2
* Copyright © 2019 Raspberry Pi Ltd
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
#include "v3dv_private.h"
27
num_subpass_attachments(const VkSubpassDescription2 *desc)
29
return desc->inputAttachmentCount +
30
desc->colorAttachmentCount +
31
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
32
(desc->pDepthStencilAttachment != NULL);
36
set_try_tlb_resolve(struct v3dv_device *device,
37
struct v3dv_render_pass_attachment *att)
39
const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
40
att->try_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
44
pass_find_subpass_range_for_attachments(struct v3dv_device *device,
45
struct v3dv_render_pass *pass)
47
for (uint32_t i = 0; i < pass->attachment_count; i++) {
48
pass->attachments[i].first_subpass = pass->subpass_count - 1;
49
pass->attachments[i].last_subpass = 0;
50
if (pass->multiview_enabled) {
51
for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) {
52
pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1;
53
pass->attachments[i].views[j].last_subpass = 0;
58
for (uint32_t i = 0; i < pass->subpass_count; i++) {
59
const struct v3dv_subpass *subpass = &pass->subpasses[i];
61
for (uint32_t j = 0; j < subpass->color_count; j++) {
62
uint32_t attachment_idx = subpass->color_attachments[j].attachment;
63
if (attachment_idx == VK_ATTACHMENT_UNUSED)
66
struct v3dv_render_pass_attachment *att =
67
&pass->attachments[attachment_idx];
69
if (i < att->first_subpass)
70
att->first_subpass = i;
71
if (i > att->last_subpass)
72
att->last_subpass = i;
74
uint32_t view_mask = subpass->view_mask;
76
uint32_t view_index = u_bit_scan(&view_mask);
77
if (i < att->views[view_index].first_subpass)
78
att->views[view_index].first_subpass = i;
79
if (i > att->views[view_index].last_subpass)
80
att->views[view_index].last_subpass = i;
83
if (subpass->resolve_attachments &&
84
subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
85
set_try_tlb_resolve(device, att);
89
uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
90
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
91
if (i < pass->attachments[ds_attachment_idx].first_subpass)
92
pass->attachments[ds_attachment_idx].first_subpass = i;
93
if (i > pass->attachments[ds_attachment_idx].last_subpass)
94
pass->attachments[ds_attachment_idx].last_subpass = i;
96
if (subpass->ds_resolve_attachment.attachment != VK_ATTACHMENT_UNUSED)
97
set_try_tlb_resolve(device, &pass->attachments[ds_attachment_idx]);
100
for (uint32_t j = 0; j < subpass->input_count; j++) {
101
uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
102
if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
104
if (i < pass->attachments[input_attachment_idx].first_subpass)
105
pass->attachments[input_attachment_idx].first_subpass = i;
106
if (i > pass->attachments[input_attachment_idx].last_subpass)
107
pass->attachments[input_attachment_idx].last_subpass = i;
110
if (subpass->resolve_attachments) {
111
for (uint32_t j = 0; j < subpass->color_count; j++) {
112
uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
113
if (attachment_idx == VK_ATTACHMENT_UNUSED)
115
if (i < pass->attachments[attachment_idx].first_subpass)
116
pass->attachments[attachment_idx].first_subpass = i;
117
if (i > pass->attachments[attachment_idx].last_subpass)
118
pass->attachments[attachment_idx].last_subpass = i;
125
VKAPI_ATTR VkResult VKAPI_CALL
126
v3dv_CreateRenderPass2(VkDevice _device,
127
const VkRenderPassCreateInfo2 *pCreateInfo,
128
const VkAllocationCallbacks *pAllocator,
129
VkRenderPass *pRenderPass)
131
V3DV_FROM_HANDLE(v3dv_device, device, _device);
132
struct v3dv_render_pass *pass;
134
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
136
/* From the VK_KHR_multiview spec:
138
* When a subpass uses a non-zero view mask, multiview functionality is
139
* considered to be enabled. Multiview is all-or-nothing for a render
140
* pass - that is, either all subpasses must have a non-zero view mask
141
* (though some subpasses may have only one view) or all must be zero.
143
bool multiview_enabled = pCreateInfo->subpassCount &&
144
pCreateInfo->pSubpasses[0].viewMask;
146
size_t size = sizeof(*pass);
147
size_t subpasses_offset = size;
148
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
149
size_t attachments_offset = size;
150
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
152
pass = vk_object_zalloc(&device->vk, pAllocator, size,
153
VK_OBJECT_TYPE_RENDER_PASS);
155
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
157
pass->multiview_enabled = multiview_enabled;
158
pass->attachment_count = pCreateInfo->attachmentCount;
159
pass->attachments = (void *) pass + attachments_offset;
160
pass->subpass_count = pCreateInfo->subpassCount;
161
pass->subpasses = (void *) pass + subpasses_offset;
163
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
164
pass->attachments[i].desc = pCreateInfo->pAttachments[i];
166
uint32_t subpass_attachment_count = 0;
167
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
168
const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
169
subpass_attachment_count += num_subpass_attachments(desc);
172
if (subpass_attachment_count) {
173
const size_t subpass_attachment_bytes =
174
subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
175
pass->subpass_attachments =
176
vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,
177
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
178
if (pass->subpass_attachments == NULL) {
179
vk_object_free(&device->vk, pAllocator, pass);
180
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
183
pass->subpass_attachments = NULL;
186
struct v3dv_subpass_attachment *p = pass->subpass_attachments;
187
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
188
const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
189
struct v3dv_subpass *subpass = &pass->subpasses[i];
191
subpass->input_count = desc->inputAttachmentCount;
192
subpass->color_count = desc->colorAttachmentCount;
193
subpass->view_mask = desc->viewMask;
195
if (desc->inputAttachmentCount > 0) {
196
subpass->input_attachments = p;
197
p += desc->inputAttachmentCount;
199
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
200
subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
201
.attachment = desc->pInputAttachments[j].attachment,
202
.layout = desc->pInputAttachments[j].layout,
207
if (desc->colorAttachmentCount > 0) {
208
subpass->color_attachments = p;
209
p += desc->colorAttachmentCount;
211
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
212
subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
213
.attachment = desc->pColorAttachments[j].attachment,
214
.layout = desc->pColorAttachments[j].layout,
219
if (desc->pResolveAttachments) {
220
subpass->resolve_attachments = p;
221
p += desc->colorAttachmentCount;
223
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
224
subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
225
.attachment = desc->pResolveAttachments[j].attachment,
226
.layout = desc->pResolveAttachments[j].layout,
231
if (desc->pDepthStencilAttachment) {
232
subpass->ds_attachment = (struct v3dv_subpass_attachment) {
233
.attachment = desc->pDepthStencilAttachment->attachment,
234
.layout = desc->pDepthStencilAttachment->layout,
237
/* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
238
* the clear might get lost. If a subpass has this then we can't emit
239
* the clear using the TLB and we have to do it as a draw call.
241
* FIXME: separate stencil.
243
if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
244
struct v3dv_render_pass_attachment *att =
245
&pass->attachments[subpass->ds_attachment.attachment];
246
if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
247
if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
248
att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
249
subpass->do_depth_clear_with_draw = true;
250
} else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
251
att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
252
subpass->do_stencil_clear_with_draw = true;
257
/* VK_KHR_depth_stencil_resolve */
258
const VkSubpassDescriptionDepthStencilResolveKHR *resolve_desc =
259
vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
260
const VkAttachmentReference2 *resolve_att =
261
resolve_desc && resolve_desc->pDepthStencilResolveAttachment &&
262
resolve_desc->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED ?
263
resolve_desc->pDepthStencilResolveAttachment : NULL;
265
subpass->ds_resolve_attachment = (struct v3dv_subpass_attachment) {
266
.attachment = resolve_att->attachment,
267
.layout = resolve_att->layout,
269
assert(resolve_desc->depthResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT ||
270
resolve_desc->stencilResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
271
subpass->resolve_depth =
272
resolve_desc->depthResolveMode != VK_RESOLVE_MODE_NONE &&
273
resolve_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
274
subpass->resolve_stencil =
275
resolve_desc->stencilResolveMode != VK_RESOLVE_MODE_NONE &&
276
resolve_att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
278
subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
279
subpass->resolve_depth = false;
280
subpass->resolve_stencil = false;
283
subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
284
subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
285
subpass->resolve_depth = false;
286
subpass->resolve_stencil = false;
290
pass_find_subpass_range_for_attachments(device, pass);
292
/* FIXME: handle subpass dependencies */
294
*pRenderPass = v3dv_render_pass_to_handle(pass);
299
VKAPI_ATTR void VKAPI_CALL
300
v3dv_DestroyRenderPass(VkDevice _device,
302
const VkAllocationCallbacks *pAllocator)
304
V3DV_FROM_HANDLE(v3dv_device, device, _device);
305
V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
310
vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
311
vk_object_free(&device->vk, pAllocator, pass);
315
subpass_get_granularity(struct v3dv_device *device,
316
struct v3dv_render_pass *pass,
317
uint32_t subpass_idx,
318
VkExtent2D *granularity)
320
/* Granularity is defined by the tile size */
321
assert(subpass_idx < pass->subpass_count);
322
struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
323
const uint32_t color_attachment_count = subpass->color_count;
326
uint32_t max_bpp = 0;
327
for (uint32_t i = 0; i < color_attachment_count; i++) {
328
uint32_t attachment_idx = subpass->color_attachments[i].attachment;
329
if (attachment_idx == VK_ATTACHMENT_UNUSED)
331
const VkAttachmentDescription2 *desc =
332
&pass->attachments[attachment_idx].desc;
333
const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
334
uint32_t internal_type, internal_bpp;
335
v3dv_X(device, get_internal_type_bpp_for_output_format)
336
(format->rt_type, &internal_type, &internal_bpp);
338
max_bpp = MAX2(max_bpp, internal_bpp);
340
if (desc->samples > VK_SAMPLE_COUNT_1_BIT)
344
uint32_t width, height;
345
bool double_buffer = (V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
346
v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
347
double_buffer, &width, &height);
348
*granularity = (VkExtent2D) {
354
VKAPI_ATTR void VKAPI_CALL
355
v3dv_GetRenderAreaGranularity(VkDevice _device,
356
VkRenderPass renderPass,
357
VkExtent2D *pGranularity)
359
V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
360
V3DV_FROM_HANDLE(v3dv_device, device, _device);
362
*pGranularity = (VkExtent2D) {
367
for (uint32_t i = 0; i < pass->subpass_count; i++) {
369
subpass_get_granularity(device, pass, i, &sg);
370
pGranularity->width = MIN2(pGranularity->width, sg.width);
371
pGranularity->height = MIN2(pGranularity->height, sg.height);
375
/* Checks whether the render area rectangle covers a region that is aligned to
376
* tile boundaries. This means that we are writing to all pixels covered by
377
* all tiles in that area (except for pixels on edge tiles that are outside
378
* the framebuffer dimensions).
380
* When our framebuffer is aligned to tile boundaries we know we are writing
381
* valid data to all all pixels in each tile and we can apply certain
382
* optimizations, like avoiding tile loads, since we know that none of the
383
* original pixel values in each tile for that area need to be preserved.
384
* We also use this to decide if we can use TLB clears, as these clear whole
385
* tiles so we can't use them if the render area is not aligned.
387
* Note that when an image is created it will possibly include padding blocks
388
* depending on its tiling layout. When the framebuffer dimensions are not
389
* aligned to tile boundaries then edge tiles are only partially covered by the
390
* framebuffer pixels, but tile stores still seem to store full tiles
391
* writing to the padded sections. This is important when the framebuffer
392
* is aliasing a smaller section of a larger image, as in that case the edge
393
* tiles of the framebuffer would overwrite valid pixels in the larger image.
394
* In that case, we can't flag the area as being aligned.
397
v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
398
const VkRect2D *area,
399
struct v3dv_framebuffer *fb,
400
struct v3dv_render_pass *pass,
401
uint32_t subpass_idx)
403
assert(subpass_idx < pass->subpass_count);
405
VkExtent2D granularity;
406
subpass_get_granularity(device, pass, subpass_idx, &granularity);
408
return area->offset.x % granularity.width == 0 &&
409
area->offset.y % granularity.height == 0 &&
410
(area->extent.width % granularity.width == 0 ||
411
(fb->has_edge_padding &&
412
area->offset.x + area->extent.width >= fb->width)) &&
413
(area->extent.height % granularity.height == 0 ||
414
(fb->has_edge_padding &&
415
area->offset.y + area->extent.height >= fb->height));