2
* Copyright © 2019 Raspberry Pi Ltd
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
#include "v3dv_private.h"
25
#include "v3dv_meta_common.h"
27
#include "compiler/nir/nir_builder.h"
28
#include "util/u_pack_color.h"
29
#include "vulkan/runtime/vk_common_entrypoints.h"
32
meta_blit_key_hash(const void *key)
34
return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
38
meta_blit_key_compare(const void *key1, const void *key2)
40
return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
44
create_blit_pipeline_layout(struct v3dv_device *device,
45
VkDescriptorSetLayout *descriptor_set_layout,
46
VkPipelineLayout *pipeline_layout)
50
if (*descriptor_set_layout == 0) {
51
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
53
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
55
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
57
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
58
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
60
.pBindings = &descriptor_set_layout_binding,
63
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
64
&descriptor_set_layout_info,
66
descriptor_set_layout);
67
if (result != VK_SUCCESS)
71
assert(*pipeline_layout == 0);
72
VkPipelineLayoutCreateInfo pipeline_layout_info = {
73
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
75
.pSetLayouts = descriptor_set_layout,
76
.pushConstantRangeCount = 1,
77
.pPushConstantRanges =
78
&(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
82
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
83
&pipeline_layout_info,
86
return result == VK_SUCCESS;
90
v3dv_meta_blit_init(struct v3dv_device *device)
92
for (uint32_t i = 0; i < 3; i++) {
93
device->meta.blit.cache[i] =
94
_mesa_hash_table_create(NULL,
96
meta_blit_key_compare);
99
create_blit_pipeline_layout(device,
100
&device->meta.blit.ds_layout,
101
&device->meta.blit.p_layout);
105
v3dv_meta_blit_finish(struct v3dv_device *device)
107
VkDevice _device = v3dv_device_to_handle(device);
109
for (uint32_t i = 0; i < 3; i++) {
110
hash_table_foreach(device->meta.blit.cache[i], entry) {
111
struct v3dv_meta_blit_pipeline *item = entry->data;
112
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
113
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
114
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
115
vk_free(&device->vk.alloc, item);
117
_mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
120
if (device->meta.blit.p_layout) {
121
v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
125
if (device->meta.blit.ds_layout) {
126
v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
132
meta_texel_buffer_copy_key_hash(const void *key)
134
return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
138
meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
140
return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
144
create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
145
VkDescriptorSetLayout *ds_layout,
146
VkPipelineLayout *p_layout)
150
if (*ds_layout == 0) {
151
VkDescriptorSetLayoutBinding ds_layout_binding = {
153
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
154
.descriptorCount = 1,
155
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
157
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
158
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
160
.pBindings = &ds_layout_binding,
163
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
167
if (result != VK_SUCCESS)
171
assert(*p_layout == 0);
172
/* FIXME: this is abusing a bit the API, since not all of our copy
173
* pipelines have a geometry shader. We could create 2 different pipeline
174
* layouts, but this works for us for now.
176
#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
177
#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
178
#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
179
#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
180
VkPushConstantRange ranges[2] = {
181
{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
182
{ VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
185
VkPipelineLayoutCreateInfo p_layout_info = {
186
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
188
.pSetLayouts = ds_layout,
189
.pushConstantRangeCount = 2,
190
.pPushConstantRanges = ranges,
194
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
198
return result == VK_SUCCESS;
202
v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
204
for (uint32_t i = 0; i < 3; i++) {
205
device->meta.texel_buffer_copy.cache[i] =
206
_mesa_hash_table_create(NULL,
207
meta_texel_buffer_copy_key_hash,
208
meta_texel_buffer_copy_key_compare);
211
create_texel_buffer_copy_pipeline_layout(
213
&device->meta.texel_buffer_copy.ds_layout,
214
&device->meta.texel_buffer_copy.p_layout);
218
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
220
VkDevice _device = v3dv_device_to_handle(device);
222
for (uint32_t i = 0; i < 3; i++) {
223
hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
224
struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
225
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
226
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
227
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
228
vk_free(&device->vk.alloc, item);
230
_mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
233
if (device->meta.texel_buffer_copy.p_layout) {
234
v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
238
if (device->meta.texel_buffer_copy.ds_layout) {
239
v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
245
get_compatible_tlb_format(VkFormat format)
248
case VK_FORMAT_R8G8B8A8_SNORM:
249
return VK_FORMAT_R8G8B8A8_UINT;
251
case VK_FORMAT_R8G8_SNORM:
252
return VK_FORMAT_R8G8_UINT;
254
case VK_FORMAT_R8_SNORM:
255
return VK_FORMAT_R8_UINT;
257
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
258
return VK_FORMAT_A8B8G8R8_UINT_PACK32;
260
case VK_FORMAT_R16_UNORM:
261
case VK_FORMAT_R16_SNORM:
262
return VK_FORMAT_R16_UINT;
264
case VK_FORMAT_R16G16_UNORM:
265
case VK_FORMAT_R16G16_SNORM:
266
return VK_FORMAT_R16G16_UINT;
268
case VK_FORMAT_R16G16B16A16_UNORM:
269
case VK_FORMAT_R16G16B16A16_SNORM:
270
return VK_FORMAT_R16G16B16A16_UINT;
272
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
273
return VK_FORMAT_R32_SFLOAT;
275
/* We can't render to compressed formats using the TLB so instead we use
276
* a compatible format with the same bpp as the compressed format. Because
277
* the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
278
* case of ETC), when we implement copies with the compatible format we
279
* will have to divide offsets and dimensions on the compressed image by
280
* the compressed block size.
282
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
283
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
284
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
285
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
286
case VK_FORMAT_BC2_UNORM_BLOCK:
287
case VK_FORMAT_BC2_SRGB_BLOCK:
288
case VK_FORMAT_BC3_SRGB_BLOCK:
289
case VK_FORMAT_BC3_UNORM_BLOCK:
290
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
291
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
292
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
293
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
294
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
295
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
296
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
297
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
298
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
299
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
300
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
301
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
302
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
303
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
304
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
305
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
306
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
307
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
308
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
309
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
310
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
311
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
312
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
313
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
314
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
315
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
316
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
317
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
318
return VK_FORMAT_R32G32B32A32_UINT;
320
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
321
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
322
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
323
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
324
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
325
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
326
case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
327
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
328
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
329
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
330
return VK_FORMAT_R16G16B16A16_UINT;
333
return VK_FORMAT_UNDEFINED;
338
* Checks if we can implement an image copy or clear operation using the TLB
342
v3dv_meta_can_use_tlb(struct v3dv_image *image,
343
const VkOffset3D *offset,
344
VkFormat *compat_format)
346
if (offset->x != 0 || offset->y != 0)
349
if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
351
*compat_format = image->vk.format;
355
/* If the image format is not TLB-supported, then check if we can use
356
* a compatible format instead.
359
*compat_format = get_compatible_tlb_format(image->vk.format);
360
if (*compat_format != VK_FORMAT_UNDEFINED)
367
/* Implements a copy using the TLB.
369
* This only works if we are copying from offset (0,0), since a TLB store for
370
* tile (x,y) will be written at the same tile offset into the destination.
371
* When this requirement is not met, we need to use a blit instead.
373
* Returns true if the implementation supports the requested operation (even if
374
* it failed to process it, for example, due to an out-of-memory error).
378
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
379
struct v3dv_buffer *buffer,
380
struct v3dv_image *image,
381
const VkBufferImageCopy2KHR *region)
384
if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format))
387
uint32_t internal_type, internal_bpp;
388
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
389
(fb_format, region->imageSubresource.aspectMask,
390
&internal_type, &internal_bpp);
393
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
394
num_layers = region->imageSubresource.layerCount;
396
num_layers = region->imageExtent.depth;
397
assert(num_layers > 0);
399
struct v3dv_job *job =
400
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
404
/* Handle copy from compressed format using a compatible format */
405
const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
406
const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
407
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
408
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
410
v3dv_job_start_frame(job, width, height, num_layers, false,
411
1, internal_bpp, false);
413
struct v3dv_meta_framebuffer framebuffer;
414
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
415
internal_type, &job->frame_tiling);
417
v3dv_X(job->device, job_emit_binning_flush)(job);
418
v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
419
(job, buffer, image, &framebuffer, region);
421
v3dv_cmd_buffer_finish_job(cmd_buffer);
427
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
428
struct v3dv_image *dst,
430
struct v3dv_image *src,
432
VkColorComponentFlags cmask,
433
VkComponentMapping *cswizzle,
434
const VkImageBlit2KHR *region,
436
bool dst_is_padded_image);
439
* Returns true if the implementation supports the requested operation (even if
440
* it failed to process it, for example, due to an out-of-memory error).
443
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
444
struct v3dv_buffer *buffer,
445
struct v3dv_image *image,
446
const VkBufferImageCopy2KHR *region)
448
bool handled = false;
450
/* This path uses a shader blit which doesn't support linear images. Return
451
* early to avoid all te heavy lifting in preparation for the blit_shader()
452
* call that is bound to fail in that scenario.
454
if (image->vk.tiling == VK_IMAGE_TILING_LINEAR &&
455
image->vk.image_type != VK_IMAGE_TYPE_1D) {
459
/* Generally, the bpp of the data in the buffer matches that of the
460
* source image. The exception is the case where we are copying
461
* stencil (8bpp) to a combined d24s8 image (32bpp).
463
uint32_t buffer_bpp = image->cpp;
465
VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
467
/* Because we are going to implement the copy as a blit, we need to create
468
* a linear image from the destination buffer and we also want our blit
469
* source and destination formats to be the same (to avoid any format
470
* conversions), so we choose a canonical format that matches the
473
* The exception to the above is copying from combined depth/stencil images
474
* because we are copying only one aspect of the image, so we need to setup
475
* our formats, color write mask and source swizzle mask to match that.
479
VkColorComponentFlags cmask = 0; /* All components */
480
VkComponentMapping cswizzle = {
481
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
482
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
483
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
484
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
486
switch (buffer_bpp) {
488
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
489
dst_format = VK_FORMAT_R32G32B32A32_UINT;
490
src_format = dst_format;
493
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
494
dst_format = VK_FORMAT_R16G16B16A16_UINT;
495
src_format = dst_format;
498
switch (copy_aspect) {
499
case VK_IMAGE_ASPECT_COLOR_BIT:
500
src_format = VK_FORMAT_R8G8B8A8_UINT;
501
dst_format = VK_FORMAT_R8G8B8A8_UINT;
503
case VK_IMAGE_ASPECT_DEPTH_BIT:
504
assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
505
image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
506
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
507
if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
508
src_format = VK_FORMAT_R32_UINT;
509
dst_format = VK_FORMAT_R32_UINT;
511
/* We want to write depth in the buffer in the first 24-bits,
512
* however, the hardware has depth in bits 8-31, so swizzle the
513
* the source components to match what we want. Also, we don't
514
* want to write bits 24-31 in the destination.
516
src_format = VK_FORMAT_R8G8B8A8_UINT;
517
dst_format = VK_FORMAT_R8G8B8A8_UINT;
518
cmask = VK_COLOR_COMPONENT_R_BIT |
519
VK_COLOR_COMPONENT_G_BIT |
520
VK_COLOR_COMPONENT_B_BIT;
521
cswizzle.r = VK_COMPONENT_SWIZZLE_G;
522
cswizzle.g = VK_COMPONENT_SWIZZLE_B;
523
cswizzle.b = VK_COMPONENT_SWIZZLE_A;
524
cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
527
case VK_IMAGE_ASPECT_STENCIL_BIT:
528
assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
529
assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
530
/* Copying from S8D24. We want to write 8-bit stencil values only,
531
* so adjust the buffer bpp for that. Since the hardware stores stencil
532
* in the LSB, we can just do a RGBA8UI to R8UI blit.
534
src_format = VK_FORMAT_R8G8B8A8_UINT;
535
dst_format = VK_FORMAT_R8_UINT;
539
unreachable("unsupported aspect");
544
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
545
copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
546
dst_format = VK_FORMAT_R16_UINT;
547
src_format = dst_format;
550
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
551
dst_format = VK_FORMAT_R8_UINT;
552
src_format = dst_format;
555
unreachable("unsupported bit-size");
559
/* The hardware doesn't support linear depth/stencil stores, so we
560
* implement copies of depth/stencil aspect as color copies using a
561
* compatible color format.
563
assert(vk_format_is_color(src_format));
564
assert(vk_format_is_color(dst_format));
565
copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
567
/* We should be able to handle the blit if we got this far */
570
/* Obtain the 2D buffer region spec */
571
uint32_t buf_width, buf_height;
572
if (region->bufferRowLength == 0)
573
buf_width = region->imageExtent.width;
575
buf_width = region->bufferRowLength;
577
if (region->bufferImageHeight == 0)
578
buf_height = region->imageExtent.height;
580
buf_height = region->bufferImageHeight;
582
/* If the image is compressed, the bpp refers to blocks, not pixels */
583
uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
584
uint32_t block_height = vk_format_get_blockheight(image->vk.format);
585
buf_width = buf_width / block_width;
586
buf_height = buf_height / block_height;
588
/* Compute layers to copy */
590
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
591
num_layers = region->imageSubresource.layerCount;
593
num_layers = region->imageExtent.depth;
594
assert(num_layers > 0);
596
/* Our blit interface can see the real format of the images to detect
597
* copies between compressed and uncompressed images and adapt the
598
* blit region accordingly. Here we are just doing a raw copy of
599
* compressed data, but we are passing an uncompressed view of the
600
* buffer for the blit destination image (since compressed formats are
601
* not renderable), so we also want to provide an uncompressed view of
605
struct v3dv_device *device = cmd_buffer->device;
606
VkDevice _device = v3dv_device_to_handle(device);
607
if (vk_format_is_compressed(image->vk.format)) {
609
VkImageCreateInfo uiview_info = {
610
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
611
.imageType = VK_IMAGE_TYPE_3D,
612
.format = dst_format,
613
.extent = { buf_width, buf_height, image->vk.extent.depth },
614
.mipLevels = image->vk.mip_levels,
615
.arrayLayers = image->vk.array_layers,
616
.samples = image->vk.samples,
617
.tiling = image->vk.tiling,
618
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
619
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
620
.queueFamilyIndexCount = 0,
621
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
623
result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
624
if (result != VK_SUCCESS)
627
v3dv_cmd_buffer_add_private_obj(
628
cmd_buffer, (uintptr_t)uiview,
629
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
632
vk_common_BindImageMemory(_device, uiview,
633
v3dv_device_memory_to_handle(image->mem),
635
if (result != VK_SUCCESS)
638
image = v3dv_image_from_handle(uiview);
641
/* Copy requested layers */
642
for (uint32_t i = 0; i < num_layers; i++) {
643
/* Create the destination blit image from the destination buffer */
644
VkImageCreateInfo image_info = {
645
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
646
.imageType = VK_IMAGE_TYPE_2D,
647
.format = dst_format,
648
.extent = { buf_width, buf_height, 1 },
651
.samples = VK_SAMPLE_COUNT_1_BIT,
652
.tiling = VK_IMAGE_TILING_LINEAR,
653
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
654
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
655
.queueFamilyIndexCount = 0,
656
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
659
VkImage buffer_image;
661
v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
662
if (result != VK_SUCCESS)
665
v3dv_cmd_buffer_add_private_obj(
666
cmd_buffer, (uintptr_t)buffer_image,
667
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
669
/* Bind the buffer memory to the image */
670
VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
671
i * buf_width * buf_height * buffer_bpp;
673
vk_common_BindImageMemory(_device, buffer_image,
674
v3dv_device_memory_to_handle(buffer->mem),
676
if (result != VK_SUCCESS)
679
/* Blit-copy the requested image extent.
681
* Since we are copying, the blit must use the same format on the
682
* destination and source images to avoid format conversions. The
683
* only exception is copying stencil, which we upload to a R8UI source
684
* image, but that we need to blit to a S8D24 destination (the only
685
* stencil format we support).
687
const VkImageBlit2KHR blit_region = {
688
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
690
.aspectMask = copy_aspect,
691
.mipLevel = region->imageSubresource.mipLevel,
692
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
697
DIV_ROUND_UP(region->imageOffset.x, block_width),
698
DIV_ROUND_UP(region->imageOffset.y, block_height),
699
region->imageOffset.z + i,
702
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
704
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
706
region->imageOffset.z + i + 1,
710
.aspectMask = copy_aspect,
718
DIV_ROUND_UP(region->imageExtent.width, block_width),
719
DIV_ROUND_UP(region->imageExtent.height, block_height),
725
handled = blit_shader(cmd_buffer,
726
v3dv_image_from_handle(buffer_image), dst_format,
729
&blit_region, VK_FILTER_NEAREST, false);
731
/* This is unexpected, we should have a supported blit spec */
732
unreachable("Unable to blit buffer to destination image");
741
VKAPI_ATTR void VKAPI_CALL
742
v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
743
const VkCopyImageToBufferInfo2KHR *info)
746
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
747
V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
748
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
750
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
752
for (uint32_t i = 0; i < info->regionCount; i++) {
753
if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
755
if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
757
unreachable("Unsupported image to buffer copy.");
762
* Returns true if the implementation supports the requested operation (even if
763
* it failed to process it, for example, due to an out-of-memory error).
766
copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
767
struct v3dv_image *dst,
768
struct v3dv_image *src,
769
const VkImageCopy2KHR *region)
771
/* Destination can't be raster format */
772
if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
775
/* We can only do full copies, so if the format is D24S8 both aspects need
776
* to be copied. We only need to check the dst format because the spec
777
* states that depth/stencil formats must match exactly.
779
if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
780
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
781
VK_IMAGE_ASPECT_STENCIL_BIT;
782
if (region->dstSubresource.aspectMask != ds_aspects)
786
/* Don't handle copies between uncompressed and compressed formats for now.
788
* FIXME: we should be able to handle these easily but there is no coverage
789
* in CTS at the moment that make such copies with full images (which we
790
* require here), only partial copies. Also, in that case the code below that
791
* checks for "dst image complete" requires some changes, since it is
792
* checking against the region dimensions, which are in units of the source
795
if (vk_format_is_compressed(dst->vk.format) !=
796
vk_format_is_compressed(src->vk.format)) {
800
/* Source region must start at (0,0) */
801
if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
804
/* Destination image must be complete */
805
if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
808
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
809
uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
810
uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
811
if (region->extent.width != dst_width || region->extent.height != dst_height)
814
/* From vkCmdCopyImage:
816
* "When copying between compressed and uncompressed formats the extent
817
* members represent the texel dimensions of the source image and not
820
const uint32_t block_w = vk_format_get_blockwidth(src->vk.format);
821
const uint32_t block_h = vk_format_get_blockheight(src->vk.format);
822
uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
823
uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
825
/* Account for sample count */
826
assert(dst->vk.samples == src->vk.samples);
827
if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
828
assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
833
/* The TFU unit doesn't handle format conversions so we need the formats to
834
* match. On the other hand, vkCmdCopyImage allows different color formats
835
* on the source and destination images, but only if they are texel
836
* compatible. For us, this means that we can effectively ignore different
837
* formats and just make the copy using either of them, since we are just
838
* moving raw data and not making any conversions.
840
* Also, the formats supported by the TFU unit are limited, but again, since
841
* we are only doing raw copies here without interpreting or converting
842
* the underlying pixel data according to its format, we can always choose
843
* to use compatible formats that are supported with the TFU unit.
845
assert(dst->cpp == src->cpp);
846
const struct v3dv_format *format =
847
v3dv_get_compatible_tfu_format(cmd_buffer->device,
850
/* Emit a TFU job for each layer to blit */
851
const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
852
region->dstSubresource.layerCount :
853
region->extent.depth;
854
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
856
const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
857
region->srcSubresource.baseArrayLayer : region->srcOffset.z;
858
const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
859
region->dstSubresource.baseArrayLayer : region->dstOffset.z;
860
for (uint32_t i = 0; i < layer_count; i++) {
861
const uint32_t dst_offset =
862
dst->mem->bo->offset +
863
v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i);
864
const uint32_t src_offset =
865
src->mem->bo->offset +
866
v3dv_layer_offset(src, src_mip_level, base_src_layer + i);
868
const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
869
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
871
v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
873
dst->mem->bo->handle,
876
dst_slice->padded_height,
878
src->mem->bo->handle,
881
src_slice->tiling == V3D_TILING_RASTER ?
882
src_slice->stride : src_slice->padded_height,
884
width, height, format);
891
* Returns true if the implementation supports the requested operation (even if
892
* it failed to process it, for example, due to an out-of-memory error).
895
copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
896
struct v3dv_image *dst,
897
struct v3dv_image *src,
898
const VkImageCopy2KHR *region)
901
if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, &fb_format) ||
902
!v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, &fb_format)) {
906
/* From the Vulkan spec, VkImageCopy valid usage:
908
* "If neither the calling command’s srcImage nor the calling command’s
909
* dstImage has a multi-planar image format then the aspectMask member
910
* of srcSubresource and dstSubresource must match."
912
assert(region->dstSubresource.aspectMask ==
913
region->srcSubresource.aspectMask);
914
uint32_t internal_type, internal_bpp;
915
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
916
(fb_format, region->dstSubresource.aspectMask,
917
&internal_type, &internal_bpp);
919
/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
921
* "The number of slices of the extent (for 3D) or layers of the
922
* srcSubresource (for non-3D) must match the number of slices of the
923
* extent (for 3D) or layers of the dstSubresource (for non-3D)."
925
assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
926
region->srcSubresource.layerCount : region->extent.depth) ==
927
(dst->vk.image_type != VK_IMAGE_TYPE_3D ?
928
region->dstSubresource.layerCount : region->extent.depth));
930
if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
931
num_layers = region->dstSubresource.layerCount;
933
num_layers = region->extent.depth;
934
assert(num_layers > 0);
936
struct v3dv_job *job =
937
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
941
/* Handle copy to compressed image using compatible format */
942
const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
943
const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
944
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
945
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
947
v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp,
948
src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
950
struct v3dv_meta_framebuffer framebuffer;
951
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
952
internal_type, &job->frame_tiling);
954
v3dv_X(job->device, job_emit_binning_flush)(job);
955
v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
957
v3dv_cmd_buffer_finish_job(cmd_buffer);
963
* Takes the image provided as argument and creates a new image that has
964
* the same specification and aliases the same memory storage, except that:
966
* - It has the uncompressed format passed in.
967
* - Its original width/height are scaled by the factors passed in.
969
* This is useful to implement copies from compressed images using the blit
970
* path. The idea is that we create uncompressed "image views" of both the
971
* source and destination images using the uncompressed format and then we
972
* define the copy blit in terms of that format.
974
static struct v3dv_image *
975
create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
976
struct v3dv_image *src,
981
assert(!vk_format_is_compressed(format));
983
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
985
VkImageCreateInfo info = {
986
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
987
.imageType = src->vk.image_type,
990
.width = src->vk.extent.width * width_scale,
991
.height = src->vk.extent.height * height_scale,
992
.depth = src->vk.extent.depth,
994
.mipLevels = src->vk.mip_levels,
995
.arrayLayers = src->vk.array_layers,
996
.samples = src->vk.samples,
997
.tiling = src->vk.tiling,
998
.usage = src->vk.usage,
1003
v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
1004
if (result != VK_SUCCESS) {
1005
v3dv_flag_oom(cmd_buffer, NULL);
1009
struct v3dv_image *image = v3dv_image_from_handle(_image);
1010
image->mem = src->mem;
1011
image->mem_offset = src->mem_offset;
1016
* Returns true if the implementation supports the requested operation (even if
1017
* it failed to process it, for example, due to an out-of-memory error).
1020
copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1021
struct v3dv_image *dst,
1022
struct v3dv_image *src,
1023
const VkImageCopy2KHR *region)
1025
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
1026
const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
1027
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
1028
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
1029
const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1030
const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1032
/* We need to choose a single format for the blit to ensure that this is
1033
* really a copy and there are not format conversions going on. Since we
1034
* going to blit, we need to make sure that the selected format can be
1035
* both rendered to and textured from.
1038
float src_scale_w = 1.0f;
1039
float src_scale_h = 1.0f;
1040
float dst_scale_w = block_scale_w;
1041
float dst_scale_h = block_scale_h;
1042
if (vk_format_is_compressed(src->vk.format)) {
1043
/* If we are copying from a compressed format we should be aware that we
1044
* are going to texture from the source image, and the texture setup
1045
* knows the actual size of the image, so we need to choose a format
1046
* that has a per-texel (not per-block) bpp that is compatible for that
1047
* image size. For example, for a source image with size Bw*WxBh*H
1048
* and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1049
* each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1050
* (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1051
* so we could specify a blit with size Bw*WxBh*H and a format with
1052
* a bpp of 8-bit per texel (R8_UINT).
1054
* Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1055
* which is 64-bit per texel, then we would need a 4-bit format, which
1056
* we don't have, so instead we still choose an 8-bit format, but we
1057
* apply a divisor to the row dimensions of the blit, since we are
1058
* copying two texels per item.
1060
* Generally, we can choose any format so long as we compute appropriate
1061
* divisors for the width and height depending on the source image's
1064
assert(src->cpp == dst->cpp);
1066
format = VK_FORMAT_R32G32_UINT;
1069
format = VK_FORMAT_R32G32B32A32_UINT;
1072
format = VK_FORMAT_R16G16B16A16_UINT;
1075
unreachable("Unsupported compressed format");
1078
/* Create image views of the src/dst images that we can interpret in
1079
* terms of the canonical format.
1081
src_scale_w /= src_block_w;
1082
src_scale_h /= src_block_h;
1083
dst_scale_w /= src_block_w;
1084
dst_scale_h /= src_block_h;
1086
src = create_image_alias(cmd_buffer, src,
1087
src_scale_w, src_scale_h, format);
1089
dst = create_image_alias(cmd_buffer, dst,
1090
dst_scale_w, dst_scale_h, format);
1092
format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1093
src->vk.format : get_compatible_tlb_format(src->vk.format);
1094
if (format == VK_FORMAT_UNDEFINED)
1097
const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1098
if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
1102
/* Given an uncompressed image with size WxH, if we copy it to a compressed
1103
* image, it will result in an image with size W*bWxH*bH, where bW and bH
1104
* are the compressed format's block width and height. This means that
1105
* copies between compressed and uncompressed images involve different
1106
* image sizes, and therefore, we need to take that into account when
1107
* setting up the source and destination blit regions below, so they are
1108
* consistent from the point of view of the single compatible format
1109
* selected for the copy.
1111
* We should take into account that the dimensions of the region provided
1112
* to the copy command are specified in terms of the source image. With that
1113
* in mind, below we adjust the blit destination region to be consistent with
1114
* the source region for the compatible format, so basically, we apply
1115
* the block scale factor to the destination offset provided by the copy
1116
* command (because it is specified in terms of the destination image, not
1117
* the source), and then we just add the region copy dimensions to that
1118
* (since the region dimensions are already specified in terms of the source
1121
const VkOffset3D src_start = {
1122
region->srcOffset.x * src_scale_w,
1123
region->srcOffset.y * src_scale_h,
1124
region->srcOffset.z,
1126
const VkOffset3D src_end = {
1127
src_start.x + region->extent.width * src_scale_w,
1128
src_start.y + region->extent.height * src_scale_h,
1129
src_start.z + region->extent.depth,
1132
const VkOffset3D dst_start = {
1133
region->dstOffset.x * dst_scale_w,
1134
region->dstOffset.y * dst_scale_h,
1135
region->dstOffset.z,
1137
const VkOffset3D dst_end = {
1138
dst_start.x + region->extent.width * src_scale_w,
1139
dst_start.y + region->extent.height * src_scale_h,
1140
dst_start.z + region->extent.depth,
1143
const VkImageBlit2KHR blit_region = {
1144
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
1145
.srcSubresource = region->srcSubresource,
1146
.srcOffsets = { src_start, src_end },
1147
.dstSubresource = region->dstSubresource,
1148
.dstOffsets = { dst_start, dst_end },
1150
bool handled = blit_shader(cmd_buffer,
1154
&blit_region, VK_FILTER_NEAREST, true);
1156
/* We should have selected formats that we can blit */
1161
VKAPI_ATTR void VKAPI_CALL
1162
v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
1163
const VkCopyImageInfo2KHR *info)
1166
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1167
V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1168
V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1170
assert(src->vk.samples == dst->vk.samples);
1172
for (uint32_t i = 0; i < info->regionCount; i++) {
1173
if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
1175
if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
1177
if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
1179
unreachable("Image copy not supported");
1183
VKAPI_ATTR void VKAPI_CALL
1184
v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
1185
const VkCopyBufferInfo2KHR *pCopyBufferInfo)
1187
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1188
V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1189
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1191
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1192
v3dv_X(cmd_buffer->device, meta_copy_buffer)
1194
dst_buffer->mem->bo, dst_buffer->mem_offset,
1195
src_buffer->mem->bo, src_buffer->mem_offset,
1196
&pCopyBufferInfo->pRegions[i]);
1201
destroy_update_buffer_cb(VkDevice _device,
1203
VkAllocationCallbacks *alloc)
1205
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1206
struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1207
v3dv_bo_free(device, bo);
1210
VKAPI_ATTR void VKAPI_CALL
1211
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1213
VkDeviceSize dstOffset,
1214
VkDeviceSize dataSize,
1217
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1218
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1220
struct v3dv_bo *src_bo =
1221
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1223
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1227
bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1229
fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1233
memcpy(src_bo->map, pData, dataSize);
1235
v3dv_bo_unmap(cmd_buffer->device, src_bo);
1237
VkBufferCopy2KHR region = {
1238
.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
1240
.dstOffset = dstOffset,
1243
struct v3dv_job *copy_job =
1244
v3dv_X(cmd_buffer->device, meta_copy_buffer)
1245
(cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1246
src_bo, 0, ®ion);
1251
v3dv_cmd_buffer_add_private_obj(
1252
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1255
VKAPI_ATTR void VKAPI_CALL
1256
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1258
VkDeviceSize dstOffset,
1262
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1263
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1265
struct v3dv_bo *bo = dst_buffer->mem->bo;
1267
/* From the Vulkan spec:
1269
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1270
* a multiple of 4, then the nearest smaller multiple is used."
1272
if (size == VK_WHOLE_SIZE) {
1273
size = dst_buffer->size - dstOffset;
1277
v3dv_X(cmd_buffer->device, meta_fill_buffer)
1278
(cmd_buffer, bo, dstOffset, size, data);
1282
* Returns true if the implementation supports the requested operation (even if
1283
* it failed to process it, for example, due to an out-of-memory error).
1286
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1287
struct v3dv_image *image,
1288
struct v3dv_buffer *buffer,
1289
const VkBufferImageCopy2KHR *region)
1291
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1293
/* Destination can't be raster format */
1294
if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1297
/* We can't copy D24S8 because buffer to image copies only copy one aspect
1298
* at a time, and the TFU copies full images. Also, V3D depth bits for
1299
* both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1300
* the Vulkan spec has the buffer data specified the other way around, so it
1301
* is not a straight copy, we would havew to swizzle the channels, which the
1304
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1305
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1309
/* Region must include full slice */
1310
const uint32_t offset_x = region->imageOffset.x;
1311
const uint32_t offset_y = region->imageOffset.y;
1312
if (offset_x != 0 || offset_y != 0)
1315
uint32_t width, height;
1316
if (region->bufferRowLength == 0)
1317
width = region->imageExtent.width;
1319
width = region->bufferRowLength;
1321
if (region->bufferImageHeight == 0)
1322
height = region->imageExtent.height;
1324
height = region->bufferImageHeight;
1326
if (width != image->vk.extent.width || height != image->vk.extent.height)
1329
/* Handle region semantics for compressed images */
1330
const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
1331
const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
1332
width = DIV_ROUND_UP(width, block_w);
1333
height = DIV_ROUND_UP(height, block_h);
1335
/* Format must be supported for texturing via the TFU. Since we are just
1336
* copying raw data and not converting between pixel formats, we can ignore
1337
* the image's format and choose a compatible TFU format for the image
1338
* texel size instead, which expands the list of formats we can handle here.
1340
const struct v3dv_format *format =
1341
v3dv_get_compatible_tfu_format(cmd_buffer->device,
1344
const uint32_t mip_level = region->imageSubresource.mipLevel;
1345
const struct v3d_resource_slice *slice = &image->slices[mip_level];
1347
uint32_t num_layers;
1348
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1349
num_layers = region->imageSubresource.layerCount;
1351
num_layers = region->imageExtent.depth;
1352
assert(num_layers > 0);
1354
assert(image->mem && image->mem->bo);
1355
const struct v3dv_bo *dst_bo = image->mem->bo;
1357
assert(buffer->mem && buffer->mem->bo);
1358
const struct v3dv_bo *src_bo = buffer->mem->bo;
1360
/* Emit a TFU job per layer to copy */
1361
const uint32_t buffer_stride = width * image->cpp;
1362
for (int i = 0; i < num_layers; i++) {
1364
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1365
layer = region->imageSubresource.baseArrayLayer + i;
1367
layer = region->imageOffset.z + i;
1369
const uint32_t buffer_offset =
1370
buffer->mem_offset + region->bufferOffset +
1371
height * buffer_stride * i;
1372
const uint32_t src_offset = src_bo->offset + buffer_offset;
1374
const uint32_t dst_offset =
1375
dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
1377
v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1382
slice->padded_height,
1389
width, height, format);
1396
* Returns true if the implementation supports the requested operation (even if
1397
* it failed to process it, for example, due to an out-of-memory error).
1400
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1401
struct v3dv_image *image,
1402
struct v3dv_buffer *buffer,
1403
const VkBufferImageCopy2KHR *region)
1406
if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format))
1409
uint32_t internal_type, internal_bpp;
1410
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1411
(fb_format, region->imageSubresource.aspectMask,
1412
&internal_type, &internal_bpp);
1414
uint32_t num_layers;
1415
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1416
num_layers = region->imageSubresource.layerCount;
1418
num_layers = region->imageExtent.depth;
1419
assert(num_layers > 0);
1421
struct v3dv_job *job =
1422
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1426
/* Handle copy to compressed format using a compatible format */
1427
const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
1428
const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
1429
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
1430
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
1432
v3dv_job_start_frame(job, width, height, num_layers, false,
1433
1, internal_bpp, false);
1435
struct v3dv_meta_framebuffer framebuffer;
1436
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1437
internal_type, &job->frame_tiling);
1439
v3dv_X(job->device, job_emit_binning_flush)(job);
1440
v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
1441
(job, image, buffer, &framebuffer, region);
1443
v3dv_cmd_buffer_finish_job(cmd_buffer);
1449
create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1450
struct v3dv_image *image,
1451
struct v3dv_buffer *buffer,
1452
const VkBufferImageCopy2KHR *region)
1454
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
1456
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
1462
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
1464
/* If this is not the first pool we create for this command buffer
1465
* size it based on the size of the currently exhausted pool.
1467
uint32_t descriptor_count = 64;
1468
if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
1469
struct v3dv_descriptor_pool *exhausted_pool =
1470
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
1471
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
1474
/* Create the descriptor pool */
1475
cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
1476
VkDescriptorPoolSize pool_size = {
1477
.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1478
.descriptorCount = descriptor_count,
1480
VkDescriptorPoolCreateInfo info = {
1481
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1482
.maxSets = descriptor_count,
1484
.pPoolSizes = &pool_size,
1488
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
1490
&cmd_buffer->device->vk.alloc,
1491
&cmd_buffer->meta.texel_buffer_copy.dspool);
1493
if (result == VK_SUCCESS) {
1494
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1495
const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
1497
v3dv_cmd_buffer_add_private_obj(
1498
cmd_buffer, (uintptr_t) _pool,
1499
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
1501
struct v3dv_descriptor_pool *pool =
1502
v3dv_descriptor_pool_from_handle(_pool);
1503
pool->is_driver_internal = true;
1510
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
1511
VkDescriptorSet *set)
1513
/* Make sure we have a descriptor pool */
1515
if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
1516
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1517
if (result != VK_SUCCESS)
1520
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1522
/* Allocate descriptor set */
1523
struct v3dv_device *device = cmd_buffer->device;
1524
VkDevice _device = v3dv_device_to_handle(device);
1525
VkDescriptorSetAllocateInfo info = {
1526
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1527
.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
1528
.descriptorSetCount = 1,
1529
.pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
1531
result = v3dv_AllocateDescriptorSets(_device, &info, set);
1533
/* If we ran out of pool space, grow the pool and try again */
1534
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
1535
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1536
if (result == VK_SUCCESS) {
1537
info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
1538
result = v3dv_AllocateDescriptorSets(_device, &info, set);
1546
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
1547
VkColorComponentFlags cmask,
1548
VkComponentMapping *cswizzle,
1552
memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1554
uint32_t *p = (uint32_t *) key;
1562
/* Note that that we are using a single byte for this, so we could pack
1563
* more data into this 32-bit slot in the future.
1565
*p = is_layered ? 1 : 0;
1568
memcpy(p, cswizzle, sizeof(VkComponentMapping));
1569
p += sizeof(VkComponentMapping) / sizeof(uint32_t);
1571
assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1575
create_blit_render_pass(struct v3dv_device *device,
1576
VkFormat dst_format,
1577
VkFormat src_format,
1578
VkRenderPass *pass_load,
1579
VkRenderPass *pass_no_load);
1581
static nir_ssa_def *gen_rect_vertices(nir_builder *b);
1584
create_pipeline(struct v3dv_device *device,
1585
struct v3dv_render_pass *pass,
1586
struct nir_shader *vs_nir,
1587
struct nir_shader *gs_nir,
1588
struct nir_shader *fs_nir,
1589
const VkPipelineVertexInputStateCreateInfo *vi_state,
1590
const VkPipelineDepthStencilStateCreateInfo *ds_state,
1591
const VkPipelineColorBlendStateCreateInfo *cb_state,
1592
const VkPipelineMultisampleStateCreateInfo *ms_state,
1593
const VkPipelineLayout layout,
1594
VkPipeline *pipeline);
1597
get_texel_buffer_copy_vs()
1599
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1600
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
1601
"meta texel buffer copy vs");
1602
nir_variable *vs_out_pos =
1603
nir_variable_create(b.shader, nir_var_shader_out,
1604
glsl_vec4_type(), "gl_Position");
1605
vs_out_pos->data.location = VARYING_SLOT_POS;
1607
nir_ssa_def *pos = gen_rect_vertices(&b);
1608
nir_store_var(&b, vs_out_pos, pos, 0xf);
1614
get_texel_buffer_copy_gs()
1616
/* FIXME: this creates a geometry shader that takes the index of a single
1617
* layer to clear from push constants, so we need to emit a draw call for
1618
* each layer that we want to clear. We could actually do better and have it
1619
* take a range of layers however, if we were to do this, we would need to
1620
* be careful not to exceed the maximum number of output vertices allowed in
1621
* a geometry shader.
1623
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1624
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
1625
"meta texel buffer copy gs");
1626
nir_shader *nir = b.shader;
1627
nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
1628
nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
1629
(1ull << VARYING_SLOT_LAYER);
1630
nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
1631
nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1632
nir->info.gs.vertices_in = 3;
1633
nir->info.gs.vertices_out = 3;
1634
nir->info.gs.invocations = 1;
1635
nir->info.gs.active_stream_mask = 0x1;
1637
/* in vec4 gl_Position[3] */
1638
nir_variable *gs_in_pos =
1639
nir_variable_create(b.shader, nir_var_shader_in,
1640
glsl_array_type(glsl_vec4_type(), 3, 0),
1642
gs_in_pos->data.location = VARYING_SLOT_POS;
1644
/* out vec4 gl_Position */
1645
nir_variable *gs_out_pos =
1646
nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
1648
gs_out_pos->data.location = VARYING_SLOT_POS;
1650
/* out float gl_Layer */
1651
nir_variable *gs_out_layer =
1652
nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
1654
gs_out_layer->data.location = VARYING_SLOT_LAYER;
1656
/* Emit output triangle */
1657
for (uint32_t i = 0; i < 3; i++) {
1658
/* gl_Position from shader input */
1659
nir_deref_instr *in_pos_i =
1660
nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
1661
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
1663
/* gl_Layer from push constants */
1664
nir_ssa_def *layer =
1665
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1666
.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
1668
nir_store_var(&b, gs_out_layer, layer, 0x1);
1670
nir_emit_vertex(&b, 0);
1673
nir_end_primitive(&b, 0);
1678
static nir_ssa_def *
1679
load_frag_coord(nir_builder *b)
1681
nir_foreach_shader_in_variable(var, b->shader) {
1682
if (var->data.location == VARYING_SLOT_POS)
1683
return nir_load_var(b, var);
1685
nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
1686
glsl_vec4_type(), NULL);
1687
pos->data.location = VARYING_SLOT_POS;
1688
return nir_load_var(b, pos);
1692
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
1694
if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
1698
case VK_COMPONENT_SWIZZLE_R:
1700
case VK_COMPONENT_SWIZZLE_G:
1702
case VK_COMPONENT_SWIZZLE_B:
1704
case VK_COMPONENT_SWIZZLE_A:
1707
unreachable("Invalid swizzle");
1712
get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
1713
VkComponentMapping *cswizzle)
1715
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1716
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
1717
"meta texel buffer copy fs");
1719
/* We only use the copy from texel buffer shader to implement
1720
* copy_buffer_to_image_shader, which always selects a compatible integer
1721
* format for the copy.
1723
assert(vk_format_is_int(format));
1725
/* Fragment shader output color */
1726
nir_variable *fs_out_color =
1727
nir_variable_create(b.shader, nir_var_shader_out,
1728
glsl_uvec4_type(), "out_color");
1729
fs_out_color->data.location = FRAG_RESULT_DATA0;
1731
/* Texel buffer input */
1732
const struct glsl_type *sampler_type =
1733
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
1734
nir_variable *sampler =
1735
nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
1736
sampler->data.descriptor_set = 0;
1737
sampler->data.binding = 0;
1739
/* Load the box describing the pixel region we want to copy from the
1743
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
1744
.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
1747
/* Load the buffer stride (this comes in texel units) */
1748
nir_ssa_def *stride =
1749
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1750
.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
1753
/* Load the buffer offset (this comes in texel units) */
1754
nir_ssa_def *offset =
1755
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1756
.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
1759
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
1761
/* Load pixel data from texel buffer based on the x,y offset of the pixel
1762
* within the box. Texel buffers are 1D arrays of texels.
1764
* Notice that we already make sure that we only generate fragments that are
1765
* inside the box through the scissor/viewport state, so our offset into the
1766
* texel buffer should always be within its bounds and we we don't need
1767
* to add a check for that here.
1769
nir_ssa_def *x_offset =
1770
nir_isub(&b, nir_channel(&b, coord, 0),
1771
nir_channel(&b, box, 0));
1772
nir_ssa_def *y_offset =
1773
nir_isub(&b, nir_channel(&b, coord, 1),
1774
nir_channel(&b, box, 1));
1775
nir_ssa_def *texel_offset =
1776
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
1777
nir_imul(&b, y_offset, stride));
1779
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
1780
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
1781
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1782
tex->op = nir_texop_txf;
1783
tex->src[0].src_type = nir_tex_src_coord;
1784
tex->src[0].src = nir_src_for_ssa(texel_offset);
1785
tex->src[1].src_type = nir_tex_src_texture_deref;
1786
tex->src[1].src = nir_src_for_ssa(tex_deref);
1787
tex->dest_type = nir_type_uint32;
1788
tex->is_array = false;
1789
tex->coord_components = 1;
1790
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
1791
nir_builder_instr_insert(&b, &tex->instr);
1795
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
1797
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
1799
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
1801
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
1802
nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
1803
nir_store_var(&b, fs_out_color, s, 0xf);
1809
create_texel_buffer_copy_pipeline(struct v3dv_device *device,
1811
VkColorComponentFlags cmask,
1812
VkComponentMapping *cswizzle,
1815
VkPipelineLayout pipeline_layout,
1816
VkPipeline *pipeline)
1818
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
1820
assert(vk_format_is_color(format));
1822
nir_shader *vs_nir = get_texel_buffer_copy_vs();
1823
nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
1824
nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
1826
const VkPipelineVertexInputStateCreateInfo vi_state = {
1827
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
1828
.vertexBindingDescriptionCount = 0,
1829
.vertexAttributeDescriptionCount = 0,
1832
VkPipelineDepthStencilStateCreateInfo ds_state = {
1833
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
1836
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
1837
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
1838
.blendEnable = false,
1839
.colorWriteMask = cmask,
1842
const VkPipelineColorBlendStateCreateInfo cb_state = {
1843
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1844
.logicOpEnable = false,
1845
.attachmentCount = 1,
1846
.pAttachments = blend_att_state
1849
const VkPipelineMultisampleStateCreateInfo ms_state = {
1850
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
1851
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
1852
.sampleShadingEnable = false,
1853
.pSampleMask = NULL,
1854
.alphaToCoverageEnable = false,
1855
.alphaToOneEnable = false,
1858
return create_pipeline(device,
1860
vs_nir, gs_nir, fs_nir,
1870
get_copy_texel_buffer_pipeline(
1871
struct v3dv_device *device,
1873
VkColorComponentFlags cmask,
1874
VkComponentMapping *cswizzle,
1875
VkImageType image_type,
1877
struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
1881
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
1882
get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
1885
mtx_lock(&device->meta.mtx);
1886
struct hash_entry *entry =
1887
_mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
1890
mtx_unlock(&device->meta.mtx);
1891
*pipeline = entry->data;
1895
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
1896
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1898
if (*pipeline == NULL)
1901
/* The blit render pass is compatible */
1902
ok = create_blit_render_pass(device, format, format,
1904
&(*pipeline)->pass_no_load);
1909
create_texel_buffer_copy_pipeline(device,
1910
format, cmask, cswizzle, is_layered,
1912
device->meta.texel_buffer_copy.p_layout,
1913
&(*pipeline)->pipeline);
1917
_mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
1920
mtx_unlock(&device->meta.mtx);
1924
mtx_unlock(&device->meta.mtx);
1926
VkDevice _device = v3dv_device_to_handle(device);
1928
if ((*pipeline)->pass)
1929
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
1930
if ((*pipeline)->pipeline)
1931
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1932
vk_free(&device->vk.alloc, *pipeline);
1940
texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
1941
VkImageAspectFlags aspect,
1942
struct v3dv_image *image,
1943
VkFormat dst_format,
1944
VkFormat src_format,
1945
struct v3dv_buffer *buffer,
1946
uint32_t buffer_bpp,
1947
VkColorComponentFlags cmask,
1948
VkComponentMapping *cswizzle,
1949
uint32_t region_count,
1950
const VkBufferImageCopy2KHR *regions)
1953
bool handled = false;
1957
/* This is a copy path, so we don't handle format conversions. The only
1958
* exception are stencil to D24S8 copies, which are handled as a color
1959
* masked R8->RGBA8 copy.
1961
assert(src_format == dst_format ||
1962
(dst_format == VK_FORMAT_R8G8B8A8_UINT &&
1963
src_format == VK_FORMAT_R8_UINT &&
1964
cmask == VK_COLOR_COMPONENT_R_BIT));
1966
/* We only handle color copies. Callers can copy D/S aspects by using
1967
* a compatible color format and maybe a cmask/cswizzle for D24 formats.
1969
if (aspect != VK_IMAGE_ASPECT_COLOR_BIT)
1972
/* FIXME: we only handle uncompressed images for now. */
1973
if (vk_format_is_compressed(image->vk.format))
1976
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
1977
VK_COLOR_COMPONENT_G_BIT |
1978
VK_COLOR_COMPONENT_B_BIT |
1979
VK_COLOR_COMPONENT_A_BIT;
1983
/* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
1984
* so we can bind it as a texel buffer. Otherwise, the buffer view
1985
* we create below won't setup the texture state that we need for this.
1987
if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
1988
if (v3dv_buffer_format_supports_features(
1989
cmd_buffer->device, src_format,
1990
VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
1991
buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
1997
/* At this point we should be able to handle the copy unless an unexpected
1998
* error occurs, such as an OOM.
2003
/* Compute the number of layers to copy.
2005
* If we are batching (region_count > 1) all our regions have the same
2006
* image subresource so we can take this from the first region. For 3D
2007
* images we require the same depth extent.
2009
const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
2010
uint32_t num_layers;
2011
if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2012
num_layers = resource->layerCount;
2014
assert(region_count == 1);
2015
num_layers = regions[0].imageExtent.depth;
2017
assert(num_layers > 0);
2019
/* Get the texel buffer copy pipeline */
2020
struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2021
bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
2022
dst_format, cmask, cswizzle,
2023
image->vk.image_type, num_layers > 1,
2027
assert(pipeline && pipeline->pipeline && pipeline->pass);
2029
/* Setup descriptor set for the source texel buffer. We don't have to
2030
* register the descriptor as a private command buffer object since
2031
* all descriptors will be freed automatically with the descriptor
2034
VkDescriptorSet set;
2035
result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2036
if (result != VK_SUCCESS)
2039
/* FIXME: for some reason passing region->bufferOffset here for the
2040
* offset field doesn't work, making the following CTS tests fail:
2042
* dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset*
2044
* So instead we pass 0 here and we pass the offset in texels as a push
2045
* constant to the shader, which seems to work correctly.
2047
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2048
VkBufferViewCreateInfo buffer_view_info = {
2049
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2050
.buffer = v3dv_buffer_to_handle(buffer),
2051
.format = src_format,
2053
.range = VK_WHOLE_SIZE,
2056
VkBufferView texel_buffer_view;
2057
result = v3dv_CreateBufferView(_device, &buffer_view_info,
2058
&cmd_buffer->device->vk.alloc,
2059
&texel_buffer_view);
2060
if (result != VK_SUCCESS)
2063
v3dv_cmd_buffer_add_private_obj(
2064
cmd_buffer, (uintptr_t)texel_buffer_view,
2065
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2067
VkWriteDescriptorSet write = {
2068
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2071
.dstArrayElement = 0,
2072
.descriptorCount = 1,
2073
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2074
.pTexelBufferView = &texel_buffer_view,
2076
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2078
/* Push command buffer state before starting meta operation */
2079
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2080
uint32_t dirty_dynamic_state = 0;
2082
/* Bind common state for all layers and regions */
2083
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2084
v3dv_CmdBindPipeline(_cmd_buffer,
2085
VK_PIPELINE_BIND_POINT_GRAPHICS,
2086
pipeline->pipeline);
2088
v3dv_CmdBindDescriptorSets(_cmd_buffer,
2089
VK_PIPELINE_BIND_POINT_GRAPHICS,
2090
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2094
/* Setup framebuffer.
2096
* For 3D images, this creates a layered framebuffer with a number of
2097
* layers matching the depth extent of the 3D image.
2099
uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel);
2100
uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel);
2101
VkImageViewCreateInfo image_view_info = {
2102
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2103
.image = v3dv_image_to_handle(image),
2104
.viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2105
.format = dst_format,
2106
.subresourceRange = {
2107
.aspectMask = aspect,
2108
.baseMipLevel = resource->mipLevel,
2110
.baseArrayLayer = resource->baseArrayLayer,
2111
.layerCount = num_layers,
2114
VkImageView image_view;
2115
result = v3dv_CreateImageView(_device, &image_view_info,
2116
&cmd_buffer->device->vk.alloc, &image_view);
2117
if (result != VK_SUCCESS)
2120
v3dv_cmd_buffer_add_private_obj(
2121
cmd_buffer, (uintptr_t)image_view,
2122
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2124
VkFramebufferCreateInfo fb_info = {
2125
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2126
.renderPass = pipeline->pass,
2127
.attachmentCount = 1,
2128
.pAttachments = &image_view,
2130
.height = fb_height,
2131
.layers = num_layers,
2135
result = v3dv_CreateFramebuffer(_device, &fb_info,
2136
&cmd_buffer->device->vk.alloc, &fb);
2137
if (result != VK_SUCCESS)
2140
v3dv_cmd_buffer_add_private_obj(
2141
cmd_buffer, (uintptr_t)fb,
2142
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2144
/* For each layer */
2145
for (uint32_t l = 0; l < num_layers; l++) {
2146
/* Start render pass for this layer.
2148
* If the we only have one region to copy, then we might be able to
2149
* skip the TLB load if it is aligned to tile boundaries. All layers
2150
* copy the same area, so we only need to check this once.
2152
bool can_skip_tlb_load = false;
2153
VkRect2D render_area;
2154
if (region_count == 1) {
2155
render_area.offset.x = regions[0].imageOffset.x;
2156
render_area.offset.y = regions[0].imageOffset.y;
2157
render_area.extent.width = regions[0].imageExtent.width;
2158
render_area.extent.height = regions[0].imageExtent.height;
2161
struct v3dv_render_pass *pipeline_pass =
2162
v3dv_render_pass_from_handle(pipeline->pass);
2164
cmask == full_cmask &&
2165
v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2166
v3dv_framebuffer_from_handle(fb),
2170
render_area.offset.x = 0;
2171
render_area.offset.y = 0;
2172
render_area.extent.width = fb_width;
2173
render_area.extent.height = fb_height;
2176
VkRenderPassBeginInfo rp_info = {
2177
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2178
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2181
.renderArea = render_area,
2182
.clearValueCount = 0,
2185
VkSubpassBeginInfo sp_info = {
2186
.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
2187
.contents = VK_SUBPASS_CONTENTS_INLINE,
2190
v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
2191
struct v3dv_job *job = cmd_buffer->state.job;
2195
/* If we are using a layered copy we need to specify the layer for the
2198
if (num_layers > 1) {
2199
uint32_t layer = resource->baseArrayLayer + l;
2200
v3dv_CmdPushConstants(_cmd_buffer,
2201
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2202
VK_SHADER_STAGE_GEOMETRY_BIT,
2206
/* For each region */
2207
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
2208
for (uint32_t r = 0; r < region_count; r++) {
2209
const VkBufferImageCopy2KHR *region = ®ions[r];
2211
/* Obtain the 2D buffer region spec */
2212
uint32_t buf_width, buf_height;
2213
if (region->bufferRowLength == 0)
2214
buf_width = region->imageExtent.width;
2216
buf_width = region->bufferRowLength;
2218
if (region->bufferImageHeight == 0)
2219
buf_height = region->imageExtent.height;
2221
buf_height = region->bufferImageHeight;
2223
const VkViewport viewport = {
2224
.x = region->imageOffset.x,
2225
.y = region->imageOffset.y,
2226
.width = region->imageExtent.width,
2227
.height = region->imageExtent.height,
2231
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2232
const VkRect2D scissor = {
2233
.offset = { region->imageOffset.x, region->imageOffset.y },
2234
.extent = { region->imageExtent.width, region->imageExtent.height }
2236
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2238
const VkDeviceSize buf_offset =
2239
region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2240
uint32_t push_data[6] = {
2241
region->imageOffset.x,
2242
region->imageOffset.y,
2243
region->imageOffset.x + region->imageExtent.width - 1,
2244
region->imageOffset.y + region->imageExtent.height - 1,
2249
v3dv_CmdPushConstants(_cmd_buffer,
2250
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2251
VK_SHADER_STAGE_FRAGMENT_BIT,
2252
0, sizeof(push_data), &push_data);
2254
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2255
} /* For each region */
2257
VkSubpassEndInfo sp_end_info = {
2258
.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
2261
v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
2262
} /* For each layer */
2265
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
2270
* Returns true if the implementation supports the requested operation (even if
2271
* it failed to process it, for example, due to an out-of-memory error).
2274
copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2275
VkImageAspectFlags aspect,
2276
struct v3dv_image *image,
2277
VkFormat dst_format,
2278
VkFormat src_format,
2279
struct v3dv_buffer *buffer,
2280
uint32_t buffer_bpp,
2281
VkColorComponentFlags cmask,
2282
VkComponentMapping *cswizzle,
2283
uint32_t region_count,
2284
const VkBufferImageCopy2KHR *regions)
2286
/* Since we can't sample linear images we need to upload the linear
2287
* buffer to a tiled image that we can use as a blit source, which
2290
perf_debug("Falling back to blit path for buffer to image copy.\n");
2292
struct v3dv_device *device = cmd_buffer->device;
2293
VkDevice _device = v3dv_device_to_handle(device);
2294
bool handled = true;
2296
/* Allocate memory for the tiled image. Since we copy layer by layer
2297
* we allocate memory to hold a full layer, which is the worse case.
2298
* For that we create a dummy image with that spec, get memory requirements
2299
* for it and use that information to create the memory allocation.
2300
* We will then reuse this memory store for all the regions we want to
2303
VkImage dummy_image;
2304
VkImageCreateInfo dummy_info = {
2305
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2306
.imageType = VK_IMAGE_TYPE_2D,
2307
.format = src_format,
2308
.extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2311
.samples = VK_SAMPLE_COUNT_1_BIT,
2312
.tiling = VK_IMAGE_TILING_OPTIMAL,
2313
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2314
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2315
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2316
.queueFamilyIndexCount = 0,
2317
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2320
v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2321
if (result != VK_SUCCESS)
2324
VkMemoryRequirements reqs;
2325
vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2326
v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2329
VkMemoryAllocateInfo alloc_info = {
2330
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2331
.allocationSize = reqs.size,
2332
.memoryTypeIndex = 0,
2334
result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2335
if (result != VK_SUCCESS)
2338
v3dv_cmd_buffer_add_private_obj(
2339
cmd_buffer, (uintptr_t)mem,
2340
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2342
/* Obtain the layer count.
2344
* If we are batching (region_count > 1) all our regions have the same
2345
* image subresource so we can take this from the first region.
2347
uint32_t num_layers;
2348
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2349
num_layers = regions[0].imageSubresource.layerCount;
2351
num_layers = regions[0].imageExtent.depth;
2352
assert(num_layers > 0);
2354
/* Sanity check: we can only batch multiple regions together if they have
2355
* the same framebuffer (so the same layer).
2357
assert(num_layers == 1 || region_count == 1);
2359
const uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
2360
const uint32_t block_height = vk_format_get_blockheight(image->vk.format);
2362
/* Copy regions by uploading each region to a temporary tiled image using
2363
* the memory we have just allocated as storage.
2365
for (uint32_t r = 0; r < region_count; r++) {
2366
const VkBufferImageCopy2KHR *region = ®ions[r];
2368
/* Obtain the 2D buffer region spec */
2369
uint32_t buf_width, buf_height;
2370
if (region->bufferRowLength == 0)
2371
buf_width = region->imageExtent.width;
2373
buf_width = region->bufferRowLength;
2375
if (region->bufferImageHeight == 0)
2376
buf_height = region->imageExtent.height;
2378
buf_height = region->bufferImageHeight;
2380
/* If the image is compressed, the bpp refers to blocks, not pixels */
2381
buf_width = buf_width / block_width;
2382
buf_height = buf_height / block_height;
2384
for (uint32_t i = 0; i < num_layers; i++) {
2385
/* Create the tiled image */
2386
VkImageCreateInfo image_info = {
2387
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2388
.imageType = VK_IMAGE_TYPE_2D,
2389
.format = src_format,
2390
.extent = { buf_width, buf_height, 1 },
2393
.samples = VK_SAMPLE_COUNT_1_BIT,
2394
.tiling = VK_IMAGE_TILING_OPTIMAL,
2395
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2396
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2397
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2398
.queueFamilyIndexCount = 0,
2399
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2402
VkImage buffer_image;
2404
v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
2406
if (result != VK_SUCCESS)
2409
v3dv_cmd_buffer_add_private_obj(
2410
cmd_buffer, (uintptr_t)buffer_image,
2411
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2413
result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
2414
if (result != VK_SUCCESS)
2417
/* Upload buffer contents for the selected layer */
2418
const VkDeviceSize buf_offset_bytes =
2419
region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2420
const VkBufferImageCopy2KHR buffer_image_copy = {
2421
.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR,
2422
.bufferOffset = buf_offset_bytes,
2423
.bufferRowLength = region->bufferRowLength / block_width,
2424
.bufferImageHeight = region->bufferImageHeight / block_height,
2425
.imageSubresource = {
2426
.aspectMask = aspect,
2428
.baseArrayLayer = 0,
2431
.imageOffset = { 0, 0, 0 },
2432
.imageExtent = { buf_width, buf_height, 1 }
2435
create_tiled_image_from_buffer(cmd_buffer,
2436
v3dv_image_from_handle(buffer_image),
2437
buffer, &buffer_image_copy);
2439
/* This is unexpected, we should have setup the upload to be
2440
* conformant to a TFU or TLB copy.
2442
unreachable("Unable to copy buffer to image through TLB");
2446
/* Blit-copy the requested image extent from the buffer image to the
2447
* destination image.
2449
* Since we are copying, the blit must use the same format on the
2450
* destination and source images to avoid format conversions. The
2451
* only exception is copying stencil, which we upload to a R8UI source
2452
* image, but that we need to blit to a S8D24 destination (the only
2453
* stencil format we support).
2455
const VkImageBlit2KHR blit_region = {
2456
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
2458
.aspectMask = aspect,
2460
.baseArrayLayer = 0,
2465
{ region->imageExtent.width, region->imageExtent.height, 1 },
2468
.aspectMask = aspect,
2469
.mipLevel = region->imageSubresource.mipLevel,
2470
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
2475
DIV_ROUND_UP(region->imageOffset.x, block_width),
2476
DIV_ROUND_UP(region->imageOffset.y, block_height),
2477
region->imageOffset.z + i,
2480
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
2482
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
2484
region->imageOffset.z + i + 1,
2489
handled = blit_shader(cmd_buffer,
2491
v3dv_image_from_handle(buffer_image), src_format,
2493
&blit_region, VK_FILTER_NEAREST, true);
2495
/* This is unexpected, we should have a supported blit spec */
2496
unreachable("Unable to blit buffer to destination image");
2506
* Returns true if the implementation supports the requested operation (even if
2507
* it failed to process it, for example, due to an out-of-memory error).
2510
copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
2511
struct v3dv_image *image,
2512
struct v3dv_buffer *buffer,
2513
uint32_t region_count,
2514
const VkBufferImageCopy2KHR *regions,
2515
bool use_texel_buffer)
2517
/* We can only call this with region_count > 1 if we can batch the regions
2518
* together, in which case they share the same image subresource, and so
2521
VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
2523
/* Generally, the bpp of the data in the buffer matches that of the
2524
* destination image. The exception is the case where we are uploading
2525
* stencil (8bpp) to a combined d24s8 image (32bpp).
2527
uint32_t buf_bpp = image->cpp;
2529
/* We are about to upload the buffer data to an image so we can then
2530
* blit that to our destination region. Because we are going to implement
2531
* the copy as a blit, we want our blit source and destination formats to be
2532
* the same (to avoid any format conversions), so we choose a canonical
2533
* format that matches the destination image bpp.
2535
VkComponentMapping ident_swizzle = {
2536
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
2537
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
2538
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
2539
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
2542
VkComponentMapping cswizzle = ident_swizzle;
2543
VkColorComponentFlags cmask = 0; /* Write all components */
2544
VkFormat src_format;
2545
VkFormat dst_format;
2548
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2549
src_format = VK_FORMAT_R32G32B32A32_UINT;
2550
dst_format = src_format;
2553
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2554
src_format = VK_FORMAT_R16G16B16A16_UINT;
2555
dst_format = src_format;
2559
case VK_IMAGE_ASPECT_COLOR_BIT:
2560
src_format = VK_FORMAT_R8G8B8A8_UINT;
2561
dst_format = src_format;
2563
case VK_IMAGE_ASPECT_DEPTH_BIT:
2564
assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
2565
image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
2566
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
2567
src_format = VK_FORMAT_R8G8B8A8_UINT;
2568
dst_format = src_format;
2569
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2571
/* For D24 formats, the Vulkan spec states that the depth component
2572
* in the buffer is stored in the 24-LSB, but V3D wants it in the
2575
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
2576
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2577
cmask = VK_COLOR_COMPONENT_G_BIT |
2578
VK_COLOR_COMPONENT_B_BIT |
2579
VK_COLOR_COMPONENT_A_BIT;
2580
cswizzle.r = VK_COMPONENT_SWIZZLE_R;
2581
cswizzle.g = VK_COMPONENT_SWIZZLE_R;
2582
cswizzle.b = VK_COMPONENT_SWIZZLE_G;
2583
cswizzle.a = VK_COMPONENT_SWIZZLE_B;
2586
case VK_IMAGE_ASPECT_STENCIL_BIT:
2587
/* Since we don't support separate stencil this is always a stencil
2588
* copy to a combined depth/stencil image. Because we don't support
2589
* separate stencil images, we interpret the buffer data as a
2590
* color R8UI image, and implement the blit as a compatible color
2591
* blit to an RGBA8UI destination masking out writes to components
2592
* GBA (which map to the D24 component of a S8D24 image).
2594
assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
2596
src_format = VK_FORMAT_R8_UINT;
2597
dst_format = VK_FORMAT_R8G8B8A8_UINT;
2598
cmask = VK_COLOR_COMPONENT_R_BIT;
2599
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2602
unreachable("unsupported aspect");
2607
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2608
src_format = VK_FORMAT_R16_UINT;
2609
dst_format = src_format;
2612
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2613
src_format = VK_FORMAT_R8_UINT;
2614
dst_format = src_format;
2617
unreachable("unsupported bit-size");
2621
if (use_texel_buffer) {
2622
return texel_buffer_shader_copy(cmd_buffer, aspect, image,
2623
dst_format, src_format,
2626
region_count, regions);
2628
return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
2629
dst_format, src_format,
2632
region_count, regions);
2637
* Returns true if the implementation supports the requested operation (even if
2638
* it failed to process it, for example, due to an out-of-memory error).
2641
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
2642
struct v3dv_image *image,
2643
struct v3dv_buffer *buffer,
2644
const VkBufferImageCopy2KHR *region)
2647
if (vk_format_is_depth_or_stencil(image->vk.format))
2650
if (vk_format_is_compressed(image->vk.format))
2653
if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
2656
uint32_t buffer_width, buffer_height;
2657
if (region->bufferRowLength == 0)
2658
buffer_width = region->imageExtent.width;
2660
buffer_width = region->bufferRowLength;
2662
if (region->bufferImageHeight == 0)
2663
buffer_height = region->imageExtent.height;
2665
buffer_height = region->bufferImageHeight;
2667
uint32_t buffer_stride = buffer_width * image->cpp;
2668
uint32_t buffer_layer_stride = buffer_stride * buffer_height;
2670
uint32_t num_layers;
2671
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2672
num_layers = region->imageSubresource.layerCount;
2674
num_layers = region->imageExtent.depth;
2675
assert(num_layers > 0);
2677
struct v3dv_job *job =
2678
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
2679
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
2684
job->cpu.copy_buffer_to_image.image = image;
2685
job->cpu.copy_buffer_to_image.buffer = buffer;
2686
job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
2687
job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
2688
job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
2689
job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
2690
job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
2691
job->cpu.copy_buffer_to_image.mip_level =
2692
region->imageSubresource.mipLevel;
2693
job->cpu.copy_buffer_to_image.base_layer =
2694
region->imageSubresource.baseArrayLayer;
2695
job->cpu.copy_buffer_to_image.layer_count = num_layers;
2697
list_addtail(&job->list_link, &cmd_buffer->jobs);
2702
VKAPI_ATTR void VKAPI_CALL
2703
v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
2704
const VkCopyBufferToImageInfo2KHR *info)
2706
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2707
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
2708
V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
2710
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2713
while (r < info->regionCount) {
2714
/* The TFU and TLB paths can only copy one region at a time and the region
2715
* needs to start at the origin. We try these first for the common case
2716
* where we are copying full images, since they should be the fastest.
2718
uint32_t batch_size = 1;
2719
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
2722
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
2725
/* Otherwise, we are copying subrects, so we fallback to copying
2726
* via shader and texel buffers and we try to batch the regions
2727
* if possible. We can only batch copies if they have the same
2728
* framebuffer spec, which is mostly determined by the image
2729
* subresource of the region.
2731
const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
2732
for (uint32_t s = r + 1; s < info->regionCount; s++) {
2733
const VkImageSubresourceLayers *rsc_s =
2734
&info->pRegions[s].imageSubresource;
2736
if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
2739
/* For 3D images we also need to check the depth extent */
2740
if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
2741
info->pRegions[s].imageExtent.depth !=
2742
info->pRegions[r].imageExtent.depth) {
2749
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2750
batch_size, &info->pRegions[r], true)) {
2754
/* If we still could not copy, fallback to slower paths.
2756
* FIXME: we could try to batch these too, but since they are bound to be
2757
* slow it might not be worth it and we should instead put more effort
2758
* in handling more cases with the other paths.
2760
if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
2761
&info->pRegions[r])) {
2766
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2767
batch_size, &info->pRegions[r], false)) {
2771
unreachable("Unsupported buffer to image copy.");
2779
compute_blit_3d_layers(const VkOffset3D *offsets,
2780
uint32_t *min_layer, uint32_t *max_layer,
2784
* Returns true if the implementation supports the requested operation (even if
2785
* it failed to process it, for example, due to an out-of-memory error).
2787
* The TFU blit path doesn't handle scaling so the blit filter parameter can
2791
blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
2792
struct v3dv_image *dst,
2793
struct v3dv_image *src,
2794
const VkImageBlit2KHR *region)
2796
assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2797
assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2799
/* Format must match */
2800
if (src->vk.format != dst->vk.format)
2803
/* Destination can't be raster format */
2804
if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
2807
/* Source region must start at (0,0) */
2808
if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
2811
/* Destination image must be complete */
2812
if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
2815
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
2816
const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
2817
const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
2818
if (region->dstOffsets[1].x < dst_width - 1||
2819
region->dstOffsets[1].y < dst_height - 1) {
2824
if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
2825
region->srcOffsets[1].y != region->dstOffsets[1].y) {
2829
/* If the format is D24S8 both aspects need to be copied, since the TFU
2830
* can't be programmed to copy only one aspect of the image.
2832
if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
2833
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
2834
VK_IMAGE_ASPECT_STENCIL_BIT;
2835
if (region->dstSubresource.aspectMask != ds_aspects)
2839
/* Our TFU blits only handle exact copies (it requires same formats
2840
* on input and output, no scaling, etc), so there is no pixel format
2841
* conversions and we can rewrite the format to use one that is TFU
2842
* compatible based on its texel size.
2844
const struct v3dv_format *format =
2845
v3dv_get_compatible_tfu_format(cmd_buffer->device,
2848
/* Emit a TFU job for each layer to blit */
2849
assert(region->dstSubresource.layerCount ==
2850
region->srcSubresource.layerCount);
2852
uint32_t min_dst_layer;
2853
uint32_t max_dst_layer;
2854
bool dst_mirror_z = false;
2855
if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2856
compute_blit_3d_layers(region->dstOffsets,
2857
&min_dst_layer, &max_dst_layer,
2860
min_dst_layer = region->dstSubresource.baseArrayLayer;
2861
max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
2864
uint32_t min_src_layer;
2865
uint32_t max_src_layer;
2866
bool src_mirror_z = false;
2867
if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
2868
compute_blit_3d_layers(region->srcOffsets,
2869
&min_src_layer, &max_src_layer,
2872
min_src_layer = region->srcSubresource.baseArrayLayer;
2873
max_src_layer = min_src_layer + region->srcSubresource.layerCount;
2876
/* No Z scaling for 3D images (for non-3D images both src and dst must
2877
* have the same layerCount).
2879
if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
2882
const uint32_t layer_count = max_dst_layer - min_dst_layer;
2883
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
2884
for (uint32_t i = 0; i < layer_count; i++) {
2885
/* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
2886
* only involves reversing the order of the slices.
2888
const uint32_t dst_layer =
2889
dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
2890
const uint32_t src_layer =
2891
src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
2893
const uint32_t dst_offset =
2894
dst->mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
2895
const uint32_t src_offset =
2896
src->mem->bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
2898
const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
2899
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
2901
v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
2903
dst->mem->bo->handle,
2906
dst_slice->padded_height,
2908
src->mem->bo->handle,
2911
src_slice->tiling == V3D_TILING_RASTER ?
2912
src_slice->stride : src_slice->padded_height,
2914
dst_width, dst_height, format);
2921
format_needs_software_int_clamp(VkFormat format)
2924
case VK_FORMAT_A2R10G10B10_UINT_PACK32:
2925
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
2926
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2927
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
2935
get_blit_pipeline_cache_key(VkFormat dst_format,
2936
VkFormat src_format,
2937
VkColorComponentFlags cmask,
2938
VkSampleCountFlagBits dst_samples,
2939
VkSampleCountFlagBits src_samples,
2942
memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
2944
uint32_t *p = (uint32_t *) key;
2949
/* Generally, when blitting from a larger format to a smaller format
2950
* the hardware takes care of clamping the source to the RT range.
2951
* Specifically, for integer formats, this is done by using
2952
* V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
2953
* clamps to the bit-size of the render type, and some formats, such as
2954
* rgb10a2_uint have a 16-bit type, so it won't do what we need and we
2955
* require to clamp in software. In these cases, we need to amend the blit
2956
* shader with clamp code that depends on both the src and dst formats, so
2957
* we need the src format to be part of the key.
2959
*p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
2965
*p = (dst_samples << 8) | src_samples;
2968
assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
2972
create_blit_render_pass(struct v3dv_device *device,
2973
VkFormat dst_format,
2974
VkFormat src_format,
2975
VkRenderPass *pass_load,
2976
VkRenderPass *pass_no_load)
2978
const bool is_color_blit = vk_format_is_color(dst_format);
2980
/* Attachment load operation is specified below */
2981
VkAttachmentDescription2 att = {
2982
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
2983
.format = dst_format,
2984
.samples = VK_SAMPLE_COUNT_1_BIT,
2985
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
2986
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2987
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
2990
VkAttachmentReference2 att_ref = {
2991
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
2993
.layout = VK_IMAGE_LAYOUT_GENERAL,
2996
VkSubpassDescription2 subpass = {
2997
.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
2998
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
2999
.inputAttachmentCount = 0,
3000
.colorAttachmentCount = is_color_blit ? 1 : 0,
3001
.pColorAttachments = is_color_blit ? &att_ref : NULL,
3002
.pResolveAttachments = NULL,
3003
.pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3004
.preserveAttachmentCount = 0,
3005
.pPreserveAttachments = NULL,
3008
VkRenderPassCreateInfo2 info = {
3009
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
3010
.attachmentCount = 1,
3011
.pAttachments = &att,
3013
.pSubpasses = &subpass,
3014
.dependencyCount = 0,
3015
.pDependencies = NULL,
3019
att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3020
result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3021
&info, &device->vk.alloc, pass_load);
3022
if (result != VK_SUCCESS)
3025
att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3026
result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3027
&info, &device->vk.alloc, pass_no_load);
3028
return result == VK_SUCCESS;
3031
static nir_ssa_def *
3032
gen_rect_vertices(nir_builder *b)
3034
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
3036
/* vertex 0: -1.0, -1.0
3037
* vertex 1: -1.0, 1.0
3038
* vertex 2: 1.0, -1.0
3039
* vertex 3: 1.0, 1.0
3043
* channel 0 is vertex_id < 2 ? -1.0 : 1.0
3044
* channel 1 is vertex id & 1 ? 1.0 : -1.0
3047
nir_ssa_def *one = nir_imm_int(b, 1);
3048
nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3049
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3051
nir_ssa_def *comp[4];
3052
comp[0] = nir_bcsel(b, c0cmp,
3053
nir_imm_float(b, -1.0f),
3054
nir_imm_float(b, 1.0f));
3056
comp[1] = nir_bcsel(b, c1cmp,
3057
nir_imm_float(b, 1.0f),
3058
nir_imm_float(b, -1.0f));
3059
comp[2] = nir_imm_float(b, 0.0f);
3060
comp[3] = nir_imm_float(b, 1.0f);
3061
return nir_vec(b, comp, 4);
3064
static nir_ssa_def *
3065
gen_tex_coords(nir_builder *b)
3067
nir_ssa_def *tex_box =
3068
nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3070
nir_ssa_def *tex_z =
3071
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3073
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
3075
/* vertex 0: src0_x, src0_y
3076
* vertex 1: src0_x, src1_y
3077
* vertex 2: src1_x, src0_y
3078
* vertex 3: src1_x, src1_y
3082
* channel 0 is vertex_id < 2 ? src0_x : src1_x
3083
* channel 1 is vertex id & 1 ? src1_y : src0_y
3086
nir_ssa_def *one = nir_imm_int(b, 1);
3087
nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3088
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3090
nir_ssa_def *comp[4];
3091
comp[0] = nir_bcsel(b, c0cmp,
3092
nir_channel(b, tex_box, 0),
3093
nir_channel(b, tex_box, 2));
3095
comp[1] = nir_bcsel(b, c1cmp,
3096
nir_channel(b, tex_box, 3),
3097
nir_channel(b, tex_box, 1));
3099
comp[3] = nir_imm_float(b, 1.0f);
3100
return nir_vec(b, comp, 4);
3103
static nir_ssa_def *
3104
build_nir_tex_op_read(struct nir_builder *b,
3105
nir_ssa_def *tex_pos,
3106
enum glsl_base_type tex_type,
3107
enum glsl_sampler_dim dim)
3109
assert(dim != GLSL_SAMPLER_DIM_MS);
3111
const struct glsl_type *sampler_type =
3112
glsl_sampler_type(dim, false, false, tex_type);
3113
nir_variable *sampler =
3114
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3115
sampler->data.descriptor_set = 0;
3116
sampler->data.binding = 0;
3118
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3119
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3120
tex->sampler_dim = dim;
3121
tex->op = nir_texop_tex;
3122
tex->src[0].src_type = nir_tex_src_coord;
3123
tex->src[0].src = nir_src_for_ssa(tex_pos);
3124
tex->src[1].src_type = nir_tex_src_texture_deref;
3125
tex->src[1].src = nir_src_for_ssa(tex_deref);
3126
tex->src[2].src_type = nir_tex_src_sampler_deref;
3127
tex->src[2].src = nir_src_for_ssa(tex_deref);
3128
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3129
tex->is_array = glsl_sampler_type_is_array(sampler_type);
3130
tex->coord_components = tex_pos->num_components;
3132
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3133
nir_builder_instr_insert(b, &tex->instr);
3134
return &tex->dest.ssa;
3137
static nir_ssa_def *
3138
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3139
nir_variable *sampler,
3140
nir_ssa_def *tex_deref,
3141
enum glsl_base_type tex_type,
3142
nir_ssa_def *tex_pos,
3143
nir_ssa_def *sample_idx)
3145
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
3146
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3147
tex->op = nir_texop_txf_ms;
3148
tex->src[0].src_type = nir_tex_src_coord;
3149
tex->src[0].src = nir_src_for_ssa(tex_pos);
3150
tex->src[1].src_type = nir_tex_src_texture_deref;
3151
tex->src[1].src = nir_src_for_ssa(tex_deref);
3152
tex->src[2].src_type = nir_tex_src_sampler_deref;
3153
tex->src[2].src = nir_src_for_ssa(tex_deref);
3154
tex->src[3].src_type = nir_tex_src_ms_index;
3155
tex->src[3].src = nir_src_for_ssa(sample_idx);
3156
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3157
tex->is_array = false;
3158
tex->coord_components = tex_pos->num_components;
3160
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3161
nir_builder_instr_insert(b, &tex->instr);
3162
return &tex->dest.ssa;
3165
/* Fetches all samples at the given position and averages them */
3166
static nir_ssa_def *
3167
build_nir_tex_op_ms_resolve(struct nir_builder *b,
3168
nir_ssa_def *tex_pos,
3169
enum glsl_base_type tex_type,
3170
VkSampleCountFlagBits src_samples)
3172
assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3173
const struct glsl_type *sampler_type =
3174
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3175
nir_variable *sampler =
3176
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3177
sampler->data.descriptor_set = 0;
3178
sampler->data.binding = 0;
3180
const bool is_int = glsl_base_type_is_integer(tex_type);
3182
nir_ssa_def *tmp = NULL;
3183
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3184
for (uint32_t i = 0; i < src_samples; i++) {
3186
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3190
/* For integer formats, the multisample resolve operation is expected to
3191
* return one of the samples, we just return the first one.
3196
tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3200
return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
3203
/* Fetches the current sample (gl_SampleID) at the given position */
3204
static nir_ssa_def *
3205
build_nir_tex_op_ms_read(struct nir_builder *b,
3206
nir_ssa_def *tex_pos,
3207
enum glsl_base_type tex_type)
3209
const struct glsl_type *sampler_type =
3210
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3211
nir_variable *sampler =
3212
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3213
sampler->data.descriptor_set = 0;
3214
sampler->data.binding = 0;
3216
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3218
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3220
nir_load_sample_id(b));
3223
static nir_ssa_def *
3224
build_nir_tex_op(struct nir_builder *b,
3225
struct v3dv_device *device,
3226
nir_ssa_def *tex_pos,
3227
enum glsl_base_type tex_type,
3228
VkSampleCountFlagBits dst_samples,
3229
VkSampleCountFlagBits src_samples,
3230
enum glsl_sampler_dim dim)
3233
case GLSL_SAMPLER_DIM_MS:
3234
assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3235
/* For multisampled texture sources we need to use fetching instead of
3236
* normalized texture coordinates. We already configured our blit
3237
* coordinates to be in texel units, but here we still need to convert
3238
* them from floating point to integer.
3240
tex_pos = nir_f2i32(b, tex_pos);
3242
if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3243
return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3245
return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3247
assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3248
return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3255
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3256
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3259
const struct glsl_type *vec4 = glsl_vec4_type();
3261
nir_variable *vs_out_pos =
3262
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3263
vs_out_pos->data.location = VARYING_SLOT_POS;
3265
nir_variable *vs_out_tex_coord =
3266
nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3267
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3268
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3270
nir_ssa_def *pos = gen_rect_vertices(&b);
3271
nir_store_var(&b, vs_out_pos, pos, 0xf);
3273
nir_ssa_def *tex_coord = gen_tex_coords(&b);
3274
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3280
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3282
switch (sampler_dim) {
3283
case GLSL_SAMPLER_DIM_1D: return 0x1;
3284
case GLSL_SAMPLER_DIM_2D: return 0x3;
3285
case GLSL_SAMPLER_DIM_MS: return 0x3;
3286
case GLSL_SAMPLER_DIM_3D: return 0x7;
3288
unreachable("invalid sampler dim");
3293
get_color_blit_fs(struct v3dv_device *device,
3294
VkFormat dst_format,
3295
VkFormat src_format,
3296
VkSampleCountFlagBits dst_samples,
3297
VkSampleCountFlagBits src_samples,
3298
enum glsl_sampler_dim sampler_dim)
3300
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3301
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3304
const struct glsl_type *vec4 = glsl_vec4_type();
3306
nir_variable *fs_in_tex_coord =
3307
nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3308
fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3310
const struct glsl_type *fs_out_type =
3311
vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3312
vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3315
enum glsl_base_type src_base_type =
3316
vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3317
vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3320
nir_variable *fs_out_color =
3321
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3322
fs_out_color->data.location = FRAG_RESULT_DATA0;
3324
nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3325
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3326
tex_coord = nir_channels(&b, tex_coord, channel_mask);
3328
nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3329
dst_samples, src_samples, sampler_dim);
3331
/* For integer textures, if the bit-size of the destination is too small to
3332
* hold source value, Vulkan (CTS) expects the implementation to clamp to the
3333
* maximum value the destination can hold. The hardware can clamp to the
3334
* render target type, which usually matches the component bit-size, but
3335
* there are some cases that won't match, such as rgb10a2, which has a 16-bit
3336
* render target type, so in these cases we need to clamp manually.
3338
if (format_needs_software_int_clamp(dst_format)) {
3339
assert(vk_format_is_int(dst_format));
3340
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3341
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3344
for (uint32_t i = 0; i < 4; i++) {
3345
c[i] = nir_channel(&b, color, i);
3347
const uint32_t src_bit_size =
3348
util_format_get_component_bits(src_pformat,
3349
UTIL_FORMAT_COLORSPACE_RGB,
3351
const uint32_t dst_bit_size =
3352
util_format_get_component_bits(dst_pformat,
3353
UTIL_FORMAT_COLORSPACE_RGB,
3356
if (dst_bit_size >= src_bit_size)
3359
assert(dst_bit_size > 0);
3360
if (util_format_is_pure_uint(dst_pformat)) {
3361
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3362
c[i] = nir_umin(&b, c[i], max);
3364
nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3365
nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3366
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3370
color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3373
nir_store_var(&b, fs_out_color, color, 0xf);
3379
create_pipeline(struct v3dv_device *device,
3380
struct v3dv_render_pass *pass,
3381
struct nir_shader *vs_nir,
3382
struct nir_shader *gs_nir,
3383
struct nir_shader *fs_nir,
3384
const VkPipelineVertexInputStateCreateInfo *vi_state,
3385
const VkPipelineDepthStencilStateCreateInfo *ds_state,
3386
const VkPipelineColorBlendStateCreateInfo *cb_state,
3387
const VkPipelineMultisampleStateCreateInfo *ms_state,
3388
const VkPipelineLayout layout,
3389
VkPipeline *pipeline)
3391
struct vk_shader_module vs_m;
3392
struct vk_shader_module gs_m;
3393
struct vk_shader_module fs_m;
3395
uint32_t num_stages = gs_nir ? 3 : 2;
3397
v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
3398
v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
3400
VkPipelineShaderStageCreateInfo stages[3] = {
3402
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3403
.stage = VK_SHADER_STAGE_VERTEX_BIT,
3404
.module = vk_shader_module_to_handle(&vs_m),
3408
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3409
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3410
.module = vk_shader_module_to_handle(&fs_m),
3414
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3415
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3416
.module = VK_NULL_HANDLE,
3422
v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
3423
stages[2].module = vk_shader_module_to_handle(&gs_m);
3426
VkGraphicsPipelineCreateInfo info = {
3427
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3429
.stageCount = num_stages,
3432
.pVertexInputState = vi_state,
3434
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3435
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3436
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3437
.primitiveRestartEnable = false,
3440
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
3441
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3446
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3447
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3448
.rasterizerDiscardEnable = false,
3449
.polygonMode = VK_POLYGON_MODE_FILL,
3450
.cullMode = VK_CULL_MODE_NONE,
3451
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3452
.depthBiasEnable = false,
3455
.pMultisampleState = ms_state,
3457
.pDepthStencilState = ds_state,
3459
.pColorBlendState = cb_state,
3461
/* The meta clear pipeline declares all state as dynamic.
3462
* As a consequence, vkCmdBindPipeline writes no dynamic state
3463
* to the cmd buffer. Therefore, at the end of the meta clear,
3464
* we need only restore dynamic state that was vkCmdSet.
3466
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3467
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3468
.dynamicStateCount = 6,
3469
.pDynamicStates = (VkDynamicState[]) {
3470
VK_DYNAMIC_STATE_VIEWPORT,
3471
VK_DYNAMIC_STATE_SCISSOR,
3472
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3473
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3474
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3475
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3476
VK_DYNAMIC_STATE_DEPTH_BIAS,
3477
VK_DYNAMIC_STATE_LINE_WIDTH,
3483
.renderPass = v3dv_render_pass_to_handle(pass),
3488
v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3494
ralloc_free(vs_nir);
3495
ralloc_free(fs_nir);
3497
return result == VK_SUCCESS;
3500
static enum glsl_sampler_dim
3501
get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3503
/* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3505
* "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3506
* VK_IMAGE_TYPE_2D, ..."
3508
assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3511
case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3512
case VK_IMAGE_TYPE_2D:
3513
return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3514
GLSL_SAMPLER_DIM_MS;
3515
case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3517
unreachable("Invalid image type");
3522
create_blit_pipeline(struct v3dv_device *device,
3523
VkFormat dst_format,
3524
VkFormat src_format,
3525
VkColorComponentFlags cmask,
3526
VkImageType src_type,
3527
VkSampleCountFlagBits dst_samples,
3528
VkSampleCountFlagBits src_samples,
3530
VkPipelineLayout pipeline_layout,
3531
VkPipeline *pipeline)
3533
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
3535
/* We always rewrite depth/stencil blits to compatible color blits */
3536
assert(vk_format_is_color(dst_format));
3537
assert(vk_format_is_color(src_format));
3539
const enum glsl_sampler_dim sampler_dim =
3540
get_sampler_dim(src_type, src_samples);
3542
nir_shader *vs_nir = get_blit_vs();
3543
nir_shader *fs_nir =
3544
get_color_blit_fs(device, dst_format, src_format,
3545
dst_samples, src_samples, sampler_dim);
3547
const VkPipelineVertexInputStateCreateInfo vi_state = {
3548
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
3549
.vertexBindingDescriptionCount = 0,
3550
.vertexAttributeDescriptionCount = 0,
3553
VkPipelineDepthStencilStateCreateInfo ds_state = {
3554
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
3557
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
3558
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
3559
.blendEnable = false,
3560
.colorWriteMask = cmask,
3563
const VkPipelineColorBlendStateCreateInfo cb_state = {
3564
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3565
.logicOpEnable = false,
3566
.attachmentCount = 1,
3567
.pAttachments = blend_att_state
3570
const VkPipelineMultisampleStateCreateInfo ms_state = {
3571
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
3572
.rasterizationSamples = dst_samples,
3573
.sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
3574
.pSampleMask = NULL,
3575
.alphaToCoverageEnable = false,
3576
.alphaToOneEnable = false,
3579
return create_pipeline(device,
3581
vs_nir, NULL, fs_nir,
3591
* Return a pipeline suitable for blitting the requested aspect given the
3592
* destination and source formats.
3595
get_blit_pipeline(struct v3dv_device *device,
3596
VkFormat dst_format,
3597
VkFormat src_format,
3598
VkColorComponentFlags cmask,
3599
VkImageType src_type,
3600
VkSampleCountFlagBits dst_samples,
3601
VkSampleCountFlagBits src_samples,
3602
struct v3dv_meta_blit_pipeline **pipeline)
3606
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
3607
get_blit_pipeline_cache_key(dst_format, src_format, cmask,
3608
dst_samples, src_samples, key);
3609
mtx_lock(&device->meta.mtx);
3610
struct hash_entry *entry =
3611
_mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
3613
mtx_unlock(&device->meta.mtx);
3614
*pipeline = entry->data;
3618
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
3619
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3621
if (*pipeline == NULL)
3624
ok = create_blit_render_pass(device, dst_format, src_format,
3626
&(*pipeline)->pass_no_load);
3630
/* Create the pipeline using one of the render passes, they are both
3631
* compatible, so we don't care which one we use here.
3633
ok = create_blit_pipeline(device,
3641
device->meta.blit.p_layout,
3642
&(*pipeline)->pipeline);
3646
memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
3647
_mesa_hash_table_insert(device->meta.blit.cache[src_type],
3648
&(*pipeline)->key, *pipeline);
3650
mtx_unlock(&device->meta.mtx);
3654
mtx_unlock(&device->meta.mtx);
3656
VkDevice _device = v3dv_device_to_handle(device);
3658
if ((*pipeline)->pass)
3659
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
3660
if ((*pipeline)->pass_no_load)
3661
v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
3662
if ((*pipeline)->pipeline)
3663
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
3664
vk_free(&device->vk.alloc, *pipeline);
3672
compute_blit_box(const VkOffset3D *offsets,
3673
uint32_t image_w, uint32_t image_h,
3674
uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
3675
bool *mirror_x, bool *mirror_y)
3677
if (offsets[1].x >= offsets[0].x) {
3679
*x = MIN2(offsets[0].x, image_w - 1);
3680
*w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
3683
*x = MIN2(offsets[1].x, image_w - 1);
3684
*w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
3686
if (offsets[1].y >= offsets[0].y) {
3688
*y = MIN2(offsets[0].y, image_h - 1);
3689
*h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
3692
*y = MIN2(offsets[1].y, image_h - 1);
3693
*h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
3698
compute_blit_3d_layers(const VkOffset3D *offsets,
3699
uint32_t *min_layer, uint32_t *max_layer,
3702
if (offsets[1].z >= offsets[0].z) {
3704
*min_layer = offsets[0].z;
3705
*max_layer = offsets[1].z;
3708
*min_layer = offsets[1].z;
3709
*max_layer = offsets[0].z;
3714
create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
3716
/* If this is not the first pool we create for this command buffer
3717
* size it based on the size of the currently exhausted pool.
3719
uint32_t descriptor_count = 64;
3720
if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
3721
struct v3dv_descriptor_pool *exhausted_pool =
3722
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
3723
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
3726
/* Create the descriptor pool */
3727
cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
3728
VkDescriptorPoolSize pool_size = {
3729
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
3730
.descriptorCount = descriptor_count,
3732
VkDescriptorPoolCreateInfo info = {
3733
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
3734
.maxSets = descriptor_count,
3736
.pPoolSizes = &pool_size,
3740
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
3742
&cmd_buffer->device->vk.alloc,
3743
&cmd_buffer->meta.blit.dspool);
3745
if (result == VK_SUCCESS) {
3746
assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3747
const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
3749
v3dv_cmd_buffer_add_private_obj(
3750
cmd_buffer, (uintptr_t) _pool,
3751
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
3753
struct v3dv_descriptor_pool *pool =
3754
v3dv_descriptor_pool_from_handle(_pool);
3755
pool->is_driver_internal = true;
3762
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
3763
VkDescriptorSet *set)
3765
/* Make sure we have a descriptor pool */
3767
if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
3768
result = create_blit_descriptor_pool(cmd_buffer);
3769
if (result != VK_SUCCESS)
3772
assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3774
/* Allocate descriptor set */
3775
struct v3dv_device *device = cmd_buffer->device;
3776
VkDevice _device = v3dv_device_to_handle(device);
3777
VkDescriptorSetAllocateInfo info = {
3778
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3779
.descriptorPool = cmd_buffer->meta.blit.dspool,
3780
.descriptorSetCount = 1,
3781
.pSetLayouts = &device->meta.blit.ds_layout,
3783
result = v3dv_AllocateDescriptorSets(_device, &info, set);
3785
/* If we ran out of pool space, grow the pool and try again */
3786
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
3787
result = create_blit_descriptor_pool(cmd_buffer);
3788
if (result == VK_SUCCESS) {
3789
info.descriptorPool = cmd_buffer->meta.blit.dspool;
3790
result = v3dv_AllocateDescriptorSets(_device, &info, set);
3798
* Returns true if the implementation supports the requested operation (even if
3799
* it failed to process it, for example, due to an out-of-memory error).
3801
* The caller can specify the channels on the destination to be written via the
3802
* cmask parameter (which can be 0 to default to all channels), as well as a
3803
* swizzle to apply to the source via the cswizzle parameter (which can be NULL
3804
* to use the default identity swizzle).
3807
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
3808
struct v3dv_image *dst,
3809
VkFormat dst_format,
3810
struct v3dv_image *src,
3811
VkFormat src_format,
3812
VkColorComponentFlags cmask,
3813
VkComponentMapping *cswizzle,
3814
const VkImageBlit2KHR *_region,
3816
bool dst_is_padded_image)
3818
bool handled = true;
3820
uint32_t dirty_dynamic_state = 0;
3822
/* We don't support rendering to linear depth/stencil, this should have
3823
* been rewritten to a compatible color blit by the caller.
3825
assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR ||
3826
!vk_format_is_depth_or_stencil(dst_format));
3828
/* Can't sample from linear images */
3829
if (src->vk.tiling == VK_IMAGE_TILING_LINEAR &&
3830
src->vk.image_type != VK_IMAGE_TYPE_1D) {
3834
VkImageBlit2KHR region = *_region;
3835
/* Rewrite combined D/S blits to compatible color blits */
3836
if (vk_format_is_depth_or_stencil(dst_format)) {
3837
assert(src_format == dst_format);
3839
switch(dst_format) {
3840
case VK_FORMAT_D16_UNORM:
3841
dst_format = VK_FORMAT_R16_UINT;
3843
case VK_FORMAT_D32_SFLOAT:
3844
dst_format = VK_FORMAT_R32_UINT;
3846
case VK_FORMAT_X8_D24_UNORM_PACK32:
3847
case VK_FORMAT_D24_UNORM_S8_UINT:
3848
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
3849
cmask |= VK_COLOR_COMPONENT_G_BIT |
3850
VK_COLOR_COMPONENT_B_BIT |
3851
VK_COLOR_COMPONENT_A_BIT;
3853
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
3854
assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
3855
cmask |= VK_COLOR_COMPONENT_R_BIT;
3857
dst_format = VK_FORMAT_R8G8B8A8_UINT;
3860
unreachable("Unsupported depth/stencil format");
3862
src_format = dst_format;
3863
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3864
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3867
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
3868
VK_COLOR_COMPONENT_G_BIT |
3869
VK_COLOR_COMPONENT_B_BIT |
3870
VK_COLOR_COMPONENT_A_BIT;
3874
VkComponentMapping ident_swizzle = {
3875
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
3876
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
3877
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
3878
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
3881
cswizzle = &ident_swizzle;
3883
/* When we get here from a copy between compressed / uncompressed images
3884
* we choose to specify the destination blit region based on the size
3885
* semantics of the source image of the copy (see copy_image_blit), so we
3886
* need to apply those same semantics here when we compute the size of the
3887
* destination image level.
3889
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
3890
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
3891
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
3892
const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
3893
const uint32_t dst_level_w =
3894
u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
3895
region.dstSubresource.mipLevel);
3896
const uint32_t dst_level_h =
3897
u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
3898
region.dstSubresource.mipLevel);
3900
const uint32_t src_level_w =
3901
u_minify(src->vk.extent.width, region.srcSubresource.mipLevel);
3902
const uint32_t src_level_h =
3903
u_minify(src->vk.extent.height, region.srcSubresource.mipLevel);
3904
const uint32_t src_level_d =
3905
u_minify(src->vk.extent.depth, region.srcSubresource.mipLevel);
3907
uint32_t dst_x, dst_y, dst_w, dst_h;
3908
bool dst_mirror_x, dst_mirror_y;
3909
compute_blit_box(region.dstOffsets,
3910
dst_level_w, dst_level_h,
3911
&dst_x, &dst_y, &dst_w, &dst_h,
3912
&dst_mirror_x, &dst_mirror_y);
3914
uint32_t src_x, src_y, src_w, src_h;
3915
bool src_mirror_x, src_mirror_y;
3916
compute_blit_box(region.srcOffsets,
3917
src_level_w, src_level_h,
3918
&src_x, &src_y, &src_w, &src_h,
3919
&src_mirror_x, &src_mirror_y);
3921
uint32_t min_dst_layer;
3922
uint32_t max_dst_layer;
3923
bool dst_mirror_z = false;
3924
if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
3925
min_dst_layer = region.dstSubresource.baseArrayLayer;
3926
max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
3928
compute_blit_3d_layers(region.dstOffsets,
3929
&min_dst_layer, &max_dst_layer,
3933
uint32_t min_src_layer;
3934
uint32_t max_src_layer;
3935
bool src_mirror_z = false;
3936
if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
3937
min_src_layer = region.srcSubresource.baseArrayLayer;
3938
max_src_layer = min_src_layer + region.srcSubresource.layerCount;
3940
compute_blit_3d_layers(region.srcOffsets,
3941
&min_src_layer, &max_src_layer,
3945
uint32_t layer_count = max_dst_layer - min_dst_layer;
3947
/* Translate source blit coordinates to normalized texture coordinates for
3948
* single sampled textures. For multisampled textures we require
3949
* unnormalized coordinates, since we can only do texelFetch on them.
3954
(float)(src_x + src_w),
3955
(float)(src_y + src_h),
3958
if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
3959
coords[0] /= (float)src_level_w;
3960
coords[1] /= (float)src_level_h;
3961
coords[2] /= (float)src_level_w;
3962
coords[3] /= (float)src_level_h;
3965
/* Handle mirroring */
3966
const bool mirror_x = dst_mirror_x != src_mirror_x;
3967
const bool mirror_y = dst_mirror_y != src_mirror_y;
3968
const bool mirror_z = dst_mirror_z != src_mirror_z;
3969
float tex_coords[5] = {
3970
!mirror_x ? coords[0] : coords[2],
3971
!mirror_y ? coords[1] : coords[3],
3972
!mirror_x ? coords[2] : coords[0],
3973
!mirror_y ? coords[3] : coords[1],
3974
/* Z coordinate for 3D blit sources, to be filled for each
3980
/* For blits from 3D images we also need to compute the slice coordinate to
3981
* sample from, which will change for each layer in the destination.
3982
* Compute the step we should increase for each iteration.
3984
const float src_z_step =
3985
(float)(max_src_layer - min_src_layer) / (float)layer_count;
3987
/* Get the blit pipeline */
3988
struct v3dv_meta_blit_pipeline *pipeline = NULL;
3989
bool ok = get_blit_pipeline(cmd_buffer->device,
3990
dst_format, src_format, cmask, src->vk.image_type,
3991
dst->vk.samples, src->vk.samples,
3995
assert(pipeline && pipeline->pipeline &&
3996
pipeline->pass && pipeline->pass_no_load);
3998
struct v3dv_device *device = cmd_buffer->device;
3999
assert(device->meta.blit.ds_layout);
4001
VkDevice _device = v3dv_device_to_handle(device);
4002
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4004
/* Create sampler for blit source image */
4005
VkSamplerCreateInfo sampler_info = {
4006
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4007
.magFilter = filter,
4008
.minFilter = filter,
4009
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4010
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4011
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4012
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4015
result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
4017
if (result != VK_SUCCESS)
4020
v3dv_cmd_buffer_add_private_obj(
4021
cmd_buffer, (uintptr_t)sampler,
4022
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4024
/* Push command buffer state before starting meta operation */
4025
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4027
/* Push state that is common for all layers */
4028
v3dv_CmdBindPipeline(_cmd_buffer,
4029
VK_PIPELINE_BIND_POINT_GRAPHICS,
4030
pipeline->pipeline);
4032
const VkViewport viewport = {
4040
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4042
const VkRect2D scissor = {
4043
.offset = { dst_x, dst_y },
4044
.extent = { dst_w, dst_h }
4046
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4048
bool can_skip_tlb_load = false;
4049
const VkRect2D render_area = {
4050
.offset = { dst_x, dst_y },
4051
.extent = { dst_w, dst_h },
4054
/* Record per-layer commands */
4055
VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
4056
for (uint32_t i = 0; i < layer_count; i++) {
4057
/* Setup framebuffer */
4058
VkImageViewCreateInfo dst_image_view_info = {
4059
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4060
.image = v3dv_image_to_handle(dst),
4061
.viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4062
.format = dst_format,
4063
.subresourceRange = {
4064
.aspectMask = aspects,
4065
.baseMipLevel = region.dstSubresource.mipLevel,
4067
.baseArrayLayer = min_dst_layer + i,
4071
VkImageView dst_image_view;
4072
result = v3dv_CreateImageView(_device, &dst_image_view_info,
4073
&device->vk.alloc, &dst_image_view);
4074
if (result != VK_SUCCESS)
4077
v3dv_cmd_buffer_add_private_obj(
4078
cmd_buffer, (uintptr_t)dst_image_view,
4079
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4081
VkFramebufferCreateInfo fb_info = {
4082
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4083
.renderPass = pipeline->pass,
4084
.attachmentCount = 1,
4085
.pAttachments = &dst_image_view,
4086
.width = dst_x + dst_w,
4087
.height = dst_y + dst_h,
4092
result = v3dv_CreateFramebuffer(_device, &fb_info,
4093
&cmd_buffer->device->vk.alloc, &fb);
4094
if (result != VK_SUCCESS)
4097
struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4098
framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4099
fb_info.height == dst_level_h &&
4100
dst_is_padded_image;
4102
v3dv_cmd_buffer_add_private_obj(
4103
cmd_buffer, (uintptr_t)fb,
4104
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4106
/* Setup descriptor set for blit source texture. We don't have to
4107
* register the descriptor as a private command buffer object since
4108
* all descriptors will be freed automatically with the descriptor
4111
VkDescriptorSet set;
4112
result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4113
if (result != VK_SUCCESS)
4116
VkImageViewCreateInfo src_image_view_info = {
4117
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4118
.image = v3dv_image_to_handle(src),
4119
.viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4120
.format = src_format,
4121
.components = *cswizzle,
4122
.subresourceRange = {
4123
.aspectMask = aspects,
4124
.baseMipLevel = region.srcSubresource.mipLevel,
4127
src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4131
VkImageView src_image_view;
4132
result = v3dv_CreateImageView(_device, &src_image_view_info,
4133
&device->vk.alloc, &src_image_view);
4134
if (result != VK_SUCCESS)
4137
v3dv_cmd_buffer_add_private_obj(
4138
cmd_buffer, (uintptr_t)src_image_view,
4139
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4141
VkDescriptorImageInfo image_info = {
4143
.imageView = src_image_view,
4144
.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4146
VkWriteDescriptorSet write = {
4147
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4150
.dstArrayElement = 0,
4151
.descriptorCount = 1,
4152
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4153
.pImageInfo = &image_info,
4155
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4157
v3dv_CmdBindDescriptorSets(_cmd_buffer,
4158
VK_PIPELINE_BIND_POINT_GRAPHICS,
4159
device->meta.blit.p_layout,
4163
/* If the region we are about to blit is tile-aligned, then we can
4164
* use the render pass version that won't pre-load the tile buffer
4165
* with the dst image contents before the blit. The exception is when we
4166
* don't have a full color mask, since in that case we need to preserve
4167
* the original value of some of the color components.
4169
* Since all layers have the same area, we only need to compute this for
4173
struct v3dv_render_pass *pipeline_pass =
4174
v3dv_render_pass_from_handle(pipeline->pass);
4176
cmask == full_cmask &&
4177
v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4178
framebuffer, pipeline_pass, 0);
4182
VkRenderPassBeginInfo rp_info = {
4183
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4184
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4187
.renderArea = render_area,
4188
.clearValueCount = 0,
4191
VkSubpassBeginInfo sp_info = {
4192
.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
4193
.contents = VK_SUBPASS_CONTENTS_INLINE,
4196
v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
4197
struct v3dv_job *job = cmd_buffer->state.job;
4201
/* For 3D blits we need to compute the source slice to blit from (the Z
4202
* coordinate of the source sample operation). We want to choose this
4203
* based on the ratio of the depth of the source and the destination
4204
* images, picking the coordinate in the middle of each step.
4206
if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4209
(min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4210
(max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4213
v3dv_CmdPushConstants(_cmd_buffer,
4214
device->meta.blit.p_layout,
4215
VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4218
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4220
VkSubpassEndInfo sp_end_info = {
4221
.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
4224
v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
4225
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
4229
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
4234
VKAPI_ATTR void VKAPI_CALL
4235
v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
4236
const VkBlitImageInfo2KHR *pBlitImageInfo)
4238
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4239
V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4240
V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4242
/* This command can only happen outside a render pass */
4243
assert(cmd_buffer->state.pass == NULL);
4244
assert(cmd_buffer->state.job == NULL);
4246
/* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4247
assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4248
src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4250
/* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4251
assert(!vk_format_is_compressed(dst->vk.format));
4253
for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4254
if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))
4256
if (blit_shader(cmd_buffer,
4257
dst, dst->vk.format,
4258
src, src->vk.format,
4260
&pBlitImageInfo->pRegions[i],
4261
pBlitImageInfo->filter, true)) {
4264
unreachable("Unsupported blit operation");
4269
resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4270
struct v3dv_image *dst,
4271
struct v3dv_image *src,
4272
const VkImageResolve2KHR *region)
4274
if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, NULL) ||
4275
!v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, NULL)) {
4279
if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4282
const VkFormat fb_format = src->vk.format;
4284
uint32_t num_layers;
4285
if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
4286
num_layers = region->dstSubresource.layerCount;
4288
num_layers = region->extent.depth;
4289
assert(num_layers > 0);
4291
struct v3dv_job *job =
4292
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4296
const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
4297
const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
4298
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4299
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4301
uint32_t internal_type, internal_bpp;
4302
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4303
(fb_format, region->srcSubresource.aspectMask,
4304
&internal_type, &internal_bpp);
4306
v3dv_job_start_frame(job, width, height, num_layers, false,
4307
1, internal_bpp, true);
4309
struct v3dv_meta_framebuffer framebuffer;
4310
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4311
internal_type, &job->frame_tiling);
4313
v3dv_X(job->device, job_emit_binning_flush)(job);
4314
v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
4315
&framebuffer, region);
4317
v3dv_cmd_buffer_finish_job(cmd_buffer);
4322
resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4323
struct v3dv_image *dst,
4324
struct v3dv_image *src,
4325
const VkImageResolve2KHR *region)
4327
const VkImageBlit2KHR blit_region = {
4328
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
4329
.srcSubresource = region->srcSubresource,
4333
region->srcOffset.x + region->extent.width,
4334
region->srcOffset.y + region->extent.height,
4337
.dstSubresource = region->dstSubresource,
4341
region->dstOffset.x + region->extent.width,
4342
region->dstOffset.y + region->extent.height,
4346
return blit_shader(cmd_buffer,
4347
dst, dst->vk.format,
4348
src, src->vk.format,
4350
&blit_region, VK_FILTER_NEAREST, true);
4353
VKAPI_ATTR void VKAPI_CALL
4354
v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
4355
const VkResolveImageInfo2KHR *info)
4358
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4359
V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4360
V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4362
/* This command can only happen outside a render pass */
4363
assert(cmd_buffer->state.pass == NULL);
4364
assert(cmd_buffer->state.job == NULL);
4366
assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4367
assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4369
for (uint32_t i = 0; i < info->regionCount; i++) {
4370
if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4372
if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4374
unreachable("Unsupported multismaple resolve operation");