2
* Copyright © 2022 Imagination Technologies Ltd.
4
* Permission is hereby granted, free of charge, to any person obtaining a copy
5
* of this software and associated documentation files (the "Software"), to deal
6
* in the Software without restriction, including without limitation the rights
7
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
* copies of the Software, and to permit persons to whom the Software is
9
* furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
#include <vulkan/vulkan.h>
32
#include "c11_compat.h"
33
#include "hwdef/rogue_hw_defs.h"
34
#include "hwdef/rogue_hw_utils.h"
37
#include "pvr_device_info.h"
38
#include "pvr_end_of_tile.h"
39
#include "pvr_formats.h"
40
#include "pvr_hw_pass.h"
41
#include "pvr_job_common.h"
42
#include "pvr_job_render.h"
43
#include "pvr_limits.h"
45
#include "pvr_private.h"
46
#include "pvr_winsys.h"
47
#include "util/compiler.h"
48
#include "util/list.h"
49
#include "util/macros.h"
50
#include "util/u_dynarray.h"
51
#include "util/u_pack_color.h"
53
#include "vk_command_buffer.h"
54
#include "vk_command_pool.h"
55
#include "vk_format.h"
57
#include "vk_object.h"
60
/* Structure used to pass data into pvr_compute_generate_control_stream()
63
struct pvr_compute_kernel_info {
64
pvr_dev_addr_t indirect_buffer_addr;
65
bool global_offsets_present;
66
uint32_t usc_common_size;
67
uint32_t usc_unified_size;
68
uint32_t pds_temp_size;
69
uint32_t pds_data_size;
72
uint32_t pds_data_offset;
73
uint32_t pds_code_offset;
74
enum PVRX(CDMCTRL_SD_TYPE) sd_type;
75
bool usc_common_shared;
76
uint32_t local_size[3];
77
uint32_t global_size[3];
78
uint32_t max_instances;
81
static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
82
struct pvr_sub_cmd *sub_cmd)
84
switch (sub_cmd->type) {
85
case PVR_SUB_CMD_TYPE_GRAPHICS:
86
pvr_csb_finish(&sub_cmd->gfx.control_stream);
87
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
88
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
91
case PVR_SUB_CMD_TYPE_COMPUTE:
92
pvr_csb_finish(&sub_cmd->compute.control_stream);
95
case PVR_SUB_CMD_TYPE_TRANSFER:
96
list_for_each_entry_safe (struct pvr_transfer_cmd,
98
&sub_cmd->transfer.transfer_cmds,
100
list_del(&transfer_cmd->link);
101
vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
106
pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
110
list_del(&sub_cmd->link);
111
vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd);
114
static void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer)
116
list_for_each_entry_safe (struct pvr_sub_cmd,
118
&cmd_buffer->sub_cmds,
120
pvr_cmd_buffer_free_sub_cmd(cmd_buffer, sub_cmd);
124
static void pvr_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
126
struct pvr_cmd_buffer *cmd_buffer =
127
container_of(vk_cmd_buffer, struct pvr_cmd_buffer, vk);
129
vk_free(&cmd_buffer->vk.pool->alloc,
130
cmd_buffer->state.render_pass_info.attachments);
131
vk_free(&cmd_buffer->vk.pool->alloc,
132
cmd_buffer->state.render_pass_info.clear_values);
134
pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
136
list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
138
pvr_bo_free(cmd_buffer->device, bo);
141
util_dynarray_fini(&cmd_buffer->scissor_array);
142
util_dynarray_fini(&cmd_buffer->depth_bias_array);
144
vk_command_buffer_finish(&cmd_buffer->vk);
145
vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
148
static VkResult pvr_cmd_buffer_create(struct pvr_device *device,
149
struct vk_command_pool *pool,
150
VkCommandBufferLevel level,
151
VkCommandBuffer *pCommandBuffer)
153
struct pvr_cmd_buffer *cmd_buffer;
156
cmd_buffer = vk_zalloc(&pool->alloc,
159
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
161
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
163
result = vk_command_buffer_init(&cmd_buffer->vk, pool, level);
164
if (result != VK_SUCCESS) {
165
vk_free(&pool->alloc, cmd_buffer);
169
cmd_buffer->vk.destroy = pvr_cmd_buffer_destroy;
170
cmd_buffer->device = device;
172
util_dynarray_init(&cmd_buffer->depth_bias_array, NULL);
173
util_dynarray_init(&cmd_buffer->scissor_array, NULL);
175
cmd_buffer->state.status = VK_SUCCESS;
176
cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
178
list_inithead(&cmd_buffer->sub_cmds);
179
list_inithead(&cmd_buffer->bo_list);
181
*pCommandBuffer = pvr_cmd_buffer_to_handle(cmd_buffer);
187
pvr_AllocateCommandBuffers(VkDevice _device,
188
const VkCommandBufferAllocateInfo *pAllocateInfo,
189
VkCommandBuffer *pCommandBuffers)
191
VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool);
192
PVR_FROM_HANDLE(pvr_device, device, _device);
193
VkResult result = VK_SUCCESS;
196
for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
197
result = pvr_cmd_buffer_create(device,
199
pAllocateInfo->level,
200
&pCommandBuffers[i]);
201
if (result != VK_SUCCESS)
205
if (result != VK_SUCCESS) {
207
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]);
208
pvr_cmd_buffer_destroy(cmd_buffer);
211
for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
212
pCommandBuffers[i] = VK_NULL_HANDLE;
218
static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer,
219
enum pvr_sub_cmd_type type)
221
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
225
case PVR_SUB_CMD_TYPE_GRAPHICS:
226
barriers = PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT;
229
case PVR_SUB_CMD_TYPE_COMPUTE:
230
barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT;
233
case PVR_SUB_CMD_TYPE_TRANSFER:
234
barriers = PVR_PIPELINE_STAGE_TRANSFER_BIT;
239
pvr_finishme("Unsupported sub-command type %d", type);
243
for (uint32_t i = 0; i < ARRAY_SIZE(state->barriers_needed); i++)
244
state->barriers_needed[i] |= barriers;
247
static VkResult pvr_cmd_buffer_upload_tables(struct pvr_device *device,
248
struct pvr_cmd_buffer *cmd_buffer)
250
struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
251
const uint32_t cache_line_size =
252
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
255
assert(!sub_cmd->gfx.depth_bias_bo && !sub_cmd->gfx.scissor_bo);
257
if (cmd_buffer->depth_bias_array.size > 0) {
259
pvr_gpu_upload(device,
260
device->heaps.general_heap,
261
util_dynarray_begin(&cmd_buffer->depth_bias_array),
262
cmd_buffer->depth_bias_array.size,
264
&sub_cmd->gfx.depth_bias_bo);
265
if (result != VK_SUCCESS)
269
if (cmd_buffer->scissor_array.size > 0) {
270
result = pvr_gpu_upload(device,
271
device->heaps.general_heap,
272
util_dynarray_begin(&cmd_buffer->scissor_array),
273
cmd_buffer->scissor_array.size,
275
&sub_cmd->gfx.scissor_bo);
276
if (result != VK_SUCCESS)
277
goto err_free_depth_bias_bo;
280
util_dynarray_clear(&cmd_buffer->depth_bias_array);
281
util_dynarray_clear(&cmd_buffer->scissor_array);
285
err_free_depth_bias_bo:
286
pvr_bo_free(device, sub_cmd->gfx.depth_bias_bo);
287
sub_cmd->gfx.depth_bias_bo = NULL;
292
static VkResult pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer)
294
struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
295
struct pvr_framebuffer *framebuffer =
296
cmd_buffer->state.render_pass_info.framebuffer;
298
pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE0, state0) {
299
state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
300
state0.word_count = framebuffer->ppp_state_size;
303
pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE1, state1) {
304
state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
311
pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
312
const void *const data,
314
struct pvr_bo **const pvr_bo_out)
316
struct pvr_device *const device = cmd_buffer->device;
317
const uint32_t cache_line_size =
318
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
319
struct pvr_bo *pvr_bo;
322
result = pvr_gpu_upload(device,
323
device->heaps.general_heap,
328
if (result != VK_SUCCESS)
331
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
333
*pvr_bo_out = pvr_bo;
339
pvr_cmd_buffer_upload_usc(struct pvr_cmd_buffer *const cmd_buffer,
340
const void *const code,
341
const size_t code_size,
342
uint64_t code_alignment,
343
struct pvr_bo **const pvr_bo_out)
345
struct pvr_device *const device = cmd_buffer->device;
346
const uint32_t cache_line_size =
347
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
348
struct pvr_bo *pvr_bo;
351
code_alignment = MAX2(code_alignment, cache_line_size);
354
pvr_gpu_upload_usc(device, code, code_size, code_alignment, &pvr_bo);
355
if (result != VK_SUCCESS)
358
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
360
*pvr_bo_out = pvr_bo;
366
pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer,
367
const uint32_t *data,
368
uint32_t data_size_dwords,
369
uint32_t data_alignment,
370
const uint32_t *code,
371
uint32_t code_size_dwords,
372
uint32_t code_alignment,
373
uint64_t min_alignment,
374
struct pvr_pds_upload *const pds_upload_out)
376
struct pvr_device *const device = cmd_buffer->device;
379
result = pvr_gpu_upload_pds(device,
388
if (result != VK_SUCCESS)
391
list_add(&pds_upload_out->pvr_bo->link, &cmd_buffer->bo_list);
396
static inline VkResult
397
pvr_cmd_buffer_upload_pds_data(struct pvr_cmd_buffer *const cmd_buffer,
398
const uint32_t *data,
399
uint32_t data_size_dwords,
400
uint32_t data_alignment,
401
struct pvr_pds_upload *const pds_upload_out)
403
return pvr_cmd_buffer_upload_pds(cmd_buffer,
414
static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
415
struct pvr_cmd_buffer *const cmd_buffer,
416
const uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
417
struct pvr_pds_upload *const pds_upload_out)
419
struct pvr_pds_event_program pixel_event_program = {
420
/* No data to DMA, just a DOUTU needed. */
421
.num_emit_word_pairs = 0,
423
const uint32_t staging_buffer_size =
424
cmd_buffer->device->pixel_event_data_size_in_dwords * sizeof(uint32_t);
425
const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
426
struct pvr_device *const device = cmd_buffer->device;
427
/* FIXME: This should come from the compiler for the USC pixel program. */
428
const uint32_t usc_temp_count = 0;
429
struct pvr_bo *usc_eot_program;
430
uint8_t *usc_eot_program_ptr;
431
uint32_t *staging_buffer;
434
result = pvr_cmd_buffer_upload_usc(cmd_buffer,
435
pvr_end_of_tile_program,
436
sizeof(pvr_end_of_tile_program),
439
if (result != VK_SUCCESS)
442
assert((pbe_cs_words[1] & 0x3F) == 0x20);
444
/* FIXME: Stop patching the framebuffer address (this will require the
445
* end-of-tile program to be generated at run-time).
447
pvr_bo_cpu_map(device, usc_eot_program);
448
usc_eot_program_ptr = usc_eot_program->bo->map;
449
usc_eot_program_ptr[6] = (pbe_cs_words[0] >> 0) & 0xFF;
450
usc_eot_program_ptr[7] = (pbe_cs_words[0] >> 8) & 0xFF;
451
usc_eot_program_ptr[8] = (pbe_cs_words[0] >> 16) & 0xFF;
452
usc_eot_program_ptr[9] = (pbe_cs_words[0] >> 24) & 0xFF;
453
pvr_bo_cpu_unmap(device, usc_eot_program);
455
pvr_pds_setup_doutu(&pixel_event_program.task_control,
456
usc_eot_program->vma->dev_addr.addr,
458
PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
461
/* TODO: We could skip allocating this and generate directly into the device
462
* buffer thus removing one allocation and memcpy() per job. Would this
463
* speed up things in a noticeable way?
465
staging_buffer = vk_alloc(allocator,
468
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
469
if (!staging_buffer) {
470
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
471
goto err_free_usc_pixel_program;
474
/* Generate the data segment. The code segment was uploaded earlier when
475
* setting up the PDS static heap data.
477
pvr_pds_generate_pixel_event_data_segment(&pixel_event_program,
479
&device->pdevice->dev_info);
481
result = pvr_cmd_buffer_upload_pds_data(
484
cmd_buffer->device->pixel_event_data_size_in_dwords,
487
if (result != VK_SUCCESS)
488
goto err_free_pixel_event_staging_buffer;
490
vk_free(allocator, staging_buffer);
494
err_free_pixel_event_staging_buffer:
495
vk_free(allocator, staging_buffer);
497
err_free_usc_pixel_program:
498
list_del(&usc_eot_program->link);
499
pvr_bo_free(device, usc_eot_program);
504
static uint32_t pvr_get_hw_clear_color(VkFormat vk_format,
505
const VkClearValue *clear_value)
507
union util_color uc = { .ui = 0 };
510
case VK_FORMAT_B8G8R8A8_UNORM:
511
util_pack_color(clear_value->color.float32,
512
PIPE_FORMAT_R8G8B8A8_UNORM,
517
assert(!"Unsupported format");
526
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
528
pvr_dev_addr_t *const addr_out)
530
const struct pvr_render_pass_info *render_pass_info =
531
&cmd_buffer->state.render_pass_info;
532
const struct pvr_render_pass *pass = render_pass_info->pass;
533
const struct pvr_renderpass_hwsetup_render *hw_render =
534
&pass->hw_setup->renders[idx];
535
ASSERTED const struct pvr_load_op *load_op = hw_render->client_data;
536
const struct pvr_renderpass_colorinit *color_init =
537
&hw_render->color_init[0];
538
const struct pvr_render_pass_attachment *attachment =
539
&pass->attachments[color_init->driver_id];
540
const VkClearValue *clear_value =
541
&render_pass_info->clear_values[color_init->driver_id];
542
uint32_t hw_clear_value;
543
struct pvr_bo *clear_bo;
546
pvr_finishme("Add missing load op data support");
548
assert(load_op->is_hw_object);
549
assert(hw_render->color_init_count == 1);
551
/* FIXME: add support for RENDERPASS_SURFACE_INITOP_LOAD. */
552
assert(color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR);
554
/* FIXME: do this at the point we store the clear values? */
555
hw_clear_value = pvr_get_hw_clear_color(attachment->vk_format, clear_value);
557
result = pvr_cmd_buffer_upload_general(cmd_buffer,
559
sizeof(hw_clear_value),
561
if (result != VK_SUCCESS)
564
*addr_out = clear_bo->vma->dev_addr;
569
static VkResult pvr_load_op_pds_data_create_and_upload(
570
struct pvr_cmd_buffer *cmd_buffer,
572
pvr_dev_addr_t constants_addr,
573
struct pvr_pds_upload *const pds_upload_out)
575
const struct pvr_render_pass_info *render_pass_info =
576
&cmd_buffer->state.render_pass_info;
577
const struct pvr_load_op *load_op =
578
render_pass_info->pass->hw_setup->renders[idx].client_data;
579
struct pvr_device *device = cmd_buffer->device;
580
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
581
struct pvr_pds_pixel_shader_sa_program program = { 0 };
582
uint32_t staging_buffer_size;
583
uint32_t *staging_buffer;
586
program.num_texture_dma_kicks = 1;
588
pvr_csb_pack (&program.texture_dma_address[0],
589
PDSINST_DOUT_FIELDS_DOUTD_SRC0,
591
value.sbase = constants_addr;
594
pvr_csb_pack (&program.texture_dma_control[0],
595
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
597
value.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
598
value.a0 = load_op->shareds_dest_offset;
599
value.bsize = load_op->shareds_count;
602
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&program, dev_info);
604
staging_buffer_size = program.data_size * sizeof(*staging_buffer);
606
staging_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc,
609
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
611
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
613
pvr_pds_generate_pixel_shader_sa_texture_state_data(&program,
617
result = pvr_cmd_buffer_upload_pds_data(cmd_buffer,
622
if (result != VK_SUCCESS) {
623
vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
627
vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
632
/* FIXME: Should this function be specific to the HW background object, in
633
* which case its name should be changed, or should it have the load op
634
* structure passed in?
637
pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
639
struct pvr_pds_upload *const pds_upload_out)
641
pvr_dev_addr_t constants_addr;
645
pvr_load_op_constants_create_and_upload(cmd_buffer, idx, &constants_addr);
646
if (result != VK_SUCCESS)
649
return pvr_load_op_pds_data_create_and_upload(cmd_buffer,
655
static void pvr_pds_bgnd_pack_state(
656
const struct pvr_load_op *load_op,
657
const struct pvr_pds_upload *load_op_program,
658
uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS])
660
pvr_csb_pack (&pds_reg_values[0], CR_PDS_BGRND0_BASE, value) {
661
value.shader_addr.addr = load_op->pds_frag_prog.data_offset;
662
value.texunicode_addr.addr = load_op->pds_tex_state_prog.code_offset;
665
pvr_csb_pack (&pds_reg_values[1], CR_PDS_BGRND1_BASE, value) {
666
value.texturedata_addr.addr = load_op_program->data_offset;
669
pvr_csb_pack (&pds_reg_values[2], CR_PDS_BGRND3_SIZEINFO, value) {
670
value.usc_sharedsize =
671
DIV_ROUND_UP(load_op->const_shareds_count,
672
PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
673
value.pds_texturestatesize = DIV_ROUND_UP(
674
load_op_program->data_size,
675
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
677
DIV_ROUND_UP(load_op->temps_count,
678
PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
683
* \brief Calculates the stride in pixels based on the pitch in bytes and pixel
686
* \param[in] pitch Width pitch in bytes.
687
* \param[in] vk_format Vulkan image format.
688
* \return Stride in pixels.
690
static inline uint32_t pvr_stride_from_pitch(uint32_t pitch, VkFormat vk_format)
692
const unsigned int cpp = vk_format_get_blocksize(vk_format);
694
assert(pitch % cpp == 0);
699
static void pvr_setup_pbe_state(
700
struct pvr_device *const device,
701
struct pvr_framebuffer *framebuffer,
703
const struct usc_mrt_resource *mrt_resource,
704
const struct pvr_image_view *const iview,
705
const VkRect2D *render_area,
706
const bool down_scale,
707
const uint32_t samples,
708
uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
709
uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
711
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
712
const struct pvr_image *image = iview->image;
713
uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch;
715
struct pvr_pbe_surf_params surface_params;
716
struct pvr_pbe_render_params render_params;
717
bool with_packed_usc_channel;
718
const uint8_t *swizzle;
721
/* down_scale should be true when performing a resolve, in which case there
722
* should be more than one sample.
724
assert((down_scale && samples > 1U) || (!down_scale && samples == 1U));
726
/* Setup surface parameters. */
728
if (PVR_HAS_FEATURE(dev_info, usc_f16sop_u8)) {
729
switch (iview->vk.format) {
730
case VK_FORMAT_B8G8R8A8_UNORM:
731
with_packed_usc_channel = true;
733
case VK_FORMAT_D32_SFLOAT:
734
with_packed_usc_channel = false;
737
unreachable("Unsupported Vulkan image format");
740
with_packed_usc_channel = false;
743
swizzle = pvr_get_format_swizzle(iview->vk.format);
744
memcpy(surface_params.swizzle, swizzle, sizeof(surface_params.swizzle));
746
pvr_pbe_get_src_format_and_gamma(iview->vk.format,
748
with_packed_usc_channel,
749
&surface_params.source_format,
750
&surface_params.gamma);
752
surface_params.is_normalized = vk_format_is_normalized(iview->vk.format);
753
surface_params.pbe_packmode = pvr_get_pbe_packmode(iview->vk.format);
754
surface_params.nr_components = vk_format_get_nr_components(iview->vk.format);
756
/* FIXME: Should we have an inline function to return the address of a mip
759
surface_params.addr.addr =
760
image->vma->dev_addr.addr +
761
image->mip_levels[iview->vk.base_mip_level].offset;
763
surface_params.mem_layout = image->memlayout;
764
surface_params.stride = pvr_stride_from_pitch(level_pitch, iview->vk.format);
765
surface_params.depth = iview->vk.extent.depth;
766
surface_params.width = iview->vk.extent.width;
767
surface_params.height = iview->vk.extent.height;
768
surface_params.z_only_render = false;
769
surface_params.down_scale = down_scale;
770
surface_params.msaa_mode = samples;
772
/* Setup render parameters. */
774
if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY) {
775
position = mrt_resource->u.mem.offset_in_dwords;
777
assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER);
778
assert(mrt_resource->u.reg.offset == 0);
780
position = mrt_resource->u.reg.out_reg;
783
assert(position <= 3 || PVR_HAS_FEATURE(dev_info, eight_output_registers));
788
render_params.source_start = PVR_PBE_STARTPOS_BIT0;
792
render_params.source_start = PVR_PBE_STARTPOS_BIT32;
796
render_params.source_start = PVR_PBE_STARTPOS_BIT64;
800
render_params.source_start = PVR_PBE_STARTPOS_BIT96;
803
assert(!"Invalid output register");
807
render_params.min_x_clip = MAX2(0, render_area->offset.x);
808
render_params.min_y_clip = MAX2(0, render_area->offset.y);
809
render_params.max_x_clip =
810
MIN2(framebuffer->width,
811
render_area->offset.x + render_area->extent.width) -
813
render_params.max_y_clip =
814
MIN2(framebuffer->height,
815
render_area->offset.y + render_area->extent.height) -
818
render_params.slice = 0;
819
render_params.mrt_index = mrt_index;
821
pvr_pbe_pack_state(device,
828
static struct pvr_render_target *
829
pvr_get_render_target(const struct pvr_render_pass *pass,
830
const struct pvr_framebuffer *framebuffer,
833
const struct pvr_renderpass_hwsetup_render *hw_render =
834
&pass->hw_setup->renders[idx];
837
switch (hw_render->sample_count) {
842
rt_idx = util_logbase2(hw_render->sample_count);
846
unreachable("Unsupported sample count");
850
return &framebuffer->render_targets[rt_idx];
854
pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
856
const struct pvr_device_info *dev_info)
858
const struct pvr_renderpass_hwsetup_render *hw_render =
859
&pass->hw_setup->renders[idx];
860
/* Default value based on the maximum value found in all existing cores. The
861
* maximum is used as this is being treated as a lower bound, making it a
862
* "safer" choice than the minimum value found in all existing cores.
864
const uint32_t min_output_regs =
865
PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 2U);
866
const uint32_t width = MAX2(hw_render->output_regs_count, min_output_regs);
868
return util_next_power_of_two(width);
871
static VkResult pvr_sub_cmd_gfx_job_init(struct pvr_device *device,
872
struct pvr_cmd_buffer *cmd_buffer,
873
struct pvr_sub_cmd *sub_cmd)
875
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
876
struct pvr_render_pass_info *render_pass_info =
877
&cmd_buffer->state.render_pass_info;
878
const struct pvr_renderpass_hwsetup_render *hw_render =
879
&render_pass_info->pass->hw_setup->renders[sub_cmd->gfx.hw_render_idx];
880
struct pvr_render_job *job = &sub_cmd->gfx.job;
881
struct pvr_pds_upload pds_pixel_event_program;
883
uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
884
[ROGUE_NUM_PBESTATE_STATE_WORDS];
885
struct pvr_render_target *render_target;
888
assert(hw_render->eot_surface_count < ARRAY_SIZE(pbe_cs_words));
890
for (uint32_t i = 0; i < hw_render->eot_surface_count; i++) {
891
const struct pvr_renderpass_hwsetup_eot_surface *surface =
892
&hw_render->eot_surfaces[i];
893
const struct pvr_image_view *iview =
894
render_pass_info->attachments[surface->attachment_index];
895
const struct usc_mrt_resource *mrt_resource =
896
&hw_render->eot_setup.mrt_resources[surface->mrt_index];
897
uint32_t samples = 1;
899
if (surface->need_resolve)
900
pvr_finishme("Set up job resolve information.");
902
pvr_setup_pbe_state(device,
903
render_pass_info->framebuffer,
907
&render_pass_info->render_area,
908
surface->need_resolve,
911
job->pbe_reg_words[i]);
914
/* FIXME: The fragment program only supports a single surface at present. */
915
assert(hw_render->eot_surface_count == 1);
916
result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
919
&pds_pixel_event_program);
920
if (result != VK_SUCCESS)
923
job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
925
/* FIXME: Don't do this if there is a barrier load. */
926
if (render_pass_info->enable_bg_tag) {
927
const struct pvr_load_op *load_op = hw_render->client_data;
928
struct pvr_pds_upload load_op_program;
930
/* FIXME: Should we free the PDS pixel event data or let it be freed
931
* when the pool gets emptied?
933
result = pvr_load_op_data_create_and_upload(cmd_buffer,
934
sub_cmd->gfx.hw_render_idx,
936
if (result != VK_SUCCESS)
939
pvr_pds_bgnd_pack_state(load_op,
941
job->pds_bgnd_reg_values);
944
job->enable_bg_tag = render_pass_info->enable_bg_tag;
945
job->process_empty_tiles = render_pass_info->process_empty_tiles;
947
render_target = pvr_get_render_target(render_pass_info->pass,
948
render_pass_info->framebuffer,
949
sub_cmd->gfx.hw_render_idx);
950
job->rt_dataset = render_target->rt_dataset;
952
job->ctrl_stream_addr =
953
pvr_csb_get_start_address(&sub_cmd->gfx.control_stream);
955
/* FIXME: Need to set up the border color table at device creation
956
* time. Set to invalid for the time being.
958
job->border_colour_table_addr = PVR_DEV_ADDR_INVALID;
960
if (sub_cmd->gfx.depth_bias_bo)
961
job->depth_bias_table_addr = sub_cmd->gfx.depth_bias_bo->vma->dev_addr;
963
job->depth_bias_table_addr = PVR_DEV_ADDR_INVALID;
965
if (sub_cmd->gfx.scissor_bo)
966
job->scissor_table_addr = sub_cmd->gfx.scissor_bo->vma->dev_addr;
968
job->scissor_table_addr = PVR_DEV_ADDR_INVALID;
970
job->pixel_output_width =
971
pvr_pass_get_pixel_output_width(render_pass_info->pass,
972
sub_cmd->gfx.hw_render_idx,
975
if (hw_render->ds_surface_id != -1) {
976
struct pvr_image_view *iview =
977
render_pass_info->attachments[hw_render->ds_surface_id];
978
const struct pvr_image *image = iview->image;
980
if (vk_format_has_depth(image->vk.format)) {
981
uint32_t level_pitch =
982
image->mip_levels[iview->vk.base_mip_level].pitch;
984
/* FIXME: Is this sufficient for depth buffers? */
985
job->depth_addr = image->dev_addr;
988
pvr_stride_from_pitch(level_pitch, iview->vk.format);
989
job->depth_height = iview->vk.extent.height;
990
job->depth_physical_width =
991
u_minify(image->physical_extent.width, iview->vk.base_mip_level);
992
job->depth_physical_height =
993
u_minify(image->physical_extent.height, iview->vk.base_mip_level);
994
job->depth_layer_size = image->layer_size;
996
if (hw_render->ds_surface_id < render_pass_info->clear_value_count) {
997
VkClearValue *clear_values =
998
&render_pass_info->clear_values[hw_render->ds_surface_id];
1000
job->depth_clear_value = clear_values->depthStencil.depth;
1002
job->depth_clear_value = 1.0f;
1005
job->depth_vk_format = iview->vk.format;
1007
job->depth_memlayout = image->memlayout;
1009
job->depth_addr = PVR_DEV_ADDR_INVALID;
1010
job->depth_stride = 0;
1011
job->depth_height = 0;
1012
job->depth_physical_width = 0;
1013
job->depth_physical_height = 0;
1014
job->depth_layer_size = 0;
1015
job->depth_clear_value = 1.0f;
1016
job->depth_vk_format = VK_FORMAT_UNDEFINED;
1017
job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
1020
if (vk_format_has_stencil(image->vk.format)) {
1021
/* FIXME: Is this sufficient for stencil buffers? */
1022
job->stencil_addr = image->dev_addr;
1024
job->stencil_addr = PVR_DEV_ADDR_INVALID;
1027
job->samples = image->vk.samples;
1029
pvr_finishme("Set up correct number of samples for render job");
1031
job->depth_addr = PVR_DEV_ADDR_INVALID;
1032
job->depth_stride = 0;
1033
job->depth_height = 0;
1034
job->depth_physical_width = 0;
1035
job->depth_physical_height = 0;
1036
job->depth_layer_size = 0;
1037
job->depth_clear_value = 1.0f;
1038
job->depth_vk_format = VK_FORMAT_UNDEFINED;
1039
job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
1041
job->stencil_addr = PVR_DEV_ADDR_INVALID;
1046
if (sub_cmd->gfx.max_tiles_in_flight ==
1047
PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U)) {
1048
/* Use the default limit based on the partition store. */
1049
job->max_tiles_in_flight = 0U;
1051
job->max_tiles_in_flight = sub_cmd->gfx.max_tiles_in_flight;
1054
job->frag_uses_atomic_ops = sub_cmd->gfx.frag_uses_atomic_ops;
1055
job->disable_compute_overlap = false;
1056
job->max_shared_registers = cmd_buffer->state.max_shared_regs;
1057
job->run_frag = true;
1058
job->geometry_terminate = true;
1063
/* Number of shareds used in the Issue Data Fence(IDF)/Wait Data Fence(WDF)
1066
#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U
1068
static void pvr_sub_cmd_compute_job_init(struct pvr_device *device,
1069
struct pvr_cmd_buffer *cmd_buffer,
1070
struct pvr_sub_cmd *sub_cmd)
1072
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1074
if (sub_cmd->compute.uses_barrier) {
1075
sub_cmd->compute.submit_info.flags |=
1076
PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
1079
pvr_csb_pack (&sub_cmd->compute.submit_info.regs.cdm_ctrl_stream_base,
1080
CR_CDM_CTRL_STREAM_BASE,
1082
value.addr = pvr_csb_get_start_address(&sub_cmd->compute.control_stream);
1085
/* FIXME: Need to set up the border color table at device creation
1086
* time. Set to invalid for the time being.
1088
pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu_border_colour_table,
1089
CR_TPU_BORDER_COLOUR_TABLE_CDM,
1091
value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
1094
sub_cmd->compute.num_shared_regs = MAX2(PVR_IDF_WDF_IN_REGISTER_CONST_COUNT,
1095
cmd_buffer->state.max_shared_regs);
1097
cmd_buffer->state.max_shared_regs = 0U;
1099
if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
1100
sub_cmd->compute.submit_info.regs.cdm_item = 0;
1102
pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu, CR_TPU, value) {
1103
value.tag_cem_4k_face_packing = true;
1106
if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
1107
PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
1108
rogue_get_num_phantoms(dev_info) > 1 &&
1109
sub_cmd->compute.uses_atomic_ops) {
1110
/* Each phantom has its own MCU, so atomicity can only be guaranteed
1111
* when all work items are processed on the same phantom. This means we
1112
* need to disable all USCs other than those of the first phantom, which
1115
pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
1121
pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
1128
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
1129
sub_cmd->compute.uses_atomic_ops) {
1130
sub_cmd->compute.submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE;
1134
#define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \
1135
(1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE))
1137
static uint32_t pvr_compute_slot_size(const struct pvr_device_info *dev_info,
1138
uint32_t coeff_regs_count,
1140
const uint32_t local_size[static 3U])
1142
uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK;
1143
uint32_t max_avail_coeff_regs =
1144
rogue_get_cdm_max_local_mem_size_regs(dev_info);
1145
uint32_t localstore_chunks_count =
1146
DIV_ROUND_UP(coeff_regs_count << 2,
1147
PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
1148
uint32_t total_workitems = local_size[0U] * local_size[1U] * local_size[2U];
1150
/* Ensure that we cannot have more workgroups in a slot than the available
1151
* number of coefficients allow us to have.
1153
if (coeff_regs_count > 0U) {
1154
/* If TA or 3D can overlap with CDM, or if the TA is running a geometry
1155
* shader then we need to consider this in calculating max allowed
1158
if (PVR_HAS_QUIRK(dev_info, 52354) &&
1159
(PVR_HAS_FEATURE(dev_info, compute_overlap) ||
1160
PVR_HAS_FEATURE(dev_info, gs_rta_support))) {
1161
/* Solve for n (number of work-groups per task). All values are in
1162
* size of common store alloc blocks:
1164
* n + (2n + 7) * (local_memory_size_max - 1) =
1165
* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1167
* n + 2n * (local_memory_size_max - 1) =
1168
* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1169
* - (7 * (local_memory_size_max - 1))
1171
* n * (1 + 2 * (local_memory_size_max - 1)) =
1172
* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1173
* - (7 * (local_memory_size_max - 1))
1175
* n = ((coefficient_memory_pool_size) -
1176
* (7 * pixel_allocation_size_max) -
1177
* (7 * (local_memory_size_max - 1)) / (1 +
1178
* 2 * (local_memory_size_max - 1)))
1180
uint32_t max_common_store_blocks =
1181
DIV_ROUND_UP(max_avail_coeff_regs * 4U,
1182
PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
1184
/* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1186
max_common_store_blocks -= ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
1187
PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS;
1189
/* - (7 * (local_memory_size_max - 1)) */
1190
max_common_store_blocks -= (ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
1191
(localstore_chunks_count - 1U));
1193
/* Divide by (1 + 2 * (local_memory_size_max - 1)) */
1194
max_workgroups_per_task = max_common_store_blocks /
1195
(1U + 2U * (localstore_chunks_count - 1U));
1197
max_workgroups_per_task =
1198
MIN2(max_workgroups_per_task,
1199
ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK);
1202
max_workgroups_per_task =
1203
MIN2((max_avail_coeff_regs / coeff_regs_count),
1204
max_workgroups_per_task);
1208
/* max_workgroups_per_task should at least be one. */
1209
assert(max_workgroups_per_task >= 1U);
1211
if (total_workitems >= ROGUE_MAX_INSTANCES_PER_TASK) {
1212
/* In this case, the work group size will have been padded up to the
1213
* next ROGUE_MAX_INSTANCES_PER_TASK so we just set max instances to be
1214
* ROGUE_MAX_INSTANCES_PER_TASK.
1216
return ROGUE_MAX_INSTANCES_PER_TASK;
1219
/* In this case, the number of instances in the slot must be clamped to
1220
* accommodate whole work-groups only.
1222
if (PVR_HAS_QUIRK(dev_info, 49032) || use_barrier) {
1223
max_workgroups_per_task =
1224
MIN2(max_workgroups_per_task,
1225
ROGUE_MAX_INSTANCES_PER_TASK / total_workitems);
1226
return total_workitems * max_workgroups_per_task;
1229
return MIN2(total_workitems * max_workgroups_per_task,
1230
ROGUE_MAX_INSTANCES_PER_TASK);
1234
pvr_compute_generate_control_stream(struct pvr_csb *csb,
1235
const struct pvr_compute_kernel_info *info)
1237
/* Compute kernel 0. */
1238
pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) {
1239
kernel0.indirect_present = !!info->indirect_buffer_addr.addr;
1240
kernel0.global_offsets_present = info->global_offsets_present;
1241
kernel0.usc_common_size = info->usc_common_size;
1242
kernel0.usc_unified_size = info->usc_unified_size;
1243
kernel0.pds_temp_size = info->pds_temp_size;
1244
kernel0.pds_data_size = info->pds_data_size;
1246
if (info->usc_target_any)
1247
kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ANY);
1249
kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ALL);
1251
kernel0.fence = info->is_fence;
1254
/* Compute kernel 1. */
1255
pvr_csb_emit (csb, CDMCTRL_KERNEL1, kernel1) {
1256
kernel1.data_addr.addr = info->pds_data_offset;
1257
kernel1.sd_type = info->sd_type;
1259
if (!info->is_fence)
1260
kernel1.usc_common_shared = info->usc_common_shared;
1263
/* Compute kernel 2. */
1264
pvr_csb_emit (csb, CDMCTRL_KERNEL2, kernel2) {
1265
kernel2.code_addr.addr = info->pds_code_offset;
1268
if (info->indirect_buffer_addr.addr) {
1269
/* Compute kernel 6. */
1270
pvr_csb_emit (csb, CDMCTRL_KERNEL6, kernel6) {
1271
kernel6.indirect_addrmsb = info->indirect_buffer_addr;
1274
/* Compute kernel 7. */
1275
pvr_csb_emit (csb, CDMCTRL_KERNEL7, kernel7) {
1276
kernel7.indirect_addrlsb = info->indirect_buffer_addr;
1279
/* Compute kernel 3. */
1280
pvr_csb_emit (csb, CDMCTRL_KERNEL3, kernel3) {
1281
assert(info->global_size[0U] > 0U);
1282
kernel3.workgroup_x = info->global_size[0U] - 1U;
1285
/* Compute kernel 4. */
1286
pvr_csb_emit (csb, CDMCTRL_KERNEL4, kernel4) {
1287
assert(info->global_size[1U] > 0U);
1288
kernel4.workgroup_y = info->global_size[1U] - 1U;
1291
/* Compute kernel 5. */
1292
pvr_csb_emit (csb, CDMCTRL_KERNEL5, kernel5) {
1293
assert(info->global_size[2U] > 0U);
1294
kernel5.workgroup_z = info->global_size[2U] - 1U;
1298
/* Compute kernel 8. */
1299
pvr_csb_emit (csb, CDMCTRL_KERNEL8, kernel8) {
1300
if (info->max_instances == ROGUE_MAX_INSTANCES_PER_TASK)
1301
kernel8.max_instances = 0U;
1303
kernel8.max_instances = info->max_instances;
1305
assert(info->local_size[0U] > 0U);
1306
kernel8.workgroup_size_x = info->local_size[0U] - 1U;
1307
assert(info->local_size[1U] > 0U);
1308
kernel8.workgroup_size_y = info->local_size[1U] - 1U;
1309
assert(info->local_size[2U] > 0U);
1310
kernel8.workgroup_size_z = info->local_size[2U] - 1U;
1314
static void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
1315
bool deallocate_shareds)
1317
const struct pvr_pds_upload *program =
1318
&cmd_buffer->device->pds_compute_fence_program;
1319
const struct pvr_device_info *dev_info =
1320
&cmd_buffer->device->pdevice->dev_info;
1321
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
1322
struct pvr_csb *csb = &state->current_sub_cmd->compute.control_stream;
1324
struct pvr_compute_kernel_info info = {
1325
.indirect_buffer_addr.addr = 0ULL,
1326
.global_offsets_present = false,
1327
.usc_common_size = 0U,
1328
.usc_unified_size = 0U,
1329
.pds_temp_size = 0U,
1331
DIV_ROUND_UP(program->data_size << 2,
1332
PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
1333
.usc_target_any = true,
1335
.pds_data_offset = program->data_offset,
1336
.sd_type = PVRX(CDMCTRL_SD_TYPE_PDS),
1337
.usc_common_shared = deallocate_shareds,
1338
.pds_code_offset = program->code_offset,
1339
.global_size = { 1U, 1U, 1U },
1340
.local_size = { 1U, 1U, 1U },
1343
/* We don't need to pad work-group size for this case. */
1344
/* Here we calculate the slot size. This can depend on the use of barriers,
1345
* local memory, BRN's or other factors.
1347
info.max_instances =
1348
pvr_compute_slot_size(dev_info, 0U, false, info.local_size);
1350
pvr_compute_generate_control_stream(csb, &info);
1353
static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
1355
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
1356
struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
1357
struct pvr_device *device = cmd_buffer->device;
1360
/* FIXME: Is this NULL check required because this function is called from
1361
* pvr_resolve_unemitted_resolve_attachments()? See comment about this
1362
* function being called twice in a row in pvr_CmdEndRenderPass().
1367
switch (sub_cmd->type) {
1368
case PVR_SUB_CMD_TYPE_GRAPHICS:
1369
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
1370
result = pvr_csb_emit_return(&sub_cmd->gfx.control_stream);
1371
if (result != VK_SUCCESS) {
1372
state->status = result;
1379
/* TODO: Check if the sub_cmd can be skipped based on
1380
* sub_cmd->gfx.empty_cmd flag.
1383
result = pvr_cmd_buffer_upload_tables(device, cmd_buffer);
1384
if (result != VK_SUCCESS) {
1385
state->status = result;
1389
result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer);
1390
if (result != VK_SUCCESS) {
1391
state->status = result;
1395
result = pvr_csb_emit_terminate(&sub_cmd->gfx.control_stream);
1396
if (result != VK_SUCCESS) {
1397
state->status = result;
1401
result = pvr_sub_cmd_gfx_job_init(device, cmd_buffer, sub_cmd);
1402
if (result != VK_SUCCESS) {
1403
state->status = result;
1409
case PVR_SUB_CMD_TYPE_COMPUTE:
1410
pvr_compute_generate_fence(cmd_buffer, true);
1412
result = pvr_csb_emit_terminate(&sub_cmd->compute.control_stream);
1413
if (result != VK_SUCCESS) {
1414
state->status = result;
1418
pvr_sub_cmd_compute_job_init(device, cmd_buffer, sub_cmd);
1421
case PVR_SUB_CMD_TYPE_TRANSFER:
1425
pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
1429
state->current_sub_cmd = NULL;
1434
static void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state,
1439
* Initial geometry phase State.
1440
* It's the driver's responsibility to ensure that the state of the
1441
* hardware is correctly initialized at the start of every geometry
1442
* phase. This is required to prevent stale state from a previous
1443
* geometry phase erroneously affecting the next geometry phase. The
1444
* following fields in PPP State Header, and their corresponding state
1445
* words, must be supplied in the first PPP State Update of a geometry
1446
* phase that contains any geometry (draw calls). Any field not listed
1447
* below is safe to ignore.
1449
* TA_PRES_STREAM_OUT_SIZE
1451
* TA_PRES_VARYING_WORD2
1452
* TA_PRES_VARYING_WORD1
1453
* TA_PRES_VARYING_WORD0
1454
* TA_PRES_OUTSELECTS
1458
* TA_PRES_REGION_CLIP
1459
* TA_PRES_PDSSTATEPTR0
1464
* If a geometry phase does not contain any geometry, this restriction
1465
* can be ignored. If the first draw call in a geometry phase will only
1466
* update the depth or stencil buffers i.e. ISP_TAGWRITEDISABLE is set
1467
* in the ISP State Control Word, the PDS State Pointers
1468
* (TA_PRES_PDSSTATEPTR*) in the first PPP State Update do not need to
1469
* be supplied, since they will never reach the PDS in the fragment
1473
state->emit_state_bits = 0;
1475
state->emit_state.stream_out = true;
1476
state->emit_state.ppp_control = true;
1477
state->emit_state.varying_word2 = true;
1478
state->emit_state.varying_word1 = true;
1479
state->emit_state.varying_word0 = true;
1480
state->emit_state.output_selects = true;
1481
state->emit_state.wclamp = true;
1482
state->emit_state.viewport = true;
1483
state->emit_state.region_clip = true;
1484
state->emit_state.pds_fragment_stateptr0 = true;
1485
state->emit_state.isp_fb = true;
1486
state->emit_state.isp = true;
1488
state->emit_state.ppp_control = true;
1489
state->emit_state.varying_word1 = true;
1490
state->emit_state.varying_word0 = true;
1491
state->emit_state.output_selects = true;
1492
state->emit_state.viewport = true;
1493
state->emit_state.region_clip = true;
1494
state->emit_state.pds_fragment_stateptr0 = true;
1495
state->emit_state.isp_fb = true;
1496
state->emit_state.isp = true;
1499
memset(&state->ppp_state, 0U, sizeof(state->ppp_state));
1501
state->dirty.vertex_bindings = true;
1502
state->dirty.gfx_pipeline_binding = true;
1503
state->dirty.viewport = true;
1506
static VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
1507
enum pvr_sub_cmd_type type)
1509
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
1510
struct pvr_device *device = cmd_buffer->device;
1511
struct pvr_sub_cmd *sub_cmd;
1514
/* Check the current status of the buffer. */
1515
if (state->status != VK_SUCCESS)
1516
return state->status;
1518
pvr_cmd_buffer_update_barriers(cmd_buffer, type);
1520
if (state->current_sub_cmd) {
1521
if (state->current_sub_cmd->type == type) {
1522
/* Continue adding to the current sub command. */
1526
/* End the current sub command. */
1527
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
1528
if (result != VK_SUCCESS)
1532
sub_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
1535
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1537
state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
1538
return state->status;
1541
sub_cmd->type = type;
1544
case PVR_SUB_CMD_TYPE_GRAPHICS:
1546
sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
1547
sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
1548
sub_cmd->gfx.modifies_depth = false;
1549
sub_cmd->gfx.modifies_stencil = false;
1550
sub_cmd->gfx.max_tiles_in_flight =
1551
PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info,
1552
isp_max_tiles_in_flight,
1554
sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass;
1555
sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer;
1556
sub_cmd->gfx.empty_cmd = true;
1558
pvr_reset_graphics_dirty_state(state, true);
1559
pvr_csb_init(device,
1560
PVR_CMD_STREAM_TYPE_GRAPHICS,
1561
&sub_cmd->gfx.control_stream);
1564
case PVR_SUB_CMD_TYPE_COMPUTE:
1565
pvr_csb_init(device,
1566
PVR_CMD_STREAM_TYPE_COMPUTE,
1567
&sub_cmd->compute.control_stream);
1570
case PVR_SUB_CMD_TYPE_TRANSFER:
1571
list_inithead(&sub_cmd->transfer.transfer_cmds);
1575
pvr_finishme("Unsupported sub-command type %d", type);
1579
list_addtail(&sub_cmd->link, &cmd_buffer->sub_cmds);
1580
state->current_sub_cmd = sub_cmd;
1585
VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
1586
struct pvr_winsys_heap *heap,
1589
struct pvr_bo **const pvr_bo_out)
1591
const uint32_t cache_line_size =
1592
rogue_get_slc_cache_line_size(&cmd_buffer->device->pdevice->dev_info);
1593
struct pvr_bo *pvr_bo;
1596
result = pvr_bo_alloc(cmd_buffer->device,
1602
if (result != VK_SUCCESS) {
1603
cmd_buffer->state.status = result;
1607
list_add(&pvr_bo->link, &cmd_buffer->bo_list);
1609
*pvr_bo_out = pvr_bo;
1614
VkResult pvr_ResetCommandBuffer(VkCommandBuffer commandBuffer,
1615
VkCommandBufferResetFlags flags)
1617
assert(!"Unimplemented");
1621
static void pvr_cmd_bind_compute_pipeline(
1622
const struct pvr_compute_pipeline *const compute_pipeline,
1623
struct pvr_cmd_buffer *const cmd_buffer)
1625
cmd_buffer->state.compute_pipeline = compute_pipeline;
1626
cmd_buffer->state.dirty.compute_pipeline_binding = true;
1629
static void pvr_cmd_bind_graphics_pipeline(
1630
const struct pvr_graphics_pipeline *const gfx_pipeline,
1631
struct pvr_cmd_buffer *const cmd_buffer)
1633
struct pvr_dynamic_state *const dest_state =
1634
&cmd_buffer->state.dynamic.common;
1635
const struct pvr_dynamic_state *const src_state =
1636
&gfx_pipeline->dynamic_state;
1637
struct pvr_cmd_buffer_state *const cmd_buffer_state = &cmd_buffer->state;
1638
const uint32_t state_mask = src_state->mask;
1640
cmd_buffer_state->gfx_pipeline = gfx_pipeline;
1641
cmd_buffer_state->dirty.gfx_pipeline_binding = true;
1643
/* FIXME: Handle PVR_DYNAMIC_STATE_BIT_VIEWPORT. */
1644
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_VIEWPORT)) {
1645
assert(!"Unimplemented");
1648
/* FIXME: Handle PVR_DYNAMIC_STATE_BIT_SCISSOR. */
1649
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_SCISSOR)) {
1650
assert(!"Unimplemented");
1653
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) {
1654
dest_state->line_width = src_state->line_width;
1656
cmd_buffer_state->dirty.line_width = true;
1659
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) {
1660
memcpy(&dest_state->depth_bias,
1661
&src_state->depth_bias,
1662
sizeof(src_state->depth_bias));
1664
cmd_buffer_state->dirty.depth_bias = true;
1667
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) {
1669
__same_type(dest_state->blend_constants, src_state->blend_constants));
1671
typed_memcpy(dest_state->blend_constants,
1672
src_state->blend_constants,
1673
ARRAY_SIZE(dest_state->blend_constants));
1675
cmd_buffer_state->dirty.blend_constants = true;
1678
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) {
1679
dest_state->compare_mask.front = src_state->compare_mask.front;
1680
dest_state->compare_mask.back = src_state->compare_mask.back;
1682
cmd_buffer_state->dirty.compare_mask = true;
1685
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) {
1686
dest_state->write_mask.front = src_state->write_mask.front;
1687
dest_state->write_mask.back = src_state->write_mask.back;
1689
cmd_buffer_state->dirty.write_mask = true;
1692
if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) {
1693
dest_state->reference.front = src_state->reference.front;
1694
dest_state->reference.back = src_state->reference.back;
1696
cmd_buffer_state->dirty.reference = true;
1700
void pvr_CmdBindPipeline(VkCommandBuffer commandBuffer,
1701
VkPipelineBindPoint pipelineBindPoint,
1702
VkPipeline _pipeline)
1704
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1705
PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline);
1707
switch (pipelineBindPoint) {
1708
case VK_PIPELINE_BIND_POINT_COMPUTE:
1709
pvr_cmd_bind_compute_pipeline(to_pvr_compute_pipeline(pipeline),
1713
case VK_PIPELINE_BIND_POINT_GRAPHICS:
1714
pvr_cmd_bind_graphics_pipeline(to_pvr_graphics_pipeline(pipeline),
1719
unreachable("Invalid bind point.");
1725
static void check_viewport_quirk_70165(const struct pvr_device *device,
1726
const VkViewport *pViewport)
1728
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1729
float min_vertex_x, max_vertex_x, min_vertex_y, max_vertex_y;
1730
float min_screen_space_value, max_screen_space_value;
1731
float sign_to_unsigned_offset, fixed_point_max;
1732
float guardband_width, guardband_height;
1734
if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
1735
/* Max representable value in 13.4 fixed point format.
1736
* Round-down to avoid precision issues.
1737
* Calculated as (2 ** 13) - 2*(2 ** -4)
1739
fixed_point_max = 8192.0f - 2.0f / 16.0f;
1741
if (PVR_HAS_FEATURE(dev_info, screen_size8K)) {
1742
if (pViewport->width <= 4096 && pViewport->height <= 4096) {
1743
guardband_width = pViewport->width / 4.0f;
1744
guardband_height = pViewport->height / 4.0f;
1746
/* 2k of the range is negative */
1747
sign_to_unsigned_offset = 2048.0f;
1749
guardband_width = 0.0f;
1750
guardband_height = 0.0f;
1752
/* For > 4k renders, the entire range is positive */
1753
sign_to_unsigned_offset = 0.0f;
1756
guardband_width = pViewport->width / 4.0f;
1757
guardband_height = pViewport->height / 4.0f;
1759
/* 2k of the range is negative */
1760
sign_to_unsigned_offset = 2048.0f;
1763
/* Max representable value in 16.8 fixed point format
1764
* Calculated as (2 ** 16) - (2 ** -8)
1766
fixed_point_max = 65535.99609375f;
1767
guardband_width = pViewport->width / 4.0f;
1768
guardband_height = pViewport->height / 4.0f;
1770
/* 4k/20k of the range is negative */
1771
sign_to_unsigned_offset = (float)PVR_MAX_NEG_OFFSCREEN_OFFSET;
1774
min_screen_space_value = -sign_to_unsigned_offset;
1775
max_screen_space_value = fixed_point_max - sign_to_unsigned_offset;
1777
min_vertex_x = pViewport->x - guardband_width;
1778
max_vertex_x = pViewport->x + pViewport->width + guardband_width;
1779
min_vertex_y = pViewport->y - guardband_height;
1780
max_vertex_y = pViewport->y + pViewport->height + guardband_height;
1781
if (min_vertex_x < min_screen_space_value ||
1782
max_vertex_x > max_screen_space_value ||
1783
min_vertex_y < min_screen_space_value ||
1784
max_vertex_y > max_screen_space_value) {
1785
mesa_logw("Viewport is affected by BRN70165, geometry outside "
1786
"the viewport could be corrupted");
1791
void pvr_CmdSetViewport(VkCommandBuffer commandBuffer,
1792
uint32_t firstViewport,
1793
uint32_t viewportCount,
1794
const VkViewport *pViewports)
1796
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1797
const uint32_t total_count = firstViewport + viewportCount;
1798
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1800
assert(firstViewport < PVR_MAX_VIEWPORTS && viewportCount > 0);
1801
assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
1803
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1806
if (PVR_HAS_QUIRK(&cmd_buffer->device->pdevice->dev_info, 70165)) {
1807
for (uint32_t viewport = 0; viewport < viewportCount; viewport++) {
1808
check_viewport_quirk_70165(cmd_buffer->device, &pViewports[viewport]);
1813
if (state->dynamic.common.viewport.count < total_count)
1814
state->dynamic.common.viewport.count = total_count;
1816
memcpy(&state->dynamic.common.viewport.viewports[firstViewport],
1818
viewportCount * sizeof(*pViewports));
1820
state->dirty.viewport = true;
1823
void pvr_CmdSetScissor(VkCommandBuffer commandBuffer,
1824
uint32_t firstScissor,
1825
uint32_t scissorCount,
1826
const VkRect2D *pScissors)
1828
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1829
const uint32_t total_count = firstScissor + scissorCount;
1830
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1832
assert(firstScissor < PVR_MAX_VIEWPORTS && scissorCount > 0);
1833
assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
1835
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1837
if (state->dynamic.common.scissor.count < total_count)
1838
state->dynamic.common.scissor.count = total_count;
1840
memcpy(&state->dynamic.common.scissor.scissors[firstScissor],
1842
scissorCount * sizeof(*pScissors));
1844
state->dirty.scissor = true;
1847
void pvr_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1849
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1850
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1852
state->dynamic.common.line_width = lineWidth;
1853
state->dirty.line_width = true;
1856
void pvr_CmdSetDepthBias(VkCommandBuffer commandBuffer,
1857
float depthBiasConstantFactor,
1858
float depthBiasClamp,
1859
float depthBiasSlopeFactor)
1861
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1862
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1864
state->dynamic.common.depth_bias.constant_factor = depthBiasConstantFactor;
1865
state->dynamic.common.depth_bias.clamp = depthBiasClamp;
1866
state->dynamic.common.depth_bias.slope_factor = depthBiasSlopeFactor;
1867
state->dirty.depth_bias = true;
1870
void pvr_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
1871
const float blendConstants[4])
1873
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1874
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1876
STATIC_ASSERT(ARRAY_SIZE(state->dynamic.common.blend_constants) == 4);
1877
memcpy(state->dynamic.common.blend_constants,
1879
sizeof(state->dynamic.common.blend_constants));
1881
state->dirty.blend_constants = true;
1884
void pvr_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
1885
float minDepthBounds,
1886
float maxDepthBounds)
1888
mesa_logd("No support for depth bounds testing.");
1891
void pvr_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
1892
VkStencilFaceFlags faceMask,
1893
uint32_t compareMask)
1895
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1896
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1898
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1899
state->dynamic.common.compare_mask.front = compareMask;
1901
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1902
state->dynamic.common.compare_mask.back = compareMask;
1904
state->dirty.compare_mask = true;
1907
void pvr_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
1908
VkStencilFaceFlags faceMask,
1911
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1912
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1914
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1915
state->dynamic.common.write_mask.front = writeMask;
1917
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1918
state->dynamic.common.write_mask.back = writeMask;
1920
state->dirty.write_mask = true;
1923
void pvr_CmdSetStencilReference(VkCommandBuffer commandBuffer,
1924
VkStencilFaceFlags faceMask,
1927
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1928
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1930
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1931
state->dynamic.common.reference.front = reference;
1933
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1934
state->dynamic.common.reference.back = reference;
1936
state->dirty.reference = true;
1939
void pvr_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
1940
VkPipelineBindPoint pipelineBindPoint,
1941
VkPipelineLayout _layout,
1943
uint32_t descriptorSetCount,
1944
const VkDescriptorSet *pDescriptorSets,
1945
uint32_t dynamicOffsetCount,
1946
const uint32_t *pDynamicOffsets)
1948
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1949
struct pvr_descriptor_state *descriptor_state;
1951
assert(firstSet + descriptorSetCount <= PVR_MAX_DESCRIPTOR_SETS);
1953
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1955
switch (pipelineBindPoint) {
1956
case VK_PIPELINE_BIND_POINT_GRAPHICS:
1957
case VK_PIPELINE_BIND_POINT_COMPUTE:
1961
unreachable("Unsupported bind point.");
1965
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1966
descriptor_state = &cmd_buffer->state.gfx_desc_state;
1967
cmd_buffer->state.dirty.gfx_desc_dirty = true;
1969
descriptor_state = &cmd_buffer->state.compute_desc_state;
1970
cmd_buffer->state.dirty.compute_desc_dirty = true;
1973
for (uint32_t i = 0; i < descriptorSetCount; i++) {
1974
PVR_FROM_HANDLE(pvr_descriptor_set, set, pDescriptorSets[i]);
1975
uint32_t index = firstSet + i;
1977
if (descriptor_state->descriptor_sets[index] != set) {
1978
descriptor_state->descriptor_sets[index] = set;
1979
descriptor_state->valid_mask |= (1u << index);
1984
void pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
1985
uint32_t firstBinding,
1986
uint32_t bindingCount,
1987
const VkBuffer *pBuffers,
1988
const VkDeviceSize *pOffsets)
1990
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1991
struct pvr_vertex_binding *const vb = cmd_buffer->state.vertex_bindings;
1993
/* We have to defer setting up vertex buffer since we need the buffer
1994
* stride from the pipeline.
1997
assert(firstBinding < PVR_MAX_VERTEX_INPUT_BINDINGS &&
1998
bindingCount <= PVR_MAX_VERTEX_INPUT_BINDINGS);
2000
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2002
for (uint32_t i = 0; i < bindingCount; i++) {
2003
vb[firstBinding + i].buffer = pvr_buffer_from_handle(pBuffers[i]);
2004
vb[firstBinding + i].offset = pOffsets[i];
2007
cmd_buffer->state.dirty.vertex_bindings = true;
2010
void pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
2012
VkDeviceSize offset,
2013
VkIndexType indexType)
2015
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2016
PVR_FROM_HANDLE(pvr_buffer, index_buffer, buffer);
2017
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2019
assert(offset < index_buffer->size);
2020
assert(indexType == VK_INDEX_TYPE_UINT32 ||
2021
indexType == VK_INDEX_TYPE_UINT16);
2023
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2025
state->index_buffer_binding.buffer = index_buffer;
2026
state->index_buffer_binding.offset = offset;
2027
state->index_buffer_binding.type = indexType;
2028
state->dirty.index_buffer_binding = true;
2031
void pvr_CmdPushConstants(VkCommandBuffer commandBuffer,
2032
VkPipelineLayout layout,
2033
VkShaderStageFlags stageFlags,
2036
const void *pValues)
2039
const uint64_t ending = (uint64_t)offset + (uint64_t)size;
2042
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2043
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2045
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2047
pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE);
2049
memcpy(&state->push_constants.data[offset], pValues, size);
2051
state->push_constants.dirty_stages |= stageFlags;
2055
pvr_cmd_buffer_setup_attachments(struct pvr_cmd_buffer *cmd_buffer,
2056
const struct pvr_render_pass *pass,
2057
const struct pvr_framebuffer *framebuffer)
2059
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2060
struct pvr_render_pass_info *info = &state->render_pass_info;
2062
assert(pass->attachment_count == framebuffer->attachment_count);
2064
/* Free any previously allocated attachments. */
2065
vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments);
2067
if (pass->attachment_count == 0) {
2068
info->attachments = NULL;
2073
vk_zalloc(&cmd_buffer->vk.pool->alloc,
2074
pass->attachment_count * sizeof(*info->attachments),
2076
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2077
if (!info->attachments) {
2078
/* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
2079
state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
2080
return state->status;
2084
for (uint32_t i = 0; i < pass->attachment_count; i++)
2085
info->attachments[i] = framebuffer->attachments[i];
2091
static VkResult pvr_init_render_targets(struct pvr_device *device,
2092
struct pvr_render_pass *pass,
2093
struct pvr_framebuffer *framebuffer)
2095
for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2096
struct pvr_render_target *render_target =
2097
pvr_get_render_target(pass, framebuffer, i);
2099
pthread_mutex_lock(&render_target->mutex);
2101
if (!render_target->valid) {
2102
const struct pvr_renderpass_hwsetup_render *hw_render =
2103
&pass->hw_setup->renders[i];
2106
result = pvr_render_target_dataset_create(device,
2108
framebuffer->height,
2109
hw_render->sample_count,
2110
framebuffer->layers,
2111
&render_target->rt_dataset);
2112
if (result != VK_SUCCESS) {
2113
pthread_mutex_unlock(&render_target->mutex);
2117
render_target->valid = true;
2120
pthread_mutex_unlock(&render_target->mutex);
2126
static const struct pvr_renderpass_hwsetup_subpass *
2127
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
2129
const struct pvr_renderpass_hw_map *map =
2130
&pass->hw_setup->subpass_map[subpass];
2132
return &pass->hw_setup->renders[map->render].subpasses[map->subpass];
2135
static void pvr_perform_start_of_render_attachment_clear(
2136
struct pvr_cmd_buffer *cmd_buffer,
2137
const struct pvr_framebuffer *framebuffer,
2139
bool is_depth_stencil,
2140
uint32_t *index_list_clear_mask)
2142
struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
2143
const struct pvr_render_pass *pass = info->pass;
2144
const struct pvr_renderpass_hwsetup_render *hw_render;
2145
const struct pvr_renderpass_hwsetup *hw_setup;
2146
struct pvr_image_view *iview;
2151
hw_setup = pass->hw_setup;
2153
&hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
2155
if (is_depth_stencil) {
2161
assert(hw_render->ds_surface_id != -1);
2164
view_idx = hw_render->ds_surface_id;
2166
is_depth = vk_format_has_depth(pass->attachments[view_idx].vk_format);
2167
is_stencil = vk_format_has_stencil(pass->attachments[view_idx].vk_format);
2168
depth_clear = hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR;
2169
stencil_clear = hw_render->stencil_init ==
2170
RENDERPASS_SURFACE_INITOP_CLEAR;
2172
/* Attempt to clear the ds attachment. Do not erroneously discard an
2173
* attachment that has no depth clear but has a stencil attachment.
2175
/* if not (a ∧ c) ∨ (b ∧ d) */
2176
if (!((is_depth && depth_clear) || (is_stencil && stencil_clear)))
2178
} else if (hw_render->color_init[index].op !=
2179
RENDERPASS_SURFACE_INITOP_CLEAR) {
2182
view_idx = hw_render->color_init[index].driver_id;
2185
iview = info->attachments[view_idx];
2186
width = iview->vk.extent.width;
2187
height = iview->vk.extent.height;
2189
/* FIXME: It would be nice if this function and pvr_sub_cmd_gfx_job_init()
2190
* were doing the same check (even if it's just an assert) to determine if a
2193
/* If this is single-layer fullscreen, we already do the clears in
2194
* pvr_sub_cmd_gfx_job_init().
2196
if (info->render_area.offset.x == 0 && info->render_area.offset.y == 0 &&
2197
info->render_area.extent.width == width &&
2198
info->render_area.extent.height == height && framebuffer->layers == 1) {
2202
pvr_finishme("Unimplemented path!");
2206
pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer)
2208
struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
2209
const struct pvr_framebuffer *framebuffer = info->framebuffer;
2210
const struct pvr_render_pass *pass = info->pass;
2211
const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup;
2212
const struct pvr_renderpass_hwsetup_render *hw_render;
2214
/* Mask of attachment clears using index lists instead of background object
2217
uint32_t index_list_clear_mask = 0;
2220
&hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
2222
info->process_empty_tiles = false;
2223
info->enable_bg_tag = false;
2227
for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
2228
pvr_perform_start_of_render_attachment_clear(cmd_buffer,
2232
&index_list_clear_mask);
2235
info->enable_bg_tag = !!hw_render->color_init_count;
2237
/* If we're not using index list for all clears/loads then we need to run
2238
* the background object on empty tiles.
2240
if (hw_render->color_init_count &&
2241
index_list_clear_mask != ((1u << hw_render->color_init_count) - 1u)) {
2242
info->process_empty_tiles = true;
2244
info->process_empty_tiles = false;
2247
if (hw_render->ds_surface_id != -1) {
2248
uint32_t ds_index_list = 0;
2250
pvr_perform_start_of_render_attachment_clear(cmd_buffer,
2257
if (index_list_clear_mask)
2258
pvr_finishme("Add support for generating loadops shaders!");
2261
static void pvr_stash_depth_format(struct pvr_cmd_buffer_state *state)
2263
const struct pvr_render_pass *pass = state->render_pass_info.pass;
2264
const struct pvr_renderpass_hwsetup_render *hw_render =
2265
&pass->hw_setup->renders[state->current_sub_cmd->gfx.hw_render_idx];
2267
if (hw_render->ds_surface_id != -1) {
2268
struct pvr_image_view **iviews = state->render_pass_info.attachments;
2270
state->depth_format = iviews[hw_render->ds_surface_id]->vk.format;
2274
static bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup)
2276
for (uint32_t i = 0; i < hw_setup->render_count; i++) {
2277
struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
2278
uint32_t render_targets_count =
2279
hw_render->init_setup.render_targets_count;
2281
for (uint32_t j = 0;
2282
j < (hw_render->color_init_count * render_targets_count);
2283
j += render_targets_count) {
2284
for (uint32_t k = 0; k < hw_render->init_setup.render_targets_count;
2286
if (hw_render->color_init[j + k].op ==
2287
RENDERPASS_SURFACE_INITOP_CLEAR) {
2292
if (hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR ||
2293
hw_render->stencil_init == RENDERPASS_SURFACE_INITOP_CLEAR) {
2302
pvr_cmd_buffer_set_clear_values(struct pvr_cmd_buffer *cmd_buffer,
2303
const VkRenderPassBeginInfo *pRenderPassBegin)
2305
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2307
/* Free any previously allocated clear values. */
2308
vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values);
2310
if (pRenderPassBegin->clearValueCount) {
2311
const size_t size = pRenderPassBegin->clearValueCount *
2312
sizeof(*state->render_pass_info.clear_values);
2314
state->render_pass_info.clear_values =
2315
vk_zalloc(&cmd_buffer->vk.pool->alloc,
2318
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2319
if (!state->render_pass_info.clear_values) {
2320
state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
2321
return state->status;
2324
memcpy(state->render_pass_info.clear_values,
2325
pRenderPassBegin->pClearValues,
2328
state->render_pass_info.clear_values = NULL;
2331
state->render_pass_info.clear_value_count =
2332
pRenderPassBegin->clearValueCount;
2337
void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
2338
const VkRenderPassBeginInfo *pRenderPassBeginInfo,
2339
const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
2341
PVR_FROM_HANDLE(pvr_framebuffer,
2343
pRenderPassBeginInfo->framebuffer);
2344
PVR_FROM_HANDLE(pvr_render_pass, pass, pRenderPassBeginInfo->renderPass);
2345
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2346
const struct pvr_renderpass_hwsetup_subpass *hw_subpass;
2347
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2350
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2352
assert(!state->render_pass_info.pass);
2353
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2355
/* FIXME: Create a separate function for everything using pass->subpasses,
2356
* look at cmd_buffer_begin_subpass() for example. */
2357
state->render_pass_info.pass = pass;
2358
state->render_pass_info.framebuffer = framebuffer;
2359
state->render_pass_info.subpass_idx = 0;
2360
state->render_pass_info.render_area = pRenderPassBeginInfo->renderArea;
2361
state->render_pass_info.current_hw_subpass = 0;
2362
state->render_pass_info.pipeline_bind_point =
2363
pass->subpasses[0].pipeline_bind_point;
2364
state->render_pass_info.userpass_spawn = pass->subpasses[0].userpass_spawn;
2365
state->dirty.userpass_spawn = true;
2367
result = pvr_cmd_buffer_setup_attachments(cmd_buffer, pass, framebuffer);
2368
if (result != VK_SUCCESS)
2372
pvr_init_render_targets(cmd_buffer->device, pass, framebuffer);
2373
if (state->status != VK_SUCCESS)
2376
result = pvr_cmd_buffer_set_clear_values(cmd_buffer, pRenderPassBeginInfo);
2377
if (result != VK_SUCCESS)
2380
assert(pass->subpasses[0].pipeline_bind_point ==
2381
VK_PIPELINE_BIND_POINT_GRAPHICS);
2383
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
2384
if (result != VK_SUCCESS)
2387
/* Run subpass 0 "soft" background object after the actual background
2390
hw_subpass = pvr_get_hw_subpass(pass, 0);
2391
if (hw_subpass->client_data)
2392
pvr_finishme("Unimplemented path!");
2394
pvr_perform_start_of_render_clears(cmd_buffer);
2395
pvr_stash_depth_format(&cmd_buffer->state);
2397
if (!pvr_loadops_contain_clear(pass->hw_setup)) {
2398
state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR;
2399
state->dynamic.scissor_accum_bounds.offset.x = 0;
2400
state->dynamic.scissor_accum_bounds.offset.y = 0;
2401
state->dynamic.scissor_accum_bounds.extent.width = 0;
2402
state->dynamic.scissor_accum_bounds.extent.height = 0;
2404
state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_DISABLED;
2408
static void pvr_cmd_buffer_reset(struct pvr_cmd_buffer *cmd_buffer)
2410
if (cmd_buffer->status != PVR_CMD_BUFFER_STATUS_INITIAL) {
2411
/* FIXME: For now we always free all resources as if
2412
* VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set.
2414
pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
2416
list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
2417
list_del(&bo->link);
2418
pvr_bo_free(cmd_buffer->device, bo);
2421
util_dynarray_clear(&cmd_buffer->scissor_array);
2422
util_dynarray_clear(&cmd_buffer->depth_bias_array);
2424
cmd_buffer->state.status = VK_SUCCESS;
2425
cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
2429
VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer,
2430
const VkCommandBufferBeginInfo *pBeginInfo)
2432
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2433
struct pvr_cmd_buffer_state *state;
2436
pvr_cmd_buffer_reset(cmd_buffer);
2438
cmd_buffer->usage_flags = pBeginInfo->flags;
2439
state = &cmd_buffer->state;
2441
/* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for
2442
* primary level command buffers.
2444
* From the Vulkan 1.0 spec:
2446
* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a
2447
* secondary command buffer is considered to be entirely inside a render
2448
* pass. If this is a primary command buffer, then this bit is ignored.
2450
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
2451
cmd_buffer->usage_flags &=
2452
~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
2455
if (cmd_buffer->usage_flags &
2456
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
2457
const VkCommandBufferInheritanceInfo *inheritance_info =
2458
pBeginInfo->pInheritanceInfo;
2459
struct pvr_render_pass *pass;
2461
pass = pvr_render_pass_from_handle(inheritance_info->renderPass);
2462
state->render_pass_info.pass = pass;
2463
state->render_pass_info.framebuffer =
2464
pvr_framebuffer_from_handle(inheritance_info->framebuffer);
2465
state->render_pass_info.subpass_idx = inheritance_info->subpass;
2466
state->render_pass_info.userpass_spawn =
2467
pass->subpasses[inheritance_info->subpass].userpass_spawn;
2470
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
2471
if (result != VK_SUCCESS)
2475
memset(state->barriers_needed,
2477
sizeof(*state->barriers_needed) * ARRAY_SIZE(state->barriers_needed));
2479
cmd_buffer->status = PVR_CMD_BUFFER_STATUS_RECORDING;
2484
VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
2485
struct pvr_transfer_cmd *transfer_cmd)
2489
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
2490
if (result != VK_SUCCESS)
2493
list_addtail(&transfer_cmd->link,
2494
&cmd_buffer->state.current_sub_cmd->transfer.transfer_cmds);
2499
void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
2500
uint32_t groupCountX,
2501
uint32_t groupCountY,
2502
uint32_t groupCountZ)
2504
assert(!"Unimplemented");
2507
void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
2509
VkDeviceSize offset)
2511
assert(!"Unimplemented");
2514
void pvr_CmdDraw(VkCommandBuffer commandBuffer,
2515
uint32_t vertexCount,
2516
uint32_t instanceCount,
2517
uint32_t firstVertex,
2518
uint32_t firstInstance)
2520
assert(!"Unimplemented");
2524
pvr_update_draw_state(struct pvr_cmd_buffer_state *const state,
2525
const struct pvr_cmd_buffer_draw_state *const draw_state)
2527
/* We don't have a state to tell us that base_instance is being used so it
2528
* gets used as a boolean - 0 means we'll use a pds program that skips the
2529
* base instance addition. If the base_instance gets used (and the last
2530
* draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib
2533
* If base_instance changes then we only need to update the data section.
2535
* The only draw call state that doesn't really matter is the start vertex
2536
* as that is handled properly in the VDM state in all cases.
2538
if ((state->draw_state.draw_indexed != draw_state->draw_indexed) ||
2539
(state->draw_state.draw_indirect != draw_state->draw_indirect) ||
2540
(state->draw_state.base_instance == 0 &&
2541
draw_state->base_instance != 0)) {
2542
state->dirty.draw_variant = true;
2543
} else if (state->draw_state.base_instance != draw_state->base_instance) {
2544
state->dirty.draw_base_instance = true;
2547
state->draw_state = *draw_state;
2550
static uint32_t pvr_calc_shared_regs_count(
2551
const struct pvr_graphics_pipeline *const gfx_pipeline)
2553
const struct pvr_pipeline_stage_state *const vertex_state =
2554
&gfx_pipeline->vertex_shader_state.stage_state;
2555
uint32_t shared_regs = vertex_state->const_shared_reg_count +
2556
vertex_state->const_shared_reg_offset;
2558
if (gfx_pipeline->fragment_shader_state.bo) {
2559
const struct pvr_pipeline_stage_state *const fragment_state =
2560
&gfx_pipeline->fragment_shader_state.stage_state;
2561
uint32_t fragment_regs = fragment_state->const_shared_reg_count +
2562
fragment_state->const_shared_reg_offset;
2564
shared_regs = MAX2(shared_regs, fragment_regs);
2570
#define PVR_WRITE(_buffer, _value, _offset, _max) \
2572
__typeof__(_value) __value = _value; \
2573
uint64_t __offset = _offset; \
2574
uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
2575
static_assert(__same_type(*_buffer, __value), \
2576
"Buffer and value type mismatch"); \
2577
assert((__offset + __nr_dwords) <= (_max)); \
2578
assert((__offset % __nr_dwords) == 0U); \
2579
_buffer[__offset / __nr_dwords] = __value; \
2583
pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer,
2584
const struct pvr_graphics_pipeline *const gfx_pipeline)
2586
const struct pvr_vertex_shader_state *const vertex_state =
2587
&gfx_pipeline->vertex_shader_state;
2588
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2589
const struct pvr_pds_info *const pds_info = state->pds_shader.info;
2590
const uint8_t *entries;
2591
uint32_t *dword_buffer;
2592
uint64_t *qword_buffer;
2593
struct pvr_bo *pvr_bo;
2596
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
2597
cmd_buffer->device->heaps.pds_heap,
2598
pds_info->data_size_in_dwords,
2599
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2601
if (result != VK_SUCCESS)
2604
dword_buffer = (uint32_t *)pvr_bo->bo->map;
2605
qword_buffer = (uint64_t *)pvr_bo->bo->map;
2607
entries = (uint8_t *)pds_info->entries;
2609
for (uint32_t i = 0; i < pds_info->entry_count; i++) {
2610
const struct pvr_const_map_entry *const entry_header =
2611
(struct pvr_const_map_entry *)entries;
2613
switch (entry_header->type) {
2614
case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
2615
const struct pvr_const_map_entry_literal32 *const literal =
2616
(struct pvr_const_map_entry_literal32 *)entries;
2618
PVR_WRITE(dword_buffer,
2619
literal->literal_value,
2620
literal->const_offset,
2621
pds_info->data_size_in_dwords);
2623
entries += sizeof(*literal);
2627
case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
2628
const struct pvr_const_map_entry_doutu_address *const doutu_addr =
2629
(struct pvr_const_map_entry_doutu_address *)entries;
2630
pvr_dev_addr_t exec_addr = vertex_state->bo->vma->dev_addr;
2631
uint64_t addr = 0ULL;
2633
exec_addr.addr += vertex_state->entry_offset;
2634
pvr_set_usc_execution_address64(&addr, exec_addr.addr);
2636
PVR_WRITE(qword_buffer,
2637
addr | doutu_addr->doutu_control,
2638
doutu_addr->const_offset,
2639
pds_info->data_size_in_dwords);
2641
entries += sizeof(*doutu_addr);
2645
case PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE: {
2646
const struct pvr_const_map_entry_base_instance *const base_instance =
2647
(struct pvr_const_map_entry_base_instance *)entries;
2649
PVR_WRITE(dword_buffer,
2650
state->draw_state.base_instance,
2651
base_instance->const_offset,
2652
pds_info->data_size_in_dwords);
2654
entries += sizeof(*base_instance);
2658
case PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS: {
2659
const struct pvr_const_map_entry_vertex_attribute_address
2661
(struct pvr_const_map_entry_vertex_attribute_address *)entries;
2662
const struct pvr_vertex_binding *const binding =
2663
&state->vertex_bindings[attribute->binding_index];
2664
uint64_t addr = binding->buffer->dev_addr.addr;
2666
addr += binding->offset;
2667
addr += attribute->offset;
2669
PVR_WRITE(qword_buffer,
2671
attribute->const_offset,
2672
pds_info->data_size_in_dwords);
2674
entries += sizeof(*attribute);
2679
unreachable("Unsupported data section map");
2684
state->pds_vertex_attrib_offset =
2685
pvr_bo->vma->dev_addr.addr -
2686
cmd_buffer->device->heaps.pds_heap->base_addr.addr;
2688
pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
2693
static VkResult pvr_setup_descriptor_mappings(
2694
struct pvr_cmd_buffer *const cmd_buffer,
2695
enum pvr_stage_allocation stage,
2696
const struct pvr_stage_allocation_uniform_state *uniform_state,
2697
uint32_t *const uniform_data_offset_out)
2699
const struct pvr_pds_info *const pds_info = &uniform_state->pds_info;
2700
const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2701
const struct pvr_descriptor_state *desc_state;
2702
const uint8_t *entries;
2703
uint32_t *dword_buffer;
2704
uint64_t *qword_buffer;
2705
struct pvr_bo *pvr_bo;
2708
if (!pds_info->data_size_in_dwords)
2711
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
2712
cmd_buffer->device->heaps.pds_heap,
2713
pds_info->data_size_in_dwords,
2714
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2716
if (result != VK_SUCCESS)
2719
dword_buffer = (uint32_t *)pvr_bo->bo->map;
2720
qword_buffer = (uint64_t *)pvr_bo->bo->map;
2722
entries = (uint8_t *)pds_info->entries;
2725
case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
2726
case PVR_STAGE_ALLOCATION_FRAGMENT:
2727
desc_state = &cmd_buffer->state.gfx_desc_state;
2730
case PVR_STAGE_ALLOCATION_COMPUTE:
2731
desc_state = &cmd_buffer->state.compute_desc_state;
2735
unreachable("Unsupported stage.");
2739
for (uint32_t i = 0; i < pds_info->entry_count; i++) {
2740
const struct pvr_const_map_entry *const entry_header =
2741
(struct pvr_const_map_entry *)entries;
2743
switch (entry_header->type) {
2744
case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
2745
const struct pvr_const_map_entry_literal32 *const literal =
2746
(struct pvr_const_map_entry_literal32 *)entries;
2748
PVR_WRITE(dword_buffer,
2749
literal->literal_value,
2750
literal->const_offset,
2751
pds_info->data_size_in_dwords);
2753
entries += sizeof(*literal);
2757
case PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER: {
2758
const struct pvr_const_map_entry_constant_buffer *const_buffer_entry =
2759
(struct pvr_const_map_entry_constant_buffer *)entries;
2760
const uint32_t desc_set = const_buffer_entry->desc_set;
2761
const uint32_t binding = const_buffer_entry->binding;
2762
const struct pvr_descriptor_set *descriptor_set;
2763
const struct pvr_descriptor *descriptor;
2764
pvr_dev_addr_t buffer_addr;
2766
/* TODO: Handle push descriptors. */
2768
assert(desc_set < PVR_MAX_DESCRIPTOR_SETS);
2769
descriptor_set = state->gfx_desc_state.descriptor_sets[desc_set];
2771
/* TODO: Handle dynamic buffers. */
2772
descriptor = &descriptor_set->descriptors[binding];
2773
assert(descriptor->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
2775
assert(descriptor->buffer_desc_range ==
2776
const_buffer_entry->size_in_dwords * sizeof(uint32_t));
2777
assert(descriptor->buffer_create_info_size ==
2778
const_buffer_entry->size_in_dwords * sizeof(uint32_t));
2780
buffer_addr = descriptor->buffer_dev_addr;
2781
buffer_addr.addr += const_buffer_entry->offset * sizeof(uint32_t);
2783
PVR_WRITE(qword_buffer,
2785
const_buffer_entry->const_offset,
2786
pds_info->data_size_in_dwords);
2788
entries += sizeof(*const_buffer_entry);
2792
case PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET: {
2793
const struct pvr_const_map_entry_descriptor_set *desc_set_entry =
2794
(struct pvr_const_map_entry_descriptor_set *)entries;
2795
const uint32_t desc_set_num = desc_set_entry->descriptor_set;
2796
const struct pvr_descriptor_set *descriptor_set;
2797
pvr_dev_addr_t desc_set_addr;
2799
assert(desc_set_num < PVR_MAX_DESCRIPTOR_SETS);
2801
/* TODO: Remove this when the compiler provides us with usage info?
2803
/* We skip DMAing unbound descriptor sets. */
2804
if (!(desc_state->valid_mask & BITFIELD_BIT(desc_set_num))) {
2805
const struct pvr_const_map_entry_literal32 *literal;
2806
uint32_t zero_literal_value;
2808
entries += sizeof(*desc_set_entry);
2809
literal = (struct pvr_const_map_entry_literal32 *)entries;
2811
/* TODO: Is there any guarantee that a literal will follow the
2812
* descriptor set entry?
2814
assert(literal->type == PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32);
2816
/* We zero out the DMA size so the DMA isn't performed. */
2817
zero_literal_value =
2818
literal->literal_value &
2819
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK;
2821
PVR_WRITE(qword_buffer,
2823
desc_set_entry->const_offset,
2824
pds_info->data_size_in_dwords);
2826
PVR_WRITE(dword_buffer,
2828
desc_set_entry->const_offset,
2829
pds_info->data_size_in_dwords);
2831
entries += sizeof(*literal);
2836
descriptor_set = desc_state->descriptor_sets[desc_set_num];
2838
pvr_finishme("Handle push descriptor entry.");
2840
desc_set_addr = descriptor_set->pvr_bo->vma->dev_addr;
2842
if (desc_set_entry->primary) {
2843
desc_set_addr.addr +=
2844
descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
2848
desc_set_addr.addr +=
2849
descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
2854
desc_set_addr.addr += (uint64_t)desc_set_entry->offset_in_dwords << 2U;
2856
PVR_WRITE(qword_buffer,
2858
desc_set_entry->const_offset,
2859
pds_info->data_size_in_dwords);
2861
entries += sizeof(*desc_set_entry);
2866
unreachable("Unsupported map entry type.");
2870
pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
2872
*uniform_data_offset_out =
2873
pvr_bo->vma->dev_addr.addr -
2874
cmd_buffer->device->heaps.pds_heap->base_addr.addr;
2882
pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
2883
const uint32_t pds_vertex_uniform_data_offset)
2885
const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2886
const struct pvr_stage_allocation_uniform_state *const vertex_uniform_state =
2887
&state->gfx_pipeline->vertex_shader_state.uniform_state;
2888
const struct pvr_pipeline_stage_state *const vertex_stage_state =
2889
&state->gfx_pipeline->vertex_shader_state.stage_state;
2890
struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
2892
if (!vertex_uniform_state->pds_info.code_size_in_dwords)
2895
pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
2896
state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL);
2898
state0.usc_common_size =
2899
DIV_ROUND_UP(vertex_stage_state->const_shared_reg_count << 2,
2900
PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
2902
state0.pds_data_size =
2903
DIV_ROUND_UP(vertex_uniform_state->pds_info.data_size_in_dwords << 2,
2904
PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE));
2907
pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) {
2908
state1.pds_data_addr.addr = pds_vertex_uniform_data_offset;
2909
state1.sd_type = PVRX(VDMCTRL_SD_TYPE_NONE);
2912
pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) {
2913
state2.pds_code_addr.addr = vertex_uniform_state->pds_code.code_offset;
2917
static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
2919
struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
2920
const struct pvr_graphics_pipeline *const gfx_pipeline =
2921
cmd_buffer->state.gfx_pipeline;
2922
struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
2923
const struct pvr_vertex_shader_state *const vertex_state =
2924
&gfx_pipeline->vertex_shader_state;
2925
uint32_t output_selects;
2927
/* TODO: Handle vertex and fragment shader state flags. */
2929
pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) {
2930
const VkPrimitiveTopology topology =
2931
gfx_pipeline->input_asm_state.topology;
2933
state.rhw_pres = true;
2934
state.vtxsize = DIV_ROUND_UP(vertex_state->vertex_output_size, 4U);
2935
state.psprite_size_pres = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
2938
if (ppp_state->output_selects != output_selects) {
2939
ppp_state->output_selects = output_selects;
2940
emit_state->output_selects = true;
2943
if (ppp_state->varying_word[0] != vertex_state->varying[0]) {
2944
ppp_state->varying_word[0] = vertex_state->varying[0];
2945
emit_state->varying_word0 = true;
2948
if (ppp_state->varying_word[1] != vertex_state->varying[1]) {
2949
ppp_state->varying_word[1] = vertex_state->varying[1];
2950
emit_state->varying_word1 = true;
2954
/* clang-format off */
2955
static enum PVRX(TA_OBJTYPE)
2956
pvr_ppp_state_get_ispa_objtype_from_vk(const VkPrimitiveTopology topology)
2957
/* clang-format on */
2960
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2961
return PVRX(TA_OBJTYPE_SPRITE_01UV);
2963
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2964
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2965
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2966
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2967
return PVRX(TA_OBJTYPE_LINE);
2969
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2970
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2971
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2972
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2973
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2974
return PVRX(TA_OBJTYPE_TRIANGLE);
2977
unreachable("Invalid topology.");
2982
static inline enum PVRX(TA_CMPMODE) pvr_cmpmode(VkCompareOp op)
2984
/* enum values are identical, so we can just cast the input directly. */
2985
return (enum PVRX(TA_CMPMODE))op;
2988
static inline enum PVRX(TA_ISPB_STENCILOP) pvr_stencilop(VkStencilOp op)
2990
/* enum values are identical, so we can just cast the input directly. */
2991
return (enum PVRX(TA_ISPB_STENCILOP))op;
2994
static void pvr_setup_isp_faces_and_control(
2995
struct pvr_cmd_buffer *const cmd_buffer,
2996
struct pvr_cmd_struct(TA_STATE_ISPA) *const ispa_out)
2998
struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
2999
const struct pvr_graphics_pipeline *const gfx_pipeline =
3000
cmd_buffer->state.gfx_pipeline;
3001
struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
3002
const struct pvr_dynamic_state *const dynamic_state =
3003
&cmd_buffer->state.dynamic.common;
3004
const struct pvr_render_pass_info *const pass_info =
3005
&cmd_buffer->state.render_pass_info;
3006
const uint32_t subpass_idx = pass_info->subpass_idx;
3007
const uint32_t *depth_stencil_attachment_idx =
3008
pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment;
3009
const struct pvr_image_view *const attachment =
3010
(!depth_stencil_attachment_idx)
3012
: pass_info->attachments[*depth_stencil_attachment_idx];
3014
const VkCullModeFlags cull_mode = gfx_pipeline->raster_state.cull_mode;
3015
const bool raster_discard_enabled =
3016
gfx_pipeline->raster_state.discard_enable;
3017
const bool disable_all = raster_discard_enabled || !attachment;
3019
const VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
3020
const enum PVRX(TA_OBJTYPE)
3021
obj_type = pvr_ppp_state_get_ispa_objtype_from_vk(topology);
3023
const bool disable_stencil_write = disable_all;
3024
const bool disable_stencil_test =
3025
disable_all || !vk_format_has_stencil(attachment->vk.format);
3027
const bool disable_depth_write = disable_all;
3028
const bool disable_depth_test = disable_all ||
3029
!vk_format_has_depth(attachment->vk.format);
3031
uint32_t ispb_stencil_off;
3032
bool is_two_sided = false;
3033
uint32_t isp_control;
3035
uint32_t line_width;
3042
/* Convert to 4.4 fixed point format. */
3043
line_width = util_unsigned_fixed(dynamic_state->line_width, 4);
3045
/* Subtract 1 to shift values from range [0=0,256=16] to [0=1/16,255=16].
3046
* If 0 it stays at 0, otherwise we subtract 1.
3048
line_width = (!!line_width) * (line_width - 1);
3050
line_width = MIN2(line_width, PVRX(TA_STATE_ISPA_POINTLINEWIDTH_SIZE_MAX));
3052
/* TODO: Part of the logic in this function is duplicated in another part
3053
* of the code. E.g. the dcmpmode, and sop1/2/3. Could we do this earlier?
3056
pvr_csb_pack (&common_a, TA_STATE_ISPA, ispa) {
3057
ispa.pointlinewidth = line_width;
3059
if (disable_depth_test)
3060
ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
3062
ispa.dcmpmode = pvr_cmpmode(gfx_pipeline->depth_compare_op);
3064
/* FIXME: Can we just have this and remove the assignment above?
3065
* The user provides a depthTestEnable at vkCreateGraphicsPipelines()
3066
* should we be using that?
3068
ispa.dcmpmode |= gfx_pipeline->depth_compare_op;
3070
ispa.dwritedisable = disable_depth_test || disable_depth_write;
3071
/* FIXME: Can we just have this and remove the assignment above? */
3072
ispa.dwritedisable = ispa.dwritedisable ||
3073
gfx_pipeline->depth_write_disable;
3075
ispa.passtype = gfx_pipeline->fragment_shader_state.pass_type;
3077
ispa.objtype = obj_type;
3079
/* Return unpacked ispa structure. dcmpmode, dwritedisable, passtype and
3080
* objtype are needed by pvr_setup_triangle_merging_flag.
3086
/* FIXME: This logic should be redone and improved. Can we also get rid of
3087
* the front and back variants?
3090
pvr_csb_pack (&front_a, TA_STATE_ISPA, ispa) {
3091
ispa.sref = (!disable_stencil_test) * dynamic_state->reference.front;
3093
front_a |= common_a;
3095
pvr_csb_pack (&back_a, TA_STATE_ISPA, ispa) {
3096
ispa.sref = (!disable_stencil_test) * dynamic_state->compare_mask.back;
3100
/* TODO: Does this actually represent the ispb control word on stencil off?
3101
* If not, rename the variable.
3103
pvr_csb_pack (&ispb_stencil_off, TA_STATE_ISPB, ispb) {
3104
ispb.sop3 = PVRX(TA_ISPB_STENCILOP_KEEP);
3105
ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP);
3106
ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP);
3107
ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS);
3110
if (disable_stencil_test) {
3111
back_b = front_b = ispb_stencil_off;
3113
pvr_csb_pack (&front_b, TA_STATE_ISPB, ispb) {
3115
(!disable_stencil_write) * dynamic_state->write_mask.front;
3116
ispb.scmpmask = dynamic_state->compare_mask.front;
3118
ispb.sop3 = pvr_stencilop(gfx_pipeline->stencil_front.pass_op);
3119
ispb.sop2 = pvr_stencilop(gfx_pipeline->stencil_front.depth_fail_op);
3120
ispb.sop1 = pvr_stencilop(gfx_pipeline->stencil_front.fail_op);
3122
ispb.scmpmode = pvr_cmpmode(gfx_pipeline->stencil_front.compare_op);
3125
pvr_csb_pack (&back_b, TA_STATE_ISPB, ispb) {
3127
(!disable_stencil_write) * dynamic_state->write_mask.back;
3128
ispb.scmpmask = dynamic_state->compare_mask.back;
3130
ispb.sop3 = pvr_stencilop(gfx_pipeline->stencil_back.pass_op);
3131
ispb.sop2 = pvr_stencilop(gfx_pipeline->stencil_back.depth_fail_op);
3132
ispb.sop1 = pvr_stencilop(gfx_pipeline->stencil_back.fail_op);
3134
ispb.scmpmode = pvr_cmpmode(gfx_pipeline->stencil_back.compare_op);
3138
if (front_a != back_a || front_b != back_b) {
3139
if (cull_mode & VK_CULL_MODE_BACK_BIT) {
3140
/* Single face, using front state. */
3141
} else if (cull_mode & VK_CULL_MODE_FRONT_BIT) {
3142
/* Single face, using back state. */
3149
emit_state->isp_ba = is_two_sided = true;
3151
if (gfx_pipeline->raster_state.front_face ==
3152
VK_FRONT_FACE_COUNTER_CLOCKWISE) {
3153
uint32_t tmp = front_a;
3163
/* HW defaults to stencil off. */
3164
if (back_b != ispb_stencil_off)
3165
emit_state->isp_fb = emit_state->isp_bb = true;
3169
if (!disable_stencil_test && front_b != ispb_stencil_off)
3170
emit_state->isp_fb = true;
3172
pvr_csb_pack (&isp_control, TA_STATE_ISPCTL, ispctl) {
3173
ispctl.upass = pass_info->userpass_spawn;
3175
/* TODO: is bo ever NULL? Figure out what to do. */
3176
ispctl.tagwritedisable = raster_discard_enabled ||
3177
!gfx_pipeline->fragment_shader_state.bo;
3179
ispctl.two_sided = is_two_sided;
3180
ispctl.bpres = emit_state->isp_fb || emit_state->isp_bb;
3182
ispctl.dbenable = !raster_discard_enabled &&
3183
gfx_pipeline->raster_state.depth_bias_enable &&
3184
obj_type == PVRX(TA_OBJTYPE_TRIANGLE);
3185
ispctl.scenable = !raster_discard_enabled;
3187
ppp_state->isp.control_struct = ispctl;
3190
emit_state->isp = true;
3192
ppp_state->isp.control = isp_control;
3193
ppp_state->isp.front_a = front_a;
3194
ppp_state->isp.front_b = front_b;
3195
ppp_state->isp.back_a = back_a;
3196
ppp_state->isp.back_b = back_b;
3199
static void pvr_get_viewport_scissor_overlap(const VkViewport *const viewport,
3200
const VkRect2D *const scissor,
3201
VkRect2D *const rect_out)
3203
/* TODO: See if we can remove this struct. */
3209
/* TODO: Worry about overflow? */
3210
const struct pvr_rect scissor_rect = {
3211
.x0 = scissor->offset.x,
3212
.y0 = scissor->offset.y,
3213
.x1 = scissor->offset.x + scissor->extent.width,
3214
.y1 = scissor->offset.y + scissor->extent.height
3216
struct pvr_rect viewport_rect = { 0 };
3218
assert(viewport->width >= 0.0f);
3219
assert(scissor_rect.x0 >= 0);
3220
assert(scissor_rect.y0 >= 0);
3222
if (scissor->extent.width == 0 || scissor->extent.height == 0) {
3223
*rect_out = (VkRect2D){ 0 };
3227
viewport_rect.x0 = (int32_t)viewport->x;
3228
viewport_rect.x1 = (int32_t)viewport->x + (int32_t)viewport->width;
3230
/* TODO: Is there a mathematical way of doing all this and then clamp at
3233
/* We flip the y0 and y1 when height is negative. */
3234
viewport_rect.y0 = (int32_t)viewport->y + MIN2(0, (int32_t)viewport->height);
3235
viewport_rect.y1 = (int32_t)viewport->y + MAX2(0, (int32_t)viewport->height);
3237
if (scissor_rect.x1 <= viewport_rect.x0 ||
3238
scissor_rect.y1 <= viewport_rect.y0 ||
3239
scissor_rect.x0 >= viewport_rect.x1 ||
3240
scissor_rect.y0 >= viewport_rect.y1) {
3241
*rect_out = (VkRect2D){ 0 };
3245
/* Determine the overlapping rectangle. */
3246
viewport_rect.x0 = MAX2(viewport_rect.x0, scissor_rect.x0);
3247
viewport_rect.y0 = MAX2(viewport_rect.y0, scissor_rect.y0);
3248
viewport_rect.x1 = MIN2(viewport_rect.x1, scissor_rect.x1);
3249
viewport_rect.y1 = MIN2(viewport_rect.y1, scissor_rect.y1);
3251
/* TODO: Is this conversion safe? Is this logic right? */
3252
rect_out->offset.x = (uint32_t)viewport_rect.x0;
3253
rect_out->offset.y = (uint32_t)viewport_rect.y0;
3254
rect_out->extent.height = (uint32_t)(viewport_rect.y1 - viewport_rect.y0);
3255
rect_out->extent.width = (uint32_t)(viewport_rect.x1 - viewport_rect.x0);
3258
static inline uint32_t
3259
pvr_get_geom_region_clip_align_size(struct pvr_device_info *const dev_info)
3261
/* TODO: This should come from rogue_ppp.xml. */
3262
return 16U + 16U * (!PVR_HAS_FEATURE(dev_info, tile_size_16x16));
3265
/* FIXME: Remove device param when PVR_HAS_FEATURE() accepts const dev_info */
3267
pvr_setup_isp_depth_bias_scissor_state(struct pvr_cmd_buffer *const cmd_buffer)
3269
struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
3270
struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
3271
const struct pvr_dynamic_state *const dynamic_state =
3272
&cmd_buffer->state.dynamic.common;
3273
const struct pvr_cmd_struct(TA_STATE_ISPCTL) *const ispctl =
3274
&ppp_state->isp.control_struct;
3275
struct pvr_device_info *const dev_info =
3276
&cmd_buffer->device->pdevice->dev_info;
3278
if (ispctl->dbenable)
3279
assert(!"Unimplemented");
3281
if (ispctl->scenable) {
3282
const uint32_t region_clip_align_size =
3283
pvr_get_geom_region_clip_align_size(dev_info);
3284
const VkViewport *const viewport = &dynamic_state->viewport.viewports[0];
3285
const VkRect2D *const scissor = &dynamic_state->scissor.scissors[0];
3286
VkRect2D overlap_rect;
3287
uint32_t scissor_words[2];
3293
/* For region clip. */
3299
/* We don't support multiple viewport calculations. */
3300
assert(dynamic_state->viewport.count == 1);
3301
/* We don't support multiple scissor calculations. */
3302
assert(dynamic_state->scissor.count == 1);
3304
pvr_get_viewport_scissor_overlap(viewport, scissor, &overlap_rect);
3306
x = overlap_rect.offset.x;
3307
y = overlap_rect.offset.y;
3308
width = overlap_rect.extent.width;
3309
height = overlap_rect.extent.height;
3311
pvr_csb_pack (&scissor_words[0], IPF_SCISSOR_WORD_0, word0) {
3312
word0.scw0_xmax = x + width;
3313
word0.scw0_xmin = x;
3316
pvr_csb_pack (&scissor_words[1], IPF_SCISSOR_WORD_1, word1) {
3317
word1.scw1_ymax = y + height;
3318
word1.scw1_ymin = y;
3321
if (cmd_buffer->scissor_array.size &&
3322
cmd_buffer->scissor_words[0] == scissor_words[0] &&
3323
cmd_buffer->scissor_words[1] == scissor_words[1]) {
3327
cmd_buffer->scissor_words[0] = scissor_words[0];
3328
cmd_buffer->scissor_words[1] = scissor_words[1];
3330
/* Calculate region clip. */
3332
left = x / region_clip_align_size;
3333
top = y / region_clip_align_size;
3335
/* We prevent right=-1 with the multiplication. */
3336
/* TODO: Is there a better way of doing this? */
3337
if ((x + width) != 0U)
3338
right = DIV_ROUND_UP(x + width, region_clip_align_size) - 1;
3342
if ((y + height) != 0U)
3343
bottom = DIV_ROUND_UP(y + height, region_clip_align_size) - 1;
3347
/* Setup region clip to clip everything outside what was calculated. */
3349
/* FIXME: Should we mask to prevent writing over other words? */
3350
pvr_csb_pack (&ppp_state->region_clipping.word0, TA_REGION_CLIP0, word0) {
3351
word0.right = right;
3353
word0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE);
3356
pvr_csb_pack (&ppp_state->region_clipping.word1, TA_REGION_CLIP1, word1) {
3357
word1.bottom = bottom;
3361
ppp_state->depthbias_scissor_indices.scissor_index =
3362
util_dynarray_num_elements(&cmd_buffer->scissor_array,
3363
__typeof__(cmd_buffer->scissor_words));
3365
memcpy(util_dynarray_grow_bytes(&cmd_buffer->scissor_array,
3367
sizeof(cmd_buffer->scissor_words)),
3368
cmd_buffer->scissor_words,
3369
sizeof(cmd_buffer->scissor_words));
3371
emit_state->isp_dbsc = true;
3372
emit_state->region_clip = true;
3377
pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer,
3378
struct pvr_cmd_struct(TA_STATE_ISPA) * ispa)
3380
struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
3381
struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
3382
uint32_t merge_word;
3385
pvr_csb_pack (&merge_word, TA_STATE_PDS_SIZEINFO2, size_info) {
3386
/* Disable for lines or punch-through or for DWD and depth compare
3389
if (ispa->objtype == PVRX(TA_OBJTYPE_LINE) ||
3390
ispa->passtype == PVRX(TA_PASSTYPE_PUNCH_THROUGH) ||
3391
(ispa->dwritedisable && ispa->dcmpmode == PVRX(TA_CMPMODE_ALWAYS))) {
3392
size_info.pds_tri_merge_disable = true;
3396
pvr_csb_pack (&mask, TA_STATE_PDS_SIZEINFO2, size_info) {
3397
size_info.pds_tri_merge_disable = true;
3400
merge_word |= ppp_state->pds.size_info2 & ~mask;
3402
if (merge_word != ppp_state->pds.size_info2) {
3403
ppp_state->pds.size_info2 = merge_word;
3404
emit_state->pds_fragment_stateptr0 = true;
3408
/* TODO: See if this function can be improved once fully implemented. */
3409
static uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
3410
const struct pvr_device_info *dev_info,
3411
uint32_t fs_common_size,
3412
uint32_t min_tiles_in_flight)
3414
uint32_t max_tiles_in_flight;
3415
uint32_t num_allocs;
3417
if (PVR_HAS_FEATURE(dev_info, s8xe)) {
3418
num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
3420
uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
3421
uint32_t min_cluster_per_phantom = 0;
3423
if (num_phantoms > 1) {
3424
pvr_finishme("Unimplemented path!!");
3426
min_cluster_per_phantom =
3427
PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
3430
if (num_phantoms > 1)
3431
pvr_finishme("Unimplemented path!!");
3433
if (num_phantoms > 2)
3434
pvr_finishme("Unimplemented path!!");
3436
if (num_phantoms > 3)
3437
pvr_finishme("Unimplemented path!!");
3439
if (min_cluster_per_phantom >= 4)
3441
else if (min_cluster_per_phantom == 2)
3447
max_tiles_in_flight =
3448
PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
3450
if (fs_common_size == UINT_MAX) {
3451
uint32_t max_common_size;
3453
num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
3455
if (!PVR_HAS_ERN(dev_info, 38748)) {
3456
/* Hardware needs space for one extra shared allocation. */
3460
max_common_size = rogue_get_reserved_shared_size(dev_info) -
3461
rogue_get_max_coeffs(dev_info);
3463
/* Double resource requirements to deal with fragmentation. */
3464
max_common_size /= num_allocs * 2;
3466
ROUND_DOWN_TO(max_common_size,
3467
PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
3469
return max_common_size;
3470
} else if (fs_common_size == 0) {
3471
return max_tiles_in_flight;
3474
pvr_finishme("Unimplemented path!!");
3480
pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer)
3482
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3483
const struct pvr_stage_allocation_uniform_state *uniform_shader_state =
3484
&state->gfx_pipeline->fragment_shader_state.uniform_state;
3485
const struct pvr_pds_upload *pds_coeff_program =
3486
&state->gfx_pipeline->fragment_shader_state.pds_coeff_program;
3487
const struct pvr_pipeline_stage_state *fragment_state =
3488
&state->gfx_pipeline->fragment_shader_state.stage_state;
3489
struct pvr_device_info *const dev_info =
3490
&cmd_buffer->device->pdevice->dev_info;
3491
struct pvr_emit_state *const emit_state = &state->emit_state;
3492
struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3493
struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
3495
const uint32_t pds_uniform_size =
3496
DIV_ROUND_UP(uniform_shader_state->pds_info.data_size_in_dwords,
3497
PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE));
3499
const uint32_t pds_varying_state_size =
3500
DIV_ROUND_UP(pds_coeff_program->data_size,
3501
PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE));
3503
const uint32_t usc_varying_size =
3504
DIV_ROUND_UP(fragment_state->coefficient_size,
3505
PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
3507
const uint32_t pds_temp_size =
3508
DIV_ROUND_UP(fragment_state->temps_count,
3509
PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
3511
const uint32_t usc_shared_size =
3512
DIV_ROUND_UP(fragment_state->const_shared_reg_count,
3513
PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
3515
const uint32_t max_tiles_in_flight =
3516
pvr_calc_fscommon_size_and_tiles_in_flight(
3519
PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE),
3521
uint32_t size_info_mask;
3522
uint32_t size_info2;
3524
if (max_tiles_in_flight < sub_cmd->gfx.max_tiles_in_flight)
3525
sub_cmd->gfx.max_tiles_in_flight = max_tiles_in_flight;
3527
pvr_csb_pack (&ppp_state->pds.pixel_shader_base,
3528
TA_STATE_PDS_SHADERBASE,
3530
const struct pvr_pds_upload *const pds_upload =
3531
&state->gfx_pipeline->fragment_shader_state.pds_fragment_program;
3533
shader_base.addr.addr = pds_upload->data_offset;
3536
if (uniform_shader_state->pds_code.pvr_bo) {
3537
pvr_csb_pack (&ppp_state->pds.texture_uniform_code_base,
3538
TA_STATE_PDS_TEXUNICODEBASE,
3540
tex_base.addr.addr = uniform_shader_state->pds_code.code_offset;
3543
ppp_state->pds.texture_uniform_code_base = 0U;
3546
pvr_csb_pack (&ppp_state->pds.size_info1, TA_STATE_PDS_SIZEINFO1, info1) {
3547
info1.pds_uniformsize = pds_uniform_size;
3548
info1.pds_texturestatesize = 0U;
3549
info1.pds_varyingsize = pds_varying_state_size;
3550
info1.usc_varyingsize = usc_varying_size;
3551
info1.pds_tempsize = pds_temp_size;
3554
pvr_csb_pack (&size_info_mask, TA_STATE_PDS_SIZEINFO2, mask) {
3555
mask.pds_tri_merge_disable = true;
3558
ppp_state->pds.size_info2 &= size_info_mask;
3560
pvr_csb_pack (&size_info2, TA_STATE_PDS_SIZEINFO2, info2) {
3561
info2.usc_sharedsize = usc_shared_size;
3564
ppp_state->pds.size_info2 |= size_info2;
3566
if (pds_coeff_program->pvr_bo) {
3567
state->emit_state.pds_fragment_stateptr1 = true;
3569
pvr_csb_pack (&ppp_state->pds.varying_base,
3570
TA_STATE_PDS_VARYINGBASE,
3572
base.addr.addr = pds_coeff_program->data_offset;
3575
ppp_state->pds.varying_base = 0U;
3578
pvr_csb_pack (&ppp_state->pds.uniform_state_data_base,
3579
TA_STATE_PDS_UNIFORMDATABASE,
3581
base.addr.addr = state->pds_fragment_uniform_data_offset;
3584
emit_state->pds_fragment_stateptr0 = true;
3585
emit_state->pds_fragment_stateptr3 = true;
3588
static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer)
3590
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3591
struct pvr_emit_state *const emit_state = &state->emit_state;
3592
struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3594
if (ppp_state->viewport_count != state->dynamic.common.viewport.count) {
3595
ppp_state->viewport_count = state->dynamic.common.viewport.count;
3596
emit_state->viewport = true;
3599
if (state->gfx_pipeline->raster_state.discard_enable) {
3600
/* We don't want to emit any viewport data as it'll just get thrown
3601
* away. It's after the previous condition because we still want to
3602
* stash the viewport_count as it's our trigger for when
3603
* rasterizer discard gets disabled.
3605
emit_state->viewport = false;
3609
for (uint32_t i = 0; i < ppp_state->viewport_count; i++) {
3610
VkViewport *viewport = &state->dynamic.common.viewport.viewports[i];
3611
uint32_t x_scale = fui(viewport->width * 0.5f);
3612
uint32_t y_scale = fui(viewport->height * 0.5f);
3613
uint32_t z_scale = fui(viewport->maxDepth - viewport->minDepth);
3614
uint32_t x_center = fui(viewport->x + viewport->width * 0.5f);
3615
uint32_t y_center = fui(viewport->y + viewport->height * 0.5f);
3616
uint32_t z_center = fui(viewport->minDepth);
3618
if (ppp_state->viewports[i].a0 != x_center ||
3619
ppp_state->viewports[i].m0 != x_scale ||
3620
ppp_state->viewports[i].a1 != y_center ||
3621
ppp_state->viewports[i].m1 != y_scale ||
3622
ppp_state->viewports[i].a2 != z_center ||
3623
ppp_state->viewports[i].m2 != z_scale) {
3624
ppp_state->viewports[i].a0 = x_center;
3625
ppp_state->viewports[i].m0 = x_scale;
3626
ppp_state->viewports[i].a1 = y_center;
3627
ppp_state->viewports[i].m1 = y_scale;
3628
ppp_state->viewports[i].a2 = z_center;
3629
ppp_state->viewports[i].m2 = z_scale;
3631
emit_state->viewport = true;
3636
static void pvr_setup_ppp_control(struct pvr_cmd_buffer *const cmd_buffer)
3638
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3639
const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
3640
struct pvr_emit_state *const emit_state = &state->emit_state;
3641
struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3642
uint32_t ppp_control;
3644
pvr_csb_pack (&ppp_control, TA_STATE_PPP_CTRL, control) {
3645
const struct pvr_raster_state *raster_state = &gfx_pipeline->raster_state;
3646
VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
3647
control.drawclippededges = true;
3648
control.wclampen = true;
3650
if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN)
3651
control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_1);
3653
control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_0);
3655
if (raster_state->depth_clamp_enable)
3656
control.clip_mode = PVRX(TA_CLIP_MODE_NO_FRONT_OR_REAR);
3658
control.clip_mode = PVRX(TA_CLIP_MODE_FRONT_REAR);
3661
* | +--- Cull Front?
3663
* 0|0 CULLMODE_CULL_CCW,
3664
* 0|1 CULLMODE_CULL_CW,
3665
* 1|0 CULLMODE_CULL_CW,
3666
* 1|1 CULLMODE_CULL_CCW,
3668
switch (raster_state->cull_mode) {
3669
case VK_CULL_MODE_BACK_BIT:
3670
case VK_CULL_MODE_FRONT_BIT:
3671
if ((raster_state->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE) ^
3672
(raster_state->cull_mode == VK_CULL_MODE_FRONT_BIT)) {
3673
control.cullmode = PVRX(TA_CULLMODE_CULL_CW);
3675
control.cullmode = PVRX(TA_CULLMODE_CULL_CCW);
3680
case VK_CULL_MODE_NONE:
3681
control.cullmode = PVRX(TA_CULLMODE_NO_CULLING);
3685
unreachable("Unsupported cull mode!");
3689
if (ppp_control != ppp_state->ppp_control) {
3690
ppp_state->ppp_control = ppp_control;
3691
emit_state->ppp_control = true;
3695
/* Largest valid PPP State update in words = 31
3697
* 3 - Stream Out Config words 0, 1 and 2
3698
* 1 - PPP Control word
3699
* 3 - Varying Config words 0, 1 and 2
3702
* 6 - Viewport Transform words
3703
* 2 - Region Clip words
3704
* 3 - PDS State for fragment phase (PDSSTATEPTR 1-3)
3705
* 4 - PDS State for fragment phase (PDSSTATEPTR0)
3706
* 6 - ISP Control Words
3708
#define PVR_MAX_PPP_STATE_DWORDS 31
3710
static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
3712
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3713
struct pvr_emit_state *const emit_state = &state->emit_state;
3714
struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3715
struct pvr_csb *const control_stream =
3716
&state->current_sub_cmd->gfx.control_stream;
3717
uint32_t ppp_state_words[PVR_MAX_PPP_STATE_DWORDS];
3718
uint32_t ppp_state_words_count;
3719
uint32_t ppp_state_header;
3720
bool deferred_secondary;
3721
struct pvr_bo *pvr_bo;
3722
uint32_t *buffer_ptr;
3725
buffer_ptr = ppp_state_words;
3727
pvr_csb_pack (&ppp_state_header, TA_STATE_HEADER, header) {
3728
header.view_port_count = (ppp_state->viewport_count == 0)
3730
: (ppp_state->viewport_count - 1);
3732
/* Skip over header. */
3735
/* Set ISP state. */
3736
if (emit_state->isp) {
3737
header.pres_ispctl = true;
3738
*buffer_ptr++ = ppp_state->isp.control;
3739
header.pres_ispctl_fa = true;
3740
*buffer_ptr++ = ppp_state->isp.front_a;
3742
if (emit_state->isp_fb) {
3743
header.pres_ispctl_fb = true;
3744
*buffer_ptr++ = ppp_state->isp.front_b;
3747
if (emit_state->isp_ba) {
3748
header.pres_ispctl_ba = true;
3749
*buffer_ptr++ = ppp_state->isp.back_a;
3752
if (emit_state->isp_bb) {
3753
header.pres_ispctl_bb = true;
3754
*buffer_ptr++ = ppp_state->isp.back_b;
3758
/* Depth bias / scissor
3759
* If deferred_secondary is true then we do a separate state update
3760
* which gets patched in ExecuteDeferredCommandBuffer.
3762
/* TODO: Update above comment when we port ExecuteDeferredCommandBuffer.
3764
deferred_secondary =
3765
cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
3766
cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
3768
if (emit_state->isp_dbsc && !deferred_secondary) {
3769
header.pres_ispctl_dbsc = true;
3771
pvr_csb_pack (buffer_ptr++, TA_STATE_ISPDBSC, ispdbsc) {
3773
ppp_state->depthbias_scissor_indices.depthbias_index;
3775
ppp_state->depthbias_scissor_indices.scissor_index;
3780
if (emit_state->pds_fragment_stateptr0) {
3781
header.pres_pds_state_ptr0 = true;
3783
*buffer_ptr++ = ppp_state->pds.pixel_shader_base;
3784
*buffer_ptr++ = ppp_state->pds.texture_uniform_code_base;
3785
*buffer_ptr++ = ppp_state->pds.size_info1;
3786
*buffer_ptr++ = ppp_state->pds.size_info2;
3789
if (emit_state->pds_fragment_stateptr1) {
3790
header.pres_pds_state_ptr1 = true;
3791
*buffer_ptr++ = ppp_state->pds.varying_base;
3794
/* We don't use the pds_fragment_stateptr2 (texture state programs)
3795
* control word, but this doesn't mean we need to set it to 0. This is
3796
* because the hardware runs the texture state program only when the
3797
* pds_texture state field of PDS_SIZEINFO1 is non-zero.
3800
if (emit_state->pds_fragment_stateptr3) {
3801
header.pres_pds_state_ptr3 = true;
3802
*buffer_ptr++ = ppp_state->pds.uniform_state_data_base;
3806
if (emit_state->region_clip) {
3807
header.pres_region_clip = true;
3808
*buffer_ptr++ = ppp_state->region_clipping.word0;
3809
*buffer_ptr++ = ppp_state->region_clipping.word1;
3813
if (emit_state->viewport) {
3814
const uint32_t viewports = MAX2(1, ppp_state->viewport_count);
3816
header.pres_viewport = true;
3817
for (uint32_t i = 0; i < viewports; i++) {
3818
*buffer_ptr++ = ppp_state->viewports[i].a0;
3819
*buffer_ptr++ = ppp_state->viewports[i].m0;
3820
*buffer_ptr++ = ppp_state->viewports[i].a1;
3821
*buffer_ptr++ = ppp_state->viewports[i].m1;
3822
*buffer_ptr++ = ppp_state->viewports[i].a2;
3823
*buffer_ptr++ = ppp_state->viewports[i].m2;
3828
if (emit_state->wclamp) {
3829
const float wclamp = 0.00001f;
3831
header.pres_wclamp = true;
3832
*buffer_ptr++ = fui(wclamp);
3835
/* Output selects. */
3836
if (emit_state->output_selects) {
3837
header.pres_outselects = true;
3838
*buffer_ptr++ = ppp_state->output_selects;
3841
/* Varying words. */
3842
if (emit_state->varying_word0) {
3843
header.pres_varying_word0 = true;
3844
*buffer_ptr++ = ppp_state->varying_word[0];
3847
if (emit_state->varying_word1) {
3848
header.pres_varying_word1 = true;
3849
*buffer_ptr++ = ppp_state->varying_word[1];
3852
if (emit_state->varying_word2) {
3853
/* We only emit this on the first draw of a render job to prevent us
3854
* from inheriting a non-zero value set elsewhere.
3856
header.pres_varying_word2 = true;
3861
if (emit_state->ppp_control) {
3862
header.pres_ppp_ctrl = true;
3863
*buffer_ptr++ = ppp_state->ppp_control;
3866
if (emit_state->stream_out) {
3867
/* We only emit this on the first draw of a render job to prevent us
3868
* from inheriting a non-zero value set elsewhere.
3870
header.pres_stream_out_size = true;
3875
if (!ppp_state_header)
3878
ppp_state_words_count = buffer_ptr - ppp_state_words;
3879
ppp_state_words[0] = ppp_state_header;
3881
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
3882
cmd_buffer->device->heaps.general_heap,
3883
ppp_state_words_count * sizeof(uint32_t),
3884
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
3886
if (result != VK_SUCCESS)
3889
memcpy(pvr_bo->bo->map,
3891
ppp_state_words_count * sizeof(uint32_t));
3893
/* Write the VDM state update into the VDM control stream. */
3894
pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) {
3895
state0.word_count = ppp_state_words_count;
3896
state0.addrmsb = pvr_bo->vma->dev_addr;
3899
pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE1, state1) {
3900
state1.addrlsb = pvr_bo->vma->dev_addr;
3903
if (emit_state->isp_dbsc &&
3904
cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
3905
pvr_finishme("Unimplemented path!!");
3908
state->emit_state_bits = 0;
3914
pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
3916
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3917
const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
3918
const bool dirty_stencil = state->dirty.compare_mask ||
3919
state->dirty.write_mask || state->dirty.reference;
3922
if (!(dirty_stencil || state->dirty.depth_bias ||
3923
state->dirty.fragment_descriptors || state->dirty.line_width ||
3924
state->dirty.gfx_pipeline_binding || state->dirty.scissor ||
3925
state->dirty.userpass_spawn || state->dirty.viewport ||
3926
state->emit_state_bits)) {
3930
if (state->dirty.gfx_pipeline_binding) {
3931
struct pvr_cmd_struct(TA_STATE_ISPA) ispa;
3933
pvr_setup_output_select(cmd_buffer);
3934
pvr_setup_isp_faces_and_control(cmd_buffer, &ispa);
3935
pvr_setup_triangle_merging_flag(cmd_buffer, &ispa);
3936
} else if (dirty_stencil || state->dirty.line_width ||
3937
state->dirty.userpass_spawn) {
3938
pvr_setup_isp_faces_and_control(cmd_buffer, NULL);
3941
if (!gfx_pipeline->raster_state.discard_enable &&
3942
state->dirty.fragment_descriptors &&
3943
gfx_pipeline->fragment_shader_state.bo) {
3944
pvr_setup_fragment_state_pointers(cmd_buffer);
3947
pvr_setup_isp_depth_bias_scissor_state(cmd_buffer);
3949
if (state->dirty.viewport)
3950
pvr_setup_viewport(cmd_buffer);
3952
pvr_setup_ppp_control(cmd_buffer);
3954
if (gfx_pipeline->raster_state.cull_mode == VK_CULL_MODE_FRONT_AND_BACK) {
3955
/* FIXME: Port SetNegativeViewport(). */
3958
result = pvr_emit_ppp_state(cmd_buffer);
3959
if (result != VK_SUCCESS)
3966
pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer,
3967
bool *const push_descriptors_dirty_out)
3969
/* TODO: Implement this function, based on ValidatePushDescriptors. */
3970
pvr_finishme("Add support for push descriptors!");
3971
*push_descriptors_dirty_out = false;
3975
pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
3976
const uint32_t vs_output_size,
3977
const bool raster_enable,
3978
uint32_t *const cam_size_out,
3979
uint32_t *const vs_max_instances_out)
3981
/* First work out the size of a vertex in the UVS and multiply by 4 for
3984
const uint32_t uvs_vertex_vector_size_in_dwords =
3985
(vs_output_size + 1U + raster_enable * 4U) * 4U;
3986
const uint32_t vdm_cam_size =
3987
PVR_GET_FEATURE_VALUE(dev_info, vdm_cam_size, 32U);
3989
/* This is a proxy for 8XE. */
3990
if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
3991
vdm_cam_size < 96U) {
3992
/* Comparisons are based on size including scratch per vertex vector. */
3993
if (uvs_vertex_vector_size_in_dwords < (14U * 4U)) {
3994
*cam_size_out = MIN2(31U, vdm_cam_size - 1U);
3995
*vs_max_instances_out = 16U;
3996
} else if (uvs_vertex_vector_size_in_dwords < (20U * 4U)) {
3997
*cam_size_out = 15U;
3998
*vs_max_instances_out = 16U;
3999
} else if (uvs_vertex_vector_size_in_dwords < (28U * 4U)) {
4000
*cam_size_out = 11U;
4001
*vs_max_instances_out = 12U;
4002
} else if (uvs_vertex_vector_size_in_dwords < (44U * 4U)) {
4004
*vs_max_instances_out = 8U;
4005
} else if (PVR_HAS_FEATURE(dev_info,
4006
simple_internal_parameter_format_v2) ||
4007
uvs_vertex_vector_size_in_dwords < (64U * 4U)) {
4009
*vs_max_instances_out = 4U;
4012
*vs_max_instances_out = 2U;
4015
/* Comparisons are based on size including scratch per vertex vector. */
4016
if (uvs_vertex_vector_size_in_dwords <= (32U * 4U)) {
4017
/* output size <= 27 + 5 scratch. */
4018
*cam_size_out = MIN2(95U, vdm_cam_size - 1U);
4019
*vs_max_instances_out = 0U;
4020
} else if (uvs_vertex_vector_size_in_dwords <= 48U * 4U) {
4021
/* output size <= 43 + 5 scratch */
4022
*cam_size_out = 63U;
4023
if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
4024
*vs_max_instances_out = 16U;
4026
*vs_max_instances_out = 0U;
4027
} else if (uvs_vertex_vector_size_in_dwords <= 64U * 4U) {
4028
/* output size <= 59 + 5 scratch. */
4029
*cam_size_out = 31U;
4030
if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
4031
*vs_max_instances_out = 16U;
4033
*vs_max_instances_out = 0U;
4035
*cam_size_out = 15U;
4036
*vs_max_instances_out = 16U;
4042
pvr_emit_dirty_vdm_state(const struct pvr_cmd_buffer *const cmd_buffer)
4044
/* FIXME: Assume all state is dirty for the moment. */
4045
struct pvr_device_info *const dev_info =
4046
&cmd_buffer->device->pdevice->dev_info;
4047
ASSERTED const uint32_t max_user_vertex_output_components =
4048
pvr_get_max_user_vertex_output_components(dev_info);
4049
struct pvr_cmd_struct(VDMCTRL_VDM_STATE0)
4050
header = { pvr_cmd_header(VDMCTRL_VDM_STATE0) };
4051
const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
4052
const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
4053
struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
4054
uint32_t vs_output_size;
4055
uint32_t max_instances;
4058
assert(gfx_pipeline);
4060
/* CAM Calculations and HW state take vertex size aligned to DWORDS. */
4062
DIV_ROUND_UP(gfx_pipeline->vertex_shader_state.vertex_output_size,
4063
PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE));
4065
assert(vs_output_size <= max_user_vertex_output_components);
4067
pvr_calculate_vertex_cam_size(dev_info,
4073
pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) {
4074
state0.cam_size = cam_size;
4076
if (gfx_pipeline->input_asm_state.primitive_restart) {
4077
state0.cut_index_enable = true;
4078
state0.cut_index_present = true;
4081
switch (gfx_pipeline->input_asm_state.topology) {
4082
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
4083
state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_1);
4087
state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0);
4091
/* If we've bound a different vertex buffer, or this draw-call requires
4092
* a different PDS attrib data-section from the last draw call (changed
4093
* base_instance) then we need to specify a new data section. This is
4094
* also the case if we've switched pipeline or attrib program as the
4095
* data-section layout will be different.
4097
state0.vs_data_addr_present =
4098
state->dirty.gfx_pipeline_binding || state->dirty.vertex_bindings ||
4099
state->dirty.draw_base_instance || state->dirty.draw_variant;
4101
/* Need to specify new PDS Attrib program if we've bound a different
4102
* pipeline or we needed a different PDS Attrib variant for this
4105
state0.vs_other_present = state->dirty.gfx_pipeline_binding ||
4106
state->dirty.draw_variant;
4108
/* UVB_SCRATCH_SELECT_ONE with no rasterization is only valid when
4109
* stream output is enabled. We use UVB_SCRATCH_SELECT_FIVE because
4110
* Vulkan doesn't support stream output and the vertex position is
4111
* always emitted to the UVB.
4113
state0.uvs_scratch_size_select =
4114
PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE);
4119
if (header.cut_index_present) {
4120
pvr_csb_emit (csb, VDMCTRL_VDM_STATE1, state1) {
4121
switch (state->index_buffer_binding.type) {
4122
case VK_INDEX_TYPE_UINT32:
4123
/* FIXME: Defines for these? These seem to come from the Vulkan
4124
* spec. for VkPipelineInputAssemblyStateCreateInfo
4125
* primitiveRestartEnable.
4127
state1.cut_index = 0xFFFFFFFF;
4130
case VK_INDEX_TYPE_UINT16:
4131
state1.cut_index = 0xFFFF;
4135
unreachable(!"Invalid index type");
4140
if (header.vs_data_addr_present) {
4141
pvr_csb_emit (csb, VDMCTRL_VDM_STATE2, state2) {
4142
state2.vs_pds_data_base_addr.addr = state->pds_vertex_attrib_offset;
4146
if (header.vs_other_present) {
4147
const uint32_t usc_unified_store_size_in_bytes =
4148
gfx_pipeline->vertex_shader_state.vertex_input_size << 2;
4150
pvr_csb_emit (csb, VDMCTRL_VDM_STATE3, state3) {
4151
state3.vs_pds_code_base_addr.addr = state->pds_shader.code_offset;
4154
pvr_csb_emit (csb, VDMCTRL_VDM_STATE4, state4) {
4155
state4.vs_output_size = vs_output_size;
4158
pvr_csb_emit (csb, VDMCTRL_VDM_STATE5, state5) {
4159
state5.vs_max_instances = max_instances;
4160
state5.vs_usc_common_size = 0U;
4161
state5.vs_usc_unified_size = DIV_ROUND_UP(
4162
usc_unified_store_size_in_bytes,
4163
PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE));
4164
state5.vs_pds_temp_size =
4165
DIV_ROUND_UP(state->pds_shader.info->temps_required << 2,
4166
PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE));
4167
state5.vs_pds_data_size =
4168
DIV_ROUND_UP(state->pds_shader.info->data_size_in_dwords << 2,
4169
PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE));
4174
static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
4176
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
4177
const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
4178
const struct pvr_pipeline_stage_state *const fragment_state =
4179
&gfx_pipeline->fragment_shader_state.stage_state;
4180
struct pvr_sub_cmd *sub_cmd;
4181
bool fstencil_writemask_zero;
4182
bool bstencil_writemask_zero;
4183
bool push_descriptors_dirty;
4188
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
4190
sub_cmd = state->current_sub_cmd;
4191
sub_cmd->gfx.empty_cmd = false;
4193
/* Determine pipeline depth/stencil usage. If a pipeline uses depth or
4194
* stencil testing, those attachments are using their loaded values, and
4195
* the loadOps cannot be optimized out.
4197
/* Pipeline uses depth testing. */
4198
if (sub_cmd->gfx.depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
4199
gfx_pipeline->depth_compare_op != VK_COMPARE_OP_ALWAYS) {
4200
sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
4203
/* Pipeline uses stencil testing. */
4204
if (sub_cmd->gfx.stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
4205
(gfx_pipeline->stencil_front.compare_op != VK_COMPARE_OP_ALWAYS ||
4206
gfx_pipeline->stencil_back.compare_op != VK_COMPARE_OP_ALWAYS)) {
4207
sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
4210
if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
4212
uint32_t coefficient_size =
4213
DIV_ROUND_UP(fragment_state->coefficient_size,
4214
PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
4216
if (coefficient_size >
4217
PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_MAX_SIZE))
4218
sub_cmd->gfx.disable_compute_overlap = true;
4221
sub_cmd->gfx.frag_uses_atomic_ops |= fragment_state->uses_atomic_ops;
4222
sub_cmd->gfx.frag_has_side_effects |= fragment_state->has_side_effects;
4223
sub_cmd->gfx.frag_uses_texture_rw |= fragment_state->uses_texture_rw;
4224
sub_cmd->gfx.vertex_uses_texture_rw |=
4225
gfx_pipeline->vertex_shader_state.stage_state.uses_texture_rw;
4228
(gfx_pipeline->stencil_front.fail_op == VK_STENCIL_OP_KEEP) &&
4229
(gfx_pipeline->stencil_front.pass_op == VK_STENCIL_OP_KEEP);
4230
bstencil_keep = (gfx_pipeline->stencil_back.fail_op == VK_STENCIL_OP_KEEP) &&
4231
(gfx_pipeline->stencil_back.pass_op == VK_STENCIL_OP_KEEP);
4232
fstencil_writemask_zero = (state->dynamic.common.write_mask.front == 0);
4233
bstencil_writemask_zero = (state->dynamic.common.write_mask.back == 0);
4235
/* Set stencil modified flag if:
4236
* - Neither front nor back-facing stencil has a fail_op/pass_op of KEEP.
4237
* - Neither front nor back-facing stencil has a write_mask of zero.
4239
if (!(fstencil_keep && bstencil_keep) &&
4240
!(fstencil_writemask_zero && bstencil_writemask_zero)) {
4241
sub_cmd->gfx.modifies_stencil = true;
4244
/* Set depth modified flag if depth write is enabled. */
4245
if (!gfx_pipeline->depth_write_disable)
4246
sub_cmd->gfx.modifies_depth = true;
4248
/* If either the data or code changes for pds vertex attribs, regenerate the
4251
if (state->dirty.vertex_bindings || state->dirty.gfx_pipeline_binding ||
4252
state->dirty.draw_variant || state->dirty.draw_base_instance) {
4253
enum pvr_pds_vertex_attrib_program_type prog_type;
4254
const struct pvr_pds_attrib_program *program;
4256
if (state->draw_state.draw_indirect)
4257
prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT;
4258
else if (state->draw_state.base_instance)
4259
prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE;
4261
prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC;
4264
&gfx_pipeline->vertex_shader_state.pds_attrib_programs[prog_type];
4265
state->pds_shader.info = &program->info;
4266
state->pds_shader.code_offset = program->program.code_offset;
4268
state->max_shared_regs =
4269
MAX2(state->max_shared_regs, pvr_calc_shared_regs_count(gfx_pipeline));
4271
pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline);
4274
/* TODO: Check for dirty push constants */
4276
pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty);
4278
state->dirty.vertex_descriptors = push_descriptors_dirty ||
4279
state->dirty.gfx_pipeline_binding;
4280
state->dirty.fragment_descriptors = state->dirty.vertex_descriptors;
4282
if (state->dirty.fragment_descriptors) {
4283
result = pvr_setup_descriptor_mappings(
4285
PVR_STAGE_ALLOCATION_FRAGMENT,
4286
&state->gfx_pipeline->fragment_shader_state.uniform_state,
4287
&state->pds_fragment_uniform_data_offset);
4288
if (result != VK_SUCCESS) {
4289
mesa_loge("Could not setup fragment descriptor mappings.");
4294
if (state->dirty.vertex_descriptors) {
4295
uint32_t pds_vertex_uniform_data_offset;
4297
result = pvr_setup_descriptor_mappings(
4299
PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
4300
&state->gfx_pipeline->vertex_shader_state.uniform_state,
4301
&pds_vertex_uniform_data_offset);
4302
if (result != VK_SUCCESS) {
4303
mesa_loge("Could not setup vertex descriptor mappings.");
4307
pvr_emit_dirty_pds_state(cmd_buffer, pds_vertex_uniform_data_offset);
4310
pvr_emit_dirty_ppp_state(cmd_buffer);
4311
pvr_emit_dirty_vdm_state(cmd_buffer);
4313
state->dirty.gfx_desc_dirty = false;
4314
state->dirty.blend_constants = false;
4315
state->dirty.compare_mask = false;
4316
state->dirty.depth_bias = false;
4317
state->dirty.draw_base_instance = false;
4318
state->dirty.draw_variant = false;
4319
state->dirty.fragment_descriptors = false;
4320
state->dirty.line_width = false;
4321
state->dirty.gfx_pipeline_binding = false;
4322
state->dirty.reference = false;
4323
state->dirty.scissor = false;
4324
state->dirty.userpass_spawn = false;
4325
state->dirty.vertex_bindings = false;
4326
state->dirty.viewport = false;
4327
state->dirty.write_mask = false;
4332
static uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology)
4335
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
4336
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_POINT_LIST);
4337
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
4338
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST);
4339
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
4340
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP);
4341
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
4342
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST);
4343
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
4344
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP);
4345
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
4346
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_FAN);
4347
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
4348
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ);
4349
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
4350
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ);
4351
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
4352
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST_ADJ);
4353
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
4354
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP_ADJ);
4355
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
4356
return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_PATCH_LIST);
4358
unreachable("Undefined primitive topology");
4362
static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
4363
VkPrimitiveTopology topology,
4364
uint32_t first_vertex,
4365
uint32_t vertex_count,
4366
uint32_t first_index,
4367
uint32_t index_count,
4368
uint32_t instance_count)
4370
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4371
struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
4372
struct pvr_cmd_struct(VDMCTRL_INDEX_LIST0)
4373
list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) };
4374
pvr_dev_addr_t index_buffer_addr = { 0 };
4375
unsigned int index_stride = 0;
4377
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
4378
list0.primitive_topology = pvr_get_hw_primitive_topology(topology);
4380
/* First instance is not handled in the VDM state, it's implemented as
4381
* an addition in the PDS vertex fetch.
4383
list0.index_count_present = true;
4385
if (instance_count > 1)
4386
list0.index_instance_count_present = true;
4388
if (first_vertex != 0)
4389
list0.index_offset_present = true;
4391
if (state->draw_state.draw_indexed) {
4392
struct pvr_buffer *buffer = state->index_buffer_binding.buffer;
4394
switch (state->index_buffer_binding.type) {
4395
case VK_INDEX_TYPE_UINT32:
4396
list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32);
4400
case VK_INDEX_TYPE_UINT16:
4401
list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16);
4406
unreachable("Invalid index type");
4409
list0.index_addr_present = true;
4410
index_buffer_addr.addr = buffer->dev_addr.addr;
4411
index_buffer_addr.addr += state->index_buffer_binding.offset;
4412
index_buffer_addr.addr += first_index * index_stride;
4413
list0.index_base_addrmsb = index_buffer_addr;
4419
if (list_hdr.index_addr_present) {
4420
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST1, list1) {
4421
list1.index_base_addrlsb = index_buffer_addr;
4425
if (list_hdr.index_count_present) {
4426
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST2, list2) {
4427
list2.index_count = vertex_count | index_count;
4431
if (list_hdr.index_instance_count_present) {
4432
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST3, list3) {
4433
list3.instance_count = instance_count - 1;
4437
if (list_hdr.index_offset_present) {
4438
pvr_csb_emit (csb, VDMCTRL_INDEX_LIST4, list4) {
4439
list4.index_offset = first_vertex;
4443
/* TODO: See if we need list_words[5-9]. */
4446
void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
4447
uint32_t indexCount,
4448
uint32_t instanceCount,
4449
uint32_t firstIndex,
4450
int32_t vertexOffset,
4451
uint32_t firstInstance)
4453
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
4454
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4455
struct pvr_cmd_buffer_draw_state draw_state;
4458
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
4460
draw_state.base_vertex = vertexOffset;
4461
draw_state.base_instance = firstInstance;
4462
draw_state.draw_indirect = false;
4463
draw_state.draw_indexed = true;
4464
pvr_update_draw_state(&cmd_buffer->state, &draw_state);
4466
result = pvr_validate_draw_state(cmd_buffer);
4467
if (result != VK_SUCCESS)
4470
/* Write the VDM control stream for the primitive. */
4471
pvr_emit_vdm_index_list(cmd_buffer,
4472
state->gfx_pipeline->input_asm_state.topology,
4480
void pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
4482
VkDeviceSize offset,
4486
assert(!"Unimplemented");
4489
void pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer,
4491
VkDeviceSize offset,
4495
assert(!"Unimplemented");
4499
pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer)
4501
pvr_finishme("Add attachment resolve support!");
4502
return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
4505
void pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
4506
const VkSubpassEndInfoKHR *pSubpassEndInfo)
4508
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
4509
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4510
struct pvr_image_view **attachments;
4511
VkClearValue *clear_values;
4514
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
4516
assert(state->render_pass_info.pass);
4517
assert(state->render_pass_info.framebuffer);
4519
/* TODO: Investigate why pvr_cmd_buffer_end_sub_cmd/EndSubCommand is called
4520
* twice in this path, one here and one from
4521
* pvr_resolve_unemitted_resolve_attachments.
4523
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
4524
if (result != VK_SUCCESS)
4527
result = pvr_resolve_unemitted_resolve_attachments(cmd_buffer);
4528
if (result != VK_SUCCESS)
4531
/* Save the required fields before clearing render_pass_info struct. */
4532
attachments = state->render_pass_info.attachments;
4533
clear_values = state->render_pass_info.clear_values;
4535
memset(&state->render_pass_info, 0, sizeof(state->render_pass_info));
4537
state->render_pass_info.attachments = attachments;
4538
state->render_pass_info.clear_values = clear_values;
4541
void pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer,
4542
uint32_t commandBufferCount,
4543
const VkCommandBuffer *pCommandBuffers)
4545
assert(!"Unimplemented");
4548
void pvr_CmdNextSubpass2(VkCommandBuffer commandBuffer,
4549
const VkSubpassBeginInfo *pSubpassBeginInfo,
4550
const VkSubpassEndInfo *pSubpassEndInfo)
4552
assert(!"Unimplemented");
4555
void pvr_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
4556
const VkDependencyInfoKHR *pDependencyInfo)
4558
assert(!"Unimplemented");
4561
void pvr_CmdResetEvent2KHR(VkCommandBuffer commandBuffer,
4563
VkPipelineStageFlags2KHR stageMask)
4565
assert(!"Unimplemented");
4568
void pvr_CmdSetEvent2KHR(VkCommandBuffer commandBuffer,
4570
const VkDependencyInfoKHR *pDependencyInfo)
4572
assert(!"Unimplemented");
4575
void pvr_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer,
4576
uint32_t eventCount,
4577
const VkEvent *pEvents,
4578
const VkDependencyInfoKHR *pDependencyInfos)
4580
assert(!"Unimplemented");
4583
void pvr_CmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer,
4584
VkPipelineStageFlags2KHR stage,
4585
VkQueryPool queryPool,
4588
unreachable("Timestamp queries are not supported.");
4591
VkResult pvr_EndCommandBuffer(VkCommandBuffer commandBuffer)
4593
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
4594
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4597
/* From the Vulkan 1.0 spec:
4599
* CommandBuffer must be in the recording state.
4601
assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_RECORDING);
4603
if (state->status != VK_SUCCESS)
4604
return state->status;
4606
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
4607
if (result != VK_SUCCESS)
4610
cmd_buffer->status = PVR_CMD_BUFFER_STATUS_EXECUTABLE;