~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/imagination/vulkan/pvr_cmd_buffer.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright © 2022 Imagination Technologies Ltd.
3
 
 *
4
 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 
 * of this software and associated documentation files (the "Software"), to deal
6
 
 * in the Software without restriction, including without limitation the rights
7
 
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
 
 * copies of the Software, and to permit persons to whom the Software is
9
 
 * furnished to do so, subject to the following conditions:
10
 
 *
11
 
 * The above copyright notice and this permission notice (including the next
12
 
 * paragraph) shall be included in all copies or substantial portions of the
13
 
 * Software.
14
 
 *
15
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
 
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 
 * SOFTWARE.
22
 
 */
23
 
 
24
 
#include <assert.h>
25
 
#include <limits.h>
26
 
#include <stdbool.h>
27
 
#include <stddef.h>
28
 
#include <stdint.h>
29
 
#include <string.h>
30
 
#include <vulkan/vulkan.h>
31
 
 
32
 
#include "c11_compat.h"
33
 
#include "hwdef/rogue_hw_defs.h"
34
 
#include "hwdef/rogue_hw_utils.h"
35
 
#include "pvr_bo.h"
36
 
#include "pvr_csb.h"
37
 
#include "pvr_device_info.h"
38
 
#include "pvr_end_of_tile.h"
39
 
#include "pvr_formats.h"
40
 
#include "pvr_hw_pass.h"
41
 
#include "pvr_job_common.h"
42
 
#include "pvr_job_render.h"
43
 
#include "pvr_limits.h"
44
 
#include "pvr_pds.h"
45
 
#include "pvr_private.h"
46
 
#include "pvr_winsys.h"
47
 
#include "util/compiler.h"
48
 
#include "util/list.h"
49
 
#include "util/macros.h"
50
 
#include "util/u_dynarray.h"
51
 
#include "util/u_pack_color.h"
52
 
#include "vk_alloc.h"
53
 
#include "vk_command_buffer.h"
54
 
#include "vk_command_pool.h"
55
 
#include "vk_format.h"
56
 
#include "vk_log.h"
57
 
#include "vk_object.h"
58
 
#include "vk_util.h"
59
 
 
60
 
/* Structure used to pass data into pvr_compute_generate_control_stream()
61
 
 * function.
62
 
 */
63
 
struct pvr_compute_kernel_info {
64
 
   pvr_dev_addr_t indirect_buffer_addr;
65
 
   bool global_offsets_present;
66
 
   uint32_t usc_common_size;
67
 
   uint32_t usc_unified_size;
68
 
   uint32_t pds_temp_size;
69
 
   uint32_t pds_data_size;
70
 
   bool usc_target_any;
71
 
   bool is_fence;
72
 
   uint32_t pds_data_offset;
73
 
   uint32_t pds_code_offset;
74
 
   enum PVRX(CDMCTRL_SD_TYPE) sd_type;
75
 
   bool usc_common_shared;
76
 
   uint32_t local_size[3];
77
 
   uint32_t global_size[3];
78
 
   uint32_t max_instances;
79
 
};
80
 
 
81
 
static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
82
 
                                        struct pvr_sub_cmd *sub_cmd)
83
 
{
84
 
   switch (sub_cmd->type) {
85
 
   case PVR_SUB_CMD_TYPE_GRAPHICS:
86
 
      pvr_csb_finish(&sub_cmd->gfx.control_stream);
87
 
      pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
88
 
      pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
89
 
      break;
90
 
 
91
 
   case PVR_SUB_CMD_TYPE_COMPUTE:
92
 
      pvr_csb_finish(&sub_cmd->compute.control_stream);
93
 
      break;
94
 
 
95
 
   case PVR_SUB_CMD_TYPE_TRANSFER:
96
 
      list_for_each_entry_safe (struct pvr_transfer_cmd,
97
 
                                transfer_cmd,
98
 
                                &sub_cmd->transfer.transfer_cmds,
99
 
                                link) {
100
 
         list_del(&transfer_cmd->link);
101
 
         vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
102
 
      }
103
 
      break;
104
 
 
105
 
   default:
106
 
      pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
107
 
      break;
108
 
   }
109
 
 
110
 
   list_del(&sub_cmd->link);
111
 
   vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd);
112
 
}
113
 
 
114
 
static void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer)
115
 
{
116
 
   list_for_each_entry_safe (struct pvr_sub_cmd,
117
 
                             sub_cmd,
118
 
                             &cmd_buffer->sub_cmds,
119
 
                             link) {
120
 
      pvr_cmd_buffer_free_sub_cmd(cmd_buffer, sub_cmd);
121
 
   }
122
 
}
123
 
 
124
 
static void pvr_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
125
 
{
126
 
   struct pvr_cmd_buffer *cmd_buffer =
127
 
      container_of(vk_cmd_buffer, struct pvr_cmd_buffer, vk);
128
 
 
129
 
   vk_free(&cmd_buffer->vk.pool->alloc,
130
 
           cmd_buffer->state.render_pass_info.attachments);
131
 
   vk_free(&cmd_buffer->vk.pool->alloc,
132
 
           cmd_buffer->state.render_pass_info.clear_values);
133
 
 
134
 
   pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
135
 
 
136
 
   list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
137
 
      list_del(&bo->link);
138
 
      pvr_bo_free(cmd_buffer->device, bo);
139
 
   }
140
 
 
141
 
   util_dynarray_fini(&cmd_buffer->scissor_array);
142
 
   util_dynarray_fini(&cmd_buffer->depth_bias_array);
143
 
 
144
 
   vk_command_buffer_finish(&cmd_buffer->vk);
145
 
   vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
146
 
}
147
 
 
148
 
static VkResult pvr_cmd_buffer_create(struct pvr_device *device,
149
 
                                      struct vk_command_pool *pool,
150
 
                                      VkCommandBufferLevel level,
151
 
                                      VkCommandBuffer *pCommandBuffer)
152
 
{
153
 
   struct pvr_cmd_buffer *cmd_buffer;
154
 
   VkResult result;
155
 
 
156
 
   cmd_buffer = vk_zalloc(&pool->alloc,
157
 
                          sizeof(*cmd_buffer),
158
 
                          8U,
159
 
                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
160
 
   if (!cmd_buffer)
161
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
162
 
 
163
 
   result = vk_command_buffer_init(&cmd_buffer->vk, pool, level);
164
 
   if (result != VK_SUCCESS) {
165
 
      vk_free(&pool->alloc, cmd_buffer);
166
 
      return result;
167
 
   }
168
 
 
169
 
   cmd_buffer->vk.destroy = pvr_cmd_buffer_destroy;
170
 
   cmd_buffer->device = device;
171
 
 
172
 
   util_dynarray_init(&cmd_buffer->depth_bias_array, NULL);
173
 
   util_dynarray_init(&cmd_buffer->scissor_array, NULL);
174
 
 
175
 
   cmd_buffer->state.status = VK_SUCCESS;
176
 
   cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
177
 
 
178
 
   list_inithead(&cmd_buffer->sub_cmds);
179
 
   list_inithead(&cmd_buffer->bo_list);
180
 
 
181
 
   *pCommandBuffer = pvr_cmd_buffer_to_handle(cmd_buffer);
182
 
 
183
 
   return VK_SUCCESS;
184
 
}
185
 
 
186
 
VkResult
187
 
pvr_AllocateCommandBuffers(VkDevice _device,
188
 
                           const VkCommandBufferAllocateInfo *pAllocateInfo,
189
 
                           VkCommandBuffer *pCommandBuffers)
190
 
{
191
 
   VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool);
192
 
   PVR_FROM_HANDLE(pvr_device, device, _device);
193
 
   VkResult result = VK_SUCCESS;
194
 
   uint32_t i;
195
 
 
196
 
   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
197
 
      result = pvr_cmd_buffer_create(device,
198
 
                                     pool,
199
 
                                     pAllocateInfo->level,
200
 
                                     &pCommandBuffers[i]);
201
 
      if (result != VK_SUCCESS)
202
 
         break;
203
 
   }
204
 
 
205
 
   if (result != VK_SUCCESS) {
206
 
      while (i--) {
207
 
         VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]);
208
 
         pvr_cmd_buffer_destroy(cmd_buffer);
209
 
      }
210
 
 
211
 
      for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
212
 
         pCommandBuffers[i] = VK_NULL_HANDLE;
213
 
   }
214
 
 
215
 
   return result;
216
 
}
217
 
 
218
 
static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer,
219
 
                                           enum pvr_sub_cmd_type type)
220
 
{
221
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
222
 
   uint32_t barriers;
223
 
 
224
 
   switch (type) {
225
 
   case PVR_SUB_CMD_TYPE_GRAPHICS:
226
 
      barriers = PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT;
227
 
      break;
228
 
 
229
 
   case PVR_SUB_CMD_TYPE_COMPUTE:
230
 
      barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT;
231
 
      break;
232
 
 
233
 
   case PVR_SUB_CMD_TYPE_TRANSFER:
234
 
      barriers = PVR_PIPELINE_STAGE_TRANSFER_BIT;
235
 
      break;
236
 
 
237
 
   default:
238
 
      barriers = 0;
239
 
      pvr_finishme("Unsupported sub-command type %d", type);
240
 
      break;
241
 
   }
242
 
 
243
 
   for (uint32_t i = 0; i < ARRAY_SIZE(state->barriers_needed); i++)
244
 
      state->barriers_needed[i] |= barriers;
245
 
}
246
 
 
247
 
static VkResult pvr_cmd_buffer_upload_tables(struct pvr_device *device,
248
 
                                             struct pvr_cmd_buffer *cmd_buffer)
249
 
{
250
 
   struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
251
 
   const uint32_t cache_line_size =
252
 
      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
253
 
   VkResult result;
254
 
 
255
 
   assert(!sub_cmd->gfx.depth_bias_bo && !sub_cmd->gfx.scissor_bo);
256
 
 
257
 
   if (cmd_buffer->depth_bias_array.size > 0) {
258
 
      result =
259
 
         pvr_gpu_upload(device,
260
 
                        device->heaps.general_heap,
261
 
                        util_dynarray_begin(&cmd_buffer->depth_bias_array),
262
 
                        cmd_buffer->depth_bias_array.size,
263
 
                        cache_line_size,
264
 
                        &sub_cmd->gfx.depth_bias_bo);
265
 
      if (result != VK_SUCCESS)
266
 
         return result;
267
 
   }
268
 
 
269
 
   if (cmd_buffer->scissor_array.size > 0) {
270
 
      result = pvr_gpu_upload(device,
271
 
                              device->heaps.general_heap,
272
 
                              util_dynarray_begin(&cmd_buffer->scissor_array),
273
 
                              cmd_buffer->scissor_array.size,
274
 
                              cache_line_size,
275
 
                              &sub_cmd->gfx.scissor_bo);
276
 
      if (result != VK_SUCCESS)
277
 
         goto err_free_depth_bias_bo;
278
 
   }
279
 
 
280
 
   util_dynarray_clear(&cmd_buffer->depth_bias_array);
281
 
   util_dynarray_clear(&cmd_buffer->scissor_array);
282
 
 
283
 
   return VK_SUCCESS;
284
 
 
285
 
err_free_depth_bias_bo:
286
 
   pvr_bo_free(device, sub_cmd->gfx.depth_bias_bo);
287
 
   sub_cmd->gfx.depth_bias_bo = NULL;
288
 
 
289
 
   return result;
290
 
}
291
 
 
292
 
static VkResult pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer)
293
 
{
294
 
   struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
295
 
   struct pvr_framebuffer *framebuffer =
296
 
      cmd_buffer->state.render_pass_info.framebuffer;
297
 
 
298
 
   pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE0, state0) {
299
 
      state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
300
 
      state0.word_count = framebuffer->ppp_state_size;
301
 
   }
302
 
 
303
 
   pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE1, state1) {
304
 
      state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
305
 
   }
306
 
 
307
 
   return VK_SUCCESS;
308
 
}
309
 
 
310
 
static VkResult
311
 
pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
312
 
                              const void *const data,
313
 
                              const size_t size,
314
 
                              struct pvr_bo **const pvr_bo_out)
315
 
{
316
 
   struct pvr_device *const device = cmd_buffer->device;
317
 
   const uint32_t cache_line_size =
318
 
      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
319
 
   struct pvr_bo *pvr_bo;
320
 
   VkResult result;
321
 
 
322
 
   result = pvr_gpu_upload(device,
323
 
                           device->heaps.general_heap,
324
 
                           data,
325
 
                           size,
326
 
                           cache_line_size,
327
 
                           &pvr_bo);
328
 
   if (result != VK_SUCCESS)
329
 
      return result;
330
 
 
331
 
   list_add(&pvr_bo->link, &cmd_buffer->bo_list);
332
 
 
333
 
   *pvr_bo_out = pvr_bo;
334
 
 
335
 
   return VK_SUCCESS;
336
 
}
337
 
 
338
 
static VkResult
339
 
pvr_cmd_buffer_upload_usc(struct pvr_cmd_buffer *const cmd_buffer,
340
 
                          const void *const code,
341
 
                          const size_t code_size,
342
 
                          uint64_t code_alignment,
343
 
                          struct pvr_bo **const pvr_bo_out)
344
 
{
345
 
   struct pvr_device *const device = cmd_buffer->device;
346
 
   const uint32_t cache_line_size =
347
 
      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
348
 
   struct pvr_bo *pvr_bo;
349
 
   VkResult result;
350
 
 
351
 
   code_alignment = MAX2(code_alignment, cache_line_size);
352
 
 
353
 
   result =
354
 
      pvr_gpu_upload_usc(device, code, code_size, code_alignment, &pvr_bo);
355
 
   if (result != VK_SUCCESS)
356
 
      return result;
357
 
 
358
 
   list_add(&pvr_bo->link, &cmd_buffer->bo_list);
359
 
 
360
 
   *pvr_bo_out = pvr_bo;
361
 
 
362
 
   return VK_SUCCESS;
363
 
}
364
 
 
365
 
static VkResult
366
 
pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer,
367
 
                          const uint32_t *data,
368
 
                          uint32_t data_size_dwords,
369
 
                          uint32_t data_alignment,
370
 
                          const uint32_t *code,
371
 
                          uint32_t code_size_dwords,
372
 
                          uint32_t code_alignment,
373
 
                          uint64_t min_alignment,
374
 
                          struct pvr_pds_upload *const pds_upload_out)
375
 
{
376
 
   struct pvr_device *const device = cmd_buffer->device;
377
 
   VkResult result;
378
 
 
379
 
   result = pvr_gpu_upload_pds(device,
380
 
                               data,
381
 
                               data_size_dwords,
382
 
                               data_alignment,
383
 
                               code,
384
 
                               code_size_dwords,
385
 
                               code_alignment,
386
 
                               min_alignment,
387
 
                               pds_upload_out);
388
 
   if (result != VK_SUCCESS)
389
 
      return result;
390
 
 
391
 
   list_add(&pds_upload_out->pvr_bo->link, &cmd_buffer->bo_list);
392
 
 
393
 
   return VK_SUCCESS;
394
 
}
395
 
 
396
 
static inline VkResult
397
 
pvr_cmd_buffer_upload_pds_data(struct pvr_cmd_buffer *const cmd_buffer,
398
 
                               const uint32_t *data,
399
 
                               uint32_t data_size_dwords,
400
 
                               uint32_t data_alignment,
401
 
                               struct pvr_pds_upload *const pds_upload_out)
402
 
{
403
 
   return pvr_cmd_buffer_upload_pds(cmd_buffer,
404
 
                                    data,
405
 
                                    data_size_dwords,
406
 
                                    data_alignment,
407
 
                                    NULL,
408
 
                                    0,
409
 
                                    0,
410
 
                                    data_alignment,
411
 
                                    pds_upload_out);
412
 
}
413
 
 
414
 
static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
415
 
   struct pvr_cmd_buffer *const cmd_buffer,
416
 
   const uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
417
 
   struct pvr_pds_upload *const pds_upload_out)
418
 
{
419
 
   struct pvr_pds_event_program pixel_event_program = {
420
 
      /* No data to DMA, just a DOUTU needed. */
421
 
      .num_emit_word_pairs = 0,
422
 
   };
423
 
   const uint32_t staging_buffer_size =
424
 
      cmd_buffer->device->pixel_event_data_size_in_dwords * sizeof(uint32_t);
425
 
   const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
426
 
   struct pvr_device *const device = cmd_buffer->device;
427
 
   /* FIXME: This should come from the compiler for the USC pixel program. */
428
 
   const uint32_t usc_temp_count = 0;
429
 
   struct pvr_bo *usc_eot_program;
430
 
   uint8_t *usc_eot_program_ptr;
431
 
   uint32_t *staging_buffer;
432
 
   VkResult result;
433
 
 
434
 
   result = pvr_cmd_buffer_upload_usc(cmd_buffer,
435
 
                                      pvr_end_of_tile_program,
436
 
                                      sizeof(pvr_end_of_tile_program),
437
 
                                      4,
438
 
                                      &usc_eot_program);
439
 
   if (result != VK_SUCCESS)
440
 
      return result;
441
 
 
442
 
   assert((pbe_cs_words[1] & 0x3F) == 0x20);
443
 
 
444
 
   /* FIXME: Stop patching the framebuffer address (this will require the
445
 
    * end-of-tile program to be generated at run-time).
446
 
    */
447
 
   pvr_bo_cpu_map(device, usc_eot_program);
448
 
   usc_eot_program_ptr = usc_eot_program->bo->map;
449
 
   usc_eot_program_ptr[6] = (pbe_cs_words[0] >> 0) & 0xFF;
450
 
   usc_eot_program_ptr[7] = (pbe_cs_words[0] >> 8) & 0xFF;
451
 
   usc_eot_program_ptr[8] = (pbe_cs_words[0] >> 16) & 0xFF;
452
 
   usc_eot_program_ptr[9] = (pbe_cs_words[0] >> 24) & 0xFF;
453
 
   pvr_bo_cpu_unmap(device, usc_eot_program);
454
 
 
455
 
   pvr_pds_setup_doutu(&pixel_event_program.task_control,
456
 
                       usc_eot_program->vma->dev_addr.addr,
457
 
                       usc_temp_count,
458
 
                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
459
 
                       false);
460
 
 
461
 
   /* TODO: We could skip allocating this and generate directly into the device
462
 
    * buffer thus removing one allocation and memcpy() per job. Would this
463
 
    * speed up things in a noticeable way?
464
 
    */
465
 
   staging_buffer = vk_alloc(allocator,
466
 
                             staging_buffer_size,
467
 
                             8,
468
 
                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
469
 
   if (!staging_buffer) {
470
 
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
471
 
      goto err_free_usc_pixel_program;
472
 
   }
473
 
 
474
 
   /* Generate the data segment. The code segment was uploaded earlier when
475
 
    * setting up the PDS static heap data.
476
 
    */
477
 
   pvr_pds_generate_pixel_event_data_segment(&pixel_event_program,
478
 
                                             staging_buffer,
479
 
                                             &device->pdevice->dev_info);
480
 
 
481
 
   result = pvr_cmd_buffer_upload_pds_data(
482
 
      cmd_buffer,
483
 
      staging_buffer,
484
 
      cmd_buffer->device->pixel_event_data_size_in_dwords,
485
 
      4,
486
 
      pds_upload_out);
487
 
   if (result != VK_SUCCESS)
488
 
      goto err_free_pixel_event_staging_buffer;
489
 
 
490
 
   vk_free(allocator, staging_buffer);
491
 
 
492
 
   return VK_SUCCESS;
493
 
 
494
 
err_free_pixel_event_staging_buffer:
495
 
   vk_free(allocator, staging_buffer);
496
 
 
497
 
err_free_usc_pixel_program:
498
 
   list_del(&usc_eot_program->link);
499
 
   pvr_bo_free(device, usc_eot_program);
500
 
 
501
 
   return result;
502
 
}
503
 
 
504
 
static uint32_t pvr_get_hw_clear_color(VkFormat vk_format,
505
 
                                       const VkClearValue *clear_value)
506
 
{
507
 
   union util_color uc = { .ui = 0 };
508
 
 
509
 
   switch (vk_format) {
510
 
   case VK_FORMAT_B8G8R8A8_UNORM:
511
 
      util_pack_color(clear_value->color.float32,
512
 
                      PIPE_FORMAT_R8G8B8A8_UNORM,
513
 
                      &uc);
514
 
      break;
515
 
 
516
 
   default:
517
 
      assert(!"Unsupported format");
518
 
      uc.ui[0] = 0;
519
 
      break;
520
 
   }
521
 
 
522
 
   return uc.ui[0];
523
 
}
524
 
 
525
 
static VkResult
526
 
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
527
 
                                        uint32_t idx,
528
 
                                        pvr_dev_addr_t *const addr_out)
529
 
{
530
 
   const struct pvr_render_pass_info *render_pass_info =
531
 
      &cmd_buffer->state.render_pass_info;
532
 
   const struct pvr_render_pass *pass = render_pass_info->pass;
533
 
   const struct pvr_renderpass_hwsetup_render *hw_render =
534
 
      &pass->hw_setup->renders[idx];
535
 
   ASSERTED const struct pvr_load_op *load_op = hw_render->client_data;
536
 
   const struct pvr_renderpass_colorinit *color_init =
537
 
      &hw_render->color_init[0];
538
 
   const struct pvr_render_pass_attachment *attachment =
539
 
      &pass->attachments[color_init->driver_id];
540
 
   const VkClearValue *clear_value =
541
 
      &render_pass_info->clear_values[color_init->driver_id];
542
 
   uint32_t hw_clear_value;
543
 
   struct pvr_bo *clear_bo;
544
 
   VkResult result;
545
 
 
546
 
   pvr_finishme("Add missing load op data support");
547
 
 
548
 
   assert(load_op->is_hw_object);
549
 
   assert(hw_render->color_init_count == 1);
550
 
 
551
 
   /* FIXME: add support for RENDERPASS_SURFACE_INITOP_LOAD. */
552
 
   assert(color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR);
553
 
 
554
 
   /* FIXME: do this at the point we store the clear values? */
555
 
   hw_clear_value = pvr_get_hw_clear_color(attachment->vk_format, clear_value);
556
 
 
557
 
   result = pvr_cmd_buffer_upload_general(cmd_buffer,
558
 
                                          &hw_clear_value,
559
 
                                          sizeof(hw_clear_value),
560
 
                                          &clear_bo);
561
 
   if (result != VK_SUCCESS)
562
 
      return result;
563
 
 
564
 
   *addr_out = clear_bo->vma->dev_addr;
565
 
 
566
 
   return VK_SUCCESS;
567
 
}
568
 
 
569
 
static VkResult pvr_load_op_pds_data_create_and_upload(
570
 
   struct pvr_cmd_buffer *cmd_buffer,
571
 
   uint32_t idx,
572
 
   pvr_dev_addr_t constants_addr,
573
 
   struct pvr_pds_upload *const pds_upload_out)
574
 
{
575
 
   const struct pvr_render_pass_info *render_pass_info =
576
 
      &cmd_buffer->state.render_pass_info;
577
 
   const struct pvr_load_op *load_op =
578
 
      render_pass_info->pass->hw_setup->renders[idx].client_data;
579
 
   struct pvr_device *device = cmd_buffer->device;
580
 
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
581
 
   struct pvr_pds_pixel_shader_sa_program program = { 0 };
582
 
   uint32_t staging_buffer_size;
583
 
   uint32_t *staging_buffer;
584
 
   VkResult result;
585
 
 
586
 
   program.num_texture_dma_kicks = 1;
587
 
 
588
 
   pvr_csb_pack (&program.texture_dma_address[0],
589
 
                 PDSINST_DOUT_FIELDS_DOUTD_SRC0,
590
 
                 value) {
591
 
      value.sbase = constants_addr;
592
 
   }
593
 
 
594
 
   pvr_csb_pack (&program.texture_dma_control[0],
595
 
                 PDSINST_DOUT_FIELDS_DOUTD_SRC1,
596
 
                 value) {
597
 
      value.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
598
 
      value.a0 = load_op->shareds_dest_offset;
599
 
      value.bsize = load_op->shareds_count;
600
 
   }
601
 
 
602
 
   pvr_pds_set_sizes_pixel_shader_sa_texture_data(&program, dev_info);
603
 
 
604
 
   staging_buffer_size = program.data_size * sizeof(*staging_buffer);
605
 
 
606
 
   staging_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc,
607
 
                             staging_buffer_size,
608
 
                             8,
609
 
                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
610
 
   if (!staging_buffer)
611
 
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
612
 
 
613
 
   pvr_pds_generate_pixel_shader_sa_texture_state_data(&program,
614
 
                                                       staging_buffer,
615
 
                                                       dev_info);
616
 
 
617
 
   result = pvr_cmd_buffer_upload_pds_data(cmd_buffer,
618
 
                                           staging_buffer,
619
 
                                           program.data_size,
620
 
                                           1,
621
 
                                           pds_upload_out);
622
 
   if (result != VK_SUCCESS) {
623
 
      vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
624
 
      return result;
625
 
   }
626
 
 
627
 
   vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
628
 
 
629
 
   return VK_SUCCESS;
630
 
}
631
 
 
632
 
/* FIXME: Should this function be specific to the HW background object, in
633
 
 * which case its name should be changed, or should it have the load op
634
 
 * structure passed in?
635
 
 */
636
 
static VkResult
637
 
pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
638
 
                                   uint32_t idx,
639
 
                                   struct pvr_pds_upload *const pds_upload_out)
640
 
{
641
 
   pvr_dev_addr_t constants_addr;
642
 
   VkResult result;
643
 
 
644
 
   result =
645
 
      pvr_load_op_constants_create_and_upload(cmd_buffer, idx, &constants_addr);
646
 
   if (result != VK_SUCCESS)
647
 
      return result;
648
 
 
649
 
   return pvr_load_op_pds_data_create_and_upload(cmd_buffer,
650
 
                                                 idx,
651
 
                                                 constants_addr,
652
 
                                                 pds_upload_out);
653
 
}
654
 
 
655
 
static void pvr_pds_bgnd_pack_state(
656
 
   const struct pvr_load_op *load_op,
657
 
   const struct pvr_pds_upload *load_op_program,
658
 
   uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS])
659
 
{
660
 
   pvr_csb_pack (&pds_reg_values[0], CR_PDS_BGRND0_BASE, value) {
661
 
      value.shader_addr.addr = load_op->pds_frag_prog.data_offset;
662
 
      value.texunicode_addr.addr = load_op->pds_tex_state_prog.code_offset;
663
 
   }
664
 
 
665
 
   pvr_csb_pack (&pds_reg_values[1], CR_PDS_BGRND1_BASE, value) {
666
 
      value.texturedata_addr.addr = load_op_program->data_offset;
667
 
   }
668
 
 
669
 
   pvr_csb_pack (&pds_reg_values[2], CR_PDS_BGRND3_SIZEINFO, value) {
670
 
      value.usc_sharedsize =
671
 
         DIV_ROUND_UP(load_op->const_shareds_count,
672
 
                      PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
673
 
      value.pds_texturestatesize = DIV_ROUND_UP(
674
 
         load_op_program->data_size,
675
 
         PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
676
 
      value.pds_tempsize =
677
 
         DIV_ROUND_UP(load_op->temps_count,
678
 
                      PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
679
 
   }
680
 
}
681
 
 
682
 
/**
683
 
 * \brief Calculates the stride in pixels based on the pitch in bytes and pixel
684
 
 * format.
685
 
 *
686
 
 * \param[in] pitch     Width pitch in bytes.
687
 
 * \param[in] vk_format Vulkan image format.
688
 
 * \return Stride in pixels.
689
 
 */
690
 
static inline uint32_t pvr_stride_from_pitch(uint32_t pitch, VkFormat vk_format)
691
 
{
692
 
   const unsigned int cpp = vk_format_get_blocksize(vk_format);
693
 
 
694
 
   assert(pitch % cpp == 0);
695
 
 
696
 
   return pitch / cpp;
697
 
}
698
 
 
699
 
static void pvr_setup_pbe_state(
700
 
   struct pvr_device *const device,
701
 
   struct pvr_framebuffer *framebuffer,
702
 
   uint32_t mrt_index,
703
 
   const struct usc_mrt_resource *mrt_resource,
704
 
   const struct pvr_image_view *const iview,
705
 
   const VkRect2D *render_area,
706
 
   const bool down_scale,
707
 
   const uint32_t samples,
708
 
   uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
709
 
   uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
710
 
{
711
 
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
712
 
   const struct pvr_image *image = iview->image;
713
 
   uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch;
714
 
 
715
 
   struct pvr_pbe_surf_params surface_params;
716
 
   struct pvr_pbe_render_params render_params;
717
 
   bool with_packed_usc_channel;
718
 
   const uint8_t *swizzle;
719
 
   uint32_t position;
720
 
 
721
 
   /* down_scale should be true when performing a resolve, in which case there
722
 
    * should be more than one sample.
723
 
    */
724
 
   assert((down_scale && samples > 1U) || (!down_scale && samples == 1U));
725
 
 
726
 
   /* Setup surface parameters. */
727
 
 
728
 
   if (PVR_HAS_FEATURE(dev_info, usc_f16sop_u8)) {
729
 
      switch (iview->vk.format) {
730
 
      case VK_FORMAT_B8G8R8A8_UNORM:
731
 
         with_packed_usc_channel = true;
732
 
         break;
733
 
      case VK_FORMAT_D32_SFLOAT:
734
 
         with_packed_usc_channel = false;
735
 
         break;
736
 
      default:
737
 
         unreachable("Unsupported Vulkan image format");
738
 
      }
739
 
   } else {
740
 
      with_packed_usc_channel = false;
741
 
   }
742
 
 
743
 
   swizzle = pvr_get_format_swizzle(iview->vk.format);
744
 
   memcpy(surface_params.swizzle, swizzle, sizeof(surface_params.swizzle));
745
 
 
746
 
   pvr_pbe_get_src_format_and_gamma(iview->vk.format,
747
 
                                    PVR_PBE_GAMMA_NONE,
748
 
                                    with_packed_usc_channel,
749
 
                                    &surface_params.source_format,
750
 
                                    &surface_params.gamma);
751
 
 
752
 
   surface_params.is_normalized = vk_format_is_normalized(iview->vk.format);
753
 
   surface_params.pbe_packmode = pvr_get_pbe_packmode(iview->vk.format);
754
 
   surface_params.nr_components = vk_format_get_nr_components(iview->vk.format);
755
 
 
756
 
   /* FIXME: Should we have an inline function to return the address of a mip
757
 
    * level?
758
 
    */
759
 
   surface_params.addr.addr =
760
 
      image->vma->dev_addr.addr +
761
 
      image->mip_levels[iview->vk.base_mip_level].offset;
762
 
 
763
 
   surface_params.mem_layout = image->memlayout;
764
 
   surface_params.stride = pvr_stride_from_pitch(level_pitch, iview->vk.format);
765
 
   surface_params.depth = iview->vk.extent.depth;
766
 
   surface_params.width = iview->vk.extent.width;
767
 
   surface_params.height = iview->vk.extent.height;
768
 
   surface_params.z_only_render = false;
769
 
   surface_params.down_scale = down_scale;
770
 
   surface_params.msaa_mode = samples;
771
 
 
772
 
   /* Setup render parameters. */
773
 
 
774
 
   if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY) {
775
 
      position = mrt_resource->u.mem.offset_in_dwords;
776
 
   } else {
777
 
      assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER);
778
 
      assert(mrt_resource->u.reg.offset == 0);
779
 
 
780
 
      position = mrt_resource->u.reg.out_reg;
781
 
   }
782
 
 
783
 
   assert(position <= 3 || PVR_HAS_FEATURE(dev_info, eight_output_registers));
784
 
 
785
 
   switch (position) {
786
 
   case 0:
787
 
   case 4:
788
 
      render_params.source_start = PVR_PBE_STARTPOS_BIT0;
789
 
      break;
790
 
   case 1:
791
 
   case 5:
792
 
      render_params.source_start = PVR_PBE_STARTPOS_BIT32;
793
 
      break;
794
 
   case 2:
795
 
   case 6:
796
 
      render_params.source_start = PVR_PBE_STARTPOS_BIT64;
797
 
      break;
798
 
   case 3:
799
 
   case 7:
800
 
      render_params.source_start = PVR_PBE_STARTPOS_BIT96;
801
 
      break;
802
 
   default:
803
 
      assert(!"Invalid output register");
804
 
      break;
805
 
   }
806
 
 
807
 
   render_params.min_x_clip = MAX2(0, render_area->offset.x);
808
 
   render_params.min_y_clip = MAX2(0, render_area->offset.y);
809
 
   render_params.max_x_clip =
810
 
      MIN2(framebuffer->width,
811
 
           render_area->offset.x + render_area->extent.width) -
812
 
      1;
813
 
   render_params.max_y_clip =
814
 
      MIN2(framebuffer->height,
815
 
           render_area->offset.y + render_area->extent.height) -
816
 
      1;
817
 
 
818
 
   render_params.slice = 0;
819
 
   render_params.mrt_index = mrt_index;
820
 
 
821
 
   pvr_pbe_pack_state(device,
822
 
                      &surface_params,
823
 
                      &render_params,
824
 
                      pbe_cs_words,
825
 
                      pbe_reg_words);
826
 
}
827
 
 
828
 
static struct pvr_render_target *
829
 
pvr_get_render_target(const struct pvr_render_pass *pass,
830
 
                      const struct pvr_framebuffer *framebuffer,
831
 
                      uint32_t idx)
832
 
{
833
 
   const struct pvr_renderpass_hwsetup_render *hw_render =
834
 
      &pass->hw_setup->renders[idx];
835
 
   uint32_t rt_idx = 0;
836
 
 
837
 
   switch (hw_render->sample_count) {
838
 
   case 1:
839
 
   case 2:
840
 
   case 4:
841
 
   case 8:
842
 
      rt_idx = util_logbase2(hw_render->sample_count);
843
 
      break;
844
 
 
845
 
   default:
846
 
      unreachable("Unsupported sample count");
847
 
      break;
848
 
   }
849
 
 
850
 
   return &framebuffer->render_targets[rt_idx];
851
 
}
852
 
 
853
 
static uint32_t
854
 
pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
855
 
                                uint32_t idx,
856
 
                                const struct pvr_device_info *dev_info)
857
 
{
858
 
   const struct pvr_renderpass_hwsetup_render *hw_render =
859
 
      &pass->hw_setup->renders[idx];
860
 
   /* Default value based on the maximum value found in all existing cores. The
861
 
    * maximum is used as this is being treated as a lower bound, making it a
862
 
    * "safer" choice than the minimum value found in all existing cores.
863
 
    */
864
 
   const uint32_t min_output_regs =
865
 
      PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 2U);
866
 
   const uint32_t width = MAX2(hw_render->output_regs_count, min_output_regs);
867
 
 
868
 
   return util_next_power_of_two(width);
869
 
}
870
 
 
871
 
static VkResult pvr_sub_cmd_gfx_job_init(struct pvr_device *device,
872
 
                                         struct pvr_cmd_buffer *cmd_buffer,
873
 
                                         struct pvr_sub_cmd *sub_cmd)
874
 
{
875
 
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
876
 
   struct pvr_render_pass_info *render_pass_info =
877
 
      &cmd_buffer->state.render_pass_info;
878
 
   const struct pvr_renderpass_hwsetup_render *hw_render =
879
 
      &render_pass_info->pass->hw_setup->renders[sub_cmd->gfx.hw_render_idx];
880
 
   struct pvr_render_job *job = &sub_cmd->gfx.job;
881
 
   struct pvr_pds_upload pds_pixel_event_program;
882
 
 
883
 
   uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
884
 
                        [ROGUE_NUM_PBESTATE_STATE_WORDS];
885
 
   struct pvr_render_target *render_target;
886
 
   VkResult result;
887
 
 
888
 
   assert(hw_render->eot_surface_count < ARRAY_SIZE(pbe_cs_words));
889
 
 
890
 
   for (uint32_t i = 0; i < hw_render->eot_surface_count; i++) {
891
 
      const struct pvr_renderpass_hwsetup_eot_surface *surface =
892
 
         &hw_render->eot_surfaces[i];
893
 
      const struct pvr_image_view *iview =
894
 
         render_pass_info->attachments[surface->attachment_index];
895
 
      const struct usc_mrt_resource *mrt_resource =
896
 
         &hw_render->eot_setup.mrt_resources[surface->mrt_index];
897
 
      uint32_t samples = 1;
898
 
 
899
 
      if (surface->need_resolve)
900
 
         pvr_finishme("Set up job resolve information.");
901
 
 
902
 
      pvr_setup_pbe_state(device,
903
 
                          render_pass_info->framebuffer,
904
 
                          surface->mrt_index,
905
 
                          mrt_resource,
906
 
                          iview,
907
 
                          &render_pass_info->render_area,
908
 
                          surface->need_resolve,
909
 
                          samples,
910
 
                          pbe_cs_words[i],
911
 
                          job->pbe_reg_words[i]);
912
 
   }
913
 
 
914
 
   /* FIXME: The fragment program only supports a single surface at present. */
915
 
   assert(hw_render->eot_surface_count == 1);
916
 
   result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
917
 
      cmd_buffer,
918
 
      pbe_cs_words[0],
919
 
      &pds_pixel_event_program);
920
 
   if (result != VK_SUCCESS)
921
 
      return result;
922
 
 
923
 
   job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
924
 
 
925
 
   /* FIXME: Don't do this if there is a barrier load. */
926
 
   if (render_pass_info->enable_bg_tag) {
927
 
      const struct pvr_load_op *load_op = hw_render->client_data;
928
 
      struct pvr_pds_upload load_op_program;
929
 
 
930
 
      /* FIXME: Should we free the PDS pixel event data or let it be freed
931
 
       * when the pool gets emptied?
932
 
       */
933
 
      result = pvr_load_op_data_create_and_upload(cmd_buffer,
934
 
                                                  sub_cmd->gfx.hw_render_idx,
935
 
                                                  &load_op_program);
936
 
      if (result != VK_SUCCESS)
937
 
         return result;
938
 
 
939
 
      pvr_pds_bgnd_pack_state(load_op,
940
 
                              &load_op_program,
941
 
                              job->pds_bgnd_reg_values);
942
 
   }
943
 
 
944
 
   job->enable_bg_tag = render_pass_info->enable_bg_tag;
945
 
   job->process_empty_tiles = render_pass_info->process_empty_tiles;
946
 
 
947
 
   render_target = pvr_get_render_target(render_pass_info->pass,
948
 
                                         render_pass_info->framebuffer,
949
 
                                         sub_cmd->gfx.hw_render_idx);
950
 
   job->rt_dataset = render_target->rt_dataset;
951
 
 
952
 
   job->ctrl_stream_addr =
953
 
      pvr_csb_get_start_address(&sub_cmd->gfx.control_stream);
954
 
 
955
 
   /* FIXME: Need to set up the border color table at device creation
956
 
    * time. Set to invalid for the time being.
957
 
    */
958
 
   job->border_colour_table_addr = PVR_DEV_ADDR_INVALID;
959
 
 
960
 
   if (sub_cmd->gfx.depth_bias_bo)
961
 
      job->depth_bias_table_addr = sub_cmd->gfx.depth_bias_bo->vma->dev_addr;
962
 
   else
963
 
      job->depth_bias_table_addr = PVR_DEV_ADDR_INVALID;
964
 
 
965
 
   if (sub_cmd->gfx.scissor_bo)
966
 
      job->scissor_table_addr = sub_cmd->gfx.scissor_bo->vma->dev_addr;
967
 
   else
968
 
      job->scissor_table_addr = PVR_DEV_ADDR_INVALID;
969
 
 
970
 
   job->pixel_output_width =
971
 
      pvr_pass_get_pixel_output_width(render_pass_info->pass,
972
 
                                      sub_cmd->gfx.hw_render_idx,
973
 
                                      dev_info);
974
 
 
975
 
   if (hw_render->ds_surface_id != -1) {
976
 
      struct pvr_image_view *iview =
977
 
         render_pass_info->attachments[hw_render->ds_surface_id];
978
 
      const struct pvr_image *image = iview->image;
979
 
 
980
 
      if (vk_format_has_depth(image->vk.format)) {
981
 
         uint32_t level_pitch =
982
 
            image->mip_levels[iview->vk.base_mip_level].pitch;
983
 
 
984
 
         /* FIXME: Is this sufficient for depth buffers? */
985
 
         job->depth_addr = image->dev_addr;
986
 
 
987
 
         job->depth_stride =
988
 
            pvr_stride_from_pitch(level_pitch, iview->vk.format);
989
 
         job->depth_height = iview->vk.extent.height;
990
 
         job->depth_physical_width =
991
 
            u_minify(image->physical_extent.width, iview->vk.base_mip_level);
992
 
         job->depth_physical_height =
993
 
            u_minify(image->physical_extent.height, iview->vk.base_mip_level);
994
 
         job->depth_layer_size = image->layer_size;
995
 
 
996
 
         if (hw_render->ds_surface_id < render_pass_info->clear_value_count) {
997
 
            VkClearValue *clear_values =
998
 
               &render_pass_info->clear_values[hw_render->ds_surface_id];
999
 
 
1000
 
            job->depth_clear_value = clear_values->depthStencil.depth;
1001
 
         } else {
1002
 
            job->depth_clear_value = 1.0f;
1003
 
         }
1004
 
 
1005
 
         job->depth_vk_format = iview->vk.format;
1006
 
 
1007
 
         job->depth_memlayout = image->memlayout;
1008
 
      } else {
1009
 
         job->depth_addr = PVR_DEV_ADDR_INVALID;
1010
 
         job->depth_stride = 0;
1011
 
         job->depth_height = 0;
1012
 
         job->depth_physical_width = 0;
1013
 
         job->depth_physical_height = 0;
1014
 
         job->depth_layer_size = 0;
1015
 
         job->depth_clear_value = 1.0f;
1016
 
         job->depth_vk_format = VK_FORMAT_UNDEFINED;
1017
 
         job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
1018
 
      }
1019
 
 
1020
 
      if (vk_format_has_stencil(image->vk.format)) {
1021
 
         /* FIXME: Is this sufficient for stencil buffers? */
1022
 
         job->stencil_addr = image->dev_addr;
1023
 
      } else {
1024
 
         job->stencil_addr = PVR_DEV_ADDR_INVALID;
1025
 
      }
1026
 
 
1027
 
      job->samples = image->vk.samples;
1028
 
   } else {
1029
 
      pvr_finishme("Set up correct number of samples for render job");
1030
 
 
1031
 
      job->depth_addr = PVR_DEV_ADDR_INVALID;
1032
 
      job->depth_stride = 0;
1033
 
      job->depth_height = 0;
1034
 
      job->depth_physical_width = 0;
1035
 
      job->depth_physical_height = 0;
1036
 
      job->depth_layer_size = 0;
1037
 
      job->depth_clear_value = 1.0f;
1038
 
      job->depth_vk_format = VK_FORMAT_UNDEFINED;
1039
 
      job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
1040
 
 
1041
 
      job->stencil_addr = PVR_DEV_ADDR_INVALID;
1042
 
 
1043
 
      job->samples = 1;
1044
 
   }
1045
 
 
1046
 
   if (sub_cmd->gfx.max_tiles_in_flight ==
1047
 
       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U)) {
1048
 
      /* Use the default limit based on the partition store. */
1049
 
      job->max_tiles_in_flight = 0U;
1050
 
   } else {
1051
 
      job->max_tiles_in_flight = sub_cmd->gfx.max_tiles_in_flight;
1052
 
   }
1053
 
 
1054
 
   job->frag_uses_atomic_ops = sub_cmd->gfx.frag_uses_atomic_ops;
1055
 
   job->disable_compute_overlap = false;
1056
 
   job->max_shared_registers = cmd_buffer->state.max_shared_regs;
1057
 
   job->run_frag = true;
1058
 
   job->geometry_terminate = true;
1059
 
 
1060
 
   return VK_SUCCESS;
1061
 
}
1062
 
 
1063
 
/* Number of shareds used in the Issue Data Fence(IDF)/Wait Data Fence(WDF)
1064
 
 * kernel.
1065
 
 */
1066
 
#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U
1067
 
 
1068
 
static void pvr_sub_cmd_compute_job_init(struct pvr_device *device,
1069
 
                                         struct pvr_cmd_buffer *cmd_buffer,
1070
 
                                         struct pvr_sub_cmd *sub_cmd)
1071
 
{
1072
 
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1073
 
 
1074
 
   if (sub_cmd->compute.uses_barrier) {
1075
 
      sub_cmd->compute.submit_info.flags |=
1076
 
         PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
1077
 
   }
1078
 
 
1079
 
   pvr_csb_pack (&sub_cmd->compute.submit_info.regs.cdm_ctrl_stream_base,
1080
 
                 CR_CDM_CTRL_STREAM_BASE,
1081
 
                 value) {
1082
 
      value.addr = pvr_csb_get_start_address(&sub_cmd->compute.control_stream);
1083
 
   }
1084
 
 
1085
 
   /* FIXME: Need to set up the border color table at device creation
1086
 
    * time. Set to invalid for the time being.
1087
 
    */
1088
 
   pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu_border_colour_table,
1089
 
                 CR_TPU_BORDER_COLOUR_TABLE_CDM,
1090
 
                 value) {
1091
 
      value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
1092
 
   }
1093
 
 
1094
 
   sub_cmd->compute.num_shared_regs = MAX2(PVR_IDF_WDF_IN_REGISTER_CONST_COUNT,
1095
 
                                           cmd_buffer->state.max_shared_regs);
1096
 
 
1097
 
   cmd_buffer->state.max_shared_regs = 0U;
1098
 
 
1099
 
   if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
1100
 
      sub_cmd->compute.submit_info.regs.cdm_item = 0;
1101
 
 
1102
 
   pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu, CR_TPU, value) {
1103
 
      value.tag_cem_4k_face_packing = true;
1104
 
   }
1105
 
 
1106
 
   if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
1107
 
       PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
1108
 
       rogue_get_num_phantoms(dev_info) > 1 &&
1109
 
       sub_cmd->compute.uses_atomic_ops) {
1110
 
      /* Each phantom has its own MCU, so atomicity can only be guaranteed
1111
 
       * when all work items are processed on the same phantom. This means we
1112
 
       * need to disable all USCs other than those of the first phantom, which
1113
 
       * has 4 clusters.
1114
 
       */
1115
 
      pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
1116
 
                    CR_COMPUTE_CLUSTER,
1117
 
                    value) {
1118
 
         value.mask = 0xFU;
1119
 
      }
1120
 
   } else {
1121
 
      pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
1122
 
                    CR_COMPUTE_CLUSTER,
1123
 
                    value) {
1124
 
         value.mask = 0U;
1125
 
      }
1126
 
   }
1127
 
 
1128
 
   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
1129
 
       sub_cmd->compute.uses_atomic_ops) {
1130
 
      sub_cmd->compute.submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE;
1131
 
   }
1132
 
}
1133
 
 
1134
 
#define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \
1135
 
   (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE))
1136
 
 
1137
 
static uint32_t pvr_compute_slot_size(const struct pvr_device_info *dev_info,
1138
 
                                      uint32_t coeff_regs_count,
1139
 
                                      bool use_barrier,
1140
 
                                      const uint32_t local_size[static 3U])
1141
 
{
1142
 
   uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK;
1143
 
   uint32_t max_avail_coeff_regs =
1144
 
      rogue_get_cdm_max_local_mem_size_regs(dev_info);
1145
 
   uint32_t localstore_chunks_count =
1146
 
      DIV_ROUND_UP(coeff_regs_count << 2,
1147
 
                   PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
1148
 
   uint32_t total_workitems = local_size[0U] * local_size[1U] * local_size[2U];
1149
 
 
1150
 
   /* Ensure that we cannot have more workgroups in a slot than the available
1151
 
    * number of coefficients allow us to have.
1152
 
    */
1153
 
   if (coeff_regs_count > 0U) {
1154
 
      /* If TA or 3D can overlap with CDM, or if the TA is running a geometry
1155
 
       * shader then we need to consider this in calculating max allowed
1156
 
       * work-groups.
1157
 
       */
1158
 
      if (PVR_HAS_QUIRK(dev_info, 52354) &&
1159
 
          (PVR_HAS_FEATURE(dev_info, compute_overlap) ||
1160
 
           PVR_HAS_FEATURE(dev_info, gs_rta_support))) {
1161
 
         /* Solve for n (number of work-groups per task). All values are in
1162
 
          * size of common store alloc blocks:
1163
 
          *
1164
 
          * n + (2n + 7) * (local_memory_size_max - 1) =
1165
 
          *     (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1166
 
          * ==>
1167
 
          * n + 2n * (local_memory_size_max - 1) =
1168
 
          *     (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1169
 
          *     - (7 * (local_memory_size_max - 1))
1170
 
          * ==>
1171
 
          * n * (1 + 2 * (local_memory_size_max - 1)) =
1172
 
          *     (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1173
 
          *     - (7 * (local_memory_size_max - 1))
1174
 
          * ==>
1175
 
          * n = ((coefficient_memory_pool_size) -
1176
 
          *     (7 * pixel_allocation_size_max) -
1177
 
          *     (7 * (local_memory_size_max - 1)) / (1 +
1178
 
          * 2 * (local_memory_size_max - 1)))
1179
 
          */
1180
 
         uint32_t max_common_store_blocks =
1181
 
            DIV_ROUND_UP(max_avail_coeff_regs * 4U,
1182
 
                         PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
1183
 
 
1184
 
         /* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
1185
 
          */
1186
 
         max_common_store_blocks -= ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
1187
 
                                    PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS;
1188
 
 
1189
 
         /* - (7 * (local_memory_size_max - 1)) */
1190
 
         max_common_store_blocks -= (ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
1191
 
                                     (localstore_chunks_count - 1U));
1192
 
 
1193
 
         /* Divide by (1 + 2 * (local_memory_size_max - 1)) */
1194
 
         max_workgroups_per_task = max_common_store_blocks /
1195
 
                                   (1U + 2U * (localstore_chunks_count - 1U));
1196
 
 
1197
 
         max_workgroups_per_task =
1198
 
            MIN2(max_workgroups_per_task,
1199
 
                 ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK);
1200
 
 
1201
 
      } else {
1202
 
         max_workgroups_per_task =
1203
 
            MIN2((max_avail_coeff_regs / coeff_regs_count),
1204
 
                 max_workgroups_per_task);
1205
 
      }
1206
 
   }
1207
 
 
1208
 
   /* max_workgroups_per_task should at least be one. */
1209
 
   assert(max_workgroups_per_task >= 1U);
1210
 
 
1211
 
   if (total_workitems >= ROGUE_MAX_INSTANCES_PER_TASK) {
1212
 
      /* In this case, the work group size will have been padded up to the
1213
 
       * next ROGUE_MAX_INSTANCES_PER_TASK so we just set max instances to be
1214
 
       * ROGUE_MAX_INSTANCES_PER_TASK.
1215
 
       */
1216
 
      return ROGUE_MAX_INSTANCES_PER_TASK;
1217
 
   }
1218
 
 
1219
 
   /* In this case, the number of instances in the slot must be clamped to
1220
 
    * accommodate whole work-groups only.
1221
 
    */
1222
 
   if (PVR_HAS_QUIRK(dev_info, 49032) || use_barrier) {
1223
 
      max_workgroups_per_task =
1224
 
         MIN2(max_workgroups_per_task,
1225
 
              ROGUE_MAX_INSTANCES_PER_TASK / total_workitems);
1226
 
      return total_workitems * max_workgroups_per_task;
1227
 
   }
1228
 
 
1229
 
   return MIN2(total_workitems * max_workgroups_per_task,
1230
 
               ROGUE_MAX_INSTANCES_PER_TASK);
1231
 
}
1232
 
 
1233
 
static void
1234
 
pvr_compute_generate_control_stream(struct pvr_csb *csb,
1235
 
                                    const struct pvr_compute_kernel_info *info)
1236
 
{
1237
 
   /* Compute kernel 0. */
1238
 
   pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) {
1239
 
      kernel0.indirect_present = !!info->indirect_buffer_addr.addr;
1240
 
      kernel0.global_offsets_present = info->global_offsets_present;
1241
 
      kernel0.usc_common_size = info->usc_common_size;
1242
 
      kernel0.usc_unified_size = info->usc_unified_size;
1243
 
      kernel0.pds_temp_size = info->pds_temp_size;
1244
 
      kernel0.pds_data_size = info->pds_data_size;
1245
 
 
1246
 
      if (info->usc_target_any)
1247
 
         kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ANY);
1248
 
      else
1249
 
         kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ALL);
1250
 
 
1251
 
      kernel0.fence = info->is_fence;
1252
 
   }
1253
 
 
1254
 
   /* Compute kernel 1. */
1255
 
   pvr_csb_emit (csb, CDMCTRL_KERNEL1, kernel1) {
1256
 
      kernel1.data_addr.addr = info->pds_data_offset;
1257
 
      kernel1.sd_type = info->sd_type;
1258
 
 
1259
 
      if (!info->is_fence)
1260
 
         kernel1.usc_common_shared = info->usc_common_shared;
1261
 
   }
1262
 
 
1263
 
   /* Compute kernel 2. */
1264
 
   pvr_csb_emit (csb, CDMCTRL_KERNEL2, kernel2) {
1265
 
      kernel2.code_addr.addr = info->pds_code_offset;
1266
 
   }
1267
 
 
1268
 
   if (info->indirect_buffer_addr.addr) {
1269
 
      /* Compute kernel 6. */
1270
 
      pvr_csb_emit (csb, CDMCTRL_KERNEL6, kernel6) {
1271
 
         kernel6.indirect_addrmsb = info->indirect_buffer_addr;
1272
 
      }
1273
 
 
1274
 
      /* Compute kernel 7. */
1275
 
      pvr_csb_emit (csb, CDMCTRL_KERNEL7, kernel7) {
1276
 
         kernel7.indirect_addrlsb = info->indirect_buffer_addr;
1277
 
      }
1278
 
   } else {
1279
 
      /* Compute kernel 3. */
1280
 
      pvr_csb_emit (csb, CDMCTRL_KERNEL3, kernel3) {
1281
 
         assert(info->global_size[0U] > 0U);
1282
 
         kernel3.workgroup_x = info->global_size[0U] - 1U;
1283
 
      }
1284
 
 
1285
 
      /* Compute kernel 4. */
1286
 
      pvr_csb_emit (csb, CDMCTRL_KERNEL4, kernel4) {
1287
 
         assert(info->global_size[1U] > 0U);
1288
 
         kernel4.workgroup_y = info->global_size[1U] - 1U;
1289
 
      }
1290
 
 
1291
 
      /* Compute kernel 5. */
1292
 
      pvr_csb_emit (csb, CDMCTRL_KERNEL5, kernel5) {
1293
 
         assert(info->global_size[2U] > 0U);
1294
 
         kernel5.workgroup_z = info->global_size[2U] - 1U;
1295
 
      }
1296
 
   }
1297
 
 
1298
 
   /* Compute kernel 8. */
1299
 
   pvr_csb_emit (csb, CDMCTRL_KERNEL8, kernel8) {
1300
 
      if (info->max_instances == ROGUE_MAX_INSTANCES_PER_TASK)
1301
 
         kernel8.max_instances = 0U;
1302
 
      else
1303
 
         kernel8.max_instances = info->max_instances;
1304
 
 
1305
 
      assert(info->local_size[0U] > 0U);
1306
 
      kernel8.workgroup_size_x = info->local_size[0U] - 1U;
1307
 
      assert(info->local_size[1U] > 0U);
1308
 
      kernel8.workgroup_size_y = info->local_size[1U] - 1U;
1309
 
      assert(info->local_size[2U] > 0U);
1310
 
      kernel8.workgroup_size_z = info->local_size[2U] - 1U;
1311
 
   }
1312
 
}
1313
 
 
1314
 
static void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
1315
 
                                       bool deallocate_shareds)
1316
 
{
1317
 
   const struct pvr_pds_upload *program =
1318
 
      &cmd_buffer->device->pds_compute_fence_program;
1319
 
   const struct pvr_device_info *dev_info =
1320
 
      &cmd_buffer->device->pdevice->dev_info;
1321
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
1322
 
   struct pvr_csb *csb = &state->current_sub_cmd->compute.control_stream;
1323
 
 
1324
 
   struct pvr_compute_kernel_info info = {
1325
 
      .indirect_buffer_addr.addr = 0ULL,
1326
 
      .global_offsets_present = false,
1327
 
      .usc_common_size = 0U,
1328
 
      .usc_unified_size = 0U,
1329
 
      .pds_temp_size = 0U,
1330
 
      .pds_data_size =
1331
 
         DIV_ROUND_UP(program->data_size << 2,
1332
 
                      PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
1333
 
      .usc_target_any = true,
1334
 
      .is_fence = true,
1335
 
      .pds_data_offset = program->data_offset,
1336
 
      .sd_type = PVRX(CDMCTRL_SD_TYPE_PDS),
1337
 
      .usc_common_shared = deallocate_shareds,
1338
 
      .pds_code_offset = program->code_offset,
1339
 
      .global_size = { 1U, 1U, 1U },
1340
 
      .local_size = { 1U, 1U, 1U },
1341
 
   };
1342
 
 
1343
 
   /* We don't need to pad work-group size for this case. */
1344
 
   /* Here we calculate the slot size. This can depend on the use of barriers,
1345
 
    * local memory, BRN's or other factors.
1346
 
    */
1347
 
   info.max_instances =
1348
 
      pvr_compute_slot_size(dev_info, 0U, false, info.local_size);
1349
 
 
1350
 
   pvr_compute_generate_control_stream(csb, &info);
1351
 
}
1352
 
 
1353
 
static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
1354
 
{
1355
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
1356
 
   struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
1357
 
   struct pvr_device *device = cmd_buffer->device;
1358
 
   VkResult result;
1359
 
 
1360
 
   /* FIXME: Is this NULL check required because this function is called from
1361
 
    * pvr_resolve_unemitted_resolve_attachments()? See comment about this
1362
 
    * function being called twice in a row in pvr_CmdEndRenderPass().
1363
 
    */
1364
 
   if (!sub_cmd)
1365
 
      return VK_SUCCESS;
1366
 
 
1367
 
   switch (sub_cmd->type) {
1368
 
   case PVR_SUB_CMD_TYPE_GRAPHICS:
1369
 
      if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
1370
 
         result = pvr_csb_emit_return(&sub_cmd->gfx.control_stream);
1371
 
         if (result != VK_SUCCESS) {
1372
 
            state->status = result;
1373
 
            return result;
1374
 
         }
1375
 
 
1376
 
         break;
1377
 
      }
1378
 
 
1379
 
      /* TODO: Check if the sub_cmd can be skipped based on
1380
 
       * sub_cmd->gfx.empty_cmd flag.
1381
 
       */
1382
 
 
1383
 
      result = pvr_cmd_buffer_upload_tables(device, cmd_buffer);
1384
 
      if (result != VK_SUCCESS) {
1385
 
         state->status = result;
1386
 
         return result;
1387
 
      }
1388
 
 
1389
 
      result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer);
1390
 
      if (result != VK_SUCCESS) {
1391
 
         state->status = result;
1392
 
         return result;
1393
 
      }
1394
 
 
1395
 
      result = pvr_csb_emit_terminate(&sub_cmd->gfx.control_stream);
1396
 
      if (result != VK_SUCCESS) {
1397
 
         state->status = result;
1398
 
         return result;
1399
 
      }
1400
 
 
1401
 
      result = pvr_sub_cmd_gfx_job_init(device, cmd_buffer, sub_cmd);
1402
 
      if (result != VK_SUCCESS) {
1403
 
         state->status = result;
1404
 
         return result;
1405
 
      }
1406
 
 
1407
 
      break;
1408
 
 
1409
 
   case PVR_SUB_CMD_TYPE_COMPUTE:
1410
 
      pvr_compute_generate_fence(cmd_buffer, true);
1411
 
 
1412
 
      result = pvr_csb_emit_terminate(&sub_cmd->compute.control_stream);
1413
 
      if (result != VK_SUCCESS) {
1414
 
         state->status = result;
1415
 
         return result;
1416
 
      }
1417
 
 
1418
 
      pvr_sub_cmd_compute_job_init(device, cmd_buffer, sub_cmd);
1419
 
      break;
1420
 
 
1421
 
   case PVR_SUB_CMD_TYPE_TRANSFER:
1422
 
      break;
1423
 
 
1424
 
   default:
1425
 
      pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
1426
 
      break;
1427
 
   }
1428
 
 
1429
 
   state->current_sub_cmd = NULL;
1430
 
 
1431
 
   return VK_SUCCESS;
1432
 
}
1433
 
 
1434
 
static void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state,
1435
 
                                           bool start_geom)
1436
 
{
1437
 
   if (start_geom) {
1438
 
      /*
1439
 
       * Initial geometry phase State.
1440
 
       * It's the driver's responsibility to ensure that the state of the
1441
 
       * hardware is correctly initialized at the start of every geometry
1442
 
       * phase. This is required to prevent stale state from a previous
1443
 
       * geometry phase erroneously affecting the next geometry phase. The
1444
 
       * following fields in PPP State Header, and their corresponding state
1445
 
       * words, must be supplied in the first PPP State Update of a geometry
1446
 
       * phase that contains any geometry (draw calls). Any field not listed
1447
 
       * below is safe to ignore.
1448
 
       *
1449
 
       *        TA_PRES_STREAM_OUT_SIZE
1450
 
       *        TA_PRES_PPPCTRL
1451
 
       *        TA_PRES_VARYING_WORD2
1452
 
       *        TA_PRES_VARYING_WORD1
1453
 
       *        TA_PRES_VARYING_WORD0
1454
 
       *        TA_PRES_OUTSELECTS
1455
 
       *        TA_PRES_WCLAMP
1456
 
       *        TA_VIEWPORT_COUNT
1457
 
       *        TA_PRES_VIEWPORT
1458
 
       *        TA_PRES_REGION_CLIP
1459
 
       *        TA_PRES_PDSSTATEPTR0
1460
 
       *        TA_PRES_ISPCTLFB
1461
 
       *        TA_PRES_ISPCTLFA
1462
 
       *        TA_PRES_ISPCTL
1463
 
       *
1464
 
       * If a geometry phase does not contain any geometry, this restriction
1465
 
       * can be ignored. If the first draw call in a geometry phase will only
1466
 
       * update the depth or stencil buffers i.e. ISP_TAGWRITEDISABLE is set
1467
 
       * in the ISP State Control Word, the PDS State Pointers
1468
 
       * (TA_PRES_PDSSTATEPTR*) in the first PPP State Update do not need to
1469
 
       * be supplied, since they will never reach the PDS in the fragment
1470
 
       * phase.
1471
 
       */
1472
 
 
1473
 
      state->emit_state_bits = 0;
1474
 
 
1475
 
      state->emit_state.stream_out = true;
1476
 
      state->emit_state.ppp_control = true;
1477
 
      state->emit_state.varying_word2 = true;
1478
 
      state->emit_state.varying_word1 = true;
1479
 
      state->emit_state.varying_word0 = true;
1480
 
      state->emit_state.output_selects = true;
1481
 
      state->emit_state.wclamp = true;
1482
 
      state->emit_state.viewport = true;
1483
 
      state->emit_state.region_clip = true;
1484
 
      state->emit_state.pds_fragment_stateptr0 = true;
1485
 
      state->emit_state.isp_fb = true;
1486
 
      state->emit_state.isp = true;
1487
 
   } else {
1488
 
      state->emit_state.ppp_control = true;
1489
 
      state->emit_state.varying_word1 = true;
1490
 
      state->emit_state.varying_word0 = true;
1491
 
      state->emit_state.output_selects = true;
1492
 
      state->emit_state.viewport = true;
1493
 
      state->emit_state.region_clip = true;
1494
 
      state->emit_state.pds_fragment_stateptr0 = true;
1495
 
      state->emit_state.isp_fb = true;
1496
 
      state->emit_state.isp = true;
1497
 
   }
1498
 
 
1499
 
   memset(&state->ppp_state, 0U, sizeof(state->ppp_state));
1500
 
 
1501
 
   state->dirty.vertex_bindings = true;
1502
 
   state->dirty.gfx_pipeline_binding = true;
1503
 
   state->dirty.viewport = true;
1504
 
}
1505
 
 
1506
 
static VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
1507
 
                                             enum pvr_sub_cmd_type type)
1508
 
{
1509
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
1510
 
   struct pvr_device *device = cmd_buffer->device;
1511
 
   struct pvr_sub_cmd *sub_cmd;
1512
 
   VkResult result;
1513
 
 
1514
 
   /* Check the current status of the buffer. */
1515
 
   if (state->status != VK_SUCCESS)
1516
 
      return state->status;
1517
 
 
1518
 
   pvr_cmd_buffer_update_barriers(cmd_buffer, type);
1519
 
 
1520
 
   if (state->current_sub_cmd) {
1521
 
      if (state->current_sub_cmd->type == type) {
1522
 
         /* Continue adding to the current sub command. */
1523
 
         return VK_SUCCESS;
1524
 
      }
1525
 
 
1526
 
      /* End the current sub command. */
1527
 
      result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
1528
 
      if (result != VK_SUCCESS)
1529
 
         return result;
1530
 
   }
1531
 
 
1532
 
   sub_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
1533
 
                       sizeof(*sub_cmd),
1534
 
                       8,
1535
 
                       VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1536
 
   if (!sub_cmd) {
1537
 
      state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
1538
 
      return state->status;
1539
 
   }
1540
 
 
1541
 
   sub_cmd->type = type;
1542
 
 
1543
 
   switch (type) {
1544
 
   case PVR_SUB_CMD_TYPE_GRAPHICS:
1545
 
 
1546
 
      sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
1547
 
      sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
1548
 
      sub_cmd->gfx.modifies_depth = false;
1549
 
      sub_cmd->gfx.modifies_stencil = false;
1550
 
      sub_cmd->gfx.max_tiles_in_flight =
1551
 
         PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info,
1552
 
                               isp_max_tiles_in_flight,
1553
 
                               1);
1554
 
      sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass;
1555
 
      sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer;
1556
 
      sub_cmd->gfx.empty_cmd = true;
1557
 
 
1558
 
      pvr_reset_graphics_dirty_state(state, true);
1559
 
      pvr_csb_init(device,
1560
 
                   PVR_CMD_STREAM_TYPE_GRAPHICS,
1561
 
                   &sub_cmd->gfx.control_stream);
1562
 
      break;
1563
 
 
1564
 
   case PVR_SUB_CMD_TYPE_COMPUTE:
1565
 
      pvr_csb_init(device,
1566
 
                   PVR_CMD_STREAM_TYPE_COMPUTE,
1567
 
                   &sub_cmd->compute.control_stream);
1568
 
      break;
1569
 
 
1570
 
   case PVR_SUB_CMD_TYPE_TRANSFER:
1571
 
      list_inithead(&sub_cmd->transfer.transfer_cmds);
1572
 
      break;
1573
 
 
1574
 
   default:
1575
 
      pvr_finishme("Unsupported sub-command type %d", type);
1576
 
      break;
1577
 
   }
1578
 
 
1579
 
   list_addtail(&sub_cmd->link, &cmd_buffer->sub_cmds);
1580
 
   state->current_sub_cmd = sub_cmd;
1581
 
 
1582
 
   return VK_SUCCESS;
1583
 
}
1584
 
 
1585
 
VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
1586
 
                                  struct pvr_winsys_heap *heap,
1587
 
                                  uint64_t size,
1588
 
                                  uint32_t flags,
1589
 
                                  struct pvr_bo **const pvr_bo_out)
1590
 
{
1591
 
   const uint32_t cache_line_size =
1592
 
      rogue_get_slc_cache_line_size(&cmd_buffer->device->pdevice->dev_info);
1593
 
   struct pvr_bo *pvr_bo;
1594
 
   VkResult result;
1595
 
 
1596
 
   result = pvr_bo_alloc(cmd_buffer->device,
1597
 
                         heap,
1598
 
                         size,
1599
 
                         cache_line_size,
1600
 
                         flags,
1601
 
                         &pvr_bo);
1602
 
   if (result != VK_SUCCESS) {
1603
 
      cmd_buffer->state.status = result;
1604
 
      return result;
1605
 
   }
1606
 
 
1607
 
   list_add(&pvr_bo->link, &cmd_buffer->bo_list);
1608
 
 
1609
 
   *pvr_bo_out = pvr_bo;
1610
 
 
1611
 
   return VK_SUCCESS;
1612
 
}
1613
 
 
1614
 
VkResult pvr_ResetCommandBuffer(VkCommandBuffer commandBuffer,
1615
 
                                VkCommandBufferResetFlags flags)
1616
 
{
1617
 
   assert(!"Unimplemented");
1618
 
   return VK_SUCCESS;
1619
 
}
1620
 
 
1621
 
static void pvr_cmd_bind_compute_pipeline(
1622
 
   const struct pvr_compute_pipeline *const compute_pipeline,
1623
 
   struct pvr_cmd_buffer *const cmd_buffer)
1624
 
{
1625
 
   cmd_buffer->state.compute_pipeline = compute_pipeline;
1626
 
   cmd_buffer->state.dirty.compute_pipeline_binding = true;
1627
 
}
1628
 
 
1629
 
static void pvr_cmd_bind_graphics_pipeline(
1630
 
   const struct pvr_graphics_pipeline *const gfx_pipeline,
1631
 
   struct pvr_cmd_buffer *const cmd_buffer)
1632
 
{
1633
 
   struct pvr_dynamic_state *const dest_state =
1634
 
      &cmd_buffer->state.dynamic.common;
1635
 
   const struct pvr_dynamic_state *const src_state =
1636
 
      &gfx_pipeline->dynamic_state;
1637
 
   struct pvr_cmd_buffer_state *const cmd_buffer_state = &cmd_buffer->state;
1638
 
   const uint32_t state_mask = src_state->mask;
1639
 
 
1640
 
   cmd_buffer_state->gfx_pipeline = gfx_pipeline;
1641
 
   cmd_buffer_state->dirty.gfx_pipeline_binding = true;
1642
 
 
1643
 
   /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_VIEWPORT. */
1644
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_VIEWPORT)) {
1645
 
      assert(!"Unimplemented");
1646
 
   }
1647
 
 
1648
 
   /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_SCISSOR. */
1649
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_SCISSOR)) {
1650
 
      assert(!"Unimplemented");
1651
 
   }
1652
 
 
1653
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) {
1654
 
      dest_state->line_width = src_state->line_width;
1655
 
 
1656
 
      cmd_buffer_state->dirty.line_width = true;
1657
 
   }
1658
 
 
1659
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) {
1660
 
      memcpy(&dest_state->depth_bias,
1661
 
             &src_state->depth_bias,
1662
 
             sizeof(src_state->depth_bias));
1663
 
 
1664
 
      cmd_buffer_state->dirty.depth_bias = true;
1665
 
   }
1666
 
 
1667
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) {
1668
 
      STATIC_ASSERT(
1669
 
         __same_type(dest_state->blend_constants, src_state->blend_constants));
1670
 
 
1671
 
      typed_memcpy(dest_state->blend_constants,
1672
 
                   src_state->blend_constants,
1673
 
                   ARRAY_SIZE(dest_state->blend_constants));
1674
 
 
1675
 
      cmd_buffer_state->dirty.blend_constants = true;
1676
 
   }
1677
 
 
1678
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) {
1679
 
      dest_state->compare_mask.front = src_state->compare_mask.front;
1680
 
      dest_state->compare_mask.back = src_state->compare_mask.back;
1681
 
 
1682
 
      cmd_buffer_state->dirty.compare_mask = true;
1683
 
   }
1684
 
 
1685
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) {
1686
 
      dest_state->write_mask.front = src_state->write_mask.front;
1687
 
      dest_state->write_mask.back = src_state->write_mask.back;
1688
 
 
1689
 
      cmd_buffer_state->dirty.write_mask = true;
1690
 
   }
1691
 
 
1692
 
   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) {
1693
 
      dest_state->reference.front = src_state->reference.front;
1694
 
      dest_state->reference.back = src_state->reference.back;
1695
 
 
1696
 
      cmd_buffer_state->dirty.reference = true;
1697
 
   }
1698
 
}
1699
 
 
1700
 
void pvr_CmdBindPipeline(VkCommandBuffer commandBuffer,
1701
 
                         VkPipelineBindPoint pipelineBindPoint,
1702
 
                         VkPipeline _pipeline)
1703
 
{
1704
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1705
 
   PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline);
1706
 
 
1707
 
   switch (pipelineBindPoint) {
1708
 
   case VK_PIPELINE_BIND_POINT_COMPUTE:
1709
 
      pvr_cmd_bind_compute_pipeline(to_pvr_compute_pipeline(pipeline),
1710
 
                                    cmd_buffer);
1711
 
      break;
1712
 
 
1713
 
   case VK_PIPELINE_BIND_POINT_GRAPHICS:
1714
 
      pvr_cmd_bind_graphics_pipeline(to_pvr_graphics_pipeline(pipeline),
1715
 
                                     cmd_buffer);
1716
 
      break;
1717
 
 
1718
 
   default:
1719
 
      unreachable("Invalid bind point.");
1720
 
      break;
1721
 
   }
1722
 
}
1723
 
 
1724
 
#if defined(DEBUG)
1725
 
static void check_viewport_quirk_70165(const struct pvr_device *device,
1726
 
                                       const VkViewport *pViewport)
1727
 
{
1728
 
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1729
 
   float min_vertex_x, max_vertex_x, min_vertex_y, max_vertex_y;
1730
 
   float min_screen_space_value, max_screen_space_value;
1731
 
   float sign_to_unsigned_offset, fixed_point_max;
1732
 
   float guardband_width, guardband_height;
1733
 
 
1734
 
   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
1735
 
      /* Max representable value in 13.4 fixed point format.
1736
 
       * Round-down to avoid precision issues.
1737
 
       * Calculated as (2 ** 13) - 2*(2 ** -4)
1738
 
       */
1739
 
      fixed_point_max = 8192.0f - 2.0f / 16.0f;
1740
 
 
1741
 
      if (PVR_HAS_FEATURE(dev_info, screen_size8K)) {
1742
 
         if (pViewport->width <= 4096 && pViewport->height <= 4096) {
1743
 
            guardband_width = pViewport->width / 4.0f;
1744
 
            guardband_height = pViewport->height / 4.0f;
1745
 
 
1746
 
            /* 2k of the range is negative */
1747
 
            sign_to_unsigned_offset = 2048.0f;
1748
 
         } else {
1749
 
            guardband_width = 0.0f;
1750
 
            guardband_height = 0.0f;
1751
 
 
1752
 
            /* For > 4k renders, the entire range is positive */
1753
 
            sign_to_unsigned_offset = 0.0f;
1754
 
         }
1755
 
      } else {
1756
 
         guardband_width = pViewport->width / 4.0f;
1757
 
         guardband_height = pViewport->height / 4.0f;
1758
 
 
1759
 
         /* 2k of the range is negative */
1760
 
         sign_to_unsigned_offset = 2048.0f;
1761
 
      }
1762
 
   } else {
1763
 
      /* Max representable value in 16.8 fixed point format
1764
 
       * Calculated as (2 ** 16) - (2 ** -8)
1765
 
       */
1766
 
      fixed_point_max = 65535.99609375f;
1767
 
      guardband_width = pViewport->width / 4.0f;
1768
 
      guardband_height = pViewport->height / 4.0f;
1769
 
 
1770
 
      /* 4k/20k of the range is negative */
1771
 
      sign_to_unsigned_offset = (float)PVR_MAX_NEG_OFFSCREEN_OFFSET;
1772
 
   }
1773
 
 
1774
 
   min_screen_space_value = -sign_to_unsigned_offset;
1775
 
   max_screen_space_value = fixed_point_max - sign_to_unsigned_offset;
1776
 
 
1777
 
   min_vertex_x = pViewport->x - guardband_width;
1778
 
   max_vertex_x = pViewport->x + pViewport->width + guardband_width;
1779
 
   min_vertex_y = pViewport->y - guardband_height;
1780
 
   max_vertex_y = pViewport->y + pViewport->height + guardband_height;
1781
 
   if (min_vertex_x < min_screen_space_value ||
1782
 
       max_vertex_x > max_screen_space_value ||
1783
 
       min_vertex_y < min_screen_space_value ||
1784
 
       max_vertex_y > max_screen_space_value) {
1785
 
      mesa_logw("Viewport is affected by BRN70165, geometry outside "
1786
 
                "the viewport could be corrupted");
1787
 
   }
1788
 
}
1789
 
#endif
1790
 
 
1791
 
void pvr_CmdSetViewport(VkCommandBuffer commandBuffer,
1792
 
                        uint32_t firstViewport,
1793
 
                        uint32_t viewportCount,
1794
 
                        const VkViewport *pViewports)
1795
 
{
1796
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1797
 
   const uint32_t total_count = firstViewport + viewportCount;
1798
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1799
 
 
1800
 
   assert(firstViewport < PVR_MAX_VIEWPORTS && viewportCount > 0);
1801
 
   assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
1802
 
 
1803
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1804
 
 
1805
 
#if defined(DEBUG)
1806
 
   if (PVR_HAS_QUIRK(&cmd_buffer->device->pdevice->dev_info, 70165)) {
1807
 
      for (uint32_t viewport = 0; viewport < viewportCount; viewport++) {
1808
 
         check_viewport_quirk_70165(cmd_buffer->device, &pViewports[viewport]);
1809
 
      }
1810
 
   }
1811
 
#endif
1812
 
 
1813
 
   if (state->dynamic.common.viewport.count < total_count)
1814
 
      state->dynamic.common.viewport.count = total_count;
1815
 
 
1816
 
   memcpy(&state->dynamic.common.viewport.viewports[firstViewport],
1817
 
          pViewports,
1818
 
          viewportCount * sizeof(*pViewports));
1819
 
 
1820
 
   state->dirty.viewport = true;
1821
 
}
1822
 
 
1823
 
void pvr_CmdSetScissor(VkCommandBuffer commandBuffer,
1824
 
                       uint32_t firstScissor,
1825
 
                       uint32_t scissorCount,
1826
 
                       const VkRect2D *pScissors)
1827
 
{
1828
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1829
 
   const uint32_t total_count = firstScissor + scissorCount;
1830
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1831
 
 
1832
 
   assert(firstScissor < PVR_MAX_VIEWPORTS && scissorCount > 0);
1833
 
   assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
1834
 
 
1835
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1836
 
 
1837
 
   if (state->dynamic.common.scissor.count < total_count)
1838
 
      state->dynamic.common.scissor.count = total_count;
1839
 
 
1840
 
   memcpy(&state->dynamic.common.scissor.scissors[firstScissor],
1841
 
          pScissors,
1842
 
          scissorCount * sizeof(*pScissors));
1843
 
 
1844
 
   state->dirty.scissor = true;
1845
 
}
1846
 
 
1847
 
void pvr_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
1848
 
{
1849
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1850
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1851
 
 
1852
 
   state->dynamic.common.line_width = lineWidth;
1853
 
   state->dirty.line_width = true;
1854
 
}
1855
 
 
1856
 
void pvr_CmdSetDepthBias(VkCommandBuffer commandBuffer,
1857
 
                         float depthBiasConstantFactor,
1858
 
                         float depthBiasClamp,
1859
 
                         float depthBiasSlopeFactor)
1860
 
{
1861
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1862
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1863
 
 
1864
 
   state->dynamic.common.depth_bias.constant_factor = depthBiasConstantFactor;
1865
 
   state->dynamic.common.depth_bias.clamp = depthBiasClamp;
1866
 
   state->dynamic.common.depth_bias.slope_factor = depthBiasSlopeFactor;
1867
 
   state->dirty.depth_bias = true;
1868
 
}
1869
 
 
1870
 
void pvr_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
1871
 
                              const float blendConstants[4])
1872
 
{
1873
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1874
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1875
 
 
1876
 
   STATIC_ASSERT(ARRAY_SIZE(state->dynamic.common.blend_constants) == 4);
1877
 
   memcpy(state->dynamic.common.blend_constants,
1878
 
          blendConstants,
1879
 
          sizeof(state->dynamic.common.blend_constants));
1880
 
 
1881
 
   state->dirty.blend_constants = true;
1882
 
}
1883
 
 
1884
 
void pvr_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
1885
 
                           float minDepthBounds,
1886
 
                           float maxDepthBounds)
1887
 
{
1888
 
   mesa_logd("No support for depth bounds testing.");
1889
 
}
1890
 
 
1891
 
void pvr_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
1892
 
                                  VkStencilFaceFlags faceMask,
1893
 
                                  uint32_t compareMask)
1894
 
{
1895
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1896
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1897
 
 
1898
 
   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1899
 
      state->dynamic.common.compare_mask.front = compareMask;
1900
 
 
1901
 
   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1902
 
      state->dynamic.common.compare_mask.back = compareMask;
1903
 
 
1904
 
   state->dirty.compare_mask = true;
1905
 
}
1906
 
 
1907
 
void pvr_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
1908
 
                                VkStencilFaceFlags faceMask,
1909
 
                                uint32_t writeMask)
1910
 
{
1911
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1912
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1913
 
 
1914
 
   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1915
 
      state->dynamic.common.write_mask.front = writeMask;
1916
 
 
1917
 
   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1918
 
      state->dynamic.common.write_mask.back = writeMask;
1919
 
 
1920
 
   state->dirty.write_mask = true;
1921
 
}
1922
 
 
1923
 
void pvr_CmdSetStencilReference(VkCommandBuffer commandBuffer,
1924
 
                                VkStencilFaceFlags faceMask,
1925
 
                                uint32_t reference)
1926
 
{
1927
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1928
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
1929
 
 
1930
 
   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
1931
 
      state->dynamic.common.reference.front = reference;
1932
 
 
1933
 
   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
1934
 
      state->dynamic.common.reference.back = reference;
1935
 
 
1936
 
   state->dirty.reference = true;
1937
 
}
1938
 
 
1939
 
void pvr_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
1940
 
                               VkPipelineBindPoint pipelineBindPoint,
1941
 
                               VkPipelineLayout _layout,
1942
 
                               uint32_t firstSet,
1943
 
                               uint32_t descriptorSetCount,
1944
 
                               const VkDescriptorSet *pDescriptorSets,
1945
 
                               uint32_t dynamicOffsetCount,
1946
 
                               const uint32_t *pDynamicOffsets)
1947
 
{
1948
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1949
 
   struct pvr_descriptor_state *descriptor_state;
1950
 
 
1951
 
   assert(firstSet + descriptorSetCount <= PVR_MAX_DESCRIPTOR_SETS);
1952
 
 
1953
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1954
 
 
1955
 
   switch (pipelineBindPoint) {
1956
 
   case VK_PIPELINE_BIND_POINT_GRAPHICS:
1957
 
   case VK_PIPELINE_BIND_POINT_COMPUTE:
1958
 
      break;
1959
 
 
1960
 
   default:
1961
 
      unreachable("Unsupported bind point.");
1962
 
      break;
1963
 
   }
1964
 
 
1965
 
   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
1966
 
      descriptor_state = &cmd_buffer->state.gfx_desc_state;
1967
 
      cmd_buffer->state.dirty.gfx_desc_dirty = true;
1968
 
   } else {
1969
 
      descriptor_state = &cmd_buffer->state.compute_desc_state;
1970
 
      cmd_buffer->state.dirty.compute_desc_dirty = true;
1971
 
   }
1972
 
 
1973
 
   for (uint32_t i = 0; i < descriptorSetCount; i++) {
1974
 
      PVR_FROM_HANDLE(pvr_descriptor_set, set, pDescriptorSets[i]);
1975
 
      uint32_t index = firstSet + i;
1976
 
 
1977
 
      if (descriptor_state->descriptor_sets[index] != set) {
1978
 
         descriptor_state->descriptor_sets[index] = set;
1979
 
         descriptor_state->valid_mask |= (1u << index);
1980
 
      }
1981
 
   }
1982
 
}
1983
 
 
1984
 
void pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
1985
 
                              uint32_t firstBinding,
1986
 
                              uint32_t bindingCount,
1987
 
                              const VkBuffer *pBuffers,
1988
 
                              const VkDeviceSize *pOffsets)
1989
 
{
1990
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1991
 
   struct pvr_vertex_binding *const vb = cmd_buffer->state.vertex_bindings;
1992
 
 
1993
 
   /* We have to defer setting up vertex buffer since we need the buffer
1994
 
    * stride from the pipeline.
1995
 
    */
1996
 
 
1997
 
   assert(firstBinding < PVR_MAX_VERTEX_INPUT_BINDINGS &&
1998
 
          bindingCount <= PVR_MAX_VERTEX_INPUT_BINDINGS);
1999
 
 
2000
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2001
 
 
2002
 
   for (uint32_t i = 0; i < bindingCount; i++) {
2003
 
      vb[firstBinding + i].buffer = pvr_buffer_from_handle(pBuffers[i]);
2004
 
      vb[firstBinding + i].offset = pOffsets[i];
2005
 
   }
2006
 
 
2007
 
   cmd_buffer->state.dirty.vertex_bindings = true;
2008
 
}
2009
 
 
2010
 
void pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
2011
 
                            VkBuffer buffer,
2012
 
                            VkDeviceSize offset,
2013
 
                            VkIndexType indexType)
2014
 
{
2015
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2016
 
   PVR_FROM_HANDLE(pvr_buffer, index_buffer, buffer);
2017
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2018
 
 
2019
 
   assert(offset < index_buffer->size);
2020
 
   assert(indexType == VK_INDEX_TYPE_UINT32 ||
2021
 
          indexType == VK_INDEX_TYPE_UINT16);
2022
 
 
2023
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2024
 
 
2025
 
   state->index_buffer_binding.buffer = index_buffer;
2026
 
   state->index_buffer_binding.offset = offset;
2027
 
   state->index_buffer_binding.type = indexType;
2028
 
   state->dirty.index_buffer_binding = true;
2029
 
}
2030
 
 
2031
 
void pvr_CmdPushConstants(VkCommandBuffer commandBuffer,
2032
 
                          VkPipelineLayout layout,
2033
 
                          VkShaderStageFlags stageFlags,
2034
 
                          uint32_t offset,
2035
 
                          uint32_t size,
2036
 
                          const void *pValues)
2037
 
{
2038
 
#if defined(DEBUG)
2039
 
   const uint64_t ending = (uint64_t)offset + (uint64_t)size;
2040
 
#endif
2041
 
 
2042
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2043
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2044
 
 
2045
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2046
 
 
2047
 
   pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE);
2048
 
 
2049
 
   memcpy(&state->push_constants.data[offset], pValues, size);
2050
 
 
2051
 
   state->push_constants.dirty_stages |= stageFlags;
2052
 
}
2053
 
 
2054
 
static VkResult
2055
 
pvr_cmd_buffer_setup_attachments(struct pvr_cmd_buffer *cmd_buffer,
2056
 
                                 const struct pvr_render_pass *pass,
2057
 
                                 const struct pvr_framebuffer *framebuffer)
2058
 
{
2059
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2060
 
   struct pvr_render_pass_info *info = &state->render_pass_info;
2061
 
 
2062
 
   assert(pass->attachment_count == framebuffer->attachment_count);
2063
 
 
2064
 
   /* Free any previously allocated attachments. */
2065
 
   vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments);
2066
 
 
2067
 
   if (pass->attachment_count == 0) {
2068
 
      info->attachments = NULL;
2069
 
      return VK_SUCCESS;
2070
 
   }
2071
 
 
2072
 
   info->attachments =
2073
 
      vk_zalloc(&cmd_buffer->vk.pool->alloc,
2074
 
                pass->attachment_count * sizeof(*info->attachments),
2075
 
                8,
2076
 
                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2077
 
   if (!info->attachments) {
2078
 
      /* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
2079
 
      state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
2080
 
      return state->status;
2081
 
   }
2082
 
 
2083
 
   if (framebuffer) {
2084
 
      for (uint32_t i = 0; i < pass->attachment_count; i++)
2085
 
         info->attachments[i] = framebuffer->attachments[i];
2086
 
   }
2087
 
 
2088
 
   return VK_SUCCESS;
2089
 
}
2090
 
 
2091
 
static VkResult pvr_init_render_targets(struct pvr_device *device,
2092
 
                                        struct pvr_render_pass *pass,
2093
 
                                        struct pvr_framebuffer *framebuffer)
2094
 
{
2095
 
   for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2096
 
      struct pvr_render_target *render_target =
2097
 
         pvr_get_render_target(pass, framebuffer, i);
2098
 
 
2099
 
      pthread_mutex_lock(&render_target->mutex);
2100
 
 
2101
 
      if (!render_target->valid) {
2102
 
         const struct pvr_renderpass_hwsetup_render *hw_render =
2103
 
            &pass->hw_setup->renders[i];
2104
 
         VkResult result;
2105
 
 
2106
 
         result = pvr_render_target_dataset_create(device,
2107
 
                                                   framebuffer->width,
2108
 
                                                   framebuffer->height,
2109
 
                                                   hw_render->sample_count,
2110
 
                                                   framebuffer->layers,
2111
 
                                                   &render_target->rt_dataset);
2112
 
         if (result != VK_SUCCESS) {
2113
 
            pthread_mutex_unlock(&render_target->mutex);
2114
 
            return result;
2115
 
         }
2116
 
 
2117
 
         render_target->valid = true;
2118
 
      }
2119
 
 
2120
 
      pthread_mutex_unlock(&render_target->mutex);
2121
 
   }
2122
 
 
2123
 
   return VK_SUCCESS;
2124
 
}
2125
 
 
2126
 
static const struct pvr_renderpass_hwsetup_subpass *
2127
 
pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
2128
 
{
2129
 
   const struct pvr_renderpass_hw_map *map =
2130
 
      &pass->hw_setup->subpass_map[subpass];
2131
 
 
2132
 
   return &pass->hw_setup->renders[map->render].subpasses[map->subpass];
2133
 
}
2134
 
 
2135
 
static void pvr_perform_start_of_render_attachment_clear(
2136
 
   struct pvr_cmd_buffer *cmd_buffer,
2137
 
   const struct pvr_framebuffer *framebuffer,
2138
 
   uint32_t index,
2139
 
   bool is_depth_stencil,
2140
 
   uint32_t *index_list_clear_mask)
2141
 
{
2142
 
   struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
2143
 
   const struct pvr_render_pass *pass = info->pass;
2144
 
   const struct pvr_renderpass_hwsetup_render *hw_render;
2145
 
   const struct pvr_renderpass_hwsetup *hw_setup;
2146
 
   struct pvr_image_view *iview;
2147
 
   uint32_t view_idx;
2148
 
   uint32_t height;
2149
 
   uint32_t width;
2150
 
 
2151
 
   hw_setup = pass->hw_setup;
2152
 
   hw_render =
2153
 
      &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
2154
 
 
2155
 
   if (is_depth_stencil) {
2156
 
      bool stencil_clear;
2157
 
      bool depth_clear;
2158
 
      bool is_stencil;
2159
 
      bool is_depth;
2160
 
 
2161
 
      assert(hw_render->ds_surface_id != -1);
2162
 
      assert(index == 0);
2163
 
 
2164
 
      view_idx = hw_render->ds_surface_id;
2165
 
 
2166
 
      is_depth = vk_format_has_depth(pass->attachments[view_idx].vk_format);
2167
 
      is_stencil = vk_format_has_stencil(pass->attachments[view_idx].vk_format);
2168
 
      depth_clear = hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR;
2169
 
      stencil_clear = hw_render->stencil_init ==
2170
 
                      RENDERPASS_SURFACE_INITOP_CLEAR;
2171
 
 
2172
 
      /* Attempt to clear the ds attachment. Do not erroneously discard an
2173
 
       * attachment that has no depth clear but has a stencil attachment.
2174
 
       */
2175
 
      /* if not (a ∧ c) ∨ (b ∧ d) */
2176
 
      if (!((is_depth && depth_clear) || (is_stencil && stencil_clear)))
2177
 
         return;
2178
 
   } else if (hw_render->color_init[index].op !=
2179
 
              RENDERPASS_SURFACE_INITOP_CLEAR) {
2180
 
      return;
2181
 
   } else {
2182
 
      view_idx = hw_render->color_init[index].driver_id;
2183
 
   }
2184
 
 
2185
 
   iview = info->attachments[view_idx];
2186
 
   width = iview->vk.extent.width;
2187
 
   height = iview->vk.extent.height;
2188
 
 
2189
 
   /* FIXME: It would be nice if this function and pvr_sub_cmd_gfx_job_init()
2190
 
    * were doing the same check (even if it's just an assert) to determine if a
2191
 
    * clear is needed.
2192
 
    */
2193
 
   /* If this is single-layer fullscreen, we already do the clears in
2194
 
    * pvr_sub_cmd_gfx_job_init().
2195
 
    */
2196
 
   if (info->render_area.offset.x == 0 && info->render_area.offset.y == 0 &&
2197
 
       info->render_area.extent.width == width &&
2198
 
       info->render_area.extent.height == height && framebuffer->layers == 1) {
2199
 
      return;
2200
 
   }
2201
 
 
2202
 
   pvr_finishme("Unimplemented path!");
2203
 
}
2204
 
 
2205
 
static void
2206
 
pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer)
2207
 
{
2208
 
   struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
2209
 
   const struct pvr_framebuffer *framebuffer = info->framebuffer;
2210
 
   const struct pvr_render_pass *pass = info->pass;
2211
 
   const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup;
2212
 
   const struct pvr_renderpass_hwsetup_render *hw_render;
2213
 
 
2214
 
   /* Mask of attachment clears using index lists instead of background object
2215
 
    * to clear.
2216
 
    */
2217
 
   uint32_t index_list_clear_mask = 0;
2218
 
 
2219
 
   hw_render =
2220
 
      &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
2221
 
   if (!hw_render) {
2222
 
      info->process_empty_tiles = false;
2223
 
      info->enable_bg_tag = false;
2224
 
      return;
2225
 
   }
2226
 
 
2227
 
   for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
2228
 
      pvr_perform_start_of_render_attachment_clear(cmd_buffer,
2229
 
                                                   framebuffer,
2230
 
                                                   i,
2231
 
                                                   false,
2232
 
                                                   &index_list_clear_mask);
2233
 
   }
2234
 
 
2235
 
   info->enable_bg_tag = !!hw_render->color_init_count;
2236
 
 
2237
 
   /* If we're not using index list for all clears/loads then we need to run
2238
 
    * the background object on empty tiles.
2239
 
    */
2240
 
   if (hw_render->color_init_count &&
2241
 
       index_list_clear_mask != ((1u << hw_render->color_init_count) - 1u)) {
2242
 
      info->process_empty_tiles = true;
2243
 
   } else {
2244
 
      info->process_empty_tiles = false;
2245
 
   }
2246
 
 
2247
 
   if (hw_render->ds_surface_id != -1) {
2248
 
      uint32_t ds_index_list = 0;
2249
 
 
2250
 
      pvr_perform_start_of_render_attachment_clear(cmd_buffer,
2251
 
                                                   framebuffer,
2252
 
                                                   0,
2253
 
                                                   true,
2254
 
                                                   &ds_index_list);
2255
 
   }
2256
 
 
2257
 
   if (index_list_clear_mask)
2258
 
      pvr_finishme("Add support for generating loadops shaders!");
2259
 
}
2260
 
 
2261
 
static void pvr_stash_depth_format(struct pvr_cmd_buffer_state *state)
2262
 
{
2263
 
   const struct pvr_render_pass *pass = state->render_pass_info.pass;
2264
 
   const struct pvr_renderpass_hwsetup_render *hw_render =
2265
 
      &pass->hw_setup->renders[state->current_sub_cmd->gfx.hw_render_idx];
2266
 
 
2267
 
   if (hw_render->ds_surface_id != -1) {
2268
 
      struct pvr_image_view **iviews = state->render_pass_info.attachments;
2269
 
 
2270
 
      state->depth_format = iviews[hw_render->ds_surface_id]->vk.format;
2271
 
   }
2272
 
}
2273
 
 
2274
 
static bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup)
2275
 
{
2276
 
   for (uint32_t i = 0; i < hw_setup->render_count; i++) {
2277
 
      struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
2278
 
      uint32_t render_targets_count =
2279
 
         hw_render->init_setup.render_targets_count;
2280
 
 
2281
 
      for (uint32_t j = 0;
2282
 
           j < (hw_render->color_init_count * render_targets_count);
2283
 
           j += render_targets_count) {
2284
 
         for (uint32_t k = 0; k < hw_render->init_setup.render_targets_count;
2285
 
              k++) {
2286
 
            if (hw_render->color_init[j + k].op ==
2287
 
                RENDERPASS_SURFACE_INITOP_CLEAR) {
2288
 
               return true;
2289
 
            }
2290
 
         }
2291
 
      }
2292
 
      if (hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR ||
2293
 
          hw_render->stencil_init == RENDERPASS_SURFACE_INITOP_CLEAR) {
2294
 
         return true;
2295
 
      }
2296
 
   }
2297
 
 
2298
 
   return false;
2299
 
}
2300
 
 
2301
 
static VkResult
2302
 
pvr_cmd_buffer_set_clear_values(struct pvr_cmd_buffer *cmd_buffer,
2303
 
                                const VkRenderPassBeginInfo *pRenderPassBegin)
2304
 
{
2305
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2306
 
 
2307
 
   /* Free any previously allocated clear values. */
2308
 
   vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values);
2309
 
 
2310
 
   if (pRenderPassBegin->clearValueCount) {
2311
 
      const size_t size = pRenderPassBegin->clearValueCount *
2312
 
                          sizeof(*state->render_pass_info.clear_values);
2313
 
 
2314
 
      state->render_pass_info.clear_values =
2315
 
         vk_zalloc(&cmd_buffer->vk.pool->alloc,
2316
 
                   size,
2317
 
                   8,
2318
 
                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2319
 
      if (!state->render_pass_info.clear_values) {
2320
 
         state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
2321
 
         return state->status;
2322
 
      }
2323
 
 
2324
 
      memcpy(state->render_pass_info.clear_values,
2325
 
             pRenderPassBegin->pClearValues,
2326
 
             size);
2327
 
   } else {
2328
 
      state->render_pass_info.clear_values = NULL;
2329
 
   }
2330
 
 
2331
 
   state->render_pass_info.clear_value_count =
2332
 
      pRenderPassBegin->clearValueCount;
2333
 
 
2334
 
   return VK_SUCCESS;
2335
 
}
2336
 
 
2337
 
void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
2338
 
                             const VkRenderPassBeginInfo *pRenderPassBeginInfo,
2339
 
                             const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
2340
 
{
2341
 
   PVR_FROM_HANDLE(pvr_framebuffer,
2342
 
                   framebuffer,
2343
 
                   pRenderPassBeginInfo->framebuffer);
2344
 
   PVR_FROM_HANDLE(pvr_render_pass, pass, pRenderPassBeginInfo->renderPass);
2345
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2346
 
   const struct pvr_renderpass_hwsetup_subpass *hw_subpass;
2347
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2348
 
   VkResult result;
2349
 
 
2350
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2351
 
 
2352
 
   assert(!state->render_pass_info.pass);
2353
 
   assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2354
 
 
2355
 
   /* FIXME: Create a separate function for everything using pass->subpasses,
2356
 
    * look at cmd_buffer_begin_subpass() for example. */
2357
 
   state->render_pass_info.pass = pass;
2358
 
   state->render_pass_info.framebuffer = framebuffer;
2359
 
   state->render_pass_info.subpass_idx = 0;
2360
 
   state->render_pass_info.render_area = pRenderPassBeginInfo->renderArea;
2361
 
   state->render_pass_info.current_hw_subpass = 0;
2362
 
   state->render_pass_info.pipeline_bind_point =
2363
 
      pass->subpasses[0].pipeline_bind_point;
2364
 
   state->render_pass_info.userpass_spawn = pass->subpasses[0].userpass_spawn;
2365
 
   state->dirty.userpass_spawn = true;
2366
 
 
2367
 
   result = pvr_cmd_buffer_setup_attachments(cmd_buffer, pass, framebuffer);
2368
 
   if (result != VK_SUCCESS)
2369
 
      return;
2370
 
 
2371
 
   state->status =
2372
 
      pvr_init_render_targets(cmd_buffer->device, pass, framebuffer);
2373
 
   if (state->status != VK_SUCCESS)
2374
 
      return;
2375
 
 
2376
 
   result = pvr_cmd_buffer_set_clear_values(cmd_buffer, pRenderPassBeginInfo);
2377
 
   if (result != VK_SUCCESS)
2378
 
      return;
2379
 
 
2380
 
   assert(pass->subpasses[0].pipeline_bind_point ==
2381
 
          VK_PIPELINE_BIND_POINT_GRAPHICS);
2382
 
 
2383
 
   result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
2384
 
   if (result != VK_SUCCESS)
2385
 
      return;
2386
 
 
2387
 
   /* Run subpass 0 "soft" background object after the actual background
2388
 
    * object.
2389
 
    */
2390
 
   hw_subpass = pvr_get_hw_subpass(pass, 0);
2391
 
   if (hw_subpass->client_data)
2392
 
      pvr_finishme("Unimplemented path!");
2393
 
 
2394
 
   pvr_perform_start_of_render_clears(cmd_buffer);
2395
 
   pvr_stash_depth_format(&cmd_buffer->state);
2396
 
 
2397
 
   if (!pvr_loadops_contain_clear(pass->hw_setup)) {
2398
 
      state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR;
2399
 
      state->dynamic.scissor_accum_bounds.offset.x = 0;
2400
 
      state->dynamic.scissor_accum_bounds.offset.y = 0;
2401
 
      state->dynamic.scissor_accum_bounds.extent.width = 0;
2402
 
      state->dynamic.scissor_accum_bounds.extent.height = 0;
2403
 
   } else {
2404
 
      state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_DISABLED;
2405
 
   }
2406
 
}
2407
 
 
2408
 
static void pvr_cmd_buffer_reset(struct pvr_cmd_buffer *cmd_buffer)
2409
 
{
2410
 
   if (cmd_buffer->status != PVR_CMD_BUFFER_STATUS_INITIAL) {
2411
 
      /* FIXME: For now we always free all resources as if
2412
 
       * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set.
2413
 
       */
2414
 
      pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
2415
 
 
2416
 
      list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
2417
 
         list_del(&bo->link);
2418
 
         pvr_bo_free(cmd_buffer->device, bo);
2419
 
      }
2420
 
 
2421
 
      util_dynarray_clear(&cmd_buffer->scissor_array);
2422
 
      util_dynarray_clear(&cmd_buffer->depth_bias_array);
2423
 
 
2424
 
      cmd_buffer->state.status = VK_SUCCESS;
2425
 
      cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
2426
 
   }
2427
 
}
2428
 
 
2429
 
VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer,
2430
 
                                const VkCommandBufferBeginInfo *pBeginInfo)
2431
 
{
2432
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2433
 
   struct pvr_cmd_buffer_state *state;
2434
 
   VkResult result;
2435
 
 
2436
 
   pvr_cmd_buffer_reset(cmd_buffer);
2437
 
 
2438
 
   cmd_buffer->usage_flags = pBeginInfo->flags;
2439
 
   state = &cmd_buffer->state;
2440
 
 
2441
 
   /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for
2442
 
    * primary level command buffers.
2443
 
    *
2444
 
    * From the Vulkan 1.0 spec:
2445
 
    *
2446
 
    *    VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a
2447
 
    *    secondary command buffer is considered to be entirely inside a render
2448
 
    *    pass. If this is a primary command buffer, then this bit is ignored.
2449
 
    */
2450
 
   if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
2451
 
      cmd_buffer->usage_flags &=
2452
 
         ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
2453
 
   }
2454
 
 
2455
 
   if (cmd_buffer->usage_flags &
2456
 
       VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
2457
 
      const VkCommandBufferInheritanceInfo *inheritance_info =
2458
 
         pBeginInfo->pInheritanceInfo;
2459
 
      struct pvr_render_pass *pass;
2460
 
 
2461
 
      pass = pvr_render_pass_from_handle(inheritance_info->renderPass);
2462
 
      state->render_pass_info.pass = pass;
2463
 
      state->render_pass_info.framebuffer =
2464
 
         pvr_framebuffer_from_handle(inheritance_info->framebuffer);
2465
 
      state->render_pass_info.subpass_idx = inheritance_info->subpass;
2466
 
      state->render_pass_info.userpass_spawn =
2467
 
         pass->subpasses[inheritance_info->subpass].userpass_spawn;
2468
 
 
2469
 
      result =
2470
 
         pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
2471
 
      if (result != VK_SUCCESS)
2472
 
         return result;
2473
 
   }
2474
 
 
2475
 
   memset(state->barriers_needed,
2476
 
          0xFF,
2477
 
          sizeof(*state->barriers_needed) * ARRAY_SIZE(state->barriers_needed));
2478
 
 
2479
 
   cmd_buffer->status = PVR_CMD_BUFFER_STATUS_RECORDING;
2480
 
 
2481
 
   return VK_SUCCESS;
2482
 
}
2483
 
 
2484
 
VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
2485
 
                                         struct pvr_transfer_cmd *transfer_cmd)
2486
 
{
2487
 
   VkResult result;
2488
 
 
2489
 
   result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
2490
 
   if (result != VK_SUCCESS)
2491
 
      return result;
2492
 
 
2493
 
   list_addtail(&transfer_cmd->link,
2494
 
                &cmd_buffer->state.current_sub_cmd->transfer.transfer_cmds);
2495
 
 
2496
 
   return VK_SUCCESS;
2497
 
}
2498
 
 
2499
 
void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
2500
 
                     uint32_t groupCountX,
2501
 
                     uint32_t groupCountY,
2502
 
                     uint32_t groupCountZ)
2503
 
{
2504
 
   assert(!"Unimplemented");
2505
 
}
2506
 
 
2507
 
void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
2508
 
                             VkBuffer _buffer,
2509
 
                             VkDeviceSize offset)
2510
 
{
2511
 
   assert(!"Unimplemented");
2512
 
}
2513
 
 
2514
 
void pvr_CmdDraw(VkCommandBuffer commandBuffer,
2515
 
                 uint32_t vertexCount,
2516
 
                 uint32_t instanceCount,
2517
 
                 uint32_t firstVertex,
2518
 
                 uint32_t firstInstance)
2519
 
{
2520
 
   assert(!"Unimplemented");
2521
 
}
2522
 
 
2523
 
static void
2524
 
pvr_update_draw_state(struct pvr_cmd_buffer_state *const state,
2525
 
                      const struct pvr_cmd_buffer_draw_state *const draw_state)
2526
 
{
2527
 
   /* We don't have a state to tell us that base_instance is being used so it
2528
 
    * gets used as a boolean - 0 means we'll use a pds program that skips the
2529
 
    * base instance addition. If the base_instance gets used (and the last
2530
 
    * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib
2531
 
    * program.
2532
 
    *
2533
 
    * If base_instance changes then we only need to update the data section.
2534
 
    *
2535
 
    * The only draw call state that doesn't really matter is the start vertex
2536
 
    * as that is handled properly in the VDM state in all cases.
2537
 
    */
2538
 
   if ((state->draw_state.draw_indexed != draw_state->draw_indexed) ||
2539
 
       (state->draw_state.draw_indirect != draw_state->draw_indirect) ||
2540
 
       (state->draw_state.base_instance == 0 &&
2541
 
        draw_state->base_instance != 0)) {
2542
 
      state->dirty.draw_variant = true;
2543
 
   } else if (state->draw_state.base_instance != draw_state->base_instance) {
2544
 
      state->dirty.draw_base_instance = true;
2545
 
   }
2546
 
 
2547
 
   state->draw_state = *draw_state;
2548
 
}
2549
 
 
2550
 
static uint32_t pvr_calc_shared_regs_count(
2551
 
   const struct pvr_graphics_pipeline *const gfx_pipeline)
2552
 
{
2553
 
   const struct pvr_pipeline_stage_state *const vertex_state =
2554
 
      &gfx_pipeline->vertex_shader_state.stage_state;
2555
 
   uint32_t shared_regs = vertex_state->const_shared_reg_count +
2556
 
                          vertex_state->const_shared_reg_offset;
2557
 
 
2558
 
   if (gfx_pipeline->fragment_shader_state.bo) {
2559
 
      const struct pvr_pipeline_stage_state *const fragment_state =
2560
 
         &gfx_pipeline->fragment_shader_state.stage_state;
2561
 
      uint32_t fragment_regs = fragment_state->const_shared_reg_count +
2562
 
                               fragment_state->const_shared_reg_offset;
2563
 
 
2564
 
      shared_regs = MAX2(shared_regs, fragment_regs);
2565
 
   }
2566
 
 
2567
 
   return shared_regs;
2568
 
}
2569
 
 
2570
 
#define PVR_WRITE(_buffer, _value, _offset, _max)                \
2571
 
   do {                                                          \
2572
 
      __typeof__(_value) __value = _value;                       \
2573
 
      uint64_t __offset = _offset;                               \
2574
 
      uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
2575
 
      static_assert(__same_type(*_buffer, __value),              \
2576
 
                    "Buffer and value type mismatch");           \
2577
 
      assert((__offset + __nr_dwords) <= (_max));                \
2578
 
      assert((__offset % __nr_dwords) == 0U);                    \
2579
 
      _buffer[__offset / __nr_dwords] = __value;                 \
2580
 
   } while (0)
2581
 
 
2582
 
static VkResult
2583
 
pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer,
2584
 
                         const struct pvr_graphics_pipeline *const gfx_pipeline)
2585
 
{
2586
 
   const struct pvr_vertex_shader_state *const vertex_state =
2587
 
      &gfx_pipeline->vertex_shader_state;
2588
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2589
 
   const struct pvr_pds_info *const pds_info = state->pds_shader.info;
2590
 
   const uint8_t *entries;
2591
 
   uint32_t *dword_buffer;
2592
 
   uint64_t *qword_buffer;
2593
 
   struct pvr_bo *pvr_bo;
2594
 
   VkResult result;
2595
 
 
2596
 
   result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
2597
 
                                     cmd_buffer->device->heaps.pds_heap,
2598
 
                                     pds_info->data_size_in_dwords,
2599
 
                                     PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2600
 
                                     &pvr_bo);
2601
 
   if (result != VK_SUCCESS)
2602
 
      return result;
2603
 
 
2604
 
   dword_buffer = (uint32_t *)pvr_bo->bo->map;
2605
 
   qword_buffer = (uint64_t *)pvr_bo->bo->map;
2606
 
 
2607
 
   entries = (uint8_t *)pds_info->entries;
2608
 
 
2609
 
   for (uint32_t i = 0; i < pds_info->entry_count; i++) {
2610
 
      const struct pvr_const_map_entry *const entry_header =
2611
 
         (struct pvr_const_map_entry *)entries;
2612
 
 
2613
 
      switch (entry_header->type) {
2614
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
2615
 
         const struct pvr_const_map_entry_literal32 *const literal =
2616
 
            (struct pvr_const_map_entry_literal32 *)entries;
2617
 
 
2618
 
         PVR_WRITE(dword_buffer,
2619
 
                   literal->literal_value,
2620
 
                   literal->const_offset,
2621
 
                   pds_info->data_size_in_dwords);
2622
 
 
2623
 
         entries += sizeof(*literal);
2624
 
         break;
2625
 
      }
2626
 
 
2627
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
2628
 
         const struct pvr_const_map_entry_doutu_address *const doutu_addr =
2629
 
            (struct pvr_const_map_entry_doutu_address *)entries;
2630
 
         pvr_dev_addr_t exec_addr = vertex_state->bo->vma->dev_addr;
2631
 
         uint64_t addr = 0ULL;
2632
 
 
2633
 
         exec_addr.addr += vertex_state->entry_offset;
2634
 
         pvr_set_usc_execution_address64(&addr, exec_addr.addr);
2635
 
 
2636
 
         PVR_WRITE(qword_buffer,
2637
 
                   addr | doutu_addr->doutu_control,
2638
 
                   doutu_addr->const_offset,
2639
 
                   pds_info->data_size_in_dwords);
2640
 
 
2641
 
         entries += sizeof(*doutu_addr);
2642
 
         break;
2643
 
      }
2644
 
 
2645
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE: {
2646
 
         const struct pvr_const_map_entry_base_instance *const base_instance =
2647
 
            (struct pvr_const_map_entry_base_instance *)entries;
2648
 
 
2649
 
         PVR_WRITE(dword_buffer,
2650
 
                   state->draw_state.base_instance,
2651
 
                   base_instance->const_offset,
2652
 
                   pds_info->data_size_in_dwords);
2653
 
 
2654
 
         entries += sizeof(*base_instance);
2655
 
         break;
2656
 
      }
2657
 
 
2658
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS: {
2659
 
         const struct pvr_const_map_entry_vertex_attribute_address
2660
 
            *const attribute =
2661
 
               (struct pvr_const_map_entry_vertex_attribute_address *)entries;
2662
 
         const struct pvr_vertex_binding *const binding =
2663
 
            &state->vertex_bindings[attribute->binding_index];
2664
 
         uint64_t addr = binding->buffer->dev_addr.addr;
2665
 
 
2666
 
         addr += binding->offset;
2667
 
         addr += attribute->offset;
2668
 
 
2669
 
         PVR_WRITE(qword_buffer,
2670
 
                   addr,
2671
 
                   attribute->const_offset,
2672
 
                   pds_info->data_size_in_dwords);
2673
 
 
2674
 
         entries += sizeof(*attribute);
2675
 
         break;
2676
 
      }
2677
 
 
2678
 
      default:
2679
 
         unreachable("Unsupported data section map");
2680
 
         break;
2681
 
      }
2682
 
   }
2683
 
 
2684
 
   state->pds_vertex_attrib_offset =
2685
 
      pvr_bo->vma->dev_addr.addr -
2686
 
      cmd_buffer->device->heaps.pds_heap->base_addr.addr;
2687
 
 
2688
 
   pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
2689
 
 
2690
 
   return VK_SUCCESS;
2691
 
}
2692
 
 
2693
 
static VkResult pvr_setup_descriptor_mappings(
2694
 
   struct pvr_cmd_buffer *const cmd_buffer,
2695
 
   enum pvr_stage_allocation stage,
2696
 
   const struct pvr_stage_allocation_uniform_state *uniform_state,
2697
 
   uint32_t *const uniform_data_offset_out)
2698
 
{
2699
 
   const struct pvr_pds_info *const pds_info = &uniform_state->pds_info;
2700
 
   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2701
 
   const struct pvr_descriptor_state *desc_state;
2702
 
   const uint8_t *entries;
2703
 
   uint32_t *dword_buffer;
2704
 
   uint64_t *qword_buffer;
2705
 
   struct pvr_bo *pvr_bo;
2706
 
   VkResult result;
2707
 
 
2708
 
   if (!pds_info->data_size_in_dwords)
2709
 
      return VK_SUCCESS;
2710
 
 
2711
 
   result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
2712
 
                                     cmd_buffer->device->heaps.pds_heap,
2713
 
                                     pds_info->data_size_in_dwords,
2714
 
                                     PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2715
 
                                     &pvr_bo);
2716
 
   if (result != VK_SUCCESS)
2717
 
      return result;
2718
 
 
2719
 
   dword_buffer = (uint32_t *)pvr_bo->bo->map;
2720
 
   qword_buffer = (uint64_t *)pvr_bo->bo->map;
2721
 
 
2722
 
   entries = (uint8_t *)pds_info->entries;
2723
 
 
2724
 
   switch (stage) {
2725
 
   case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
2726
 
   case PVR_STAGE_ALLOCATION_FRAGMENT:
2727
 
      desc_state = &cmd_buffer->state.gfx_desc_state;
2728
 
      break;
2729
 
 
2730
 
   case PVR_STAGE_ALLOCATION_COMPUTE:
2731
 
      desc_state = &cmd_buffer->state.compute_desc_state;
2732
 
      break;
2733
 
 
2734
 
   default:
2735
 
      unreachable("Unsupported stage.");
2736
 
      break;
2737
 
   }
2738
 
 
2739
 
   for (uint32_t i = 0; i < pds_info->entry_count; i++) {
2740
 
      const struct pvr_const_map_entry *const entry_header =
2741
 
         (struct pvr_const_map_entry *)entries;
2742
 
 
2743
 
      switch (entry_header->type) {
2744
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
2745
 
         const struct pvr_const_map_entry_literal32 *const literal =
2746
 
            (struct pvr_const_map_entry_literal32 *)entries;
2747
 
 
2748
 
         PVR_WRITE(dword_buffer,
2749
 
                   literal->literal_value,
2750
 
                   literal->const_offset,
2751
 
                   pds_info->data_size_in_dwords);
2752
 
 
2753
 
         entries += sizeof(*literal);
2754
 
         break;
2755
 
      }
2756
 
 
2757
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER: {
2758
 
         const struct pvr_const_map_entry_constant_buffer *const_buffer_entry =
2759
 
            (struct pvr_const_map_entry_constant_buffer *)entries;
2760
 
         const uint32_t desc_set = const_buffer_entry->desc_set;
2761
 
         const uint32_t binding = const_buffer_entry->binding;
2762
 
         const struct pvr_descriptor_set *descriptor_set;
2763
 
         const struct pvr_descriptor *descriptor;
2764
 
         pvr_dev_addr_t buffer_addr;
2765
 
 
2766
 
         /* TODO: Handle push descriptors. */
2767
 
 
2768
 
         assert(desc_set < PVR_MAX_DESCRIPTOR_SETS);
2769
 
         descriptor_set = state->gfx_desc_state.descriptor_sets[desc_set];
2770
 
 
2771
 
         /* TODO: Handle dynamic buffers. */
2772
 
         descriptor = &descriptor_set->descriptors[binding];
2773
 
         assert(descriptor->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
2774
 
 
2775
 
         assert(descriptor->buffer_desc_range ==
2776
 
                const_buffer_entry->size_in_dwords * sizeof(uint32_t));
2777
 
         assert(descriptor->buffer_create_info_size ==
2778
 
                const_buffer_entry->size_in_dwords * sizeof(uint32_t));
2779
 
 
2780
 
         buffer_addr = descriptor->buffer_dev_addr;
2781
 
         buffer_addr.addr += const_buffer_entry->offset * sizeof(uint32_t);
2782
 
 
2783
 
         PVR_WRITE(qword_buffer,
2784
 
                   buffer_addr.addr,
2785
 
                   const_buffer_entry->const_offset,
2786
 
                   pds_info->data_size_in_dwords);
2787
 
 
2788
 
         entries += sizeof(*const_buffer_entry);
2789
 
         break;
2790
 
      }
2791
 
 
2792
 
      case PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET: {
2793
 
         const struct pvr_const_map_entry_descriptor_set *desc_set_entry =
2794
 
            (struct pvr_const_map_entry_descriptor_set *)entries;
2795
 
         const uint32_t desc_set_num = desc_set_entry->descriptor_set;
2796
 
         const struct pvr_descriptor_set *descriptor_set;
2797
 
         pvr_dev_addr_t desc_set_addr;
2798
 
 
2799
 
         assert(desc_set_num < PVR_MAX_DESCRIPTOR_SETS);
2800
 
 
2801
 
         /* TODO: Remove this when the compiler provides us with usage info?
2802
 
          */
2803
 
         /* We skip DMAing unbound descriptor sets. */
2804
 
         if (!(desc_state->valid_mask & BITFIELD_BIT(desc_set_num))) {
2805
 
            const struct pvr_const_map_entry_literal32 *literal;
2806
 
            uint32_t zero_literal_value;
2807
 
 
2808
 
            entries += sizeof(*desc_set_entry);
2809
 
            literal = (struct pvr_const_map_entry_literal32 *)entries;
2810
 
 
2811
 
            /* TODO: Is there any guarantee that a literal will follow the
2812
 
             * descriptor set entry?
2813
 
             */
2814
 
            assert(literal->type == PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32);
2815
 
 
2816
 
            /* We zero out the DMA size so the DMA isn't performed. */
2817
 
            zero_literal_value =
2818
 
               literal->literal_value &
2819
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK;
2820
 
 
2821
 
            PVR_WRITE(qword_buffer,
2822
 
                      0UL,
2823
 
                      desc_set_entry->const_offset,
2824
 
                      pds_info->data_size_in_dwords);
2825
 
 
2826
 
            PVR_WRITE(dword_buffer,
2827
 
                      zero_literal_value,
2828
 
                      desc_set_entry->const_offset,
2829
 
                      pds_info->data_size_in_dwords);
2830
 
 
2831
 
            entries += sizeof(*literal);
2832
 
            i++;
2833
 
            continue;
2834
 
         }
2835
 
 
2836
 
         descriptor_set = desc_state->descriptor_sets[desc_set_num];
2837
 
 
2838
 
         pvr_finishme("Handle push descriptor entry.");
2839
 
 
2840
 
         desc_set_addr = descriptor_set->pvr_bo->vma->dev_addr;
2841
 
 
2842
 
         if (desc_set_entry->primary) {
2843
 
            desc_set_addr.addr +=
2844
 
               descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
2845
 
                  .primary_offset
2846
 
               << 2U;
2847
 
         } else {
2848
 
            desc_set_addr.addr +=
2849
 
               descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
2850
 
                  .secondary_offset
2851
 
               << 2U;
2852
 
         }
2853
 
 
2854
 
         desc_set_addr.addr += (uint64_t)desc_set_entry->offset_in_dwords << 2U;
2855
 
 
2856
 
         PVR_WRITE(qword_buffer,
2857
 
                   desc_set_addr.addr,
2858
 
                   desc_set_entry->const_offset,
2859
 
                   pds_info->data_size_in_dwords);
2860
 
 
2861
 
         entries += sizeof(*desc_set_entry);
2862
 
         break;
2863
 
      }
2864
 
 
2865
 
      default:
2866
 
         unreachable("Unsupported map entry type.");
2867
 
      }
2868
 
   }
2869
 
 
2870
 
   pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
2871
 
 
2872
 
   *uniform_data_offset_out =
2873
 
      pvr_bo->vma->dev_addr.addr -
2874
 
      cmd_buffer->device->heaps.pds_heap->base_addr.addr;
2875
 
 
2876
 
   return VK_SUCCESS;
2877
 
}
2878
 
 
2879
 
#undef PVR_WRITE
2880
 
 
2881
 
static void
2882
 
pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
2883
 
                         const uint32_t pds_vertex_uniform_data_offset)
2884
 
{
2885
 
   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
2886
 
   const struct pvr_stage_allocation_uniform_state *const vertex_uniform_state =
2887
 
      &state->gfx_pipeline->vertex_shader_state.uniform_state;
2888
 
   const struct pvr_pipeline_stage_state *const vertex_stage_state =
2889
 
      &state->gfx_pipeline->vertex_shader_state.stage_state;
2890
 
   struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
2891
 
 
2892
 
   if (!vertex_uniform_state->pds_info.code_size_in_dwords)
2893
 
      return;
2894
 
 
2895
 
   pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
2896
 
      state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL);
2897
 
 
2898
 
      state0.usc_common_size =
2899
 
         DIV_ROUND_UP(vertex_stage_state->const_shared_reg_count << 2,
2900
 
                      PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
2901
 
 
2902
 
      state0.pds_data_size =
2903
 
         DIV_ROUND_UP(vertex_uniform_state->pds_info.data_size_in_dwords << 2,
2904
 
                      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE));
2905
 
   }
2906
 
 
2907
 
   pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) {
2908
 
      state1.pds_data_addr.addr = pds_vertex_uniform_data_offset;
2909
 
      state1.sd_type = PVRX(VDMCTRL_SD_TYPE_NONE);
2910
 
   }
2911
 
 
2912
 
   pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) {
2913
 
      state2.pds_code_addr.addr = vertex_uniform_state->pds_code.code_offset;
2914
 
   }
2915
 
}
2916
 
 
2917
 
static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
2918
 
{
2919
 
   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
2920
 
   const struct pvr_graphics_pipeline *const gfx_pipeline =
2921
 
      cmd_buffer->state.gfx_pipeline;
2922
 
   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
2923
 
   const struct pvr_vertex_shader_state *const vertex_state =
2924
 
      &gfx_pipeline->vertex_shader_state;
2925
 
   uint32_t output_selects;
2926
 
 
2927
 
   /* TODO: Handle vertex and fragment shader state flags. */
2928
 
 
2929
 
   pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) {
2930
 
      const VkPrimitiveTopology topology =
2931
 
         gfx_pipeline->input_asm_state.topology;
2932
 
 
2933
 
      state.rhw_pres = true;
2934
 
      state.vtxsize = DIV_ROUND_UP(vertex_state->vertex_output_size, 4U);
2935
 
      state.psprite_size_pres = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
2936
 
   }
2937
 
 
2938
 
   if (ppp_state->output_selects != output_selects) {
2939
 
      ppp_state->output_selects = output_selects;
2940
 
      emit_state->output_selects = true;
2941
 
   }
2942
 
 
2943
 
   if (ppp_state->varying_word[0] != vertex_state->varying[0]) {
2944
 
      ppp_state->varying_word[0] = vertex_state->varying[0];
2945
 
      emit_state->varying_word0 = true;
2946
 
   }
2947
 
 
2948
 
   if (ppp_state->varying_word[1] != vertex_state->varying[1]) {
2949
 
      ppp_state->varying_word[1] = vertex_state->varying[1];
2950
 
      emit_state->varying_word1 = true;
2951
 
   }
2952
 
}
2953
 
 
2954
 
/* clang-format off */
2955
 
static enum PVRX(TA_OBJTYPE)
2956
 
pvr_ppp_state_get_ispa_objtype_from_vk(const VkPrimitiveTopology topology)
2957
 
/* clang-format on */
2958
 
{
2959
 
   switch (topology) {
2960
 
   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2961
 
      return PVRX(TA_OBJTYPE_SPRITE_01UV);
2962
 
 
2963
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2964
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2965
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2966
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2967
 
      return PVRX(TA_OBJTYPE_LINE);
2968
 
 
2969
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2970
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2971
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2972
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2973
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2974
 
      return PVRX(TA_OBJTYPE_TRIANGLE);
2975
 
 
2976
 
   default:
2977
 
      unreachable("Invalid topology.");
2978
 
      return 0;
2979
 
   }
2980
 
}
2981
 
 
2982
 
static inline enum PVRX(TA_CMPMODE) pvr_cmpmode(VkCompareOp op)
2983
 
{
2984
 
   /* enum values are identical, so we can just cast the input directly. */
2985
 
   return (enum PVRX(TA_CMPMODE))op;
2986
 
}
2987
 
 
2988
 
static inline enum PVRX(TA_ISPB_STENCILOP) pvr_stencilop(VkStencilOp op)
2989
 
{
2990
 
   /* enum values are identical, so we can just cast the input directly. */
2991
 
   return (enum PVRX(TA_ISPB_STENCILOP))op;
2992
 
}
2993
 
 
2994
 
static void pvr_setup_isp_faces_and_control(
2995
 
   struct pvr_cmd_buffer *const cmd_buffer,
2996
 
   struct pvr_cmd_struct(TA_STATE_ISPA) *const ispa_out)
2997
 
{
2998
 
   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
2999
 
   const struct pvr_graphics_pipeline *const gfx_pipeline =
3000
 
      cmd_buffer->state.gfx_pipeline;
3001
 
   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
3002
 
   const struct pvr_dynamic_state *const dynamic_state =
3003
 
      &cmd_buffer->state.dynamic.common;
3004
 
   const struct pvr_render_pass_info *const pass_info =
3005
 
      &cmd_buffer->state.render_pass_info;
3006
 
   const uint32_t subpass_idx = pass_info->subpass_idx;
3007
 
   const uint32_t *depth_stencil_attachment_idx =
3008
 
      pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment;
3009
 
   const struct pvr_image_view *const attachment =
3010
 
      (!depth_stencil_attachment_idx)
3011
 
         ? NULL
3012
 
         : pass_info->attachments[*depth_stencil_attachment_idx];
3013
 
 
3014
 
   const VkCullModeFlags cull_mode = gfx_pipeline->raster_state.cull_mode;
3015
 
   const bool raster_discard_enabled =
3016
 
      gfx_pipeline->raster_state.discard_enable;
3017
 
   const bool disable_all = raster_discard_enabled || !attachment;
3018
 
 
3019
 
   const VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
3020
 
   const enum PVRX(TA_OBJTYPE)
3021
 
      obj_type = pvr_ppp_state_get_ispa_objtype_from_vk(topology);
3022
 
 
3023
 
   const bool disable_stencil_write = disable_all;
3024
 
   const bool disable_stencil_test =
3025
 
      disable_all || !vk_format_has_stencil(attachment->vk.format);
3026
 
 
3027
 
   const bool disable_depth_write = disable_all;
3028
 
   const bool disable_depth_test = disable_all ||
3029
 
                                   !vk_format_has_depth(attachment->vk.format);
3030
 
 
3031
 
   uint32_t ispb_stencil_off;
3032
 
   bool is_two_sided = false;
3033
 
   uint32_t isp_control;
3034
 
 
3035
 
   uint32_t line_width;
3036
 
   uint32_t common_a;
3037
 
   uint32_t front_a;
3038
 
   uint32_t front_b;
3039
 
   uint32_t back_a;
3040
 
   uint32_t back_b;
3041
 
 
3042
 
   /* Convert to 4.4 fixed point format. */
3043
 
   line_width = util_unsigned_fixed(dynamic_state->line_width, 4);
3044
 
 
3045
 
   /* Subtract 1 to shift values from range [0=0,256=16] to [0=1/16,255=16].
3046
 
    * If 0 it stays at 0, otherwise we subtract 1.
3047
 
    */
3048
 
   line_width = (!!line_width) * (line_width - 1);
3049
 
 
3050
 
   line_width = MIN2(line_width, PVRX(TA_STATE_ISPA_POINTLINEWIDTH_SIZE_MAX));
3051
 
 
3052
 
   /* TODO: Part of the logic in this function is duplicated in another part
3053
 
    * of the code. E.g. the dcmpmode, and sop1/2/3. Could we do this earlier?
3054
 
    */
3055
 
 
3056
 
   pvr_csb_pack (&common_a, TA_STATE_ISPA, ispa) {
3057
 
      ispa.pointlinewidth = line_width;
3058
 
 
3059
 
      if (disable_depth_test)
3060
 
         ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
3061
 
      else
3062
 
         ispa.dcmpmode = pvr_cmpmode(gfx_pipeline->depth_compare_op);
3063
 
 
3064
 
      /* FIXME: Can we just have this and remove the assignment above?
3065
 
       * The user provides a depthTestEnable at vkCreateGraphicsPipelines()
3066
 
       * should we be using that?
3067
 
       */
3068
 
      ispa.dcmpmode |= gfx_pipeline->depth_compare_op;
3069
 
 
3070
 
      ispa.dwritedisable = disable_depth_test || disable_depth_write;
3071
 
      /* FIXME: Can we just have this and remove the assignment above? */
3072
 
      ispa.dwritedisable = ispa.dwritedisable ||
3073
 
                           gfx_pipeline->depth_write_disable;
3074
 
 
3075
 
      ispa.passtype = gfx_pipeline->fragment_shader_state.pass_type;
3076
 
 
3077
 
      ispa.objtype = obj_type;
3078
 
 
3079
 
      /* Return unpacked ispa structure. dcmpmode, dwritedisable, passtype and
3080
 
       * objtype are needed by pvr_setup_triangle_merging_flag.
3081
 
       */
3082
 
      if (ispa_out)
3083
 
         *ispa_out = ispa;
3084
 
   }
3085
 
 
3086
 
   /* FIXME: This logic should be redone and improved. Can we also get rid of
3087
 
    * the front and back variants?
3088
 
    */
3089
 
 
3090
 
   pvr_csb_pack (&front_a, TA_STATE_ISPA, ispa) {
3091
 
      ispa.sref = (!disable_stencil_test) * dynamic_state->reference.front;
3092
 
   }
3093
 
   front_a |= common_a;
3094
 
 
3095
 
   pvr_csb_pack (&back_a, TA_STATE_ISPA, ispa) {
3096
 
      ispa.sref = (!disable_stencil_test) * dynamic_state->compare_mask.back;
3097
 
   }
3098
 
   back_a |= common_a;
3099
 
 
3100
 
   /* TODO: Does this actually represent the ispb control word on stencil off?
3101
 
    * If not, rename the variable.
3102
 
    */
3103
 
   pvr_csb_pack (&ispb_stencil_off, TA_STATE_ISPB, ispb) {
3104
 
      ispb.sop3 = PVRX(TA_ISPB_STENCILOP_KEEP);
3105
 
      ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP);
3106
 
      ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP);
3107
 
      ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS);
3108
 
   }
3109
 
 
3110
 
   if (disable_stencil_test) {
3111
 
      back_b = front_b = ispb_stencil_off;
3112
 
   } else {
3113
 
      pvr_csb_pack (&front_b, TA_STATE_ISPB, ispb) {
3114
 
         ispb.swmask =
3115
 
            (!disable_stencil_write) * dynamic_state->write_mask.front;
3116
 
         ispb.scmpmask = dynamic_state->compare_mask.front;
3117
 
 
3118
 
         ispb.sop3 = pvr_stencilop(gfx_pipeline->stencil_front.pass_op);
3119
 
         ispb.sop2 = pvr_stencilop(gfx_pipeline->stencil_front.depth_fail_op);
3120
 
         ispb.sop1 = pvr_stencilop(gfx_pipeline->stencil_front.fail_op);
3121
 
 
3122
 
         ispb.scmpmode = pvr_cmpmode(gfx_pipeline->stencil_front.compare_op);
3123
 
      }
3124
 
 
3125
 
      pvr_csb_pack (&back_b, TA_STATE_ISPB, ispb) {
3126
 
         ispb.swmask =
3127
 
            (!disable_stencil_write) * dynamic_state->write_mask.back;
3128
 
         ispb.scmpmask = dynamic_state->compare_mask.back;
3129
 
 
3130
 
         ispb.sop3 = pvr_stencilop(gfx_pipeline->stencil_back.pass_op);
3131
 
         ispb.sop2 = pvr_stencilop(gfx_pipeline->stencil_back.depth_fail_op);
3132
 
         ispb.sop1 = pvr_stencilop(gfx_pipeline->stencil_back.fail_op);
3133
 
 
3134
 
         ispb.scmpmode = pvr_cmpmode(gfx_pipeline->stencil_back.compare_op);
3135
 
      }
3136
 
   }
3137
 
 
3138
 
   if (front_a != back_a || front_b != back_b) {
3139
 
      if (cull_mode & VK_CULL_MODE_BACK_BIT) {
3140
 
         /* Single face, using front state. */
3141
 
      } else if (cull_mode & VK_CULL_MODE_FRONT_BIT) {
3142
 
         /* Single face, using back state. */
3143
 
 
3144
 
         front_a = back_a;
3145
 
         front_b = back_b;
3146
 
      } else {
3147
 
         /* Both faces. */
3148
 
 
3149
 
         emit_state->isp_ba = is_two_sided = true;
3150
 
 
3151
 
         if (gfx_pipeline->raster_state.front_face ==
3152
 
             VK_FRONT_FACE_COUNTER_CLOCKWISE) {
3153
 
            uint32_t tmp = front_a;
3154
 
 
3155
 
            front_a = back_a;
3156
 
            back_a = tmp;
3157
 
 
3158
 
            tmp = front_b;
3159
 
            front_b = back_b;
3160
 
            back_b = tmp;
3161
 
         }
3162
 
 
3163
 
         /* HW defaults to stencil off. */
3164
 
         if (back_b != ispb_stencil_off)
3165
 
            emit_state->isp_fb = emit_state->isp_bb = true;
3166
 
      }
3167
 
   }
3168
 
 
3169
 
   if (!disable_stencil_test && front_b != ispb_stencil_off)
3170
 
      emit_state->isp_fb = true;
3171
 
 
3172
 
   pvr_csb_pack (&isp_control, TA_STATE_ISPCTL, ispctl) {
3173
 
      ispctl.upass = pass_info->userpass_spawn;
3174
 
 
3175
 
      /* TODO: is bo ever NULL? Figure out what to do. */
3176
 
      ispctl.tagwritedisable = raster_discard_enabled ||
3177
 
                               !gfx_pipeline->fragment_shader_state.bo;
3178
 
 
3179
 
      ispctl.two_sided = is_two_sided;
3180
 
      ispctl.bpres = emit_state->isp_fb || emit_state->isp_bb;
3181
 
 
3182
 
      ispctl.dbenable = !raster_discard_enabled &&
3183
 
                        gfx_pipeline->raster_state.depth_bias_enable &&
3184
 
                        obj_type == PVRX(TA_OBJTYPE_TRIANGLE);
3185
 
      ispctl.scenable = !raster_discard_enabled;
3186
 
 
3187
 
      ppp_state->isp.control_struct = ispctl;
3188
 
   }
3189
 
 
3190
 
   emit_state->isp = true;
3191
 
 
3192
 
   ppp_state->isp.control = isp_control;
3193
 
   ppp_state->isp.front_a = front_a;
3194
 
   ppp_state->isp.front_b = front_b;
3195
 
   ppp_state->isp.back_a = back_a;
3196
 
   ppp_state->isp.back_b = back_b;
3197
 
}
3198
 
 
3199
 
static void pvr_get_viewport_scissor_overlap(const VkViewport *const viewport,
3200
 
                                             const VkRect2D *const scissor,
3201
 
                                             VkRect2D *const rect_out)
3202
 
{
3203
 
   /* TODO: See if we can remove this struct. */
3204
 
   struct pvr_rect {
3205
 
      int32_t x0, y0;
3206
 
      int32_t x1, y1;
3207
 
   };
3208
 
 
3209
 
   /* TODO: Worry about overflow? */
3210
 
   const struct pvr_rect scissor_rect = {
3211
 
      .x0 = scissor->offset.x,
3212
 
      .y0 = scissor->offset.y,
3213
 
      .x1 = scissor->offset.x + scissor->extent.width,
3214
 
      .y1 = scissor->offset.y + scissor->extent.height
3215
 
   };
3216
 
   struct pvr_rect viewport_rect = { 0 };
3217
 
 
3218
 
   assert(viewport->width >= 0.0f);
3219
 
   assert(scissor_rect.x0 >= 0);
3220
 
   assert(scissor_rect.y0 >= 0);
3221
 
 
3222
 
   if (scissor->extent.width == 0 || scissor->extent.height == 0) {
3223
 
      *rect_out = (VkRect2D){ 0 };
3224
 
      return;
3225
 
   }
3226
 
 
3227
 
   viewport_rect.x0 = (int32_t)viewport->x;
3228
 
   viewport_rect.x1 = (int32_t)viewport->x + (int32_t)viewport->width;
3229
 
 
3230
 
   /* TODO: Is there a mathematical way of doing all this and then clamp at
3231
 
    * the end?
3232
 
    */
3233
 
   /* We flip the y0 and y1 when height is negative. */
3234
 
   viewport_rect.y0 = (int32_t)viewport->y + MIN2(0, (int32_t)viewport->height);
3235
 
   viewport_rect.y1 = (int32_t)viewport->y + MAX2(0, (int32_t)viewport->height);
3236
 
 
3237
 
   if (scissor_rect.x1 <= viewport_rect.x0 ||
3238
 
       scissor_rect.y1 <= viewport_rect.y0 ||
3239
 
       scissor_rect.x0 >= viewport_rect.x1 ||
3240
 
       scissor_rect.y0 >= viewport_rect.y1) {
3241
 
      *rect_out = (VkRect2D){ 0 };
3242
 
      return;
3243
 
   }
3244
 
 
3245
 
   /* Determine the overlapping rectangle. */
3246
 
   viewport_rect.x0 = MAX2(viewport_rect.x0, scissor_rect.x0);
3247
 
   viewport_rect.y0 = MAX2(viewport_rect.y0, scissor_rect.y0);
3248
 
   viewport_rect.x1 = MIN2(viewport_rect.x1, scissor_rect.x1);
3249
 
   viewport_rect.y1 = MIN2(viewport_rect.y1, scissor_rect.y1);
3250
 
 
3251
 
   /* TODO: Is this conversion safe? Is this logic right? */
3252
 
   rect_out->offset.x = (uint32_t)viewport_rect.x0;
3253
 
   rect_out->offset.y = (uint32_t)viewport_rect.y0;
3254
 
   rect_out->extent.height = (uint32_t)(viewport_rect.y1 - viewport_rect.y0);
3255
 
   rect_out->extent.width = (uint32_t)(viewport_rect.x1 - viewport_rect.x0);
3256
 
}
3257
 
 
3258
 
static inline uint32_t
3259
 
pvr_get_geom_region_clip_align_size(struct pvr_device_info *const dev_info)
3260
 
{
3261
 
   /* TODO: This should come from rogue_ppp.xml. */
3262
 
   return 16U + 16U * (!PVR_HAS_FEATURE(dev_info, tile_size_16x16));
3263
 
}
3264
 
 
3265
 
/* FIXME: Remove device param when PVR_HAS_FEATURE() accepts const dev_info */
3266
 
static void
3267
 
pvr_setup_isp_depth_bias_scissor_state(struct pvr_cmd_buffer *const cmd_buffer)
3268
 
{
3269
 
   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
3270
 
   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
3271
 
   const struct pvr_dynamic_state *const dynamic_state =
3272
 
      &cmd_buffer->state.dynamic.common;
3273
 
   const struct pvr_cmd_struct(TA_STATE_ISPCTL) *const ispctl =
3274
 
      &ppp_state->isp.control_struct;
3275
 
   struct pvr_device_info *const dev_info =
3276
 
      &cmd_buffer->device->pdevice->dev_info;
3277
 
 
3278
 
   if (ispctl->dbenable)
3279
 
      assert(!"Unimplemented");
3280
 
 
3281
 
   if (ispctl->scenable) {
3282
 
      const uint32_t region_clip_align_size =
3283
 
         pvr_get_geom_region_clip_align_size(dev_info);
3284
 
      const VkViewport *const viewport = &dynamic_state->viewport.viewports[0];
3285
 
      const VkRect2D *const scissor = &dynamic_state->scissor.scissors[0];
3286
 
      VkRect2D overlap_rect;
3287
 
      uint32_t scissor_words[2];
3288
 
      uint32_t height;
3289
 
      uint32_t width;
3290
 
      uint32_t x;
3291
 
      uint32_t y;
3292
 
 
3293
 
      /* For region clip. */
3294
 
      uint32_t bottom;
3295
 
      uint32_t right;
3296
 
      uint32_t left;
3297
 
      uint32_t top;
3298
 
 
3299
 
      /* We don't support multiple viewport calculations. */
3300
 
      assert(dynamic_state->viewport.count == 1);
3301
 
      /* We don't support multiple scissor calculations. */
3302
 
      assert(dynamic_state->scissor.count == 1);
3303
 
 
3304
 
      pvr_get_viewport_scissor_overlap(viewport, scissor, &overlap_rect);
3305
 
 
3306
 
      x = overlap_rect.offset.x;
3307
 
      y = overlap_rect.offset.y;
3308
 
      width = overlap_rect.extent.width;
3309
 
      height = overlap_rect.extent.height;
3310
 
 
3311
 
      pvr_csb_pack (&scissor_words[0], IPF_SCISSOR_WORD_0, word0) {
3312
 
         word0.scw0_xmax = x + width;
3313
 
         word0.scw0_xmin = x;
3314
 
      }
3315
 
 
3316
 
      pvr_csb_pack (&scissor_words[1], IPF_SCISSOR_WORD_1, word1) {
3317
 
         word1.scw1_ymax = y + height;
3318
 
         word1.scw1_ymin = y;
3319
 
      }
3320
 
 
3321
 
      if (cmd_buffer->scissor_array.size &&
3322
 
          cmd_buffer->scissor_words[0] == scissor_words[0] &&
3323
 
          cmd_buffer->scissor_words[1] == scissor_words[1]) {
3324
 
         return;
3325
 
      }
3326
 
 
3327
 
      cmd_buffer->scissor_words[0] = scissor_words[0];
3328
 
      cmd_buffer->scissor_words[1] = scissor_words[1];
3329
 
 
3330
 
      /* Calculate region clip. */
3331
 
 
3332
 
      left = x / region_clip_align_size;
3333
 
      top = y / region_clip_align_size;
3334
 
 
3335
 
      /* We prevent right=-1 with the multiplication. */
3336
 
      /* TODO: Is there a better way of doing this? */
3337
 
      if ((x + width) != 0U)
3338
 
         right = DIV_ROUND_UP(x + width, region_clip_align_size) - 1;
3339
 
      else
3340
 
         right = 0;
3341
 
 
3342
 
      if ((y + height) != 0U)
3343
 
         bottom = DIV_ROUND_UP(y + height, region_clip_align_size) - 1;
3344
 
      else
3345
 
         bottom = 0U;
3346
 
 
3347
 
      /* Setup region clip to clip everything outside what was calculated. */
3348
 
 
3349
 
      /* FIXME: Should we mask to prevent writing over other words? */
3350
 
      pvr_csb_pack (&ppp_state->region_clipping.word0, TA_REGION_CLIP0, word0) {
3351
 
         word0.right = right;
3352
 
         word0.left = left;
3353
 
         word0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE);
3354
 
      }
3355
 
 
3356
 
      pvr_csb_pack (&ppp_state->region_clipping.word1, TA_REGION_CLIP1, word1) {
3357
 
         word1.bottom = bottom;
3358
 
         word1.top = top;
3359
 
      }
3360
 
 
3361
 
      ppp_state->depthbias_scissor_indices.scissor_index =
3362
 
         util_dynarray_num_elements(&cmd_buffer->scissor_array,
3363
 
                                    __typeof__(cmd_buffer->scissor_words));
3364
 
 
3365
 
      memcpy(util_dynarray_grow_bytes(&cmd_buffer->scissor_array,
3366
 
                                      1,
3367
 
                                      sizeof(cmd_buffer->scissor_words)),
3368
 
             cmd_buffer->scissor_words,
3369
 
             sizeof(cmd_buffer->scissor_words));
3370
 
 
3371
 
      emit_state->isp_dbsc = true;
3372
 
      emit_state->region_clip = true;
3373
 
   }
3374
 
}
3375
 
 
3376
 
static void
3377
 
pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer,
3378
 
                                struct pvr_cmd_struct(TA_STATE_ISPA) * ispa)
3379
 
{
3380
 
   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
3381
 
   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
3382
 
   uint32_t merge_word;
3383
 
   uint32_t mask;
3384
 
 
3385
 
   pvr_csb_pack (&merge_word, TA_STATE_PDS_SIZEINFO2, size_info) {
3386
 
      /* Disable for lines or punch-through or for DWD and depth compare
3387
 
       * always.
3388
 
       */
3389
 
      if (ispa->objtype == PVRX(TA_OBJTYPE_LINE) ||
3390
 
          ispa->passtype == PVRX(TA_PASSTYPE_PUNCH_THROUGH) ||
3391
 
          (ispa->dwritedisable && ispa->dcmpmode == PVRX(TA_CMPMODE_ALWAYS))) {
3392
 
         size_info.pds_tri_merge_disable = true;
3393
 
      }
3394
 
   }
3395
 
 
3396
 
   pvr_csb_pack (&mask, TA_STATE_PDS_SIZEINFO2, size_info) {
3397
 
      size_info.pds_tri_merge_disable = true;
3398
 
   }
3399
 
 
3400
 
   merge_word |= ppp_state->pds.size_info2 & ~mask;
3401
 
 
3402
 
   if (merge_word != ppp_state->pds.size_info2) {
3403
 
      ppp_state->pds.size_info2 = merge_word;
3404
 
      emit_state->pds_fragment_stateptr0 = true;
3405
 
   }
3406
 
}
3407
 
 
3408
 
/* TODO: See if this function can be improved once fully implemented. */
3409
 
static uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
3410
 
   const struct pvr_device_info *dev_info,
3411
 
   uint32_t fs_common_size,
3412
 
   uint32_t min_tiles_in_flight)
3413
 
{
3414
 
   uint32_t max_tiles_in_flight;
3415
 
   uint32_t num_allocs;
3416
 
 
3417
 
   if (PVR_HAS_FEATURE(dev_info, s8xe)) {
3418
 
      num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
3419
 
   } else {
3420
 
      uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
3421
 
      uint32_t min_cluster_per_phantom = 0;
3422
 
 
3423
 
      if (num_phantoms > 1) {
3424
 
         pvr_finishme("Unimplemented path!!");
3425
 
      } else {
3426
 
         min_cluster_per_phantom =
3427
 
            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
3428
 
      }
3429
 
 
3430
 
      if (num_phantoms > 1)
3431
 
         pvr_finishme("Unimplemented path!!");
3432
 
 
3433
 
      if (num_phantoms > 2)
3434
 
         pvr_finishme("Unimplemented path!!");
3435
 
 
3436
 
      if (num_phantoms > 3)
3437
 
         pvr_finishme("Unimplemented path!!");
3438
 
 
3439
 
      if (min_cluster_per_phantom >= 4)
3440
 
         num_allocs = 1;
3441
 
      else if (min_cluster_per_phantom == 2)
3442
 
         num_allocs = 2;
3443
 
      else
3444
 
         num_allocs = 4;
3445
 
   }
3446
 
 
3447
 
   max_tiles_in_flight =
3448
 
      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
3449
 
 
3450
 
   if (fs_common_size == UINT_MAX) {
3451
 
      uint32_t max_common_size;
3452
 
 
3453
 
      num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
3454
 
 
3455
 
      if (!PVR_HAS_ERN(dev_info, 38748)) {
3456
 
         /* Hardware needs space for one extra shared allocation. */
3457
 
         num_allocs += 1;
3458
 
      }
3459
 
 
3460
 
      max_common_size = rogue_get_reserved_shared_size(dev_info) -
3461
 
                        rogue_get_max_coeffs(dev_info);
3462
 
 
3463
 
      /* Double resource requirements to deal with fragmentation. */
3464
 
      max_common_size /= num_allocs * 2;
3465
 
      max_common_size =
3466
 
         ROUND_DOWN_TO(max_common_size,
3467
 
                       PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
3468
 
 
3469
 
      return max_common_size;
3470
 
   } else if (fs_common_size == 0) {
3471
 
      return max_tiles_in_flight;
3472
 
   }
3473
 
 
3474
 
   pvr_finishme("Unimplemented path!!");
3475
 
 
3476
 
   return 0;
3477
 
}
3478
 
 
3479
 
static void
3480
 
pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer)
3481
 
{
3482
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3483
 
   const struct pvr_stage_allocation_uniform_state *uniform_shader_state =
3484
 
      &state->gfx_pipeline->fragment_shader_state.uniform_state;
3485
 
   const struct pvr_pds_upload *pds_coeff_program =
3486
 
      &state->gfx_pipeline->fragment_shader_state.pds_coeff_program;
3487
 
   const struct pvr_pipeline_stage_state *fragment_state =
3488
 
      &state->gfx_pipeline->fragment_shader_state.stage_state;
3489
 
   struct pvr_device_info *const dev_info =
3490
 
      &cmd_buffer->device->pdevice->dev_info;
3491
 
   struct pvr_emit_state *const emit_state = &state->emit_state;
3492
 
   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3493
 
   struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
3494
 
 
3495
 
   const uint32_t pds_uniform_size =
3496
 
      DIV_ROUND_UP(uniform_shader_state->pds_info.data_size_in_dwords,
3497
 
                   PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE));
3498
 
 
3499
 
   const uint32_t pds_varying_state_size =
3500
 
      DIV_ROUND_UP(pds_coeff_program->data_size,
3501
 
                   PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE));
3502
 
 
3503
 
   const uint32_t usc_varying_size =
3504
 
      DIV_ROUND_UP(fragment_state->coefficient_size,
3505
 
                   PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
3506
 
 
3507
 
   const uint32_t pds_temp_size =
3508
 
      DIV_ROUND_UP(fragment_state->temps_count,
3509
 
                   PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
3510
 
 
3511
 
   const uint32_t usc_shared_size =
3512
 
      DIV_ROUND_UP(fragment_state->const_shared_reg_count,
3513
 
                   PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
3514
 
 
3515
 
   const uint32_t max_tiles_in_flight =
3516
 
      pvr_calc_fscommon_size_and_tiles_in_flight(
3517
 
         dev_info,
3518
 
         usc_shared_size *
3519
 
            PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE),
3520
 
         1);
3521
 
   uint32_t size_info_mask;
3522
 
   uint32_t size_info2;
3523
 
 
3524
 
   if (max_tiles_in_flight < sub_cmd->gfx.max_tiles_in_flight)
3525
 
      sub_cmd->gfx.max_tiles_in_flight = max_tiles_in_flight;
3526
 
 
3527
 
   pvr_csb_pack (&ppp_state->pds.pixel_shader_base,
3528
 
                 TA_STATE_PDS_SHADERBASE,
3529
 
                 shader_base) {
3530
 
      const struct pvr_pds_upload *const pds_upload =
3531
 
         &state->gfx_pipeline->fragment_shader_state.pds_fragment_program;
3532
 
 
3533
 
      shader_base.addr.addr = pds_upload->data_offset;
3534
 
   }
3535
 
 
3536
 
   if (uniform_shader_state->pds_code.pvr_bo) {
3537
 
      pvr_csb_pack (&ppp_state->pds.texture_uniform_code_base,
3538
 
                    TA_STATE_PDS_TEXUNICODEBASE,
3539
 
                    tex_base) {
3540
 
         tex_base.addr.addr = uniform_shader_state->pds_code.code_offset;
3541
 
      }
3542
 
   } else {
3543
 
      ppp_state->pds.texture_uniform_code_base = 0U;
3544
 
   }
3545
 
 
3546
 
   pvr_csb_pack (&ppp_state->pds.size_info1, TA_STATE_PDS_SIZEINFO1, info1) {
3547
 
      info1.pds_uniformsize = pds_uniform_size;
3548
 
      info1.pds_texturestatesize = 0U;
3549
 
      info1.pds_varyingsize = pds_varying_state_size;
3550
 
      info1.usc_varyingsize = usc_varying_size;
3551
 
      info1.pds_tempsize = pds_temp_size;
3552
 
   }
3553
 
 
3554
 
   pvr_csb_pack (&size_info_mask, TA_STATE_PDS_SIZEINFO2, mask) {
3555
 
      mask.pds_tri_merge_disable = true;
3556
 
   }
3557
 
 
3558
 
   ppp_state->pds.size_info2 &= size_info_mask;
3559
 
 
3560
 
   pvr_csb_pack (&size_info2, TA_STATE_PDS_SIZEINFO2, info2) {
3561
 
      info2.usc_sharedsize = usc_shared_size;
3562
 
   }
3563
 
 
3564
 
   ppp_state->pds.size_info2 |= size_info2;
3565
 
 
3566
 
   if (pds_coeff_program->pvr_bo) {
3567
 
      state->emit_state.pds_fragment_stateptr1 = true;
3568
 
 
3569
 
      pvr_csb_pack (&ppp_state->pds.varying_base,
3570
 
                    TA_STATE_PDS_VARYINGBASE,
3571
 
                    base) {
3572
 
         base.addr.addr = pds_coeff_program->data_offset;
3573
 
      }
3574
 
   } else {
3575
 
      ppp_state->pds.varying_base = 0U;
3576
 
   }
3577
 
 
3578
 
   pvr_csb_pack (&ppp_state->pds.uniform_state_data_base,
3579
 
                 TA_STATE_PDS_UNIFORMDATABASE,
3580
 
                 base) {
3581
 
      base.addr.addr = state->pds_fragment_uniform_data_offset;
3582
 
   }
3583
 
 
3584
 
   emit_state->pds_fragment_stateptr0 = true;
3585
 
   emit_state->pds_fragment_stateptr3 = true;
3586
 
}
3587
 
 
3588
 
static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer)
3589
 
{
3590
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3591
 
   struct pvr_emit_state *const emit_state = &state->emit_state;
3592
 
   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3593
 
 
3594
 
   if (ppp_state->viewport_count != state->dynamic.common.viewport.count) {
3595
 
      ppp_state->viewport_count = state->dynamic.common.viewport.count;
3596
 
      emit_state->viewport = true;
3597
 
   }
3598
 
 
3599
 
   if (state->gfx_pipeline->raster_state.discard_enable) {
3600
 
      /* We don't want to emit any viewport data as it'll just get thrown
3601
 
       * away. It's after the previous condition because we still want to
3602
 
       * stash the viewport_count as it's our trigger for when
3603
 
       * rasterizer discard gets disabled.
3604
 
       */
3605
 
      emit_state->viewport = false;
3606
 
      return;
3607
 
   }
3608
 
 
3609
 
   for (uint32_t i = 0; i < ppp_state->viewport_count; i++) {
3610
 
      VkViewport *viewport = &state->dynamic.common.viewport.viewports[i];
3611
 
      uint32_t x_scale = fui(viewport->width * 0.5f);
3612
 
      uint32_t y_scale = fui(viewport->height * 0.5f);
3613
 
      uint32_t z_scale = fui(viewport->maxDepth - viewport->minDepth);
3614
 
      uint32_t x_center = fui(viewport->x + viewport->width * 0.5f);
3615
 
      uint32_t y_center = fui(viewport->y + viewport->height * 0.5f);
3616
 
      uint32_t z_center = fui(viewport->minDepth);
3617
 
 
3618
 
      if (ppp_state->viewports[i].a0 != x_center ||
3619
 
          ppp_state->viewports[i].m0 != x_scale ||
3620
 
          ppp_state->viewports[i].a1 != y_center ||
3621
 
          ppp_state->viewports[i].m1 != y_scale ||
3622
 
          ppp_state->viewports[i].a2 != z_center ||
3623
 
          ppp_state->viewports[i].m2 != z_scale) {
3624
 
         ppp_state->viewports[i].a0 = x_center;
3625
 
         ppp_state->viewports[i].m0 = x_scale;
3626
 
         ppp_state->viewports[i].a1 = y_center;
3627
 
         ppp_state->viewports[i].m1 = y_scale;
3628
 
         ppp_state->viewports[i].a2 = z_center;
3629
 
         ppp_state->viewports[i].m2 = z_scale;
3630
 
 
3631
 
         emit_state->viewport = true;
3632
 
      }
3633
 
   }
3634
 
}
3635
 
 
3636
 
static void pvr_setup_ppp_control(struct pvr_cmd_buffer *const cmd_buffer)
3637
 
{
3638
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3639
 
   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
3640
 
   struct pvr_emit_state *const emit_state = &state->emit_state;
3641
 
   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3642
 
   uint32_t ppp_control;
3643
 
 
3644
 
   pvr_csb_pack (&ppp_control, TA_STATE_PPP_CTRL, control) {
3645
 
      const struct pvr_raster_state *raster_state = &gfx_pipeline->raster_state;
3646
 
      VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
3647
 
      control.drawclippededges = true;
3648
 
      control.wclampen = true;
3649
 
 
3650
 
      if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN)
3651
 
         control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_1);
3652
 
      else
3653
 
         control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_0);
3654
 
 
3655
 
      if (raster_state->depth_clamp_enable)
3656
 
         control.clip_mode = PVRX(TA_CLIP_MODE_NO_FRONT_OR_REAR);
3657
 
      else
3658
 
         control.clip_mode = PVRX(TA_CLIP_MODE_FRONT_REAR);
3659
 
 
3660
 
      /* +--- FrontIsCCW?
3661
 
       * | +--- Cull Front?
3662
 
       * v v
3663
 
       * 0|0 CULLMODE_CULL_CCW,
3664
 
       * 0|1 CULLMODE_CULL_CW,
3665
 
       * 1|0 CULLMODE_CULL_CW,
3666
 
       * 1|1 CULLMODE_CULL_CCW,
3667
 
       */
3668
 
      switch (raster_state->cull_mode) {
3669
 
      case VK_CULL_MODE_BACK_BIT:
3670
 
      case VK_CULL_MODE_FRONT_BIT:
3671
 
         if ((raster_state->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE) ^
3672
 
             (raster_state->cull_mode == VK_CULL_MODE_FRONT_BIT)) {
3673
 
            control.cullmode = PVRX(TA_CULLMODE_CULL_CW);
3674
 
         } else {
3675
 
            control.cullmode = PVRX(TA_CULLMODE_CULL_CCW);
3676
 
         }
3677
 
 
3678
 
         break;
3679
 
 
3680
 
      case VK_CULL_MODE_NONE:
3681
 
         control.cullmode = PVRX(TA_CULLMODE_NO_CULLING);
3682
 
         break;
3683
 
 
3684
 
      default:
3685
 
         unreachable("Unsupported cull mode!");
3686
 
      }
3687
 
   }
3688
 
 
3689
 
   if (ppp_control != ppp_state->ppp_control) {
3690
 
      ppp_state->ppp_control = ppp_control;
3691
 
      emit_state->ppp_control = true;
3692
 
   }
3693
 
}
3694
 
 
3695
 
/* Largest valid PPP State update in words = 31
3696
 
 * 1 - Header
3697
 
 * 3 - Stream Out Config words 0, 1 and 2
3698
 
 * 1 - PPP Control word
3699
 
 * 3 - Varying Config words 0, 1 and 2
3700
 
 * 1 - Output Select
3701
 
 * 1 - WClamp
3702
 
 * 6 - Viewport Transform words
3703
 
 * 2 - Region Clip words
3704
 
 * 3 - PDS State for fragment phase (PDSSTATEPTR 1-3)
3705
 
 * 4 - PDS State for fragment phase (PDSSTATEPTR0)
3706
 
 * 6 - ISP Control Words
3707
 
 */
3708
 
#define PVR_MAX_PPP_STATE_DWORDS 31
3709
 
 
3710
 
static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
3711
 
{
3712
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3713
 
   struct pvr_emit_state *const emit_state = &state->emit_state;
3714
 
   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
3715
 
   struct pvr_csb *const control_stream =
3716
 
      &state->current_sub_cmd->gfx.control_stream;
3717
 
   uint32_t ppp_state_words[PVR_MAX_PPP_STATE_DWORDS];
3718
 
   uint32_t ppp_state_words_count;
3719
 
   uint32_t ppp_state_header;
3720
 
   bool deferred_secondary;
3721
 
   struct pvr_bo *pvr_bo;
3722
 
   uint32_t *buffer_ptr;
3723
 
   VkResult result;
3724
 
 
3725
 
   buffer_ptr = ppp_state_words;
3726
 
 
3727
 
   pvr_csb_pack (&ppp_state_header, TA_STATE_HEADER, header) {
3728
 
      header.view_port_count = (ppp_state->viewport_count == 0)
3729
 
                                  ? 0U
3730
 
                                  : (ppp_state->viewport_count - 1);
3731
 
 
3732
 
      /* Skip over header. */
3733
 
      buffer_ptr++;
3734
 
 
3735
 
      /* Set ISP state. */
3736
 
      if (emit_state->isp) {
3737
 
         header.pres_ispctl = true;
3738
 
         *buffer_ptr++ = ppp_state->isp.control;
3739
 
         header.pres_ispctl_fa = true;
3740
 
         *buffer_ptr++ = ppp_state->isp.front_a;
3741
 
 
3742
 
         if (emit_state->isp_fb) {
3743
 
            header.pres_ispctl_fb = true;
3744
 
            *buffer_ptr++ = ppp_state->isp.front_b;
3745
 
         }
3746
 
 
3747
 
         if (emit_state->isp_ba) {
3748
 
            header.pres_ispctl_ba = true;
3749
 
            *buffer_ptr++ = ppp_state->isp.back_a;
3750
 
         }
3751
 
 
3752
 
         if (emit_state->isp_bb) {
3753
 
            header.pres_ispctl_bb = true;
3754
 
            *buffer_ptr++ = ppp_state->isp.back_b;
3755
 
         }
3756
 
      }
3757
 
 
3758
 
      /* Depth bias / scissor
3759
 
       * If deferred_secondary is true then we do a separate state update
3760
 
       * which gets patched in ExecuteDeferredCommandBuffer.
3761
 
       */
3762
 
      /* TODO: Update above comment when we port ExecuteDeferredCommandBuffer.
3763
 
       */
3764
 
      deferred_secondary =
3765
 
         cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
3766
 
         cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
3767
 
 
3768
 
      if (emit_state->isp_dbsc && !deferred_secondary) {
3769
 
         header.pres_ispctl_dbsc = true;
3770
 
 
3771
 
         pvr_csb_pack (buffer_ptr++, TA_STATE_ISPDBSC, ispdbsc) {
3772
 
            ispdbsc.dbindex =
3773
 
               ppp_state->depthbias_scissor_indices.depthbias_index;
3774
 
            ispdbsc.scindex =
3775
 
               ppp_state->depthbias_scissor_indices.scissor_index;
3776
 
         }
3777
 
      }
3778
 
 
3779
 
      /* PDS state. */
3780
 
      if (emit_state->pds_fragment_stateptr0) {
3781
 
         header.pres_pds_state_ptr0 = true;
3782
 
 
3783
 
         *buffer_ptr++ = ppp_state->pds.pixel_shader_base;
3784
 
         *buffer_ptr++ = ppp_state->pds.texture_uniform_code_base;
3785
 
         *buffer_ptr++ = ppp_state->pds.size_info1;
3786
 
         *buffer_ptr++ = ppp_state->pds.size_info2;
3787
 
      }
3788
 
 
3789
 
      if (emit_state->pds_fragment_stateptr1) {
3790
 
         header.pres_pds_state_ptr1 = true;
3791
 
         *buffer_ptr++ = ppp_state->pds.varying_base;
3792
 
      }
3793
 
 
3794
 
      /* We don't use the pds_fragment_stateptr2 (texture state programs)
3795
 
       * control word, but this doesn't mean we need to set it to 0. This is
3796
 
       * because the hardware runs the texture state program only when the
3797
 
       * pds_texture state field of PDS_SIZEINFO1 is non-zero.
3798
 
       */
3799
 
 
3800
 
      if (emit_state->pds_fragment_stateptr3) {
3801
 
         header.pres_pds_state_ptr3 = true;
3802
 
         *buffer_ptr++ = ppp_state->pds.uniform_state_data_base;
3803
 
      }
3804
 
 
3805
 
      /* Region clip. */
3806
 
      if (emit_state->region_clip) {
3807
 
         header.pres_region_clip = true;
3808
 
         *buffer_ptr++ = ppp_state->region_clipping.word0;
3809
 
         *buffer_ptr++ = ppp_state->region_clipping.word1;
3810
 
      }
3811
 
 
3812
 
      /* Viewport. */
3813
 
      if (emit_state->viewport) {
3814
 
         const uint32_t viewports = MAX2(1, ppp_state->viewport_count);
3815
 
 
3816
 
         header.pres_viewport = true;
3817
 
         for (uint32_t i = 0; i < viewports; i++) {
3818
 
            *buffer_ptr++ = ppp_state->viewports[i].a0;
3819
 
            *buffer_ptr++ = ppp_state->viewports[i].m0;
3820
 
            *buffer_ptr++ = ppp_state->viewports[i].a1;
3821
 
            *buffer_ptr++ = ppp_state->viewports[i].m1;
3822
 
            *buffer_ptr++ = ppp_state->viewports[i].a2;
3823
 
            *buffer_ptr++ = ppp_state->viewports[i].m2;
3824
 
         }
3825
 
      }
3826
 
 
3827
 
      /* W clamp. */
3828
 
      if (emit_state->wclamp) {
3829
 
         const float wclamp = 0.00001f;
3830
 
 
3831
 
         header.pres_wclamp = true;
3832
 
         *buffer_ptr++ = fui(wclamp);
3833
 
      }
3834
 
 
3835
 
      /* Output selects. */
3836
 
      if (emit_state->output_selects) {
3837
 
         header.pres_outselects = true;
3838
 
         *buffer_ptr++ = ppp_state->output_selects;
3839
 
      }
3840
 
 
3841
 
      /* Varying words. */
3842
 
      if (emit_state->varying_word0) {
3843
 
         header.pres_varying_word0 = true;
3844
 
         *buffer_ptr++ = ppp_state->varying_word[0];
3845
 
      }
3846
 
 
3847
 
      if (emit_state->varying_word1) {
3848
 
         header.pres_varying_word1 = true;
3849
 
         *buffer_ptr++ = ppp_state->varying_word[1];
3850
 
      }
3851
 
 
3852
 
      if (emit_state->varying_word2) {
3853
 
         /* We only emit this on the first draw of a render job to prevent us
3854
 
          * from inheriting a non-zero value set elsewhere.
3855
 
          */
3856
 
         header.pres_varying_word2 = true;
3857
 
         *buffer_ptr++ = 0;
3858
 
      }
3859
 
 
3860
 
      /* PPP control. */
3861
 
      if (emit_state->ppp_control) {
3862
 
         header.pres_ppp_ctrl = true;
3863
 
         *buffer_ptr++ = ppp_state->ppp_control;
3864
 
      }
3865
 
 
3866
 
      if (emit_state->stream_out) {
3867
 
         /* We only emit this on the first draw of a render job to prevent us
3868
 
          * from inheriting a non-zero value set elsewhere.
3869
 
          */
3870
 
         header.pres_stream_out_size = true;
3871
 
         *buffer_ptr++ = 0;
3872
 
      }
3873
 
   }
3874
 
 
3875
 
   if (!ppp_state_header)
3876
 
      return VK_SUCCESS;
3877
 
 
3878
 
   ppp_state_words_count = buffer_ptr - ppp_state_words;
3879
 
   ppp_state_words[0] = ppp_state_header;
3880
 
 
3881
 
   result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
3882
 
                                     cmd_buffer->device->heaps.general_heap,
3883
 
                                     ppp_state_words_count * sizeof(uint32_t),
3884
 
                                     PVR_BO_ALLOC_FLAG_CPU_MAPPED,
3885
 
                                     &pvr_bo);
3886
 
   if (result != VK_SUCCESS)
3887
 
      return result;
3888
 
 
3889
 
   memcpy(pvr_bo->bo->map,
3890
 
          ppp_state_words,
3891
 
          ppp_state_words_count * sizeof(uint32_t));
3892
 
 
3893
 
   /* Write the VDM state update into the VDM control stream. */
3894
 
   pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) {
3895
 
      state0.word_count = ppp_state_words_count;
3896
 
      state0.addrmsb = pvr_bo->vma->dev_addr;
3897
 
   }
3898
 
 
3899
 
   pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE1, state1) {
3900
 
      state1.addrlsb = pvr_bo->vma->dev_addr;
3901
 
   }
3902
 
 
3903
 
   if (emit_state->isp_dbsc &&
3904
 
       cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
3905
 
      pvr_finishme("Unimplemented path!!");
3906
 
   }
3907
 
 
3908
 
   state->emit_state_bits = 0;
3909
 
 
3910
 
   return VK_SUCCESS;
3911
 
}
3912
 
 
3913
 
static VkResult
3914
 
pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
3915
 
{
3916
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
3917
 
   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
3918
 
   const bool dirty_stencil = state->dirty.compare_mask ||
3919
 
                              state->dirty.write_mask || state->dirty.reference;
3920
 
   VkResult result;
3921
 
 
3922
 
   if (!(dirty_stencil || state->dirty.depth_bias ||
3923
 
         state->dirty.fragment_descriptors || state->dirty.line_width ||
3924
 
         state->dirty.gfx_pipeline_binding || state->dirty.scissor ||
3925
 
         state->dirty.userpass_spawn || state->dirty.viewport ||
3926
 
         state->emit_state_bits)) {
3927
 
      return VK_SUCCESS;
3928
 
   }
3929
 
 
3930
 
   if (state->dirty.gfx_pipeline_binding) {
3931
 
      struct pvr_cmd_struct(TA_STATE_ISPA) ispa;
3932
 
 
3933
 
      pvr_setup_output_select(cmd_buffer);
3934
 
      pvr_setup_isp_faces_and_control(cmd_buffer, &ispa);
3935
 
      pvr_setup_triangle_merging_flag(cmd_buffer, &ispa);
3936
 
   } else if (dirty_stencil || state->dirty.line_width ||
3937
 
              state->dirty.userpass_spawn) {
3938
 
      pvr_setup_isp_faces_and_control(cmd_buffer, NULL);
3939
 
   }
3940
 
 
3941
 
   if (!gfx_pipeline->raster_state.discard_enable &&
3942
 
       state->dirty.fragment_descriptors &&
3943
 
       gfx_pipeline->fragment_shader_state.bo) {
3944
 
      pvr_setup_fragment_state_pointers(cmd_buffer);
3945
 
   }
3946
 
 
3947
 
   pvr_setup_isp_depth_bias_scissor_state(cmd_buffer);
3948
 
 
3949
 
   if (state->dirty.viewport)
3950
 
      pvr_setup_viewport(cmd_buffer);
3951
 
 
3952
 
   pvr_setup_ppp_control(cmd_buffer);
3953
 
 
3954
 
   if (gfx_pipeline->raster_state.cull_mode == VK_CULL_MODE_FRONT_AND_BACK) {
3955
 
      /* FIXME: Port SetNegativeViewport(). */
3956
 
   }
3957
 
 
3958
 
   result = pvr_emit_ppp_state(cmd_buffer);
3959
 
   if (result != VK_SUCCESS)
3960
 
      return result;
3961
 
 
3962
 
   return VK_SUCCESS;
3963
 
}
3964
 
 
3965
 
static void
3966
 
pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer,
3967
 
                              bool *const push_descriptors_dirty_out)
3968
 
{
3969
 
   /* TODO: Implement this function, based on ValidatePushDescriptors. */
3970
 
   pvr_finishme("Add support for push descriptors!");
3971
 
   *push_descriptors_dirty_out = false;
3972
 
}
3973
 
 
3974
 
static void
3975
 
pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
3976
 
                              const uint32_t vs_output_size,
3977
 
                              const bool raster_enable,
3978
 
                              uint32_t *const cam_size_out,
3979
 
                              uint32_t *const vs_max_instances_out)
3980
 
{
3981
 
   /* First work out the size of a vertex in the UVS and multiply by 4 for
3982
 
    * column ordering.
3983
 
    */
3984
 
   const uint32_t uvs_vertex_vector_size_in_dwords =
3985
 
      (vs_output_size + 1U + raster_enable * 4U) * 4U;
3986
 
   const uint32_t vdm_cam_size =
3987
 
      PVR_GET_FEATURE_VALUE(dev_info, vdm_cam_size, 32U);
3988
 
 
3989
 
   /* This is a proxy for 8XE. */
3990
 
   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
3991
 
       vdm_cam_size < 96U) {
3992
 
      /* Comparisons are based on size including scratch per vertex vector. */
3993
 
      if (uvs_vertex_vector_size_in_dwords < (14U * 4U)) {
3994
 
         *cam_size_out = MIN2(31U, vdm_cam_size - 1U);
3995
 
         *vs_max_instances_out = 16U;
3996
 
      } else if (uvs_vertex_vector_size_in_dwords < (20U * 4U)) {
3997
 
         *cam_size_out = 15U;
3998
 
         *vs_max_instances_out = 16U;
3999
 
      } else if (uvs_vertex_vector_size_in_dwords < (28U * 4U)) {
4000
 
         *cam_size_out = 11U;
4001
 
         *vs_max_instances_out = 12U;
4002
 
      } else if (uvs_vertex_vector_size_in_dwords < (44U * 4U)) {
4003
 
         *cam_size_out = 7U;
4004
 
         *vs_max_instances_out = 8U;
4005
 
      } else if (PVR_HAS_FEATURE(dev_info,
4006
 
                                 simple_internal_parameter_format_v2) ||
4007
 
                 uvs_vertex_vector_size_in_dwords < (64U * 4U)) {
4008
 
         *cam_size_out = 7U;
4009
 
         *vs_max_instances_out = 4U;
4010
 
      } else {
4011
 
         *cam_size_out = 3U;
4012
 
         *vs_max_instances_out = 2U;
4013
 
      }
4014
 
   } else {
4015
 
      /* Comparisons are based on size including scratch per vertex vector. */
4016
 
      if (uvs_vertex_vector_size_in_dwords <= (32U * 4U)) {
4017
 
         /* output size <= 27 + 5 scratch. */
4018
 
         *cam_size_out = MIN2(95U, vdm_cam_size - 1U);
4019
 
         *vs_max_instances_out = 0U;
4020
 
      } else if (uvs_vertex_vector_size_in_dwords <= 48U * 4U) {
4021
 
         /* output size <= 43 + 5 scratch */
4022
 
         *cam_size_out = 63U;
4023
 
         if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
4024
 
            *vs_max_instances_out = 16U;
4025
 
         else
4026
 
            *vs_max_instances_out = 0U;
4027
 
      } else if (uvs_vertex_vector_size_in_dwords <= 64U * 4U) {
4028
 
         /* output size <= 59 + 5 scratch. */
4029
 
         *cam_size_out = 31U;
4030
 
         if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
4031
 
            *vs_max_instances_out = 16U;
4032
 
         else
4033
 
            *vs_max_instances_out = 0U;
4034
 
      } else {
4035
 
         *cam_size_out = 15U;
4036
 
         *vs_max_instances_out = 16U;
4037
 
      }
4038
 
   }
4039
 
}
4040
 
 
4041
 
static void
4042
 
pvr_emit_dirty_vdm_state(const struct pvr_cmd_buffer *const cmd_buffer)
4043
 
{
4044
 
   /* FIXME: Assume all state is dirty for the moment. */
4045
 
   struct pvr_device_info *const dev_info =
4046
 
      &cmd_buffer->device->pdevice->dev_info;
4047
 
   ASSERTED const uint32_t max_user_vertex_output_components =
4048
 
      pvr_get_max_user_vertex_output_components(dev_info);
4049
 
   struct pvr_cmd_struct(VDMCTRL_VDM_STATE0)
4050
 
      header = { pvr_cmd_header(VDMCTRL_VDM_STATE0) };
4051
 
   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
4052
 
   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
4053
 
   struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
4054
 
   uint32_t vs_output_size;
4055
 
   uint32_t max_instances;
4056
 
   uint32_t cam_size;
4057
 
 
4058
 
   assert(gfx_pipeline);
4059
 
 
4060
 
   /* CAM Calculations and HW state take vertex size aligned to DWORDS. */
4061
 
   vs_output_size =
4062
 
      DIV_ROUND_UP(gfx_pipeline->vertex_shader_state.vertex_output_size,
4063
 
                   PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE));
4064
 
 
4065
 
   assert(vs_output_size <= max_user_vertex_output_components);
4066
 
 
4067
 
   pvr_calculate_vertex_cam_size(dev_info,
4068
 
                                 vs_output_size,
4069
 
                                 true,
4070
 
                                 &cam_size,
4071
 
                                 &max_instances);
4072
 
 
4073
 
   pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) {
4074
 
      state0.cam_size = cam_size;
4075
 
 
4076
 
      if (gfx_pipeline->input_asm_state.primitive_restart) {
4077
 
         state0.cut_index_enable = true;
4078
 
         state0.cut_index_present = true;
4079
 
      }
4080
 
 
4081
 
      switch (gfx_pipeline->input_asm_state.topology) {
4082
 
      case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
4083
 
         state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_1);
4084
 
         break;
4085
 
 
4086
 
      default:
4087
 
         state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0);
4088
 
         break;
4089
 
      }
4090
 
 
4091
 
      /* If we've bound a different vertex buffer, or this draw-call requires
4092
 
       * a different PDS attrib data-section from the last draw call (changed
4093
 
       * base_instance) then we need to specify a new data section. This is
4094
 
       * also the case if we've switched pipeline or attrib program as the
4095
 
       * data-section layout will be different.
4096
 
       */
4097
 
      state0.vs_data_addr_present =
4098
 
         state->dirty.gfx_pipeline_binding || state->dirty.vertex_bindings ||
4099
 
         state->dirty.draw_base_instance || state->dirty.draw_variant;
4100
 
 
4101
 
      /* Need to specify new PDS Attrib program if we've bound a different
4102
 
       * pipeline or we needed a different PDS Attrib variant for this
4103
 
       * draw-call.
4104
 
       */
4105
 
      state0.vs_other_present = state->dirty.gfx_pipeline_binding ||
4106
 
                                state->dirty.draw_variant;
4107
 
 
4108
 
      /* UVB_SCRATCH_SELECT_ONE with no rasterization is only valid when
4109
 
       * stream output is enabled. We use UVB_SCRATCH_SELECT_FIVE because
4110
 
       * Vulkan doesn't support stream output and the vertex position is
4111
 
       * always emitted to the UVB.
4112
 
       */
4113
 
      state0.uvs_scratch_size_select =
4114
 
         PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE);
4115
 
 
4116
 
      header = state0;
4117
 
   }
4118
 
 
4119
 
   if (header.cut_index_present) {
4120
 
      pvr_csb_emit (csb, VDMCTRL_VDM_STATE1, state1) {
4121
 
         switch (state->index_buffer_binding.type) {
4122
 
         case VK_INDEX_TYPE_UINT32:
4123
 
            /* FIXME: Defines for these? These seem to come from the Vulkan
4124
 
             * spec. for VkPipelineInputAssemblyStateCreateInfo
4125
 
             * primitiveRestartEnable.
4126
 
             */
4127
 
            state1.cut_index = 0xFFFFFFFF;
4128
 
            break;
4129
 
 
4130
 
         case VK_INDEX_TYPE_UINT16:
4131
 
            state1.cut_index = 0xFFFF;
4132
 
            break;
4133
 
 
4134
 
         default:
4135
 
            unreachable(!"Invalid index type");
4136
 
         }
4137
 
      }
4138
 
   }
4139
 
 
4140
 
   if (header.vs_data_addr_present) {
4141
 
      pvr_csb_emit (csb, VDMCTRL_VDM_STATE2, state2) {
4142
 
         state2.vs_pds_data_base_addr.addr = state->pds_vertex_attrib_offset;
4143
 
      }
4144
 
   }
4145
 
 
4146
 
   if (header.vs_other_present) {
4147
 
      const uint32_t usc_unified_store_size_in_bytes =
4148
 
         gfx_pipeline->vertex_shader_state.vertex_input_size << 2;
4149
 
 
4150
 
      pvr_csb_emit (csb, VDMCTRL_VDM_STATE3, state3) {
4151
 
         state3.vs_pds_code_base_addr.addr = state->pds_shader.code_offset;
4152
 
      }
4153
 
 
4154
 
      pvr_csb_emit (csb, VDMCTRL_VDM_STATE4, state4) {
4155
 
         state4.vs_output_size = vs_output_size;
4156
 
      }
4157
 
 
4158
 
      pvr_csb_emit (csb, VDMCTRL_VDM_STATE5, state5) {
4159
 
         state5.vs_max_instances = max_instances;
4160
 
         state5.vs_usc_common_size = 0U;
4161
 
         state5.vs_usc_unified_size = DIV_ROUND_UP(
4162
 
            usc_unified_store_size_in_bytes,
4163
 
            PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE));
4164
 
         state5.vs_pds_temp_size =
4165
 
            DIV_ROUND_UP(state->pds_shader.info->temps_required << 2,
4166
 
                         PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE));
4167
 
         state5.vs_pds_data_size =
4168
 
            DIV_ROUND_UP(state->pds_shader.info->data_size_in_dwords << 2,
4169
 
                         PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE));
4170
 
      }
4171
 
   }
4172
 
}
4173
 
 
4174
 
static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
4175
 
{
4176
 
   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
4177
 
   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
4178
 
   const struct pvr_pipeline_stage_state *const fragment_state =
4179
 
      &gfx_pipeline->fragment_shader_state.stage_state;
4180
 
   struct pvr_sub_cmd *sub_cmd;
4181
 
   bool fstencil_writemask_zero;
4182
 
   bool bstencil_writemask_zero;
4183
 
   bool push_descriptors_dirty;
4184
 
   bool fstencil_keep;
4185
 
   bool bstencil_keep;
4186
 
   VkResult result;
4187
 
 
4188
 
   pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
4189
 
 
4190
 
   sub_cmd = state->current_sub_cmd;
4191
 
   sub_cmd->gfx.empty_cmd = false;
4192
 
 
4193
 
   /* Determine pipeline depth/stencil usage. If a pipeline uses depth or
4194
 
    * stencil testing, those attachments are using their loaded values, and
4195
 
    * the loadOps cannot be optimized out.
4196
 
    */
4197
 
   /* Pipeline uses depth testing. */
4198
 
   if (sub_cmd->gfx.depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
4199
 
       gfx_pipeline->depth_compare_op != VK_COMPARE_OP_ALWAYS) {
4200
 
      sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
4201
 
   }
4202
 
 
4203
 
   /* Pipeline uses stencil testing. */
4204
 
   if (sub_cmd->gfx.stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
4205
 
       (gfx_pipeline->stencil_front.compare_op != VK_COMPARE_OP_ALWAYS ||
4206
 
        gfx_pipeline->stencil_back.compare_op != VK_COMPARE_OP_ALWAYS)) {
4207
 
      sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
4208
 
   }
4209
 
 
4210
 
   if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
4211
 
                       compute_overlap)) {
4212
 
      uint32_t coefficient_size =
4213
 
         DIV_ROUND_UP(fragment_state->coefficient_size,
4214
 
                      PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
4215
 
 
4216
 
      if (coefficient_size >
4217
 
          PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_MAX_SIZE))
4218
 
         sub_cmd->gfx.disable_compute_overlap = true;
4219
 
   }
4220
 
 
4221
 
   sub_cmd->gfx.frag_uses_atomic_ops |= fragment_state->uses_atomic_ops;
4222
 
   sub_cmd->gfx.frag_has_side_effects |= fragment_state->has_side_effects;
4223
 
   sub_cmd->gfx.frag_uses_texture_rw |= fragment_state->uses_texture_rw;
4224
 
   sub_cmd->gfx.vertex_uses_texture_rw |=
4225
 
      gfx_pipeline->vertex_shader_state.stage_state.uses_texture_rw;
4226
 
 
4227
 
   fstencil_keep =
4228
 
      (gfx_pipeline->stencil_front.fail_op == VK_STENCIL_OP_KEEP) &&
4229
 
      (gfx_pipeline->stencil_front.pass_op == VK_STENCIL_OP_KEEP);
4230
 
   bstencil_keep = (gfx_pipeline->stencil_back.fail_op == VK_STENCIL_OP_KEEP) &&
4231
 
                   (gfx_pipeline->stencil_back.pass_op == VK_STENCIL_OP_KEEP);
4232
 
   fstencil_writemask_zero = (state->dynamic.common.write_mask.front == 0);
4233
 
   bstencil_writemask_zero = (state->dynamic.common.write_mask.back == 0);
4234
 
 
4235
 
   /* Set stencil modified flag if:
4236
 
    * - Neither front nor back-facing stencil has a fail_op/pass_op of KEEP.
4237
 
    * - Neither front nor back-facing stencil has a write_mask of zero.
4238
 
    */
4239
 
   if (!(fstencil_keep && bstencil_keep) &&
4240
 
       !(fstencil_writemask_zero && bstencil_writemask_zero)) {
4241
 
      sub_cmd->gfx.modifies_stencil = true;
4242
 
   }
4243
 
 
4244
 
   /* Set depth modified flag if depth write is enabled. */
4245
 
   if (!gfx_pipeline->depth_write_disable)
4246
 
      sub_cmd->gfx.modifies_depth = true;
4247
 
 
4248
 
   /* If either the data or code changes for pds vertex attribs, regenerate the
4249
 
    * data segment.
4250
 
    */
4251
 
   if (state->dirty.vertex_bindings || state->dirty.gfx_pipeline_binding ||
4252
 
       state->dirty.draw_variant || state->dirty.draw_base_instance) {
4253
 
      enum pvr_pds_vertex_attrib_program_type prog_type;
4254
 
      const struct pvr_pds_attrib_program *program;
4255
 
 
4256
 
      if (state->draw_state.draw_indirect)
4257
 
         prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT;
4258
 
      else if (state->draw_state.base_instance)
4259
 
         prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE;
4260
 
      else
4261
 
         prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC;
4262
 
 
4263
 
      program =
4264
 
         &gfx_pipeline->vertex_shader_state.pds_attrib_programs[prog_type];
4265
 
      state->pds_shader.info = &program->info;
4266
 
      state->pds_shader.code_offset = program->program.code_offset;
4267
 
 
4268
 
      state->max_shared_regs =
4269
 
         MAX2(state->max_shared_regs, pvr_calc_shared_regs_count(gfx_pipeline));
4270
 
 
4271
 
      pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline);
4272
 
   }
4273
 
 
4274
 
   /* TODO: Check for dirty push constants */
4275
 
 
4276
 
   pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty);
4277
 
 
4278
 
   state->dirty.vertex_descriptors = push_descriptors_dirty ||
4279
 
                                     state->dirty.gfx_pipeline_binding;
4280
 
   state->dirty.fragment_descriptors = state->dirty.vertex_descriptors;
4281
 
 
4282
 
   if (state->dirty.fragment_descriptors) {
4283
 
      result = pvr_setup_descriptor_mappings(
4284
 
         cmd_buffer,
4285
 
         PVR_STAGE_ALLOCATION_FRAGMENT,
4286
 
         &state->gfx_pipeline->fragment_shader_state.uniform_state,
4287
 
         &state->pds_fragment_uniform_data_offset);
4288
 
      if (result != VK_SUCCESS) {
4289
 
         mesa_loge("Could not setup fragment descriptor mappings.");
4290
 
         return result;
4291
 
      }
4292
 
   }
4293
 
 
4294
 
   if (state->dirty.vertex_descriptors) {
4295
 
      uint32_t pds_vertex_uniform_data_offset;
4296
 
 
4297
 
      result = pvr_setup_descriptor_mappings(
4298
 
         cmd_buffer,
4299
 
         PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
4300
 
         &state->gfx_pipeline->vertex_shader_state.uniform_state,
4301
 
         &pds_vertex_uniform_data_offset);
4302
 
      if (result != VK_SUCCESS) {
4303
 
         mesa_loge("Could not setup vertex descriptor mappings.");
4304
 
         return result;
4305
 
      }
4306
 
 
4307
 
      pvr_emit_dirty_pds_state(cmd_buffer, pds_vertex_uniform_data_offset);
4308
 
   }
4309
 
 
4310
 
   pvr_emit_dirty_ppp_state(cmd_buffer);
4311
 
   pvr_emit_dirty_vdm_state(cmd_buffer);
4312
 
 
4313
 
   state->dirty.gfx_desc_dirty = false;
4314
 
   state->dirty.blend_constants = false;
4315
 
   state->dirty.compare_mask = false;
4316
 
   state->dirty.depth_bias = false;
4317
 
   state->dirty.draw_base_instance = false;
4318
 
   state->dirty.draw_variant = false;
4319
 
   state->dirty.fragment_descriptors = false;
4320
 
   state->dirty.line_width = false;
4321
 
   state->dirty.gfx_pipeline_binding = false;
4322
 
   state->dirty.reference = false;
4323
 
   state->dirty.scissor = false;
4324
 
   state->dirty.userpass_spawn = false;
4325
 
   state->dirty.vertex_bindings = false;
4326
 
   state->dirty.viewport = false;
4327
 
   state->dirty.write_mask = false;
4328
 
 
4329
 
   return VK_SUCCESS;
4330
 
}
4331
 
 
4332
 
static uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology)
4333
 
{
4334
 
   switch (topology) {
4335
 
   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
4336
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_POINT_LIST);
4337
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
4338
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST);
4339
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
4340
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP);
4341
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
4342
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST);
4343
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
4344
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP);
4345
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
4346
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_FAN);
4347
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
4348
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ);
4349
 
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
4350
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ);
4351
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
4352
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST_ADJ);
4353
 
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
4354
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP_ADJ);
4355
 
   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
4356
 
      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_PATCH_LIST);
4357
 
   default:
4358
 
      unreachable("Undefined primitive topology");
4359
 
   }
4360
 
}
4361
 
 
4362
 
static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
4363
 
                                    VkPrimitiveTopology topology,
4364
 
                                    uint32_t first_vertex,
4365
 
                                    uint32_t vertex_count,
4366
 
                                    uint32_t first_index,
4367
 
                                    uint32_t index_count,
4368
 
                                    uint32_t instance_count)
4369
 
{
4370
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4371
 
   struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
4372
 
   struct pvr_cmd_struct(VDMCTRL_INDEX_LIST0)
4373
 
      list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) };
4374
 
   pvr_dev_addr_t index_buffer_addr = { 0 };
4375
 
   unsigned int index_stride = 0;
4376
 
 
4377
 
   pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
4378
 
      list0.primitive_topology = pvr_get_hw_primitive_topology(topology);
4379
 
 
4380
 
      /* First instance is not handled in the VDM state, it's implemented as
4381
 
       * an addition in the PDS vertex fetch.
4382
 
       */
4383
 
      list0.index_count_present = true;
4384
 
 
4385
 
      if (instance_count > 1)
4386
 
         list0.index_instance_count_present = true;
4387
 
 
4388
 
      if (first_vertex != 0)
4389
 
         list0.index_offset_present = true;
4390
 
 
4391
 
      if (state->draw_state.draw_indexed) {
4392
 
         struct pvr_buffer *buffer = state->index_buffer_binding.buffer;
4393
 
 
4394
 
         switch (state->index_buffer_binding.type) {
4395
 
         case VK_INDEX_TYPE_UINT32:
4396
 
            list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32);
4397
 
            index_stride = 4;
4398
 
            break;
4399
 
 
4400
 
         case VK_INDEX_TYPE_UINT16:
4401
 
            list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16);
4402
 
            index_stride = 2;
4403
 
            break;
4404
 
 
4405
 
         default:
4406
 
            unreachable("Invalid index type");
4407
 
         }
4408
 
 
4409
 
         list0.index_addr_present = true;
4410
 
         index_buffer_addr.addr = buffer->dev_addr.addr;
4411
 
         index_buffer_addr.addr += state->index_buffer_binding.offset;
4412
 
         index_buffer_addr.addr += first_index * index_stride;
4413
 
         list0.index_base_addrmsb = index_buffer_addr;
4414
 
      }
4415
 
 
4416
 
      list_hdr = list0;
4417
 
   }
4418
 
 
4419
 
   if (list_hdr.index_addr_present) {
4420
 
      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST1, list1) {
4421
 
         list1.index_base_addrlsb = index_buffer_addr;
4422
 
      }
4423
 
   }
4424
 
 
4425
 
   if (list_hdr.index_count_present) {
4426
 
      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST2, list2) {
4427
 
         list2.index_count = vertex_count | index_count;
4428
 
      }
4429
 
   }
4430
 
 
4431
 
   if (list_hdr.index_instance_count_present) {
4432
 
      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST3, list3) {
4433
 
         list3.instance_count = instance_count - 1;
4434
 
      }
4435
 
   }
4436
 
 
4437
 
   if (list_hdr.index_offset_present) {
4438
 
      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST4, list4) {
4439
 
         list4.index_offset = first_vertex;
4440
 
      }
4441
 
   }
4442
 
 
4443
 
   /* TODO: See if we need list_words[5-9]. */
4444
 
}
4445
 
 
4446
 
void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
4447
 
                        uint32_t indexCount,
4448
 
                        uint32_t instanceCount,
4449
 
                        uint32_t firstIndex,
4450
 
                        int32_t vertexOffset,
4451
 
                        uint32_t firstInstance)
4452
 
{
4453
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
4454
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4455
 
   struct pvr_cmd_buffer_draw_state draw_state;
4456
 
   VkResult result;
4457
 
 
4458
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
4459
 
 
4460
 
   draw_state.base_vertex = vertexOffset;
4461
 
   draw_state.base_instance = firstInstance;
4462
 
   draw_state.draw_indirect = false;
4463
 
   draw_state.draw_indexed = true;
4464
 
   pvr_update_draw_state(&cmd_buffer->state, &draw_state);
4465
 
 
4466
 
   result = pvr_validate_draw_state(cmd_buffer);
4467
 
   if (result != VK_SUCCESS)
4468
 
      return;
4469
 
 
4470
 
   /* Write the VDM control stream for the primitive. */
4471
 
   pvr_emit_vdm_index_list(cmd_buffer,
4472
 
                           state->gfx_pipeline->input_asm_state.topology,
4473
 
                           vertexOffset,
4474
 
                           0,
4475
 
                           firstIndex,
4476
 
                           indexCount,
4477
 
                           instanceCount);
4478
 
}
4479
 
 
4480
 
void pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
4481
 
                                VkBuffer _buffer,
4482
 
                                VkDeviceSize offset,
4483
 
                                uint32_t drawCount,
4484
 
                                uint32_t stride)
4485
 
{
4486
 
   assert(!"Unimplemented");
4487
 
}
4488
 
 
4489
 
void pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer,
4490
 
                         VkBuffer _buffer,
4491
 
                         VkDeviceSize offset,
4492
 
                         uint32_t drawCount,
4493
 
                         uint32_t stride)
4494
 
{
4495
 
   assert(!"Unimplemented");
4496
 
}
4497
 
 
4498
 
static VkResult
4499
 
pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer)
4500
 
{
4501
 
   pvr_finishme("Add attachment resolve support!");
4502
 
   return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
4503
 
}
4504
 
 
4505
 
void pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
4506
 
                           const VkSubpassEndInfoKHR *pSubpassEndInfo)
4507
 
{
4508
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
4509
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4510
 
   struct pvr_image_view **attachments;
4511
 
   VkClearValue *clear_values;
4512
 
   VkResult result;
4513
 
 
4514
 
   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
4515
 
 
4516
 
   assert(state->render_pass_info.pass);
4517
 
   assert(state->render_pass_info.framebuffer);
4518
 
 
4519
 
   /* TODO: Investigate why pvr_cmd_buffer_end_sub_cmd/EndSubCommand is called
4520
 
    * twice in this path, one here and one from
4521
 
    * pvr_resolve_unemitted_resolve_attachments.
4522
 
    */
4523
 
   result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
4524
 
   if (result != VK_SUCCESS)
4525
 
      return;
4526
 
 
4527
 
   result = pvr_resolve_unemitted_resolve_attachments(cmd_buffer);
4528
 
   if (result != VK_SUCCESS)
4529
 
      return;
4530
 
 
4531
 
   /* Save the required fields before clearing render_pass_info struct. */
4532
 
   attachments = state->render_pass_info.attachments;
4533
 
   clear_values = state->render_pass_info.clear_values;
4534
 
 
4535
 
   memset(&state->render_pass_info, 0, sizeof(state->render_pass_info));
4536
 
 
4537
 
   state->render_pass_info.attachments = attachments;
4538
 
   state->render_pass_info.clear_values = clear_values;
4539
 
}
4540
 
 
4541
 
void pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer,
4542
 
                            uint32_t commandBufferCount,
4543
 
                            const VkCommandBuffer *pCommandBuffers)
4544
 
{
4545
 
   assert(!"Unimplemented");
4546
 
}
4547
 
 
4548
 
void pvr_CmdNextSubpass2(VkCommandBuffer commandBuffer,
4549
 
                         const VkSubpassBeginInfo *pSubpassBeginInfo,
4550
 
                         const VkSubpassEndInfo *pSubpassEndInfo)
4551
 
{
4552
 
   assert(!"Unimplemented");
4553
 
}
4554
 
 
4555
 
void pvr_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
4556
 
                                const VkDependencyInfoKHR *pDependencyInfo)
4557
 
{
4558
 
   assert(!"Unimplemented");
4559
 
}
4560
 
 
4561
 
void pvr_CmdResetEvent2KHR(VkCommandBuffer commandBuffer,
4562
 
                           VkEvent _event,
4563
 
                           VkPipelineStageFlags2KHR stageMask)
4564
 
{
4565
 
   assert(!"Unimplemented");
4566
 
}
4567
 
 
4568
 
void pvr_CmdSetEvent2KHR(VkCommandBuffer commandBuffer,
4569
 
                         VkEvent _event,
4570
 
                         const VkDependencyInfoKHR *pDependencyInfo)
4571
 
{
4572
 
   assert(!"Unimplemented");
4573
 
}
4574
 
 
4575
 
void pvr_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer,
4576
 
                           uint32_t eventCount,
4577
 
                           const VkEvent *pEvents,
4578
 
                           const VkDependencyInfoKHR *pDependencyInfos)
4579
 
{
4580
 
   assert(!"Unimplemented");
4581
 
}
4582
 
 
4583
 
void pvr_CmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer,
4584
 
                               VkPipelineStageFlags2KHR stage,
4585
 
                               VkQueryPool queryPool,
4586
 
                               uint32_t query)
4587
 
{
4588
 
   unreachable("Timestamp queries are not supported.");
4589
 
}
4590
 
 
4591
 
VkResult pvr_EndCommandBuffer(VkCommandBuffer commandBuffer)
4592
 
{
4593
 
   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
4594
 
   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
4595
 
   VkResult result;
4596
 
 
4597
 
   /* From the Vulkan 1.0 spec:
4598
 
    *
4599
 
    * CommandBuffer must be in the recording state.
4600
 
    */
4601
 
   assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_RECORDING);
4602
 
 
4603
 
   if (state->status != VK_SUCCESS)
4604
 
      return state->status;
4605
 
 
4606
 
   result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
4607
 
   if (result != VK_SUCCESS)
4608
 
      return result;
4609
 
 
4610
 
   cmd_buffer->status = PVR_CMD_BUFFER_STATUS_EXECUTABLE;
4611
 
 
4612
 
   return VK_SUCCESS;
4613
 
}