2
* Copyright © 2022 Imagination Technologies Ltd.
4
* Permission is hereby granted, free of charge, to any person obtaining a copy
5
* of this software and associated documentation files (the "Software"), to deal
6
* in the Software without restriction, including without limitation the rights
7
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
* copies of the Software, and to permit persons to whom the Software is
9
* furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
#include "pvr_device_info.h"
32
#include "pvr_rogue_pds_defs.h"
33
#include "pvr_rogue_pds_disasm.h"
34
#include "pvr_rogue_pds_encode.h"
36
#include "util/macros.h"
38
#define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
39
#define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
40
#define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
42
#define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
43
#define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
45
#define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
46
#define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
47
#define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
49
#define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
50
#define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
52
/* 32-bit PTemp index for draw indirect base instance. */
53
#define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
55
/* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
56
#define PVR_PDS_DDMAD_NUM_CONSTS 8
58
#if defined(TRACE_PDS)
59
/* Some macros for a pretty printing. */
61
# define pvr_debug_pds_const(reg, size, annotation) \
62
mesa_logd("const[%d] @ (%dbits) %s", reg, size, annotation)
63
# define pvr_debug_pds_temp(reg, size, annotation) \
64
mesa_logd("temp[%d] @ (%dbits) %s", reg, size, annotation)
65
# define pvr_debug_pds_note(...) mesa_logd(" // " __VA_ARGS__)
66
# define pvr_debug_pds_flag(flags, flag) \
68
if ((flags & flag) == flag) \
69
mesa_logd(" > " #flag); \
71
# define pvr_debug(annotation) mesa_logd(annotation)
74
# define pvr_debug_pds_const(reg, size, annotation)
75
# define pvr_debug_pds_temp(reg, size, annotation)
76
# define pvr_debug_pds_note(...)
77
# define pvr_debug_pds_flag(flags, flag)
78
# define pvr_debug(annotation)
81
struct pvr_pds_const_map_entry_write_state {
82
const struct pvr_pds_info *PDS_info;
83
struct pvr_const_map_entry *entry;
84
size_t size_of_last_entry_in_bytes;
86
size_t entries_size_in_bytes;
89
static void pvr_init_pds_const_map_entry_write_state(
90
struct pvr_pds_info *PDS_info,
91
struct pvr_pds_const_map_entry_write_state *entry_write_state)
93
entry_write_state->PDS_info = PDS_info;
94
entry_write_state->entry = PDS_info->entries;
95
entry_write_state->size_of_last_entry_in_bytes = 0;
96
entry_write_state->entry_count = 0;
97
entry_write_state->entries_size_in_bytes = 0;
100
/* Returns a pointer to the next struct pvr_const_map_entry. */
101
static void *pvr_prepare_next_pds_const_map_entry(
102
struct pvr_pds_const_map_entry_write_state *entry_write_state,
103
size_t size_of_next_entry_in_bytes)
105
/* Move on to the next entry. */
106
uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
107
entry_write_state->size_of_last_entry_in_bytes);
108
entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
110
entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
111
entry_write_state->entry_count++;
112
entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
114
/* Check if we can write into the next entry. */
115
assert(entry_write_state->entries_size_in_bytes <=
116
entry_write_state->PDS_info->entries_size_in_bytes);
118
return entry_write_state->entry;
121
static void pvr_write_pds_const_map_entry_vertex_attribute_address(
122
struct pvr_pds_const_map_entry_write_state *entry_write_state,
123
const struct pvr_pds_vertex_dma *DMA,
125
bool use_robust_vertex_fetch)
127
pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
133
if (use_robust_vertex_fetch) {
134
struct pvr_const_map_entry_robust_vertex_attribute_address
135
*robust_attribute_entry;
137
robust_attribute_entry =
138
pvr_prepare_next_pds_const_map_entry(entry_write_state,
139
sizeof(*robust_attribute_entry));
140
robust_attribute_entry->type =
141
PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
142
robust_attribute_entry->const_offset = const_val;
143
robust_attribute_entry->binding_index = DMA->binding_index;
144
robust_attribute_entry->component_size_in_bytes =
145
DMA->component_size_in_bytes;
146
robust_attribute_entry->offset = DMA->offset;
147
robust_attribute_entry->stride = DMA->stride;
148
robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
149
robust_attribute_entry->robustness_buffer_offset =
150
DMA->robustness_buffer_offset;
152
struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
155
pvr_prepare_next_pds_const_map_entry(entry_write_state,
156
sizeof(*attribute_entry));
157
attribute_entry->type =
158
PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
159
attribute_entry->const_offset = const_val;
160
attribute_entry->binding_index = DMA->binding_index;
161
attribute_entry->offset = DMA->offset;
162
attribute_entry->stride = DMA->stride;
163
attribute_entry->size_in_dwords = DMA->size_in_dwords;
167
static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
171
return pvr_pds_inst_encode_dout(cc,
175
PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
179
pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
182
unsigned int const32,
183
unsigned int const64,
184
unsigned int dma_size_in_dwords,
185
unsigned int destination,
188
uint32_t literal_value;
190
/* Encode literal value. */
191
literal_value = dma_size_in_dwords
192
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
193
literal_value |= destination
194
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
195
literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
199
literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
201
/* Create const map entry. */
202
struct pvr_const_map_entry_literal32 *literal_entry;
204
literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
205
sizeof(*literal_entry));
206
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
207
literal_entry->const_offset = const32;
208
literal_entry->literal_value = literal_value;
211
return pvr_pds_inst_encode_dout(0,
215
PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
218
#define pvr_encode_burst_cs(psDataEntry, \
223
dma_size_in_dwords, \
231
dma_size_in_dwords, \
233
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
235
static uint32_t pvr_encode_direct_write(
236
struct pvr_pds_const_map_entry_write_state *entry_write_state,
239
unsigned int const32,
240
unsigned int const64,
242
unsigned int destination,
243
uint32_t destination_store,
244
const struct pvr_device_info *dev_info)
246
struct pvr_const_map_entry_literal32 *literal_entry;
248
uint32_t instruction =
249
pvr_pds_inst_encode_dout(0,
253
PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
255
literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
256
sizeof(*literal_entry));
257
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
258
literal_entry->const_offset = const32;
259
literal_entry->literal_value = destination_store;
261
if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
262
literal_entry->literal_value |=
263
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
266
literal_entry->literal_value |=
267
destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
269
if (data_mask == 0x1) {
270
literal_entry->literal_value |=
271
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
272
} else if (data_mask == 0x2) {
273
literal_entry->literal_value |=
274
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
276
literal_entry->literal_value |=
277
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
281
literal_entry->literal_value |=
282
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
288
/* Constant and Temporary register allocation
289
* - reserve space for a 32-bit register or a 64-bit register
290
* - returned indices are offsets to 32-bit register locations
291
* - 64-bit registers need to be aligned to even indices.
293
#define RESERVE_32BIT 1U
294
#define RESERVE_64BIT 2U
297
# define pvr_find_constant(usage, words, name) \
298
pvr_find_constant2(usage, words, name)
299
# define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
301
# define pvr_find_constant(usage, words, name) \
302
pvr_find_constant2(usage, words, NULL);
303
# define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
307
pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
309
uint32_t const_index = ~0U;
310
uint32_t step = words;
311
uint8_t mask = (1 << words) - 1;
313
assert(words == 1 || words == 2);
315
/* Find a register at 'step' alignment that satisfies the mask. */
316
for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
317
for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
318
if ((const_usage[i] & (mask << b)) != 0)
320
const_usage[i] |= (mask << b);
321
const_index = i * 8 + b;
322
pvr_debug_pds_const(const_index, words * 32, const_name);
327
unreachable("Unexpected: Space cannot be found for constant");
331
#define PVR_MAX_PDS_TEMPS 32
332
struct pvr_temp_usage {
335
uint8_t temps_needed;
338
#define PVR_INVALID_TEMP UINT8_C(~0)
340
static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
341
uint8_t temps_needed,
342
const char *temp_name)
344
uint8_t step = temps_needed;
345
uint8_t mask = (1 << temps_needed) - 1;
347
assert(temps_needed == 1 || temps_needed == 2);
348
assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
350
for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
351
if ((temps->temp_usage & (mask << i)) != 0)
354
const size_t clzBits = 8 * sizeof(unsigned int);
356
temps->temp_usage |= (mask << i);
357
temps->temp_used += temps_needed;
358
temps->temps_needed =
359
clzBits - __builtin_clz((unsigned int)temps->temp_usage);
361
pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
366
unreachable("Unexpected: Space cannot be found for temps");
367
return PVR_INVALID_TEMP;
371
* Wrapper macro to add a toggle for "data mode", allowing us to calculate the
372
* size of a PDS program without actually attempting to store it.
374
* \param dest The array/memory pointer where the PDS program should be stored.
375
* If the given code is NULL, automatically switch to count mode
376
* instead of attempting to fill in unallocated memory.
377
* \param counter The local counter that holds the total instruction count.
378
* \param statement What function call/value should be stored at dest[counter]
379
* when condition is false.
382
#define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
386
dest[counter++] = statement; \
387
PVR_PDS_PRINT_INST(statement); \
391
* Generates the PDS vertex primary program for the dma's listed in the input
392
* structure. Produces the constant map for the Vulkan driver based upon the
393
* requirements of the instructions added to the program.
398
* The PDS data is optimized for the DDMAD layout, with the data for those
399
* instructions laid out first. The data required for other instructions is laid
400
* out in the entries unused by the DDMADs.
404
* bank | index | usage
405
* 0 | 0:1 | temps (current index)[-]
406
* 2 | 2:3 | stride[32]
407
* 1 | 4:5 | base address[64]
411
* Each DMA whose stride > 0 requires one entry, laid out as above. We stride
412
* over the banks to ensure that each ddmad reads each of its operands from a
413
* different bank (i.e. remove bank clashes)
415
* Note: This is "wasting" const[0:1] and const[2], however these free
416
* registers will be used by other, non-ddmad instructions.
418
* The const register usage is maintained in the au8ConstUsage array, the
419
* DDMAD instructions, for example, will utilize the top 5 registers in each
420
* block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
425
* The constant map is built up as we add PDS instructions and passed back
426
* for the driver to fill in the PDS data section with the correct parameters
427
* for each draw call.
429
* \param input_program PDS Program description.
430
* \param code Buffer to be filled in with the PDS program. If NULL is provided,
431
* automatically switch to count-mode, preventing writes to
432
* unallocated memory.
433
* \param info PDS info structure filled in for the driver, contains the
435
* \param use_robust_vertex_fetch Do vertex fetches apply range checking.
436
* \param dev_info pvr device information struct.
438
void pvr_pds_generate_vertex_primary_program(
439
struct pvr_pds_vertex_primary_program_input *input_program,
441
struct pvr_pds_info *info,
442
bool use_robust_vertex_fetch,
443
const struct pvr_device_info *dev_info)
445
struct pvr_pds_const_map_entry_write_state entry_write_state;
446
struct pvr_const_map_entry_doutu_address *doutu_address_entry;
448
uint32_t instruction = 0; /* index into code */
449
uint32_t index; /* index used for current attribute, either vertex or
453
uint32_t total_dma_count = 0;
454
uint32_t running_dma_count = 0;
456
uint32_t write_instance_control = ~0;
457
uint32_t write_vertex_control = ~0;
458
uint32_t write_base_instance_control = ~0;
459
uint32_t write_base_vertex_control = ~0;
460
uint32_t pvr_write_draw_index_control = ~0;
462
uint32_t ddmad_count = 0;
463
uint32_t doutw_count = 0;
465
uint32_t base_instance = 0;
466
uint32_t base_vertex = 0;
467
uint32_t draw_index = 0;
469
uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
471
struct pvr_temp_usage temp_usage = { 0 };
473
uint32_t zero_temp = PVR_INVALID_TEMP;
475
uint32_t max_index_temp = PVR_INVALID_TEMP;
476
uint32_t current_index_temp = PVR_INVALID_TEMP;
478
uint32_t index_id_temp = PVR_INVALID_TEMP;
479
uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
480
uint32_t instance_ID_temp = PVR_INVALID_TEMP;
482
/* Debug tracing of program flags. */
483
pvr_debug("pvr_pds_generate_vertex_primary_program");
484
pvr_debug("=================================================");
485
pvr_debug_pds_flag(input_program->flags,
486
PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
487
pvr_debug_pds_flag(input_program->flags,
488
PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
489
pvr_debug_pds_flag(input_program->flags,
490
PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
491
pvr_debug_pds_flag(input_program->flags,
492
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
493
pvr_debug_pds_flag(input_program->flags,
494
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
495
pvr_debug_pds_flag(input_program->flags,
496
PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
497
pvr_debug_pds_flag(input_program->flags,
498
PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
501
pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
503
/* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
504
* blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
506
info->data_size_in_dwords = 4;
508
/* Reserve 2 temps - these are automatically filled in by the VDM
510
* For instanced draw calls we manually increment the instance id by the
511
* base-instance offset which is either provided as a constant, or in a
512
* ptemp (for draw indirect)
516
* 0 - index id (pre-filled)
517
* 1 - base instance + instance id
519
index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
521
pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
523
/* Reserve the lowest 2 dwords for DOUTU.
526
const_usage[0] = 0x03;
528
/* Reserve consts for all the DDMAD's. */
529
for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
530
/* Mark the consts required by this ddmad "in-use".
533
const_usage[ddmad_count++] |= 0xf8;
536
/* Start off by assuming we can fit everything in the 8 dwords/ddmad
537
* footprint, if any DOUTD/DOUTW falls outside we will increase this
541
info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
543
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
545
write_vertex_control =
546
pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
549
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
551
write_instance_control = pvr_find_constant(const_usage,
553
"Instance id DOUTW Ctrl");
556
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
558
write_base_instance_control =
559
pvr_find_constant(const_usage,
561
"Base Instance DOUTW Ctrl");
564
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
566
write_base_vertex_control = pvr_find_constant(const_usage,
568
"Base Vertex DOUTW Ctrl");
570
/* Load base vertex from constant for non-indirect variants. */
571
if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
573
struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
574
(struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
577
pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
580
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
581
sizeof(*psBaseVertexEntry));
582
psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
583
psBaseVertexEntry->const_offset = base_vertex;
587
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
589
pvr_write_draw_index_control =
590
pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
592
/* Set draw index to 0 for non-indirect variants. */
593
if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
595
struct pvr_const_map_entry_literal32 *literal_entry;
598
pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
601
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
602
sizeof(*literal_entry));
603
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
604
literal_entry->const_offset = draw_index;
605
literal_entry->literal_value = 0;
609
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
610
/* Load absolute instance id into uiInstanceIdTemp. */
614
pvr_pds_inst_encode_add32(
618
/* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
619
/* src1 */ R32_T(instance_ID_temp),
620
/* dst */ R32TP_T(instance_ID_temp)));
621
} else if (input_program->flags &
622
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
623
struct pvr_const_map_entry_base_instance *base_instance_entry =
624
(struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
627
pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
629
PVR_PDS_MODE_TOGGLE(code,
631
pvr_pds_inst_encode_add32(
635
/* src0 */ R32_C(base_instance),
636
/* src1 */ R32_T(instance_ID_temp),
637
/* dst */ R32TP_T(instance_ID_temp)));
639
base_instance_entry =
640
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
641
sizeof(*base_instance_entry));
642
base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
643
base_instance_entry->const_offset = base_instance;
644
} else if (input_program->flags &
645
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
646
struct pvr_const_map_entry_base_instance *base_instance_entry =
647
(struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
649
base_instance = pvr_find_constant(const_usage,
651
"base_instance (Driver Const)");
653
/* Base instance provided by the driver. */
654
base_instance_entry =
655
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
656
sizeof(*base_instance_entry));
657
base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
658
base_instance_entry->const_offset = base_instance;
661
total_dma_count = ddmad_count;
663
total_dma_count += doutw_count;
665
if (use_robust_vertex_fetch) {
666
pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
668
if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
669
zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
671
/* Load 0 into instance_ID_temp. */
672
PVR_PDS_MODE_TOGGLE(code,
674
pvr_pds_inst_encode_limm(0, /* cc */
675
zero_temp, /* SRC1 */
680
zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
683
pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
685
pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
687
PVR_PDS_MODE_TOGGLE(code,
689
pvr_pds_inst_encode_stflp64(
691
PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
693
R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
695
R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
697
0, /* SRC2 (REGS32) */
698
R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
700
PVR_PDS_MODE_TOGGLE(code,
702
pvr_pds_inst_encode_stflp64(
704
PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
706
R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
708
0, /* SRC1 (REGS64TP) */
709
0, /* SRC2 (REGS32) */
710
R64TP_T(current_index_temp >> 1) /* DST */
713
PVR_PDS_MODE_TOGGLE(code,
715
pvr_pds_inst_encode_stflp64(
717
PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
719
R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
721
0, /* SRC1 (REGS64TP) */
722
0, /* SRC2 (REGS32) */
723
R64TP_T(max_index_temp >> 1) /* DST */
729
if (input_program->dma_count && use_robust_vertex_fetch) {
733
pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
735
PVR_HAS_FEATURE(dev_info, pds_ddmadt)
736
? PVR_ROGUE_PDSINST_PREDICATE_OOB
737
: PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
742
for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
743
uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
744
uint32_t control_word;
745
struct pvr_const_map_entry_literal32 *literal_entry;
747
struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma];
748
bool last_DMA = (++running_dma_count == total_dma_count);
750
pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_DMA);
752
/* The id we use to index into this dma. */
753
if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
754
pvr_debug_pds_note("Instance Rate (divisor = %d)",
755
vertex_dma->divisor);
757
/* 4 - madd 0 - needs to be 64-bit aligned
760
if (vertex_dma->divisor > 1) {
761
const uint32_t adjusted_instance_ID_temp =
762
pvr_get_temps(&temp_usage,
764
"adjusted_instance_ID_temp");
765
const uint32_t MADD_temp =
766
pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
768
/* 1. Remove base instance value from temp 1 to get instance id
769
* 2. Divide the instance id by the divisor - Iout = (Iin *
770
* Multiplier) >> (shift+31)
771
* 3. Add the base instance back on.
773
* Need two zero temps for the add part of the later MAD.
776
PVR_PDS_MODE_TOGGLE(code,
778
pvr_pds_inst_encode_add64(
782
/* src0 */ R64_T(MADD_temp >> 1),
783
/* src1 */ R64_T(MADD_temp >> 1),
784
/* dst */ R64TP_T(MADD_temp >> 1)));
786
if (input_program->flags &
787
PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
788
/* Subtract base instance from temp 1, put into
789
* adjusted_instance_ID_temp.
794
pvr_pds_inst_encode_add32(
798
/* src0 */ R32_T(instance_ID_temp),
799
/* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
800
/* dst */ R32TP_T(adjusted_instance_ID_temp)));
801
} else if (input_program->flags &
802
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
803
/* Subtract base instance from temp 1, put into
804
* adjusted_instance_ID_temp.
809
pvr_pds_inst_encode_add32(
813
/* src0 */ R32_T(instance_ID_temp),
814
/* src1 */ R32_C(base_instance),
815
/* dst */ R32TP_T(adjusted_instance_ID_temp)));
817
/* Copy instance from temp 1 to adjusted_instance_ID_temp.
822
pvr_pds_inst_encode_add32(
826
/* src0 */ R32_T(instance_ID_temp),
827
/* src1 */ R32_T(MADD_temp), /* MADD_temp is set
828
* to 0 at this point.
830
/* dst */ R32TP_T(adjusted_instance_ID_temp)));
833
/* shift = the bit of the next highest power of two. */
834
uint32_t shift_unsigned =
835
(31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
836
int32_t shift = (int32_t)shift_unsigned;
837
uint32_t shift_2s_comp;
840
"Perform instance rate divide (as integer multiply and rshift)");
842
const uint32_t multipier_constant =
843
pvr_find_constant(const_usage,
845
"MultiplierConstant (for InstanceDivisor)");
847
/* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
848
note: the division above is integer division. */
849
uint64_t multipier64 =
850
(uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
851
((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
852
(uint64_t)vertex_dma->divisor);
853
uint32_t multiplier = (uint32_t)multipier64;
855
pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
857
pvr_debug_pds_note(" - Value of Shift = %d", shift);
860
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
861
sizeof(*literal_entry));
862
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
863
literal_entry->const_offset = multipier_constant;
864
literal_entry->literal_value = multiplier;
866
/* (Iin * Multiplier) */
870
pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
871
0, /* Unsigned ALU mode */
872
0, /* Unconditional */
873
R32_C(multipier_constant),
874
R32_T(adjusted_instance_ID_temp),
875
R64_T(MADD_temp / 2),
876
R64TP_T(MADD_temp / 2)));
878
/* >> (shift + 31) */
884
shift_2s_comp = 0xFFFE1;
885
PVR_PDS_MODE_TOGGLE(code,
887
pvr_pds_inst_encode_stflp64(
889
/* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
890
/* IM */ 1, /* enable immediate */
891
/* SRC0 */ R64_T(MADD_temp / 2),
892
/* SRC1 */ 0, /* This won't be used
895
/* SRC2 (Shift) */ shift_2s_comp,
896
/* DST */ R64TP_T(MADD_temp / 2)));
900
/* >> (shift + 31) */
901
shift_2s_comp = *((uint32_t *)&shift);
902
PVR_PDS_MODE_TOGGLE(code,
904
pvr_pds_inst_encode_stflp64(
906
/* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
907
/* IM */ 1, /* enable immediate */
908
/* SRC0 */ R64_T(MADD_temp / 2),
909
/* SRC1 */ 0, /* This won't be used
912
/* SRC2 (Shift) */ shift_2s_comp,
913
/* DST */ R64TP_T(MADD_temp / 2)));
915
if (input_program->flags &
916
PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
917
/* Add base instance. */
921
pvr_pds_inst_encode_add32(
925
/* src0 */ R32_T(MADD_temp),
926
/* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
927
/* dst */ R32TP_T(MADD_temp)));
928
} else if (input_program->flags &
929
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
930
/* Add base instance. */
931
PVR_PDS_MODE_TOGGLE(code,
933
pvr_pds_inst_encode_add32(
937
/* src0 */ R32_T(MADD_temp),
938
/* src1 */ R32_C(base_instance),
939
/* dst */ R32TP_T(MADD_temp)));
943
"DMA Vertex Index will be sourced from 'MADD_temp'");
945
} else if (vertex_dma->divisor == 0) {
946
if (base_instance_ID_temp == PVR_INVALID_TEMP) {
947
base_instance_ID_temp = pvr_get_temps(&temp_usage,
949
"uBaseInstanceIDTemp");
952
/* Load 0 into instance_ID_temp. */
953
PVR_PDS_MODE_TOGGLE(code,
955
pvr_pds_inst_encode_limm(
957
/* src1 */ base_instance_ID_temp,
961
if (input_program->flags &
962
PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
963
/* Add base instance. */
967
pvr_pds_inst_encode_add32(
971
/* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
972
/* src1 */ R32_T(base_instance_ID_temp),
973
/* dst */ R32TP_T(base_instance_ID_temp)));
975
} else if (input_program->flags &
976
PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
977
/* Add base instance. */
981
pvr_pds_inst_encode_add32(
985
/* src0 */ R32_C(base_instance),
986
/* src1 */ R32_T(base_instance_ID_temp),
987
/* dst */ R32TP_T(base_instance_ID_temp)));
991
"DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
992
index = base_instance_ID_temp;
995
"DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
996
index = instance_ID_temp;
1000
"DMA Vertex Index will be sourced from 'uIndexIdTemp'");
1001
index = index_id_temp;
1004
/* DDMAD Const Usage [__XX_---] */
1005
pvr_write_pds_const_map_entry_vertex_attribute_address(
1009
use_robust_vertex_fetch);
1011
/* DDMAD Const Usage [__XXX---] */
1013
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1014
sizeof(*literal_entry));
1015
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1016
literal_entry->const_offset = const_base + 3;
1017
literal_entry->literal_value = vertex_dma->stride;
1019
control_word = vertex_dma->size_in_dwords
1020
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1021
control_word |= vertex_dma->destination
1022
<< PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1023
control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1024
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1026
/* DDMADT instructions will do a dummy doutd when OOB if
1027
* PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
1028
* would need to do another doutd after an OOB DDMADT to provide the 'in
1029
* bounds' data the DDMADT can't be set as LAST.
1031
* This requires us to include a final dummy DDMAD.LAST instruction.
1033
* Pseudocode taken from SeriesXE2017.PDS Instruction Controller
1036
* DDMAD src0,src1,src2,src3
1038
* calculated_source_address := src0*src1+src2
1039
* base_address := src2
1040
* dma_parameters := src3[31:0]
1041
* buffer_size := src3[63:33]
1046
* if (calculated_source_address[39:0] + (burst_size<<2) <=
1047
* base_address[39:0] + buffer_size) {
1049
* DOUTD calculated_source_address,dma_paramters
1052
* if (last_instance == 1) {
1053
* dma_parameters[BURST_SIZE] := 0
1054
* DOUTD calculated_source_address,dma_paramters
1059
* DOUTD calculated_source_address,dma_paramters
1063
if (last_DMA && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
1064
!use_robust_vertex_fetch)) {
1065
pvr_debug_pds_note("LAST DDMAD");
1066
control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1069
/* DDMAD Const Usage [_XXXX---] */
1071
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1072
sizeof(*literal_entry));
1073
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1074
literal_entry->const_offset = (const_base + 6);
1075
literal_entry->literal_value = control_word;
1077
if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1078
/* DDMAD Const Usage [XXXXX---]
1079
* With DDMADT an extra 32bits of SRC3 contains the information for
1080
* performing out-of-bounds tests on the DMA.
1083
if (use_robust_vertex_fetch) {
1084
struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
1087
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1088
sizeof(*obb_buffer_size));
1090
obb_buffer_size->type =
1091
PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
1092
obb_buffer_size->const_offset = const_base + 7;
1093
obb_buffer_size->binding_index = vertex_dma->binding_index;
1096
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1097
sizeof(*literal_entry));
1098
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1099
literal_entry->const_offset = const_base + 7;
1100
literal_entry->literal_value = 0;
1103
PVR_PDS_MODE_TOGGLE(
1106
pvr_pds_inst_encode_ddmad(0, /* cc */
1108
R32_C(const_base + 3), /* SRC0 (REGS32) */
1109
index, /* SRC1 (REGS32T) */
1110
R64_C((const_base + 4) >> 1), /* SRC2
1113
R64_C((const_base + 6) >> 1) /* SRC3
1118
if (use_robust_vertex_fetch) {
1119
/* If not out of bounds, skip next DDMAD instructions. */
1120
PVR_PDS_MODE_TOGGLE(code,
1122
pvr_pds_inst_encode_ddmad(
1125
R32_C(const_base + 3), /* SRC0 (REGS32) */
1126
R32_T(zero_temp), /* SRC1 (REGS32T) */
1127
R64_C((const_base + 4) >> 1), /* SRC2
1130
R64_C((const_base + 6) >> 1) /* SRC3
1135
/* Now the driver must have a dummy DDMAD marked as last. */
1137
uint32_t dummy_dma_const = pvr_find_constant(const_usage,
1140
uint32_t zero_const =
1141
pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
1144
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1145
sizeof(*literal_entry));
1146
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1147
literal_entry->const_offset = zero_const;
1148
literal_entry->literal_value = 0;
1151
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1152
sizeof(*literal_entry));
1153
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1154
literal_entry->const_offset = zero_const + 1;
1155
literal_entry->literal_value = 0;
1158
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1159
sizeof(*literal_entry));
1160
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1161
literal_entry->const_offset = dummy_dma_const;
1162
literal_entry->literal_value = 0;
1164
literal_entry->literal_value |=
1165
0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1166
literal_entry->literal_value |=
1167
(PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1168
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1169
literal_entry->literal_value |=
1170
PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1173
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1174
sizeof(*literal_entry));
1175
literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1176
literal_entry->const_offset = dummy_dma_const + 1;
1177
literal_entry->literal_value = 0;
1179
PVR_PDS_MODE_TOGGLE(code,
1181
pvr_pds_inst_encode_ddmad(
1184
R32_C(zero_const), /* SRC0 (REGS32)
1186
R32_T(zero_temp), /* SRC1 (REGS32T)
1188
R64_C((dummy_dma_const) >> 1), /* SRC2
1191
R64_C((dummy_dma_const) >> 1) /* SRC3
1198
if (use_robust_vertex_fetch) {
1199
struct pvr_const_map_entry_vertex_attribute_max_index
1202
pvr_debug("RobustVertexFetch DDMAD");
1204
const uint32_t max_index_const =
1205
pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
1208
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1209
sizeof(*max_index_entry));
1210
max_index_entry->const_offset = max_index_const;
1211
max_index_entry->type =
1212
PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
1213
max_index_entry->binding_index = vertex_dma->binding_index;
1214
max_index_entry->offset = vertex_dma->offset;
1215
max_index_entry->stride = vertex_dma->stride;
1216
max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
1217
max_index_entry->component_size_in_bytes =
1218
vertex_dma->component_size_in_bytes;
1220
PVR_PDS_MODE_TOGGLE(
1223
pvr_pds_inst_encode_add32(0, /* cc */
1225
PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
1226
R32_C(max_index_const), /* SRC0
1229
R32_T(zero_temp), /* SRC1 (REGS32) */
1230
R32TP_T(max_index_temp) /* DST
1235
PVR_PDS_MODE_TOGGLE(code,
1237
pvr_pds_inst_encode_stflp32(
1240
PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1241
index, /* SRC0 (REGS32T) */
1242
0, /* SRC1 (REGS32) */
1243
0, /* SRC2 (REG32TP) */
1244
R32TP_T(current_index_temp) /* DST
1249
PVR_PDS_MODE_TOGGLE(
1252
pvr_pds_inst_encode_cmp(
1254
PVR_ROGUE_PDSINST_COP_GT, /* Operation */
1255
R64TP_T(current_index_temp >> 1), /* SRC
1258
R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
1261
PVR_PDS_MODE_TOGGLE(code,
1263
pvr_pds_inst_encode_stflp32(
1266
PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1267
zero_temp, /* SRC0 (REGS32T) */
1268
0, /* SRC1 (REGS32) */
1269
0, /* SRC2 (REG32TP) */
1270
R32TP_T(current_index_temp) /* DST
1275
PVR_PDS_MODE_TOGGLE(code,
1277
pvr_pds_inst_encode_ddmad(
1280
R32_C(const_base + 3), /* SRC0 (REGS32) */
1281
current_index_temp, /* SRC1 (REGS32T) */
1282
R64_C((const_base + 4) >> 1), /* SRC2
1285
(const_base + 6) >> 1 /* SRC3 (REGS64C) */
1288
PVR_PDS_MODE_TOGGLE(code,
1290
pvr_pds_inst_encode_ddmad(
1293
/* src0 */ R32_C(const_base + 3),
1295
/* src1 */ R64_C((const_base + 4) >> 1),
1296
/* src3 */ (const_base + 6) >> 1));
1301
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
1302
bool last_DMA = (++running_dma_count == total_dma_count);
1304
PVR_PDS_MODE_TOGGLE(
1307
pvr_encode_direct_write(
1311
R64_C(write_vertex_control),
1314
input_program->vertex_id_register,
1315
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1319
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
1320
bool last_DMA = (++running_dma_count == total_dma_count);
1322
PVR_PDS_MODE_TOGGLE(
1325
pvr_encode_direct_write(
1329
R64_C(write_instance_control),
1332
input_program->instance_id_register,
1333
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1337
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
1338
bool last_DMA = (++running_dma_count == total_dma_count);
1340
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1341
/* Base instance comes from ptemp 1. */
1342
PVR_PDS_MODE_TOGGLE(
1345
pvr_encode_direct_write(
1349
R64_C(write_base_instance_control),
1350
R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
1352
input_program->base_instance_register,
1353
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1356
uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
1358
/* Base instance comes from driver constant. */
1359
PVR_PDS_MODE_TOGGLE(
1362
pvr_encode_direct_write(
1366
R64_C(write_base_instance_control),
1367
R64_C(base_instance >> 1),
1369
input_program->base_instance_register,
1370
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1375
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
1376
bool last_DMA = (++running_dma_count == total_dma_count);
1378
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1379
/* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
1380
PVR_PDS_MODE_TOGGLE(
1383
pvr_encode_direct_write(
1387
R64_C(write_base_vertex_control),
1390
input_program->base_vertex_register,
1391
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1394
uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
1396
/* Base vertex comes from driver constant (literal 0). */
1397
PVR_PDS_MODE_TOGGLE(
1400
pvr_encode_direct_write(
1404
R64_C(write_base_vertex_control),
1405
R64_C(base_vertex >> 1),
1407
input_program->base_vertex_register,
1408
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1413
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
1414
bool last_DMA = (++running_dma_count == total_dma_count);
1416
if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1417
/* Draw index comes from ptemp 3. */
1418
PVR_PDS_MODE_TOGGLE(
1421
pvr_encode_direct_write(
1425
R64_C(pvr_write_draw_index_control),
1428
input_program->draw_index_register,
1429
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1432
uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
1434
/* Draw index comes from constant (literal 0). */
1435
PVR_PDS_MODE_TOGGLE(
1438
pvr_encode_direct_write(
1442
R64_C(pvr_write_draw_index_control),
1443
R64_C(draw_index >> 1),
1445
input_program->draw_index_register,
1446
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1451
doutu_address_entry =
1452
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1453
sizeof(*doutu_address_entry));
1454
doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1455
doutu_address_entry->const_offset = 0;
1456
doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
1458
if (use_robust_vertex_fetch) {
1460
PVR_PDS_MODE_TOGGLE(
1463
pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
1465
PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
1470
PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
1471
PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
1473
assert(running_dma_count == total_dma_count);
1475
for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
1476
if (const_usage[i] == 0)
1479
info->data_size_in_dwords =
1480
8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
1483
info->temps_required = temp_usage.temps_needed;
1484
info->entry_count = entry_write_state.entry_count;
1485
info->entries_written_size_in_bytes =
1486
entry_write_state.entries_size_in_bytes;
1487
info->code_size_in_dwords = instruction;
1489
pvr_debug("=================================================\n");
1492
void pvr_pds_generate_descriptor_upload_program(
1493
struct pvr_descriptor_program_input *input_program,
1494
uint32_t *code_section,
1495
struct pvr_pds_info *info)
1497
unsigned int num_consts64;
1498
unsigned int num_consts32;
1499
unsigned int next_const64;
1500
unsigned int next_const32;
1501
unsigned int instruction = 0;
1502
uint32_t compile_time_buffer_index = 0;
1504
unsigned int total_dma_count = 0;
1505
unsigned int running_dma_count = 0;
1507
struct pvr_pds_const_map_entry_write_state entry_write_state;
1509
/* Calculate the total register usage so we can stick 32-bit consts
1510
* after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
1513
num_consts32 = input_program->descriptor_set_count;
1514
num_consts64 = input_program->descriptor_set_count;
1515
total_dma_count = input_program->descriptor_set_count;
1517
pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
1519
for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1520
struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1522
/* This switch statement looks pointless but we want to optimize DMAs
1523
* that can be done as a DOUTW.
1525
switch (buffer->type) {
1527
/* 1 DOUTD per compile time buffer: */
1536
/* DOUTU for the secondary update program requires a 64-bit constant. */
1537
if (input_program->secondary_program_present)
1540
info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
1542
/* Start counting constants. */
1544
next_const32 = num_consts64 * 2;
1546
/* For each descriptor set perform a DOUTD. */
1547
for (unsigned int descriptor_index = 0;
1548
descriptor_index < input_program->descriptor_set_count;
1549
descriptor_index++) {
1550
struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
1551
struct pvr_pds_descriptor_set *descriptor_set =
1552
&input_program->descriptor_sets[descriptor_index];
1554
bool last_DMA = (++running_dma_count == total_dma_count);
1555
bool halt = last_DMA && !input_program->secondary_program_present;
1557
descriptor_set_entry =
1558
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1559
sizeof(*descriptor_set_entry));
1560
descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
1561
descriptor_set_entry->const_offset = next_const64 * 2;
1562
descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
1563
descriptor_set_entry->primary = descriptor_set->primary;
1564
descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
1566
PVR_PDS_MODE_TOGGLE(code_section,
1568
pvr_encode_burst_cs(&entry_write_state,
1573
descriptor_set->size_in_dwords,
1574
descriptor_set->destination));
1580
for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1581
struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1583
bool last_DMA = (++running_dma_count == total_dma_count);
1584
bool halt = last_DMA && !input_program->secondary_program_present;
1586
switch (buffer->type) {
1587
case PVR_BUFFER_TYPE_PUSH_CONSTS: {
1588
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1590
special_buffer_entry =
1591
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1592
sizeof(*special_buffer_entry));
1593
special_buffer_entry->type =
1594
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1595
special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
1596
special_buffer_entry->buffer_index = buffer->source_offset;
1599
case PVR_BUFFER_TYPE_DYNAMIC: {
1600
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1602
special_buffer_entry =
1603
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1604
sizeof(*special_buffer_entry));
1605
special_buffer_entry->type =
1606
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1607
special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
1608
special_buffer_entry->buffer_index = buffer->source_offset;
1611
case PVR_BUFFER_TYPES_COMPILE_TIME: {
1612
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1614
special_buffer_entry =
1615
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1616
sizeof(*special_buffer_entry));
1617
special_buffer_entry->type =
1618
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1619
special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_COMPILE_TIME;
1620
special_buffer_entry->buffer_index = compile_time_buffer_index++;
1623
case PVR_BUFFER_TYPES_BUFFER_LENGTHS: {
1624
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1626
special_buffer_entry =
1627
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1628
sizeof(*special_buffer_entry));
1629
special_buffer_entry->type =
1630
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1631
special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_BUFFER_LENGTHS;
1634
case PVR_BUFFER_TYPE_BLEND_CONSTS: {
1635
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1637
special_buffer_entry =
1638
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1639
sizeof(*special_buffer_entry));
1640
special_buffer_entry->type =
1641
PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1642
special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
1643
special_buffer_entry->buffer_index =
1644
input_program->blend_constants_used_mask;
1647
case PVR_BUFFER_TYPE_UBO: {
1648
struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
1650
constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1652
sizeof(*constant_buffer_entry));
1653
constant_buffer_entry->type =
1654
PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
1655
constant_buffer_entry->buffer_id = buffer->buffer_id;
1656
constant_buffer_entry->desc_set = buffer->desc_set;
1657
constant_buffer_entry->binding = buffer->binding;
1658
constant_buffer_entry->offset = buffer->source_offset;
1659
constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1662
case PVR_BUFFER_TYPES_UBO_ZEROING: {
1663
struct pvr_const_map_entry_constant_buffer_zeroing
1664
*constant_buffer_entry;
1666
constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1668
sizeof(*constant_buffer_entry));
1669
constant_buffer_entry->type =
1670
PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
1671
constant_buffer_entry->buffer_id = buffer->buffer_id;
1672
constant_buffer_entry->offset = buffer->source_offset;
1673
constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1678
entry_write_state.entry->const_offset = next_const64 * 2;
1680
PVR_PDS_MODE_TOGGLE(code_section,
1682
pvr_encode_burst_cs(&entry_write_state,
1687
buffer->size_in_dwords,
1688
buffer->destination));
1694
if (total_dma_count != running_dma_count)
1695
fprintf(stderr, "Mismatch in DMA count\n");
1697
if (input_program->secondary_program_present) {
1698
struct pvr_const_map_entry_doutu_address *doutu_address;
1700
PVR_PDS_MODE_TOGGLE(code_section,
1702
pvr_pds_encode_doutu(false, true, next_const64));
1705
pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1706
sizeof(*doutu_address));
1707
doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1708
doutu_address->const_offset = next_const64 * 2;
1709
doutu_address->doutu_control = input_program->secondary_task_control.src0;
1714
if (instruction == 0 && input_program->must_not_be_empty) {
1715
PVR_PDS_MODE_TOGGLE(code_section,
1717
pvr_pds_inst_encode_halt(
1721
info->entry_count = entry_write_state.entry_count;
1722
info->entries_written_size_in_bytes =
1723
entry_write_state.entries_size_in_bytes;
1724
info->code_size_in_dwords = instruction;