~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/imagination/vulkan/pds/pvr_xgl_pds.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright © 2022 Imagination Technologies Ltd.
3
 
 *
4
 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 
 * of this software and associated documentation files (the "Software"), to deal
6
 
 * in the Software without restriction, including without limitation the rights
7
 
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
 
 * copies of the Software, and to permit persons to whom the Software is
9
 
 * furnished to do so, subject to the following conditions:
10
 
 *
11
 
 * The above copyright notice and this permission notice (including the next
12
 
 * paragraph) shall be included in all copies or substantial portions of the
13
 
 * Software.
14
 
 *
15
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
 
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 
 * SOFTWARE.
22
 
 */
23
 
 
24
 
#include <assert.h>
25
 
#include <stdbool.h>
26
 
#include <stdint.h>
27
 
#include <stdio.h>
28
 
#include <string.h>
29
 
 
30
 
#include "pvr_device_info.h"
31
 
#include "pvr_pds.h"
32
 
#include "pvr_rogue_pds_defs.h"
33
 
#include "pvr_rogue_pds_disasm.h"
34
 
#include "pvr_rogue_pds_encode.h"
35
 
#include "util/log.h"
36
 
#include "util/macros.h"
37
 
 
38
 
#define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
39
 
#define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
40
 
#define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
41
 
 
42
 
#define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
43
 
#define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
44
 
 
45
 
#define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
46
 
#define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
47
 
#define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
48
 
 
49
 
#define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
50
 
#define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
51
 
 
52
 
/* 32-bit PTemp index for draw indirect base instance. */
53
 
#define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
54
 
 
55
 
/* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
56
 
#define PVR_PDS_DDMAD_NUM_CONSTS 8
57
 
 
58
 
#if defined(TRACE_PDS)
59
 
/* Some macros for a pretty printing. */
60
 
 
61
 
#   define pvr_debug_pds_const(reg, size, annotation) \
62
 
      mesa_logd("const[%d]   @  (%dbits)  %s", reg, size, annotation)
63
 
#   define pvr_debug_pds_temp(reg, size, annotation) \
64
 
      mesa_logd("temp[%d]    @  (%dbits)  %s", reg, size, annotation)
65
 
#   define pvr_debug_pds_note(...) mesa_logd("              // " __VA_ARGS__)
66
 
#   define pvr_debug_pds_flag(flags, flag) \
67
 
      {                                    \
68
 
         if ((flags & flag) == flag)       \
69
 
            mesa_logd(" > " #flag);        \
70
 
      }
71
 
#   define pvr_debug(annotation) mesa_logd(annotation)
72
 
 
73
 
#else
74
 
#   define pvr_debug_pds_const(reg, size, annotation)
75
 
#   define pvr_debug_pds_temp(reg, size, annotation)
76
 
#   define pvr_debug_pds_note(...)
77
 
#   define pvr_debug_pds_flag(flags, flag)
78
 
#   define pvr_debug(annotation)
79
 
#endif
80
 
 
81
 
struct pvr_pds_const_map_entry_write_state {
82
 
   const struct pvr_pds_info *PDS_info;
83
 
   struct pvr_const_map_entry *entry;
84
 
   size_t size_of_last_entry_in_bytes;
85
 
   uint32_t entry_count;
86
 
   size_t entries_size_in_bytes;
87
 
};
88
 
 
89
 
static void pvr_init_pds_const_map_entry_write_state(
90
 
   struct pvr_pds_info *PDS_info,
91
 
   struct pvr_pds_const_map_entry_write_state *entry_write_state)
92
 
{
93
 
   entry_write_state->PDS_info = PDS_info;
94
 
   entry_write_state->entry = PDS_info->entries;
95
 
   entry_write_state->size_of_last_entry_in_bytes = 0;
96
 
   entry_write_state->entry_count = 0;
97
 
   entry_write_state->entries_size_in_bytes = 0;
98
 
}
99
 
 
100
 
/* Returns a pointer to the next struct pvr_const_map_entry. */
101
 
static void *pvr_prepare_next_pds_const_map_entry(
102
 
   struct pvr_pds_const_map_entry_write_state *entry_write_state,
103
 
   size_t size_of_next_entry_in_bytes)
104
 
{
105
 
   /* Move on to the next entry. */
106
 
   uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
107
 
                          entry_write_state->size_of_last_entry_in_bytes);
108
 
   entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
109
 
 
110
 
   entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
111
 
   entry_write_state->entry_count++;
112
 
   entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
113
 
 
114
 
   /* Check if we can write into the next entry. */
115
 
   assert(entry_write_state->entries_size_in_bytes <=
116
 
          entry_write_state->PDS_info->entries_size_in_bytes);
117
 
 
118
 
   return entry_write_state->entry;
119
 
}
120
 
 
121
 
static void pvr_write_pds_const_map_entry_vertex_attribute_address(
122
 
   struct pvr_pds_const_map_entry_write_state *entry_write_state,
123
 
   const struct pvr_pds_vertex_dma *DMA,
124
 
   uint32_t const_val,
125
 
   bool use_robust_vertex_fetch)
126
 
{
127
 
   pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
128
 
                      DMA->size_in_dwords,
129
 
                      DMA->stride,
130
 
                      DMA->offset,
131
 
                      DMA->binding_index);
132
 
 
133
 
   if (use_robust_vertex_fetch) {
134
 
      struct pvr_const_map_entry_robust_vertex_attribute_address
135
 
         *robust_attribute_entry;
136
 
 
137
 
      robust_attribute_entry =
138
 
         pvr_prepare_next_pds_const_map_entry(entry_write_state,
139
 
                                              sizeof(*robust_attribute_entry));
140
 
      robust_attribute_entry->type =
141
 
         PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
142
 
      robust_attribute_entry->const_offset = const_val;
143
 
      robust_attribute_entry->binding_index = DMA->binding_index;
144
 
      robust_attribute_entry->component_size_in_bytes =
145
 
         DMA->component_size_in_bytes;
146
 
      robust_attribute_entry->offset = DMA->offset;
147
 
      robust_attribute_entry->stride = DMA->stride;
148
 
      robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
149
 
      robust_attribute_entry->robustness_buffer_offset =
150
 
         DMA->robustness_buffer_offset;
151
 
   } else {
152
 
      struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
153
 
 
154
 
      attribute_entry =
155
 
         pvr_prepare_next_pds_const_map_entry(entry_write_state,
156
 
                                              sizeof(*attribute_entry));
157
 
      attribute_entry->type =
158
 
         PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
159
 
      attribute_entry->const_offset = const_val;
160
 
      attribute_entry->binding_index = DMA->binding_index;
161
 
      attribute_entry->offset = DMA->offset;
162
 
      attribute_entry->stride = DMA->stride;
163
 
      attribute_entry->size_in_dwords = DMA->size_in_dwords;
164
 
   }
165
 
}
166
 
 
167
 
static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
168
 
                                                   uint32_t end,
169
 
                                                   uint32_t src0)
170
 
{
171
 
   return pvr_pds_inst_encode_dout(cc,
172
 
                                   end,
173
 
                                   0,
174
 
                                   src0,
175
 
                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
176
 
}
177
 
 
178
 
static uint32_t
179
 
pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
180
 
                 bool last_DMA,
181
 
                 bool halt,
182
 
                 unsigned int const32,
183
 
                 unsigned int const64,
184
 
                 unsigned int dma_size_in_dwords,
185
 
                 unsigned int destination,
186
 
                 unsigned int store)
187
 
{
188
 
   uint32_t literal_value;
189
 
 
190
 
   /* Encode literal value. */
191
 
   literal_value = dma_size_in_dwords
192
 
                   << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
193
 
   literal_value |= destination
194
 
                    << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
195
 
   literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
196
 
                    store;
197
 
 
198
 
   if (last_DMA)
199
 
      literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
200
 
 
201
 
   /* Create const map entry. */
202
 
   struct pvr_const_map_entry_literal32 *literal_entry;
203
 
 
204
 
   literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
205
 
                                                        sizeof(*literal_entry));
206
 
   literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
207
 
   literal_entry->const_offset = const32;
208
 
   literal_entry->literal_value = literal_value;
209
 
 
210
 
   /* Encode DOUTD */
211
 
   return pvr_pds_inst_encode_dout(0,
212
 
                                   halt,
213
 
                                   R32_C(const32),
214
 
                                   R64_C(const64),
215
 
                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
216
 
}
217
 
 
218
 
#define pvr_encode_burst_cs(psDataEntry,        \
219
 
                            last_DMA,           \
220
 
                            halt,               \
221
 
                            const32,            \
222
 
                            const64,            \
223
 
                            dma_size_in_dwords, \
224
 
                            destination)        \
225
 
   pvr_encode_burst(                            \
226
 
      psDataEntry,                              \
227
 
      last_DMA,                                 \
228
 
      halt,                                     \
229
 
      const32,                                  \
230
 
      const64,                                  \
231
 
      dma_size_in_dwords,                       \
232
 
      destination,                              \
233
 
      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
234
 
 
235
 
static uint32_t pvr_encode_direct_write(
236
 
   struct pvr_pds_const_map_entry_write_state *entry_write_state,
237
 
   bool last_DMA,
238
 
   bool halt,
239
 
   unsigned int const32,
240
 
   unsigned int const64,
241
 
   uint32_t data_mask,
242
 
   unsigned int destination,
243
 
   uint32_t destination_store,
244
 
   const struct pvr_device_info *dev_info)
245
 
{
246
 
   struct pvr_const_map_entry_literal32 *literal_entry;
247
 
 
248
 
   uint32_t instruction =
249
 
      pvr_pds_inst_encode_dout(0,
250
 
                               halt,
251
 
                               const32,
252
 
                               const64,
253
 
                               PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
254
 
 
255
 
   literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
256
 
                                                        sizeof(*literal_entry));
257
 
   literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
258
 
   literal_entry->const_offset = const32;
259
 
   literal_entry->literal_value = destination_store;
260
 
 
261
 
   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
262
 
      literal_entry->literal_value |=
263
 
         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
264
 
   }
265
 
 
266
 
   literal_entry->literal_value |=
267
 
      destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
268
 
 
269
 
   if (data_mask == 0x1) {
270
 
      literal_entry->literal_value |=
271
 
         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
272
 
   } else if (data_mask == 0x2) {
273
 
      literal_entry->literal_value |=
274
 
         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
275
 
   } else {
276
 
      literal_entry->literal_value |=
277
 
         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
278
 
   }
279
 
 
280
 
   if (last_DMA) {
281
 
      literal_entry->literal_value |=
282
 
         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
283
 
   }
284
 
 
285
 
   return instruction;
286
 
}
287
 
 
288
 
/* Constant and Temporary register allocation
289
 
 * - reserve space for a 32-bit register or a 64-bit register
290
 
 * - returned indices are offsets to 32-bit register locations
291
 
 * - 64-bit registers need to be aligned to even indices.
292
 
 */
293
 
#define RESERVE_32BIT 1U
294
 
#define RESERVE_64BIT 2U
295
 
 
296
 
#if defined(DEBUG)
297
 
#   define pvr_find_constant(usage, words, name) \
298
 
      pvr_find_constant2(usage, words, name)
299
 
#   define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
300
 
#else
301
 
#   define pvr_find_constant(usage, words, name) \
302
 
      pvr_find_constant2(usage, words, NULL);
303
 
#   define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
304
 
#endif
305
 
 
306
 
static uint32_t
307
 
pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
308
 
{
309
 
   uint32_t const_index = ~0U;
310
 
   uint32_t step = words;
311
 
   uint8_t mask = (1 << words) - 1;
312
 
 
313
 
   assert(words == 1 || words == 2);
314
 
 
315
 
   /* Find a register at 'step' alignment that satisfies the mask. */
316
 
   for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
317
 
      for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
318
 
         if ((const_usage[i] & (mask << b)) != 0)
319
 
            continue;
320
 
         const_usage[i] |= (mask << b);
321
 
         const_index = i * 8 + b;
322
 
         pvr_debug_pds_const(const_index, words * 32, const_name);
323
 
         return const_index;
324
 
      }
325
 
   }
326
 
 
327
 
   unreachable("Unexpected: Space cannot be found for constant");
328
 
   return ~0U;
329
 
}
330
 
 
331
 
#define PVR_MAX_PDS_TEMPS 32
332
 
struct pvr_temp_usage {
333
 
   uint32_t temp_usage;
334
 
   uint8_t temp_used;
335
 
   uint8_t temps_needed;
336
 
};
337
 
 
338
 
#define PVR_INVALID_TEMP UINT8_C(~0)
339
 
 
340
 
static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
341
 
                              uint8_t temps_needed,
342
 
                              const char *temp_name)
343
 
{
344
 
   uint8_t step = temps_needed;
345
 
   uint8_t mask = (1 << temps_needed) - 1;
346
 
 
347
 
   assert(temps_needed == 1 || temps_needed == 2);
348
 
   assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
349
 
 
350
 
   for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
351
 
      if ((temps->temp_usage & (mask << i)) != 0)
352
 
         continue;
353
 
 
354
 
      const size_t clzBits = 8 * sizeof(unsigned int);
355
 
 
356
 
      temps->temp_usage |= (mask << i);
357
 
      temps->temp_used += temps_needed;
358
 
      temps->temps_needed =
359
 
         clzBits - __builtin_clz((unsigned int)temps->temp_usage);
360
 
 
361
 
      pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
362
 
 
363
 
      return i;
364
 
   }
365
 
 
366
 
   unreachable("Unexpected: Space cannot be found for temps");
367
 
   return PVR_INVALID_TEMP;
368
 
}
369
 
 
370
 
/**
371
 
 * Wrapper macro to add a toggle for "data mode", allowing us to calculate the
372
 
 * size of a PDS program without actually attempting to store it.
373
 
 *
374
 
 * \param dest The array/memory pointer where the PDS program should be stored.
375
 
 *             If the given code is NULL, automatically switch to count mode
376
 
 *             instead of attempting to fill in unallocated memory.
377
 
 * \param counter The local counter that holds the total instruction count.
378
 
 * \param statement What function call/value should be stored at dest[counter]
379
 
 *                  when condition is false.
380
 
 */
381
 
 
382
 
#define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
383
 
   if (!dest) {                                       \
384
 
      counter++;                                      \
385
 
   } else {                                           \
386
 
      dest[counter++] = statement;                    \
387
 
      PVR_PDS_PRINT_INST(statement);                  \
388
 
   }
389
 
 
390
 
/**
391
 
 * Generates the PDS vertex primary program for the dma's listed in the input
392
 
 * structure. Produces the constant map for the Vulkan driver based upon the
393
 
 * requirements of the instructions added to the program.
394
 
 *
395
 
 * PDS Data Layout
396
 
 * ---------------
397
 
 *
398
 
 * The PDS data is optimized for the DDMAD layout, with the data for those
399
 
 * instructions laid out first. The data required for other instructions is laid
400
 
 * out in the entries unused by the DDMADs.
401
 
 *
402
 
 * DDMAD layout
403
 
 * \verbatim
404
 
 *      bank | index | usage
405
 
 *      0    |  0:1  | temps (current index)[-]
406
 
 *      2    |  2:3  | stride[32]
407
 
 *      1    |  4:5  | base address[64]
408
 
 *      3    |  6:7  | ctrl[64]
409
 
 * \endverbatim
410
 
 *
411
 
 *  Each DMA whose stride > 0 requires one entry, laid out as above. We stride
412
 
 *      over the banks to ensure that each ddmad reads each of its operands from a
413
 
 *      different bank (i.e. remove bank clashes)
414
 
 *
415
 
 *      Note: This is "wasting" const[0:1] and const[2], however these free
416
 
 *      registers will be used by other, non-ddmad instructions.
417
 
 *
418
 
 *      The const register usage is maintained in the au8ConstUsage array, the
419
 
 * DDMAD instructions, for example, will utilize the top 5 registers in each
420
 
 * block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
421
 
 *
422
 
 *      Constant Map
423
 
 *      ------------
424
 
 *
425
 
 *      The constant map is built up as we add PDS instructions and passed back
426
 
 * for the driver to fill in the PDS data section with the correct parameters
427
 
 * for each draw call.
428
 
 *
429
 
 * \param input_program PDS Program description.
430
 
 * \param code Buffer to be filled in with the PDS program. If NULL is provided,
431
 
 *             automatically switch to count-mode, preventing writes to
432
 
 *             unallocated memory.
433
 
 * \param info PDS info structure filled in for the driver, contains the
434
 
 *             constant map.
435
 
 * \param use_robust_vertex_fetch Do vertex fetches apply range checking.
436
 
 * \param dev_info pvr device information struct.
437
 
 */
438
 
void pvr_pds_generate_vertex_primary_program(
439
 
   struct pvr_pds_vertex_primary_program_input *input_program,
440
 
   uint32_t *code,
441
 
   struct pvr_pds_info *info,
442
 
   bool use_robust_vertex_fetch,
443
 
   const struct pvr_device_info *dev_info)
444
 
{
445
 
   struct pvr_pds_const_map_entry_write_state entry_write_state;
446
 
   struct pvr_const_map_entry_doutu_address *doutu_address_entry;
447
 
 
448
 
   uint32_t instruction = 0; /* index into code */
449
 
   uint32_t index; /* index used for current attribute, either vertex or
450
 
                    * instance.
451
 
                    */
452
 
 
453
 
   uint32_t total_dma_count = 0;
454
 
   uint32_t running_dma_count = 0;
455
 
 
456
 
   uint32_t write_instance_control = ~0;
457
 
   uint32_t write_vertex_control = ~0;
458
 
   uint32_t write_base_instance_control = ~0;
459
 
   uint32_t write_base_vertex_control = ~0;
460
 
   uint32_t pvr_write_draw_index_control = ~0;
461
 
 
462
 
   uint32_t ddmad_count = 0;
463
 
   uint32_t doutw_count = 0;
464
 
 
465
 
   uint32_t base_instance = 0;
466
 
   uint32_t base_vertex = 0;
467
 
   uint32_t draw_index = 0;
468
 
 
469
 
   uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
470
 
 
471
 
   struct pvr_temp_usage temp_usage = { 0 };
472
 
 
473
 
   uint32_t zero_temp = PVR_INVALID_TEMP;
474
 
 
475
 
   uint32_t max_index_temp = PVR_INVALID_TEMP;
476
 
   uint32_t current_index_temp = PVR_INVALID_TEMP;
477
 
 
478
 
   uint32_t index_id_temp = PVR_INVALID_TEMP;
479
 
   uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
480
 
   uint32_t instance_ID_temp = PVR_INVALID_TEMP;
481
 
 
482
 
   /* Debug tracing of program flags. */
483
 
   pvr_debug("pvr_pds_generate_vertex_primary_program");
484
 
   pvr_debug("=================================================");
485
 
   pvr_debug_pds_flag(input_program->flags,
486
 
                      PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
487
 
   pvr_debug_pds_flag(input_program->flags,
488
 
                      PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
489
 
   pvr_debug_pds_flag(input_program->flags,
490
 
                      PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
491
 
   pvr_debug_pds_flag(input_program->flags,
492
 
                      PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
493
 
   pvr_debug_pds_flag(input_program->flags,
494
 
                      PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
495
 
   pvr_debug_pds_flag(input_program->flags,
496
 
                      PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
497
 
   pvr_debug_pds_flag(input_program->flags,
498
 
                      PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
499
 
   pvr_debug(" ");
500
 
 
501
 
   pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
502
 
 
503
 
   /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
504
 
    * blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
505
 
    */
506
 
   info->data_size_in_dwords = 4;
507
 
 
508
 
   /* Reserve 2 temps - these are automatically filled in by the VDM
509
 
    *
510
 
    * For instanced draw calls we manually increment the instance id by the
511
 
    * base-instance offset which is either provided as a constant, or in a
512
 
    * ptemp (for draw indirect)
513
 
    *
514
 
    * temp - contents
515
 
    * ---------------
516
 
    * 0    - index id (pre-filled)
517
 
    * 1    - base instance + instance id
518
 
    */
519
 
   index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
520
 
   instance_ID_temp =
521
 
      pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
522
 
 
523
 
   /* Reserve the lowest 2 dwords for DOUTU.
524
 
    * [------XX]
525
 
    */
526
 
   const_usage[0] = 0x03;
527
 
 
528
 
   /* Reserve consts for all the DDMAD's. */
529
 
   for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
530
 
      /* Mark the consts required by this ddmad "in-use".
531
 
       * [XXXXX---]
532
 
       */
533
 
      const_usage[ddmad_count++] |= 0xf8;
534
 
   }
535
 
 
536
 
   /* Start off by assuming we can fit everything in the 8 dwords/ddmad
537
 
    * footprint, if any DOUTD/DOUTW falls outside we will increase this
538
 
    * counter.
539
 
    */
540
 
   if (ddmad_count)
541
 
      info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
542
 
 
543
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
544
 
      doutw_count++;
545
 
      write_vertex_control =
546
 
         pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
547
 
   }
548
 
 
549
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
550
 
      doutw_count++;
551
 
      write_instance_control = pvr_find_constant(const_usage,
552
 
                                                 RESERVE_32BIT,
553
 
                                                 "Instance id DOUTW Ctrl");
554
 
   }
555
 
 
556
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
557
 
      doutw_count++;
558
 
      write_base_instance_control =
559
 
         pvr_find_constant(const_usage,
560
 
                           RESERVE_32BIT,
561
 
                           "Base Instance DOUTW Ctrl");
562
 
   }
563
 
 
564
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
565
 
      doutw_count++;
566
 
      write_base_vertex_control = pvr_find_constant(const_usage,
567
 
                                                    RESERVE_32BIT,
568
 
                                                    "Base Vertex DOUTW Ctrl");
569
 
 
570
 
      /* Load base vertex from constant for non-indirect variants. */
571
 
      if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
572
 
          0) {
573
 
         struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
574
 
            (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
575
 
 
576
 
         base_vertex =
577
 
            pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
578
 
 
579
 
         psBaseVertexEntry =
580
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
581
 
                                                 sizeof(*psBaseVertexEntry));
582
 
         psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
583
 
         psBaseVertexEntry->const_offset = base_vertex;
584
 
      }
585
 
   }
586
 
 
587
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
588
 
      doutw_count++;
589
 
      pvr_write_draw_index_control =
590
 
         pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
591
 
 
592
 
      /* Set draw index to 0 for non-indirect variants. */
593
 
      if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
594
 
          0) {
595
 
         struct pvr_const_map_entry_literal32 *literal_entry;
596
 
 
597
 
         draw_index =
598
 
            pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
599
 
 
600
 
         literal_entry =
601
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
602
 
                                                 sizeof(*literal_entry));
603
 
         literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
604
 
         literal_entry->const_offset = draw_index;
605
 
         literal_entry->literal_value = 0;
606
 
      }
607
 
   }
608
 
 
609
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
610
 
      /* Load absolute instance id into uiInstanceIdTemp. */
611
 
      PVR_PDS_MODE_TOGGLE(
612
 
         code,
613
 
         instruction,
614
 
         pvr_pds_inst_encode_add32(
615
 
            /* cc    */ 0,
616
 
            /* alum  */ 0,
617
 
            /* sna   */ 0,
618
 
            /* src0  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
619
 
            /* src1  */ R32_T(instance_ID_temp),
620
 
            /* dst   */ R32TP_T(instance_ID_temp)));
621
 
   } else if (input_program->flags &
622
 
              PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
623
 
      struct pvr_const_map_entry_base_instance *base_instance_entry =
624
 
         (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
625
 
 
626
 
      base_instance =
627
 
         pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
628
 
 
629
 
      PVR_PDS_MODE_TOGGLE(code,
630
 
                          instruction,
631
 
                          pvr_pds_inst_encode_add32(
632
 
                             /* cc    */ 0,
633
 
                             /* alum  */ 0,
634
 
                             /* sna   */ 0,
635
 
                             /* src0  */ R32_C(base_instance),
636
 
                             /* src1  */ R32_T(instance_ID_temp),
637
 
                             /* dst   */ R32TP_T(instance_ID_temp)));
638
 
 
639
 
      base_instance_entry =
640
 
         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
641
 
                                              sizeof(*base_instance_entry));
642
 
      base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
643
 
      base_instance_entry->const_offset = base_instance;
644
 
   } else if (input_program->flags &
645
 
              PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
646
 
      struct pvr_const_map_entry_base_instance *base_instance_entry =
647
 
         (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
648
 
 
649
 
      base_instance = pvr_find_constant(const_usage,
650
 
                                        RESERVE_32BIT,
651
 
                                        "base_instance (Driver Const)");
652
 
 
653
 
      /* Base instance provided by the driver. */
654
 
      base_instance_entry =
655
 
         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
656
 
                                              sizeof(*base_instance_entry));
657
 
      base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
658
 
      base_instance_entry->const_offset = base_instance;
659
 
   }
660
 
 
661
 
   total_dma_count = ddmad_count;
662
 
 
663
 
   total_dma_count += doutw_count;
664
 
 
665
 
   if (use_robust_vertex_fetch) {
666
 
      pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
667
 
 
668
 
      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
669
 
         zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
670
 
 
671
 
         /* Load 0 into instance_ID_temp. */
672
 
         PVR_PDS_MODE_TOGGLE(code,
673
 
                             instruction,
674
 
                             pvr_pds_inst_encode_limm(0, /* cc */
675
 
                                                      zero_temp, /* SRC1 */
676
 
                                                      0, /* SRC0 */
677
 
                                                      0 /* GR */
678
 
                                                      ));
679
 
      } else {
680
 
         zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
681
 
 
682
 
         max_index_temp =
683
 
            pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
684
 
         current_index_temp =
685
 
            pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
686
 
 
687
 
         PVR_PDS_MODE_TOGGLE(code,
688
 
                             instruction,
689
 
                             pvr_pds_inst_encode_stflp64(
690
 
                                0, /* cc */
691
 
                                PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
692
 
                                1, /* IM */
693
 
                                R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
694
 
                                                          */
695
 
                                R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
696
 
                                                          */
697
 
                                0, /* SRC2 (REGS32) */
698
 
                                R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
699
 
                                ));
700
 
         PVR_PDS_MODE_TOGGLE(code,
701
 
                             instruction,
702
 
                             pvr_pds_inst_encode_stflp64(
703
 
                                0, /* cc */
704
 
                                PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
705
 
                                1, /* IM */
706
 
                                R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
707
 
                                                          */
708
 
                                0, /* SRC1 (REGS64TP) */
709
 
                                0, /* SRC2 (REGS32) */
710
 
                                R64TP_T(current_index_temp >> 1) /* DST */
711
 
                                /* (REG64TP) */
712
 
                                ));
713
 
         PVR_PDS_MODE_TOGGLE(code,
714
 
                             instruction,
715
 
                             pvr_pds_inst_encode_stflp64(
716
 
                                0, /* cc */
717
 
                                PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
718
 
                                1, /* IM */
719
 
                                R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
720
 
                                                          */
721
 
                                0, /* SRC1 (REGS64TP) */
722
 
                                0, /* SRC2 (REGS32) */
723
 
                                R64TP_T(max_index_temp >> 1) /* DST */
724
 
                                /* (REG64TP) */
725
 
                                ));
726
 
      }
727
 
   }
728
 
 
729
 
   if (input_program->dma_count && use_robust_vertex_fetch) {
730
 
      PVR_PDS_MODE_TOGGLE(
731
 
         code,
732
 
         instruction,
733
 
         pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
734
 
                                 0, /* Neg */
735
 
                                 PVR_HAS_FEATURE(dev_info, pds_ddmadt)
736
 
                                    ? PVR_ROGUE_PDSINST_PREDICATE_OOB
737
 
                                    : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
738
 
                                 1 /* Addr */
739
 
                                 ));
740
 
   }
741
 
 
742
 
   for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
743
 
      uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
744
 
      uint32_t control_word;
745
 
      struct pvr_const_map_entry_literal32 *literal_entry;
746
 
 
747
 
      struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma];
748
 
      bool last_DMA = (++running_dma_count == total_dma_count);
749
 
 
750
 
      pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_DMA);
751
 
 
752
 
      /* The id we use to index into this dma. */
753
 
      if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
754
 
         pvr_debug_pds_note("Instance Rate (divisor = %d)",
755
 
                            vertex_dma->divisor);
756
 
 
757
 
         /* 4    - madd 0 - needs to be 64-bit aligned
758
 
          * 5    - madd 1
759
 
          */
760
 
         if (vertex_dma->divisor > 1) {
761
 
            const uint32_t adjusted_instance_ID_temp =
762
 
               pvr_get_temps(&temp_usage,
763
 
                             RESERVE_64BIT,
764
 
                             "adjusted_instance_ID_temp");
765
 
            const uint32_t MADD_temp =
766
 
               pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
767
 
 
768
 
            /* 1. Remove base instance value from temp 1 to get instance id
769
 
             * 2. Divide the instance id by the divisor - Iout = (Iin *
770
 
             *    Multiplier) >> (shift+31)
771
 
             * 3. Add the base instance back on.
772
 
             *
773
 
             * Need two zero temps for the add part of the later MAD.
774
 
             */
775
 
 
776
 
            PVR_PDS_MODE_TOGGLE(code,
777
 
                                instruction,
778
 
                                pvr_pds_inst_encode_add64(
779
 
                                   /* cc    */ 0,
780
 
                                   /* alum  */ 0,
781
 
                                   /* sna   */ 1,
782
 
                                   /* src0  */ R64_T(MADD_temp >> 1),
783
 
                                   /* src1  */ R64_T(MADD_temp >> 1),
784
 
                                   /* dst   */ R64TP_T(MADD_temp >> 1)));
785
 
 
786
 
            if (input_program->flags &
787
 
                PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
788
 
               /* Subtract base instance from temp 1, put into
789
 
                * adjusted_instance_ID_temp.
790
 
                */
791
 
               PVR_PDS_MODE_TOGGLE(
792
 
                  code,
793
 
                  instruction,
794
 
                  pvr_pds_inst_encode_add32(
795
 
                     /* cc    */ 0,
796
 
                     /* alum  */ 0,
797
 
                     /* sna   */ 1,
798
 
                     /* src0  */ R32_T(instance_ID_temp),
799
 
                     /* src1  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
800
 
                     /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
801
 
            } else if (input_program->flags &
802
 
                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
803
 
               /* Subtract base instance from temp 1, put into
804
 
                * adjusted_instance_ID_temp.
805
 
                */
806
 
               PVR_PDS_MODE_TOGGLE(
807
 
                  code,
808
 
                  instruction,
809
 
                  pvr_pds_inst_encode_add32(
810
 
                     /* cc    */ 0,
811
 
                     /* alum  */ 0,
812
 
                     /* sna   */ 1,
813
 
                     /* src0  */ R32_T(instance_ID_temp),
814
 
                     /* src1  */ R32_C(base_instance),
815
 
                     /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
816
 
            } else {
817
 
               /* Copy instance from temp 1 to adjusted_instance_ID_temp.
818
 
                */
819
 
               PVR_PDS_MODE_TOGGLE(
820
 
                  code,
821
 
                  instruction,
822
 
                  pvr_pds_inst_encode_add32(
823
 
                     /* cc    */ 0,
824
 
                     /* alum  */ 0,
825
 
                     /* sna   */ 0,
826
 
                     /* src0  */ R32_T(instance_ID_temp),
827
 
                     /* src1  */ R32_T(MADD_temp), /* MADD_temp is set
828
 
                                                    * to 0 at this point.
829
 
                                                    */
830
 
                     /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
831
 
            }
832
 
 
833
 
            /* shift = the bit of the next highest power of two. */
834
 
            uint32_t shift_unsigned =
835
 
               (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
836
 
            int32_t shift = (int32_t)shift_unsigned;
837
 
            uint32_t shift_2s_comp;
838
 
 
839
 
            pvr_debug_pds_note(
840
 
               "Perform instance rate divide (as integer multiply and rshift)");
841
 
 
842
 
            const uint32_t multipier_constant =
843
 
               pvr_find_constant(const_usage,
844
 
                                 RESERVE_32BIT,
845
 
                                 "MultiplierConstant (for InstanceDivisor)");
846
 
 
847
 
            /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
848
 
               note: the division above is integer division. */
849
 
            uint64_t multipier64 =
850
 
               (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
851
 
                           ((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
852
 
                          (uint64_t)vertex_dma->divisor);
853
 
            uint32_t multiplier = (uint32_t)multipier64;
854
 
 
855
 
            pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
856
 
                               multiplier);
857
 
            pvr_debug_pds_note(" - Value of Shift = %d", shift);
858
 
 
859
 
            literal_entry =
860
 
               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
861
 
                                                    sizeof(*literal_entry));
862
 
            literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
863
 
            literal_entry->const_offset = multipier_constant;
864
 
            literal_entry->literal_value = multiplier;
865
 
 
866
 
            /* (Iin * Multiplier) */
867
 
            PVR_PDS_MODE_TOGGLE(
868
 
               code,
869
 
               instruction,
870
 
               pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
871
 
                                         0, /* Unsigned ALU mode */
872
 
                                         0, /* Unconditional */
873
 
                                         R32_C(multipier_constant),
874
 
                                         R32_T(adjusted_instance_ID_temp),
875
 
                                         R64_T(MADD_temp / 2),
876
 
                                         R64TP_T(MADD_temp / 2)));
877
 
 
878
 
            /*  >> (shift + 31) */
879
 
            shift += 31;
880
 
            shift *= -1;
881
 
 
882
 
            if (shift < -31) {
883
 
               /* >> (31) */
884
 
               shift_2s_comp = 0xFFFE1;
885
 
               PVR_PDS_MODE_TOGGLE(code,
886
 
                                   instruction,
887
 
                                   pvr_pds_inst_encode_stflp64(
888
 
                                      /* cc */ 0,
889
 
                                      /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
890
 
                                      /* IM */ 1, /*  enable immediate */
891
 
                                      /* SRC0 */ R64_T(MADD_temp / 2),
892
 
                                      /* SRC1 */ 0, /* This won't be used
893
 
                                                       in a shift
894
 
                                                       operation. */
895
 
                                      /* SRC2 (Shift) */ shift_2s_comp,
896
 
                                      /* DST */ R64TP_T(MADD_temp / 2)));
897
 
               shift += 31;
898
 
            }
899
 
 
900
 
            /* >> (shift + 31) */
901
 
            shift_2s_comp = *((uint32_t *)&shift);
902
 
            PVR_PDS_MODE_TOGGLE(code,
903
 
                                instruction,
904
 
                                pvr_pds_inst_encode_stflp64(
905
 
                                   /* cc */ 0,
906
 
                                   /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
907
 
                                   /* IM */ 1, /*  enable immediate */
908
 
                                   /* SRC0 */ R64_T(MADD_temp / 2),
909
 
                                   /* SRC1 */ 0, /* This won't be used
910
 
                                                  * in a shift
911
 
                                                  * operation. */
912
 
                                   /* SRC2 (Shift) */ shift_2s_comp,
913
 
                                   /* DST */ R64TP_T(MADD_temp / 2)));
914
 
 
915
 
            if (input_program->flags &
916
 
                PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
917
 
               /* Add base instance. */
918
 
               PVR_PDS_MODE_TOGGLE(
919
 
                  code,
920
 
                  instruction,
921
 
                  pvr_pds_inst_encode_add32(
922
 
                     /* cc    */ 0,
923
 
                     /* alum  */ 0,
924
 
                     /* sna   */ 0,
925
 
                     /* src0  */ R32_T(MADD_temp),
926
 
                     /* src1  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
927
 
                     /* dst   */ R32TP_T(MADD_temp)));
928
 
            } else if (input_program->flags &
929
 
                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
930
 
               /* Add base instance. */
931
 
               PVR_PDS_MODE_TOGGLE(code,
932
 
                                   instruction,
933
 
                                   pvr_pds_inst_encode_add32(
934
 
                                      /* cc    */ 0,
935
 
                                      /* alum  */ 0,
936
 
                                      /* sna   */ 0,
937
 
                                      /* src0  */ R32_T(MADD_temp),
938
 
                                      /* src1  */ R32_C(base_instance),
939
 
                                      /* dst   */ R32TP_T(MADD_temp)));
940
 
            }
941
 
 
942
 
            pvr_debug_pds_note(
943
 
               "DMA Vertex Index will be sourced from 'MADD_temp'");
944
 
            index = MADD_temp;
945
 
         } else if (vertex_dma->divisor == 0) {
946
 
            if (base_instance_ID_temp == PVR_INVALID_TEMP) {
947
 
               base_instance_ID_temp = pvr_get_temps(&temp_usage,
948
 
                                                     RESERVE_32BIT,
949
 
                                                     "uBaseInstanceIDTemp");
950
 
            }
951
 
 
952
 
            /* Load 0 into instance_ID_temp. */
953
 
            PVR_PDS_MODE_TOGGLE(code,
954
 
                                instruction,
955
 
                                pvr_pds_inst_encode_limm(
956
 
                                   /* cc       */ 0,
957
 
                                   /* src1     */ base_instance_ID_temp,
958
 
                                   /* src0     */ 0,
959
 
                                   /* gr       */ 0));
960
 
 
961
 
            if (input_program->flags &
962
 
                PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
963
 
               /* Add base instance. */
964
 
               PVR_PDS_MODE_TOGGLE(
965
 
                  code,
966
 
                  instruction,
967
 
                  pvr_pds_inst_encode_add32(
968
 
                     /* cc    */ 0,
969
 
                     /* alum  */ 0,
970
 
                     /* sna   */ 0,
971
 
                     /* src0  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
972
 
                     /* src1  */ R32_T(base_instance_ID_temp),
973
 
                     /* dst   */ R32TP_T(base_instance_ID_temp)));
974
 
 
975
 
            } else if (input_program->flags &
976
 
                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
977
 
               /* Add base instance. */
978
 
               PVR_PDS_MODE_TOGGLE(
979
 
                  code,
980
 
                  instruction,
981
 
                  pvr_pds_inst_encode_add32(
982
 
                     /* cc    */ 0,
983
 
                     /* alum  */ 0,
984
 
                     /* sna   */ 0,
985
 
                     /* src0  */ R32_C(base_instance),
986
 
                     /* src1  */ R32_T(base_instance_ID_temp),
987
 
                     /* dst   */ R32TP_T(base_instance_ID_temp)));
988
 
            }
989
 
 
990
 
            pvr_debug_pds_note(
991
 
               "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
992
 
            index = base_instance_ID_temp;
993
 
         } else {
994
 
            pvr_debug_pds_note(
995
 
               "DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
996
 
            index = instance_ID_temp;
997
 
         }
998
 
      } else {
999
 
         pvr_debug_pds_note(
1000
 
            "DMA Vertex Index will be sourced from 'uIndexIdTemp'");
1001
 
         index = index_id_temp;
1002
 
      }
1003
 
 
1004
 
      /* DDMAD Const Usage [__XX_---] */
1005
 
      pvr_write_pds_const_map_entry_vertex_attribute_address(
1006
 
         &entry_write_state,
1007
 
         vertex_dma,
1008
 
         const_base + 4,
1009
 
         use_robust_vertex_fetch);
1010
 
 
1011
 
      /* DDMAD Const Usage [__XXX---] */
1012
 
      literal_entry =
1013
 
         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1014
 
                                              sizeof(*literal_entry));
1015
 
      literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1016
 
      literal_entry->const_offset = const_base + 3;
1017
 
      literal_entry->literal_value = vertex_dma->stride;
1018
 
 
1019
 
      control_word = vertex_dma->size_in_dwords
1020
 
                     << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1021
 
      control_word |= vertex_dma->destination
1022
 
                      << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1023
 
      control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1024
 
                       PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1025
 
 
1026
 
      /* DDMADT instructions will do a dummy doutd when OOB if
1027
 
       * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
1028
 
       * would need to do another doutd after an OOB DDMADT to provide the 'in
1029
 
       * bounds' data the DDMADT can't be set as LAST.
1030
 
       *
1031
 
       * This requires us to include a final dummy DDMAD.LAST instruction.
1032
 
       *
1033
 
       * Pseudocode taken from SeriesXE2017.PDS Instruction Controller
1034
 
       * Specification.doc
1035
 
       *
1036
 
       *        DDMAD src0,src1,src2,src3
1037
 
       *
1038
 
       *        calculated_source_address := src0*src1+src2
1039
 
       *        base_address              := src2
1040
 
       *        dma_parameters            := src3[31:0]
1041
 
       *        buffer_size               := src3[63:33]
1042
 
       *        test                      := src3[32]
1043
 
       *
1044
 
       *        if (test == 1) {
1045
 
       *           // DDMAD(T)
1046
 
       *           if (calculated_source_address[39:0] + (burst_size<<2) <=
1047
 
       *         base_address[39:0] + buffer_size) {
1048
 
       *        OOB := 0
1049
 
       *        DOUTD calculated_source_address,dma_paramters
1050
 
       *     } else {
1051
 
       *        OOB := 1
1052
 
       *        if (last_instance == 1) {
1053
 
       *           dma_parameters[BURST_SIZE] := 0
1054
 
       *           DOUTD calculated_source_address,dma_paramters
1055
 
       *              }
1056
 
       *           }
1057
 
       *        } else {
1058
 
       *           // DDMAD
1059
 
       *           DOUTD calculated_source_address,dma_paramters
1060
 
       *        }
1061
 
       */
1062
 
 
1063
 
      if (last_DMA && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
1064
 
                       !use_robust_vertex_fetch)) {
1065
 
         pvr_debug_pds_note("LAST DDMAD");
1066
 
         control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1067
 
      }
1068
 
 
1069
 
      /* DDMAD Const Usage [_XXXX---] */
1070
 
      literal_entry =
1071
 
         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1072
 
                                              sizeof(*literal_entry));
1073
 
      literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1074
 
      literal_entry->const_offset = (const_base + 6);
1075
 
      literal_entry->literal_value = control_word;
1076
 
 
1077
 
      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1078
 
         /* DDMAD Const Usage [XXXXX---]
1079
 
          * With DDMADT an extra 32bits of SRC3 contains the information for
1080
 
          * performing out-of-bounds tests on the DMA.
1081
 
          */
1082
 
 
1083
 
         if (use_robust_vertex_fetch) {
1084
 
            struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
1085
 
               *obb_buffer_size;
1086
 
            obb_buffer_size =
1087
 
               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1088
 
                                                    sizeof(*obb_buffer_size));
1089
 
 
1090
 
            obb_buffer_size->type =
1091
 
               PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
1092
 
            obb_buffer_size->const_offset = const_base + 7;
1093
 
            obb_buffer_size->binding_index = vertex_dma->binding_index;
1094
 
         } else {
1095
 
            literal_entry =
1096
 
               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1097
 
                                                    sizeof(*literal_entry));
1098
 
            literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1099
 
            literal_entry->const_offset = const_base + 7;
1100
 
            literal_entry->literal_value = 0;
1101
 
         }
1102
 
 
1103
 
         PVR_PDS_MODE_TOGGLE(
1104
 
            code,
1105
 
            instruction,
1106
 
            pvr_pds_inst_encode_ddmad(0, /* cc */
1107
 
                                      0, /* END */
1108
 
                                      R32_C(const_base + 3), /* SRC0 (REGS32) */
1109
 
                                      index, /* SRC1 (REGS32T) */
1110
 
                                      R64_C((const_base + 4) >> 1), /* SRC2
1111
 
                                                                     * (REGS64)
1112
 
                                                                     */
1113
 
                                      R64_C((const_base + 6) >> 1) /* SRC3
1114
 
                                                                    * (REGS64C)
1115
 
                                                                    */
1116
 
                                      ));
1117
 
 
1118
 
         if (use_robust_vertex_fetch) {
1119
 
            /* If not out of bounds, skip next DDMAD instructions. */
1120
 
            PVR_PDS_MODE_TOGGLE(code,
1121
 
                                instruction,
1122
 
                                pvr_pds_inst_encode_ddmad(
1123
 
                                   1, /* cc */
1124
 
                                   0, /* END */
1125
 
                                   R32_C(const_base + 3), /* SRC0 (REGS32) */
1126
 
                                   R32_T(zero_temp), /* SRC1 (REGS32T) */
1127
 
                                   R64_C((const_base + 4) >> 1), /* SRC2
1128
 
                                                                  * (REGS64)
1129
 
                                                                  */
1130
 
                                   R64_C((const_base + 6) >> 1) /* SRC3
1131
 
                                                                 * (REGS64C)
1132
 
                                                                 */
1133
 
                                   ));
1134
 
 
1135
 
            /* Now the driver must have a dummy DDMAD marked as last. */
1136
 
            if (last_DMA) {
1137
 
               uint32_t dummy_dma_const = pvr_find_constant(const_usage,
1138
 
                                                            RESERVE_64BIT,
1139
 
                                                            "uDummyDMAConst");
1140
 
               uint32_t zero_const =
1141
 
                  pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
1142
 
 
1143
 
               literal_entry =
1144
 
                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1145
 
                                                       sizeof(*literal_entry));
1146
 
               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1147
 
               literal_entry->const_offset = zero_const;
1148
 
               literal_entry->literal_value = 0;
1149
 
 
1150
 
               literal_entry =
1151
 
                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1152
 
                                                       sizeof(*literal_entry));
1153
 
               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1154
 
               literal_entry->const_offset = zero_const + 1;
1155
 
               literal_entry->literal_value = 0;
1156
 
 
1157
 
               literal_entry =
1158
 
                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1159
 
                                                       sizeof(*literal_entry));
1160
 
               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1161
 
               literal_entry->const_offset = dummy_dma_const;
1162
 
               literal_entry->literal_value = 0;
1163
 
 
1164
 
               literal_entry->literal_value |=
1165
 
                  0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1166
 
               literal_entry->literal_value |=
1167
 
                  (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1168
 
                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1169
 
               literal_entry->literal_value |=
1170
 
                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1171
 
 
1172
 
               literal_entry =
1173
 
                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1174
 
                                                       sizeof(*literal_entry));
1175
 
               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1176
 
               literal_entry->const_offset = dummy_dma_const + 1;
1177
 
               literal_entry->literal_value = 0;
1178
 
 
1179
 
               PVR_PDS_MODE_TOGGLE(code,
1180
 
                                   instruction,
1181
 
                                   pvr_pds_inst_encode_ddmad(
1182
 
                                      0, /* cc */
1183
 
                                      0, /* END */
1184
 
                                      R32_C(zero_const), /* SRC0 (REGS32)
1185
 
                                                          */
1186
 
                                      R32_T(zero_temp), /* SRC1 (REGS32T)
1187
 
                                                         */
1188
 
                                      R64_C((dummy_dma_const) >> 1), /* SRC2
1189
 
                                                                        (REGS64)
1190
 
                                                                     */
1191
 
                                      R64_C((dummy_dma_const) >> 1) /* SRC3
1192
 
                                                                       (REGS64C)
1193
 
                                                                    */
1194
 
                                      ));
1195
 
            }
1196
 
         }
1197
 
      } else {
1198
 
         if (use_robust_vertex_fetch) {
1199
 
            struct pvr_const_map_entry_vertex_attribute_max_index
1200
 
               *max_index_entry;
1201
 
 
1202
 
            pvr_debug("RobustVertexFetch DDMAD");
1203
 
 
1204
 
            const uint32_t max_index_const =
1205
 
               pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
1206
 
 
1207
 
            max_index_entry =
1208
 
               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1209
 
                                                    sizeof(*max_index_entry));
1210
 
            max_index_entry->const_offset = max_index_const;
1211
 
            max_index_entry->type =
1212
 
               PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
1213
 
            max_index_entry->binding_index = vertex_dma->binding_index;
1214
 
            max_index_entry->offset = vertex_dma->offset;
1215
 
            max_index_entry->stride = vertex_dma->stride;
1216
 
            max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
1217
 
            max_index_entry->component_size_in_bytes =
1218
 
               vertex_dma->component_size_in_bytes;
1219
 
 
1220
 
            PVR_PDS_MODE_TOGGLE(
1221
 
               code,
1222
 
               instruction,
1223
 
               pvr_pds_inst_encode_add32(0, /* cc */
1224
 
                                         0, /* ALUM */
1225
 
                                         PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
1226
 
                                         R32_C(max_index_const), /* SRC0
1227
 
                                                                  * (REGS32)
1228
 
                                                                  */
1229
 
                                         R32_T(zero_temp), /* SRC1 (REGS32) */
1230
 
                                         R32TP_T(max_index_temp) /* DST
1231
 
                                                                  * (REG32TP)
1232
 
                                                                  */
1233
 
                                         ));
1234
 
 
1235
 
            PVR_PDS_MODE_TOGGLE(code,
1236
 
                                instruction,
1237
 
                                pvr_pds_inst_encode_stflp32(
1238
 
                                   1, /* IM */
1239
 
                                   0, /* cc */
1240
 
                                   PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1241
 
                                   index, /* SRC0 (REGS32T) */
1242
 
                                   0, /* SRC1 (REGS32) */
1243
 
                                   0, /* SRC2 (REG32TP) */
1244
 
                                   R32TP_T(current_index_temp) /* DST
1245
 
                                                                * (REG32TP)
1246
 
                                                                */
1247
 
                                   ));
1248
 
 
1249
 
            PVR_PDS_MODE_TOGGLE(
1250
 
               code,
1251
 
               instruction,
1252
 
               pvr_pds_inst_encode_cmp(
1253
 
                  0, /* cc enable */
1254
 
                  PVR_ROGUE_PDSINST_COP_GT, /* Operation */
1255
 
                  R64TP_T(current_index_temp >> 1), /* SRC
1256
 
                                                     * (REGS64TP)
1257
 
                                                     */
1258
 
                  R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
1259
 
                  ));
1260
 
 
1261
 
            PVR_PDS_MODE_TOGGLE(code,
1262
 
                                instruction,
1263
 
                                pvr_pds_inst_encode_stflp32(
1264
 
                                   1, /* IM */
1265
 
                                   1, /* cc */
1266
 
                                   PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1267
 
                                   zero_temp, /* SRC0 (REGS32T) */
1268
 
                                   0, /* SRC1 (REGS32) */
1269
 
                                   0, /* SRC2 (REG32TP) */
1270
 
                                   R32TP_T(current_index_temp) /* DST
1271
 
                                                                * (REG32TP)
1272
 
                                                                */
1273
 
                                   ));
1274
 
 
1275
 
            PVR_PDS_MODE_TOGGLE(code,
1276
 
                                instruction,
1277
 
                                pvr_pds_inst_encode_ddmad(
1278
 
                                   0, /* cc  */
1279
 
                                   0, /* END */
1280
 
                                   R32_C(const_base + 3), /* SRC0 (REGS32) */
1281
 
                                   current_index_temp, /* SRC1 (REGS32T) */
1282
 
                                   R64_C((const_base + 4) >> 1), /* SRC2
1283
 
                                                                  * (REGS64)
1284
 
                                                                  */
1285
 
                                   (const_base + 6) >> 1 /* SRC3 (REGS64C) */
1286
 
                                   ));
1287
 
         } else {
1288
 
            PVR_PDS_MODE_TOGGLE(code,
1289
 
                                instruction,
1290
 
                                pvr_pds_inst_encode_ddmad(
1291
 
                                   /* cc    */ 0,
1292
 
                                   /* end   */ 0,
1293
 
                                   /* src0  */ R32_C(const_base + 3),
1294
 
                                   /* src2  */ (index),
1295
 
                                   /* src1  */ R64_C((const_base + 4) >> 1),
1296
 
                                   /* src3  */ (const_base + 6) >> 1));
1297
 
         }
1298
 
      }
1299
 
   }
1300
 
 
1301
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
1302
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1303
 
 
1304
 
      PVR_PDS_MODE_TOGGLE(
1305
 
         code,
1306
 
         instruction,
1307
 
         pvr_encode_direct_write(
1308
 
            &entry_write_state,
1309
 
            last_DMA,
1310
 
            false,
1311
 
            R64_C(write_vertex_control),
1312
 
            R64_T(0),
1313
 
            0x1,
1314
 
            input_program->vertex_id_register,
1315
 
            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1316
 
            dev_info));
1317
 
   }
1318
 
 
1319
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
1320
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1321
 
 
1322
 
      PVR_PDS_MODE_TOGGLE(
1323
 
         code,
1324
 
         instruction,
1325
 
         pvr_encode_direct_write(
1326
 
            &entry_write_state,
1327
 
            last_DMA,
1328
 
            false,
1329
 
            R64_C(write_instance_control),
1330
 
            R64_T(0),
1331
 
            0x2,
1332
 
            input_program->instance_id_register,
1333
 
            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1334
 
            dev_info));
1335
 
   }
1336
 
 
1337
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
1338
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1339
 
 
1340
 
      if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1341
 
         /* Base instance comes from ptemp 1. */
1342
 
         PVR_PDS_MODE_TOGGLE(
1343
 
            code,
1344
 
            instruction,
1345
 
            pvr_encode_direct_write(
1346
 
               &entry_write_state,
1347
 
               last_DMA,
1348
 
               false,
1349
 
               R64_C(write_base_instance_control),
1350
 
               R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
1351
 
               0x2,
1352
 
               input_program->base_instance_register,
1353
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1354
 
               dev_info));
1355
 
      } else {
1356
 
         uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
1357
 
 
1358
 
         /* Base instance comes from driver constant. */
1359
 
         PVR_PDS_MODE_TOGGLE(
1360
 
            code,
1361
 
            instruction,
1362
 
            pvr_encode_direct_write(
1363
 
               &entry_write_state,
1364
 
               last_DMA,
1365
 
               false,
1366
 
               R64_C(write_base_instance_control),
1367
 
               R64_C(base_instance >> 1),
1368
 
               data_mask,
1369
 
               input_program->base_instance_register,
1370
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1371
 
               dev_info));
1372
 
      }
1373
 
   }
1374
 
 
1375
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
1376
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1377
 
 
1378
 
      if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1379
 
         /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
1380
 
         PVR_PDS_MODE_TOGGLE(
1381
 
            code,
1382
 
            instruction,
1383
 
            pvr_encode_direct_write(
1384
 
               &entry_write_state,
1385
 
               last_DMA,
1386
 
               false,
1387
 
               R64_C(write_base_vertex_control),
1388
 
               R64_P(0),
1389
 
               0x1,
1390
 
               input_program->base_vertex_register,
1391
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1392
 
               dev_info));
1393
 
      } else {
1394
 
         uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
1395
 
 
1396
 
         /* Base vertex comes from driver constant (literal 0). */
1397
 
         PVR_PDS_MODE_TOGGLE(
1398
 
            code,
1399
 
            instruction,
1400
 
            pvr_encode_direct_write(
1401
 
               &entry_write_state,
1402
 
               last_DMA,
1403
 
               false,
1404
 
               R64_C(write_base_vertex_control),
1405
 
               R64_C(base_vertex >> 1),
1406
 
               data_mask,
1407
 
               input_program->base_vertex_register,
1408
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1409
 
               dev_info));
1410
 
      }
1411
 
   }
1412
 
 
1413
 
   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
1414
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1415
 
 
1416
 
      if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1417
 
         /* Draw index comes from ptemp 3. */
1418
 
         PVR_PDS_MODE_TOGGLE(
1419
 
            code,
1420
 
            instruction,
1421
 
            pvr_encode_direct_write(
1422
 
               &entry_write_state,
1423
 
               last_DMA,
1424
 
               false,
1425
 
               R64_C(pvr_write_draw_index_control),
1426
 
               R64_P(1),
1427
 
               0x2,
1428
 
               input_program->draw_index_register,
1429
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1430
 
               dev_info));
1431
 
      } else {
1432
 
         uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
1433
 
 
1434
 
         /* Draw index comes from constant (literal 0). */
1435
 
         PVR_PDS_MODE_TOGGLE(
1436
 
            code,
1437
 
            instruction,
1438
 
            pvr_encode_direct_write(
1439
 
               &entry_write_state,
1440
 
               last_DMA,
1441
 
               false,
1442
 
               R64_C(pvr_write_draw_index_control),
1443
 
               R64_C(draw_index >> 1),
1444
 
               data_mask,
1445
 
               input_program->draw_index_register,
1446
 
               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1447
 
               dev_info));
1448
 
      }
1449
 
   }
1450
 
 
1451
 
   doutu_address_entry =
1452
 
      pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1453
 
                                           sizeof(*doutu_address_entry));
1454
 
   doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1455
 
   doutu_address_entry->const_offset = 0;
1456
 
   doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
1457
 
 
1458
 
   if (use_robust_vertex_fetch) {
1459
 
      /* Restore IF0 */
1460
 
      PVR_PDS_MODE_TOGGLE(
1461
 
         code,
1462
 
         instruction,
1463
 
         pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
1464
 
                                 0, /* Neg */
1465
 
                                 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
1466
 
                                 1 /* Addr */
1467
 
                                 ));
1468
 
   }
1469
 
 
1470
 
   PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
1471
 
   PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
1472
 
 
1473
 
   assert(running_dma_count == total_dma_count);
1474
 
 
1475
 
   for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
1476
 
      if (const_usage[i] == 0)
1477
 
         break;
1478
 
 
1479
 
      info->data_size_in_dwords =
1480
 
         8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
1481
 
   }
1482
 
 
1483
 
   info->temps_required = temp_usage.temps_needed;
1484
 
   info->entry_count = entry_write_state.entry_count;
1485
 
   info->entries_written_size_in_bytes =
1486
 
      entry_write_state.entries_size_in_bytes;
1487
 
   info->code_size_in_dwords = instruction;
1488
 
 
1489
 
   pvr_debug("=================================================\n");
1490
 
}
1491
 
 
1492
 
void pvr_pds_generate_descriptor_upload_program(
1493
 
   struct pvr_descriptor_program_input *input_program,
1494
 
   uint32_t *code_section,
1495
 
   struct pvr_pds_info *info)
1496
 
{
1497
 
   unsigned int num_consts64;
1498
 
   unsigned int num_consts32;
1499
 
   unsigned int next_const64;
1500
 
   unsigned int next_const32;
1501
 
   unsigned int instruction = 0;
1502
 
   uint32_t compile_time_buffer_index = 0;
1503
 
 
1504
 
   unsigned int total_dma_count = 0;
1505
 
   unsigned int running_dma_count = 0;
1506
 
 
1507
 
   struct pvr_pds_const_map_entry_write_state entry_write_state;
1508
 
 
1509
 
   /* Calculate the total register usage so we can stick 32-bit consts
1510
 
    * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
1511
 
    * constant.
1512
 
    */
1513
 
   num_consts32 = input_program->descriptor_set_count;
1514
 
   num_consts64 = input_program->descriptor_set_count;
1515
 
   total_dma_count = input_program->descriptor_set_count;
1516
 
 
1517
 
   pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
1518
 
 
1519
 
   for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1520
 
      struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1521
 
 
1522
 
      /* This switch statement looks pointless but we want to optimize DMAs
1523
 
       * that can be done as a DOUTW.
1524
 
       */
1525
 
      switch (buffer->type) {
1526
 
      default: {
1527
 
         /* 1 DOUTD per compile time buffer: */
1528
 
         num_consts32++;
1529
 
         num_consts64++;
1530
 
         total_dma_count++;
1531
 
         break;
1532
 
      }
1533
 
      }
1534
 
   }
1535
 
 
1536
 
   /* DOUTU for the secondary update program requires a 64-bit constant. */
1537
 
   if (input_program->secondary_program_present)
1538
 
      num_consts64++;
1539
 
 
1540
 
   info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
1541
 
 
1542
 
   /* Start counting constants. */
1543
 
   next_const64 = 0;
1544
 
   next_const32 = num_consts64 * 2;
1545
 
 
1546
 
   /* For each descriptor set perform a DOUTD. */
1547
 
   for (unsigned int descriptor_index = 0;
1548
 
        descriptor_index < input_program->descriptor_set_count;
1549
 
        descriptor_index++) {
1550
 
      struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
1551
 
      struct pvr_pds_descriptor_set *descriptor_set =
1552
 
         &input_program->descriptor_sets[descriptor_index];
1553
 
 
1554
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1555
 
      bool halt = last_DMA && !input_program->secondary_program_present;
1556
 
 
1557
 
      descriptor_set_entry =
1558
 
         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1559
 
                                              sizeof(*descriptor_set_entry));
1560
 
      descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
1561
 
      descriptor_set_entry->const_offset = next_const64 * 2;
1562
 
      descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
1563
 
      descriptor_set_entry->primary = descriptor_set->primary;
1564
 
      descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
1565
 
 
1566
 
      PVR_PDS_MODE_TOGGLE(code_section,
1567
 
                          instruction,
1568
 
                          pvr_encode_burst_cs(&entry_write_state,
1569
 
                                              last_DMA,
1570
 
                                              halt,
1571
 
                                              next_const32,
1572
 
                                              next_const64,
1573
 
                                              descriptor_set->size_in_dwords,
1574
 
                                              descriptor_set->destination));
1575
 
 
1576
 
      next_const64++;
1577
 
      next_const32++;
1578
 
   }
1579
 
 
1580
 
   for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1581
 
      struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1582
 
 
1583
 
      bool last_DMA = (++running_dma_count == total_dma_count);
1584
 
      bool halt = last_DMA && !input_program->secondary_program_present;
1585
 
 
1586
 
      switch (buffer->type) {
1587
 
      case PVR_BUFFER_TYPE_PUSH_CONSTS: {
1588
 
         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1589
 
 
1590
 
         special_buffer_entry =
1591
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1592
 
                                                 sizeof(*special_buffer_entry));
1593
 
         special_buffer_entry->type =
1594
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1595
 
         special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
1596
 
         special_buffer_entry->buffer_index = buffer->source_offset;
1597
 
         break;
1598
 
      }
1599
 
      case PVR_BUFFER_TYPE_DYNAMIC: {
1600
 
         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1601
 
 
1602
 
         special_buffer_entry =
1603
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1604
 
                                                 sizeof(*special_buffer_entry));
1605
 
         special_buffer_entry->type =
1606
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1607
 
         special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
1608
 
         special_buffer_entry->buffer_index = buffer->source_offset;
1609
 
         break;
1610
 
      }
1611
 
      case PVR_BUFFER_TYPES_COMPILE_TIME: {
1612
 
         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1613
 
 
1614
 
         special_buffer_entry =
1615
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1616
 
                                                 sizeof(*special_buffer_entry));
1617
 
         special_buffer_entry->type =
1618
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1619
 
         special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_COMPILE_TIME;
1620
 
         special_buffer_entry->buffer_index = compile_time_buffer_index++;
1621
 
         break;
1622
 
      }
1623
 
      case PVR_BUFFER_TYPES_BUFFER_LENGTHS: {
1624
 
         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1625
 
 
1626
 
         special_buffer_entry =
1627
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1628
 
                                                 sizeof(*special_buffer_entry));
1629
 
         special_buffer_entry->type =
1630
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1631
 
         special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_BUFFER_LENGTHS;
1632
 
         break;
1633
 
      }
1634
 
      case PVR_BUFFER_TYPE_BLEND_CONSTS: {
1635
 
         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1636
 
 
1637
 
         special_buffer_entry =
1638
 
            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1639
 
                                                 sizeof(*special_buffer_entry));
1640
 
         special_buffer_entry->type =
1641
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1642
 
         special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
1643
 
         special_buffer_entry->buffer_index =
1644
 
            input_program->blend_constants_used_mask;
1645
 
         break;
1646
 
      }
1647
 
      case PVR_BUFFER_TYPE_UBO: {
1648
 
         struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
1649
 
 
1650
 
         constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1651
 
            &entry_write_state,
1652
 
            sizeof(*constant_buffer_entry));
1653
 
         constant_buffer_entry->type =
1654
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
1655
 
         constant_buffer_entry->buffer_id = buffer->buffer_id;
1656
 
         constant_buffer_entry->desc_set = buffer->desc_set;
1657
 
         constant_buffer_entry->binding = buffer->binding;
1658
 
         constant_buffer_entry->offset = buffer->source_offset;
1659
 
         constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1660
 
         break;
1661
 
      }
1662
 
      case PVR_BUFFER_TYPES_UBO_ZEROING: {
1663
 
         struct pvr_const_map_entry_constant_buffer_zeroing
1664
 
            *constant_buffer_entry;
1665
 
 
1666
 
         constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1667
 
            &entry_write_state,
1668
 
            sizeof(*constant_buffer_entry));
1669
 
         constant_buffer_entry->type =
1670
 
            PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
1671
 
         constant_buffer_entry->buffer_id = buffer->buffer_id;
1672
 
         constant_buffer_entry->offset = buffer->source_offset;
1673
 
         constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1674
 
         break;
1675
 
      }
1676
 
      }
1677
 
 
1678
 
      entry_write_state.entry->const_offset = next_const64 * 2;
1679
 
 
1680
 
      PVR_PDS_MODE_TOGGLE(code_section,
1681
 
                          instruction,
1682
 
                          pvr_encode_burst_cs(&entry_write_state,
1683
 
                                              last_DMA,
1684
 
                                              halt,
1685
 
                                              next_const32,
1686
 
                                              next_const64,
1687
 
                                              buffer->size_in_dwords,
1688
 
                                              buffer->destination));
1689
 
 
1690
 
      next_const64++;
1691
 
      next_const32++;
1692
 
   }
1693
 
 
1694
 
   if (total_dma_count != running_dma_count)
1695
 
      fprintf(stderr, "Mismatch in DMA count\n");
1696
 
 
1697
 
   if (input_program->secondary_program_present) {
1698
 
      struct pvr_const_map_entry_doutu_address *doutu_address;
1699
 
 
1700
 
      PVR_PDS_MODE_TOGGLE(code_section,
1701
 
                          instruction,
1702
 
                          pvr_pds_encode_doutu(false, true, next_const64));
1703
 
 
1704
 
      doutu_address =
1705
 
         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1706
 
                                              sizeof(*doutu_address));
1707
 
      doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1708
 
      doutu_address->const_offset = next_const64 * 2;
1709
 
      doutu_address->doutu_control = input_program->secondary_task_control.src0;
1710
 
 
1711
 
      next_const64++;
1712
 
   }
1713
 
 
1714
 
   if (instruction == 0 && input_program->must_not_be_empty) {
1715
 
      PVR_PDS_MODE_TOGGLE(code_section,
1716
 
                          instruction,
1717
 
                          pvr_pds_inst_encode_halt(
1718
 
                             /* cc */ false));
1719
 
   }
1720
 
 
1721
 
   info->entry_count = entry_write_state.entry_count;
1722
 
   info->entries_written_size_in_bytes =
1723
 
      entry_write_state.entries_size_in_bytes;
1724
 
   info->code_size_in_dwords = instruction;
1725
 
}