2
* Copyright (C) 2020 Collabora, Ltd.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
#include "compiler/nir/nir.h"
29
#include "util/u_dynarray.h"
30
#include "util/hash_table.h"
32
/* On Valhall, the driver gives the hardware a table of resource tables.
33
* Resources are addressed as the index of the table together with the index of
34
* the resource within the table. For simplicity, we put one type of resource
35
* in each table and fix the numbering of the tables.
37
* This numbering is arbitrary. It is a software ABI between the
38
* Gallium driver and the Valhall compiler.
40
enum pan_resource_table {
43
PAN_TABLE_ATTRIBUTE_BUFFER,
48
PAN_NUM_RESOURCE_TABLES
51
/* Indices for named (non-XFB) varyings that are present. These are packed
52
* tightly so they correspond to a bitfield present (P) indexed by (1 <<
53
* PAN_VARY_*). This has the nice property that you can lookup the buffer index
54
* of a given special field given a shift S by:
56
* idx = popcount(P & ((1 << S) - 1))
58
* That is... look at all of the varyings that come earlier and count them, the
59
* count is the new index since plus one. Likewise, the total number of special
60
* buffers required is simply popcount(P)
63
enum pan_special_varying {
65
PAN_VARY_POSITION = 1,
67
PAN_VARY_PNTCOORD = 3,
69
PAN_VARY_FRAGCOORD = 5,
75
/* Maximum number of attribute descriptors required for varyings. These include
76
* up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
78
#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
80
/* Define the general compiler entry point */
82
#define MAX_SYSVAL_COUNT 32
84
/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
85
* their class for equal comparison */
87
#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
88
#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
89
#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
91
/* Define some common types. We start at one for easy indexing of hash
92
* tables internal to the compiler */
95
PAN_SYSVAL_VIEWPORT_SCALE = 1,
96
PAN_SYSVAL_VIEWPORT_OFFSET = 2,
97
PAN_SYSVAL_TEXTURE_SIZE = 3,
99
PAN_SYSVAL_NUM_WORK_GROUPS = 5,
100
PAN_SYSVAL_SAMPLER = 7,
101
PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
102
PAN_SYSVAL_WORK_DIM = 9,
103
PAN_SYSVAL_IMAGE_SIZE = 10,
104
PAN_SYSVAL_SAMPLE_POSITIONS = 11,
105
PAN_SYSVAL_MULTISAMPLED = 12,
106
PAN_SYSVAL_RT_CONVERSION = 13,
107
PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
108
PAN_SYSVAL_DRAWID = 15,
109
PAN_SYSVAL_BLEND_CONSTANTS = 16,
112
#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \
113
((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
115
#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f)
116
#define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3)
117
#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
119
/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
120
* consistent with the blob so we can compare traces easier. */
124
PAN_INSTANCE_ID = 17,
128
struct panfrost_sysvals {
129
/* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
130
unsigned sysvals[MAX_SYSVAL_COUNT];
131
unsigned sysval_count;
134
/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each.
135
* In practice, the maximum number of FAU slots is limited by implementation.
136
* All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the
137
* maximum number of 32-bit words is 128, since there are 2 words per FAU slot.
139
* Midgard can push at most 92 words, so this bound suffices. The Midgard
140
* compiler pushes less than this, as Midgard uses register-mapped uniforms
141
* instead of FAU, preventing large numbers of uniforms to be pushed for
142
* nontrivial programs.
144
#define PAN_MAX_PUSH 128
146
/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
147
* an offset to a word must be < 2^16. There are less than 2^8 UBOs */
149
struct panfrost_ubo_word {
154
struct panfrost_ubo_push {
156
struct panfrost_ubo_word words[PAN_MAX_PUSH];
159
/* Helper for searching the above. Note this is O(N) to the number of pushed
160
* constants, do not run in the draw call hot path */
163
pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
165
struct hash_table_u64 *
166
panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);
169
pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
170
struct panfrost_sysvals *sysvals,
174
panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
176
struct panfrost_compile_inputs {
178
bool is_blend, is_blit;
182
uint64_t bifrost_blend_desc;
189
enum pipe_format rt_formats[8];
190
uint8_t raw_fmt_mask;
201
struct pan_shader_varying {
202
gl_varying_slot location;
203
enum pipe_format format;
206
struct bifrost_shader_blend_info {
208
uint32_t return_offset;
210
/* mali_bifrost_register_file_format corresponding to nir_alu_type */
215
* Unpacked form of a v7 message preload descriptor, produced by the compiler's
216
* message preload optimization. By splitting out this struct, the compiler does
217
* not need to know about data structure packing, avoiding a dependency on
220
struct bifrost_message_preload {
221
/* Whether to preload this message */
224
/* Varying to load from */
225
unsigned varying_index;
227
/* Register type, FP32 otherwise */
230
/* Number of components, ignored if texturing */
231
unsigned num_components;
233
/* If texture is set, performs a texture instruction according to
234
* texture_index, skip, and zero_lod. If texture is unset, only the
235
* varying load is performed.
237
bool texture, skip, zero_lod;
238
unsigned texture_index;
241
struct bifrost_shader_info {
242
struct bifrost_shader_blend_info blend[8];
243
nir_alu_type blend_src1_type;
245
struct bifrost_message_preload messages[2];
248
struct midgard_shader_info {
252
struct pan_shader_info {
253
gl_shader_stage stage;
254
unsigned work_reg_count;
258
/* Bit mask of preloaded registers */
263
bool reads_frag_coord;
264
bool reads_point_coord;
269
bool writes_coverage;
272
bool early_fragment_tests;
273
bool can_early_z, can_fpk;
274
BITSET_WORD outputs_read;
275
BITSET_WORD outputs_written;
279
bool writes_point_size;
281
/* If the primary shader writes point size, the Valhall
282
* driver may need a variant that does not write point
283
* size. Offset to such a shader in the program binary.
285
* Zero if no such variant is required.
287
* Only used with IDVS on Valhall.
289
unsigned no_psiz_offset;
291
/* Set if Index-Driven Vertex Shading is in use */
294
/* If IDVS is used, whether a varying shader is used */
295
bool secondary_enable;
297
/* If a varying shader is used, the varying shader's
298
* offset in the program binary
300
unsigned secondary_offset;
302
/* If IDVS is in use, number of work registers used by
305
unsigned secondary_work_reg_count;
307
/* If IDVS is in use, bit mask of preloaded registers
308
* used by the varying shader
310
uint64_t secondary_preload;
314
/* Is it legal to merge workgroups? This is true if the
315
* shader uses neither barriers nor shared memory.
317
* Used by the Valhall hardware.
319
bool allow_merging_workgroups;
323
/* Does the shader contains a barrier? or (for fragment shaders) does it
324
* require helper invocations, which demand the same ordering guarantees
325
* of the hardware? These notions are unified in the hardware, so we
326
* unify them here as well.
328
bool contains_barrier;
331
uint64_t outputs_written;
333
unsigned sampler_count;
334
unsigned texture_count;
336
unsigned attribute_count;
339
unsigned input_count;
340
struct pan_shader_varying input[PAN_MAX_VARYINGS];
341
unsigned output_count;
342
struct pan_shader_varying output[PAN_MAX_VARYINGS];
345
struct panfrost_sysvals sysvals;
347
/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
348
* Uniforms (Bifrost) */
349
struct panfrost_ubo_push push;
354
struct bifrost_shader_info bifrost;
355
struct midgard_shader_info midgard;
359
typedef struct pan_block {
360
/* Link to next block. Must be first for mir_get_block */
361
struct list_head link;
363
/* List of instructions emitted for the current block */
364
struct list_head instructions;
366
/* Index of the block in source order */
369
/* Control flow graph */
370
struct pan_block *successors[2];
371
struct set *predecessors;
372
bool unconditional_jumps;
374
/* In liveness analysis, these are live masks (per-component) for
375
* indices for the block. Scalar compilers have the luxury of using
376
* simple bit fields, but for us, liveness is a vector idea. */
381
struct pan_instruction {
382
struct list_head link;
385
#define pan_foreach_instr_in_block_rev(block, v) \
386
list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
388
#define pan_foreach_successor(blk, v) \
391
for (_v = (pan_block **) &blk->successors[0], \
393
v != NULL && _v < (pan_block **) &blk->successors[2]; \
396
#define pan_foreach_predecessor(blk, v) \
397
struct set_entry *_entry_##v; \
398
struct pan_block *v; \
399
for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
400
v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL); \
401
_entry_##v != NULL; \
402
_entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
403
v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
405
static inline pan_block *
406
pan_exit_block(struct list_head *blocks)
408
pan_block *last = list_last_entry(blocks, pan_block, link);
409
assert(!last->successors[0] && !last->successors[1]);
413
typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
415
void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
416
void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
417
bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
419
void pan_compute_liveness(struct list_head *blocks,
421
pan_liveness_update callback);
423
void pan_free_liveness(struct list_head *blocks);
426
pan_to_bytemask(unsigned bytes, unsigned mask);
428
void pan_block_add_successor(pan_block *block, pan_block *successor);
431
#define PAN_IS_REG (1)
433
static inline unsigned
434
pan_ssa_index(nir_ssa_def *ssa)
436
/* Off-by-one ensures BIR_NO_ARG is skipped */
437
return ((ssa->index + 1) << 1) | 0;
440
static inline unsigned
441
pan_src_index(nir_src *src)
444
return pan_ssa_index(src->ssa);
446
assert(!src->reg.indirect);
447
return (src->reg.reg->index << 1) | PAN_IS_REG;
451
static inline unsigned
452
pan_dest_index(nir_dest *dst)
455
return pan_ssa_index(&dst->ssa);
457
assert(!dst->reg.indirect);
458
return (dst->reg.reg->index << 1) | PAN_IS_REG;
462
/* IR printing helpers */
463
void pan_print_alu_type(nir_alu_type t, FILE *fp);
465
/* Until it can be upstreamed.. */
466
bool pan_has_source_mod(nir_alu_src *src, nir_op op);
467
bool pan_has_dest_mod(nir_dest **dest, nir_op op);
469
/* NIR passes to do some backend-specific lowering */
471
#define PAN_WRITEOUT_C 1
472
#define PAN_WRITEOUT_Z 2
473
#define PAN_WRITEOUT_S 4
474
#define PAN_WRITEOUT_2 8
476
bool pan_nir_lower_zs_store(nir_shader *nir);
478
bool pan_nir_lower_64bit_intrin(nir_shader *shader);
480
bool pan_lower_helper_invocation(nir_shader *shader);
481
bool pan_lower_sample_pos(nir_shader *shader);