2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics to
4
develop this 3D driver.
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**********************************************************************/
29
* Keith Whitwell <keithw@vmware.com>
39
#include "brw_compiler.h"
40
#include "brw_eu_defines.h"
42
#include "brw_disasm_info.h"
44
#include "util/bitset.h"
50
#define BRW_EU_MAX_INSN_STACK 5
52
struct brw_insn_state {
53
/* One of BRW_EXECUTE_* */
56
/* Group in units of channels */
59
/* Compression control on gfx4-5 */
62
/* One of BRW_MASK_* */
63
unsigned mask_control:1;
65
/* Scheduling info for Gfx12+ */
70
/* One of BRW_ALIGN_* */
71
unsigned access_mode:1;
73
/* One of BRW_PREDICATE_* */
74
enum brw_predicate predicate:4;
78
/* Flag subreg. Bottom bit is subreg, top bit is reg */
79
unsigned flag_subreg:2;
81
bool acc_wr_control:1;
85
/* A helper for accessing the last instruction emitted. This makes it easy
86
* to set various bits on an instruction without having to create temporary
87
* variable and assign the emitted instruction to those.
89
#define brw_last_inst (&p->store[p->nr_insn - 1])
95
unsigned int next_insn_offset;
99
/* Allow clients to push/pop instruction state:
101
struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
102
struct brw_insn_state *current;
104
/** Whether or not the user wants automatic exec sizes
106
* If true, codegen will try to automatically infer the exec size of an
107
* instruction from the width of the destination register. If false, it
108
* will take whatever is set by brw_set_default_exec_size verbatim.
110
* This is set to true by default in brw_init_codegen.
112
bool automatic_exec_sizes;
114
bool single_program_flow;
115
const struct intel_device_info *devinfo;
117
/* Control flow stacks:
118
* - if_stack contains IF and ELSE instructions which must be patched
119
* (and popped) once the matching ENDIF instruction is encountered.
121
* Just store the instruction pointer(an index).
125
int if_stack_array_size;
128
* loop_stack contains the instruction pointers of the starts of loops which
129
* must be patched (and popped) once the matching WHILE instruction is
134
* pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
135
* blocks they were popping out of, to fix up the mask stack. This tracks
136
* the IF/ENDIF nesting in each current nested loop level.
138
int *if_depth_in_loop;
139
int loop_stack_depth;
140
int loop_stack_array_size;
142
struct brw_shader_reloc *relocs;
144
int reloc_array_size;
150
struct brw_label *next;
153
void brw_pop_insn_state( struct brw_codegen *p );
154
void brw_push_insn_state( struct brw_codegen *p );
155
unsigned brw_get_default_exec_size(struct brw_codegen *p);
156
unsigned brw_get_default_group(struct brw_codegen *p);
157
unsigned brw_get_default_access_mode(struct brw_codegen *p);
158
struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
159
void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
160
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
161
void brw_set_default_saturate( struct brw_codegen *p, bool enable );
162
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
163
void brw_inst_set_compression(const struct intel_device_info *devinfo,
164
brw_inst *inst, bool on);
165
void brw_set_default_compression(struct brw_codegen *p, bool on);
166
void brw_inst_set_group(const struct intel_device_info *devinfo,
167
brw_inst *inst, unsigned group);
168
void brw_set_default_group(struct brw_codegen *p, unsigned group);
169
void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c);
170
void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
171
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
172
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
173
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
174
void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
176
void brw_init_codegen(const struct intel_device_info *, struct brw_codegen *p,
178
bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
179
bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
180
const struct brw_label *brw_find_label(const struct brw_label *root, int offset);
181
void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx);
182
int brw_disassemble_inst(FILE *file, const struct intel_device_info *devinfo,
183
const struct brw_inst *inst, bool is_compacted,
184
int offset, const struct brw_label *root_label);
186
brw_label *brw_label_assembly(const struct intel_device_info *devinfo,
187
const void *assembly, int start, int end,
189
void brw_disassemble_with_labels(const struct intel_device_info *devinfo,
190
const void *assembly, int start, int end, FILE *out);
191
void brw_disassemble(const struct intel_device_info *devinfo,
192
const void *assembly, int start, int end,
193
const struct brw_label *root_label, FILE *out);
194
const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
195
unsigned *num_relocs);
196
const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
198
bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
199
const char *identifier);
201
void brw_realign(struct brw_codegen *p, unsigned align);
202
int brw_append_data(struct brw_codegen *p, void *data,
203
unsigned size, unsigned align);
204
brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
205
void brw_add_reloc(struct brw_codegen *p, uint32_t id,
206
enum brw_shader_reloc_type type,
207
uint32_t offset, uint32_t delta);
208
void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
209
void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
211
void gfx6_resolve_implied_move(struct brw_codegen *p,
213
unsigned msg_reg_nr);
215
/* Helpers for regular instructions:
218
brw_inst *brw_##OP(struct brw_codegen *p, \
219
struct brw_reg dest, \
220
struct brw_reg src0);
223
brw_inst *brw_##OP(struct brw_codegen *p, \
224
struct brw_reg dest, \
225
struct brw_reg src0, \
226
struct brw_reg src1);
229
brw_inst *brw_##OP(struct brw_codegen *p, \
230
struct brw_reg dest, \
231
struct brw_reg src0, \
232
struct brw_reg src1, \
233
struct brw_reg src2);
286
/* Helpers for SEND instruction:
290
* Construct a message descriptor immediate with the specified common
291
* descriptor controls.
293
static inline uint32_t
294
brw_message_desc(const struct intel_device_info *devinfo,
296
unsigned response_length,
299
if (devinfo->ver >= 5) {
300
return (SET_BITS(msg_length, 28, 25) |
301
SET_BITS(response_length, 24, 20) |
302
SET_BITS(header_present, 19, 19));
304
return (SET_BITS(msg_length, 23, 20) |
305
SET_BITS(response_length, 19, 16));
309
static inline unsigned
310
brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
312
if (devinfo->ver >= 5)
313
return GET_BITS(desc, 28, 25);
315
return GET_BITS(desc, 23, 20);
318
static inline unsigned
319
brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
321
if (devinfo->ver >= 5)
322
return GET_BITS(desc, 24, 20);
324
return GET_BITS(desc, 19, 16);
328
brw_message_desc_header_present(ASSERTED
329
const struct intel_device_info *devinfo,
332
assert(devinfo->ver >= 5);
333
return GET_BITS(desc, 19, 19);
336
static inline unsigned
337
brw_message_ex_desc(UNUSED const struct intel_device_info *devinfo,
338
unsigned ex_msg_length)
340
return SET_BITS(ex_msg_length, 9, 6);
343
static inline unsigned
344
brw_message_ex_desc_ex_mlen(UNUSED const struct intel_device_info *devinfo,
347
return GET_BITS(ex_desc, 9, 6);
350
static inline uint32_t
351
brw_urb_desc(const struct intel_device_info *devinfo,
353
bool per_slot_offset_present,
354
bool channel_mask_present,
355
unsigned global_offset)
357
if (devinfo->ver >= 8) {
358
return (SET_BITS(per_slot_offset_present, 17, 17) |
359
SET_BITS(channel_mask_present, 15, 15) |
360
SET_BITS(global_offset, 14, 4) |
361
SET_BITS(msg_type, 3, 0));
362
} else if (devinfo->ver >= 7) {
363
assert(!channel_mask_present);
364
return (SET_BITS(per_slot_offset_present, 16, 16) |
365
SET_BITS(global_offset, 13, 3) |
366
SET_BITS(msg_type, 3, 0));
368
unreachable("unhandled URB write generation");
372
static inline uint32_t
373
brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
376
assert(devinfo->ver >= 7);
377
return GET_BITS(desc, 3, 0);
380
static inline uint32_t
381
brw_urb_fence_desc(const struct intel_device_info *devinfo)
383
assert(devinfo->has_lsc);
384
return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
388
* Construct a message descriptor immediate with the specified sampler
391
static inline uint32_t
392
brw_sampler_desc(const struct intel_device_info *devinfo,
393
unsigned binding_table_index,
397
unsigned return_format)
399
const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
400
SET_BITS(sampler, 11, 8));
402
/* From the CHV Bspec: Shared Functions - Message Descriptor -
405
* SIMD Mode[2] 29 This field is the upper bit of the 3-bit
408
if (devinfo->ver >= 8)
409
return desc | SET_BITS(msg_type, 16, 12) |
410
SET_BITS(simd_mode & 0x3, 18, 17) |
411
SET_BITS(simd_mode >> 2, 29, 29) |
412
SET_BITS(return_format, 30, 30);
413
if (devinfo->ver >= 7)
414
return (desc | SET_BITS(msg_type, 16, 12) |
415
SET_BITS(simd_mode, 18, 17));
416
else if (devinfo->ver >= 5)
417
return (desc | SET_BITS(msg_type, 15, 12) |
418
SET_BITS(simd_mode, 17, 16));
419
else if (devinfo->verx10 >= 45)
420
return desc | SET_BITS(msg_type, 15, 12);
422
return (desc | SET_BITS(return_format, 13, 12) |
423
SET_BITS(msg_type, 15, 14));
426
static inline unsigned
427
brw_sampler_desc_binding_table_index(UNUSED
428
const struct intel_device_info *devinfo,
431
return GET_BITS(desc, 7, 0);
434
static inline unsigned
435
brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
438
return GET_BITS(desc, 11, 8);
441
static inline unsigned
442
brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
444
if (devinfo->ver >= 7)
445
return GET_BITS(desc, 16, 12);
446
else if (devinfo->verx10 >= 45)
447
return GET_BITS(desc, 15, 12);
449
return GET_BITS(desc, 15, 14);
452
static inline unsigned
453
brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
456
assert(devinfo->ver >= 5);
457
if (devinfo->ver >= 8)
458
return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
459
else if (devinfo->ver >= 7)
460
return GET_BITS(desc, 18, 17);
462
return GET_BITS(desc, 17, 16);
465
static inline unsigned
466
brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
469
assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
470
if (devinfo->ver >= 8)
471
return GET_BITS(desc, 30, 30);
473
return GET_BITS(desc, 13, 12);
477
* Construct a message descriptor for the dataport
479
static inline uint32_t
480
brw_dp_desc(const struct intel_device_info *devinfo,
481
unsigned binding_table_index,
483
unsigned msg_control)
485
/* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
488
assert(devinfo->ver >= 6);
489
const unsigned desc = SET_BITS(binding_table_index, 7, 0);
490
if (devinfo->ver >= 8) {
491
return (desc | SET_BITS(msg_control, 13, 8) |
492
SET_BITS(msg_type, 18, 14));
493
} else if (devinfo->ver >= 7) {
494
return (desc | SET_BITS(msg_control, 13, 8) |
495
SET_BITS(msg_type, 17, 14));
497
return (desc | SET_BITS(msg_control, 12, 8) |
498
SET_BITS(msg_type, 16, 13));
502
static inline unsigned
503
brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
506
return GET_BITS(desc, 7, 0);
509
static inline unsigned
510
brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
512
assert(devinfo->ver >= 6);
513
if (devinfo->ver >= 8)
514
return GET_BITS(desc, 18, 14);
515
else if (devinfo->ver >= 7)
516
return GET_BITS(desc, 17, 14);
518
return GET_BITS(desc, 16, 13);
521
static inline unsigned
522
brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
524
assert(devinfo->ver >= 6);
525
if (devinfo->ver >= 7)
526
return GET_BITS(desc, 13, 8);
528
return GET_BITS(desc, 12, 8);
532
* Construct a message descriptor immediate with the specified dataport read
535
static inline uint32_t
536
brw_dp_read_desc(const struct intel_device_info *devinfo,
537
unsigned binding_table_index,
538
unsigned msg_control,
540
unsigned target_cache)
542
if (devinfo->ver >= 6)
543
return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
544
else if (devinfo->verx10 >= 45)
545
return (SET_BITS(binding_table_index, 7, 0) |
546
SET_BITS(msg_control, 10, 8) |
547
SET_BITS(msg_type, 13, 11) |
548
SET_BITS(target_cache, 15, 14));
550
return (SET_BITS(binding_table_index, 7, 0) |
551
SET_BITS(msg_control, 11, 8) |
552
SET_BITS(msg_type, 13, 12) |
553
SET_BITS(target_cache, 15, 14));
556
static inline unsigned
557
brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
560
if (devinfo->ver >= 6)
561
return brw_dp_desc_msg_type(devinfo, desc);
562
else if (devinfo->verx10 >= 45)
563
return GET_BITS(desc, 13, 11);
565
return GET_BITS(desc, 13, 12);
568
static inline unsigned
569
brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
572
if (devinfo->ver >= 6)
573
return brw_dp_desc_msg_control(devinfo, desc);
574
else if (devinfo->verx10 >= 45)
575
return GET_BITS(desc, 10, 8);
577
return GET_BITS(desc, 11, 8);
581
* Construct a message descriptor immediate with the specified dataport write
584
static inline uint32_t
585
brw_dp_write_desc(const struct intel_device_info *devinfo,
586
unsigned binding_table_index,
587
unsigned msg_control,
589
unsigned send_commit_msg)
591
assert(devinfo->ver <= 6 || !send_commit_msg);
592
if (devinfo->ver >= 6) {
593
return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
594
SET_BITS(send_commit_msg, 17, 17);
596
return (SET_BITS(binding_table_index, 7, 0) |
597
SET_BITS(msg_control, 11, 8) |
598
SET_BITS(msg_type, 14, 12) |
599
SET_BITS(send_commit_msg, 15, 15));
603
static inline unsigned
604
brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
607
if (devinfo->ver >= 6)
608
return brw_dp_desc_msg_type(devinfo, desc);
610
return GET_BITS(desc, 14, 12);
613
static inline unsigned
614
brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
617
if (devinfo->ver >= 6)
618
return brw_dp_desc_msg_control(devinfo, desc);
620
return GET_BITS(desc, 11, 8);
624
brw_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
627
assert(devinfo->ver <= 6);
628
if (devinfo->ver >= 6)
629
return GET_BITS(desc, 17, 17);
631
return GET_BITS(desc, 15, 15);
635
* Construct a message descriptor immediate with the specified dataport
636
* surface function controls.
638
static inline uint32_t
639
brw_dp_surface_desc(const struct intel_device_info *devinfo,
641
unsigned msg_control)
643
assert(devinfo->ver >= 7);
644
/* We'll OR in the binding table index later */
645
return brw_dp_desc(devinfo, 0, msg_type, msg_control);
648
static inline uint32_t
649
brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
650
unsigned exec_size, /**< 0 for SIMD4x2 */
652
bool response_expected)
654
assert(exec_size <= 8 || exec_size == 16);
657
if (devinfo->verx10 >= 75) {
659
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
661
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
664
msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
667
const unsigned msg_control =
668
SET_BITS(atomic_op, 3, 0) |
669
SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
670
SET_BITS(response_expected, 5, 5);
672
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
675
static inline uint32_t
676
brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
679
bool response_expected)
681
assert(exec_size <= 8 || exec_size == 16);
682
assert(devinfo->ver >= 9);
684
assert(exec_size > 0);
685
const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
687
const unsigned msg_control =
688
SET_BITS(atomic_op, 1, 0) |
689
SET_BITS(exec_size <= 8, 4, 4) |
690
SET_BITS(response_expected, 5, 5);
692
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
695
static inline unsigned
696
brw_mdc_cmask(unsigned num_channels)
698
/* See also MDC_CMASK in the SKL PRM Vol 2d. */
699
return 0xf & (0xf << num_channels);
702
static inline unsigned
703
lsc_cmask(unsigned num_channels)
705
assert(num_channels > 0 && num_channels <= 4);
706
return BITSET_MASK(num_channels);
709
static inline uint32_t
710
brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
711
unsigned exec_size, /**< 0 for SIMD4x2 */
712
unsigned num_channels,
715
assert(exec_size <= 8 || exec_size == 16);
719
if (devinfo->verx10 >= 75) {
720
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
722
msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
726
if (devinfo->verx10 >= 75) {
727
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
729
msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
733
/* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
734
if (write && devinfo->verx10 == 70 && exec_size == 0)
737
/* See also MDC_SM3 in the SKL PRM Vol 2d. */
738
const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
739
exec_size <= 8 ? 2 : 1;
741
const unsigned msg_control =
742
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
743
SET_BITS(simd_mode, 5, 4);
745
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
748
static inline unsigned
749
brw_mdc_ds(unsigned bit_size)
753
return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
755
return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
757
return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
759
unreachable("Unsupported bit_size for byte scattered messages");
763
static inline uint32_t
764
brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
769
assert(exec_size <= 8 || exec_size == 16);
771
assert(devinfo->verx10 >= 75);
772
const unsigned msg_type =
773
write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
774
HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
776
assert(exec_size > 0);
777
const unsigned msg_control =
778
SET_BITS(exec_size == 16, 0, 0) |
779
SET_BITS(brw_mdc_ds(bit_size), 3, 2);
781
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
784
static inline uint32_t
785
brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
789
assert(exec_size == 8 || exec_size == 16);
793
if (devinfo->ver >= 6) {
794
msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
796
msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
799
if (devinfo->ver >= 7) {
800
msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
801
} else if (devinfo->verx10 >= 45) {
802
msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
804
msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
808
const unsigned msg_control =
809
SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
810
SET_BITS(exec_size == 16, 0, 0);
812
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
815
static inline uint32_t
816
brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
821
/* Writes can only have addresses aligned by OWORDs (16 Bytes). */
822
assert(!write || align_16B);
824
const unsigned msg_type =
825
write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
826
align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
827
GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
829
const unsigned msg_control =
830
SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
832
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
835
static inline uint32_t
836
brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
837
unsigned exec_size, /**< 0 for SIMD4x2 */
838
unsigned num_channels,
841
assert(exec_size <= 8 || exec_size == 16);
842
assert(devinfo->ver >= 8);
845
write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
846
GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
848
/* See also MDC_SM3 in the SKL PRM Vol 2d. */
849
const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
850
exec_size <= 8 ? 2 : 1;
852
const unsigned msg_control =
853
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
854
SET_BITS(simd_mode, 5, 4);
856
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
857
msg_type, msg_control);
860
static inline uint32_t
861
brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
866
/* Writes can only have addresses aligned by OWORDs (16 Bytes). */
867
assert(!write || align_16B);
870
write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
871
GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
873
unsigned msg_control =
874
SET_BITS(!align_16B, 4, 3) |
875
SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
877
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
878
msg_type, msg_control);
882
* Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
885
static inline uint32_t
886
brw_mdc_a64_ds(unsigned elems)
894
unreachable("Unsupported elmeent count for A64 scattered message");
898
static inline uint32_t
899
brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
900
unsigned exec_size, /**< 0 for SIMD4x2 */
904
assert(exec_size <= 8 || exec_size == 16);
905
assert(devinfo->ver >= 8);
908
write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
909
GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
911
const unsigned msg_control =
912
SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
913
SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
914
SET_BITS(exec_size == 16, 4, 4);
916
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
917
msg_type, msg_control);
920
static inline uint32_t
921
brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
922
ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
925
bool response_expected)
927
assert(exec_size == 8);
928
assert(devinfo->ver >= 8);
929
assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
930
assert(devinfo->ver >= 12 || bit_size >= 32);
932
const unsigned msg_type = bit_size == 16 ?
933
GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
934
GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
936
const unsigned msg_control =
937
SET_BITS(atomic_op, 3, 0) |
938
SET_BITS(bit_size == 64, 4, 4) |
939
SET_BITS(response_expected, 5, 5);
941
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
942
msg_type, msg_control);
945
static inline uint32_t
946
brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
947
ASSERTED unsigned exec_size,
950
bool response_expected)
952
assert(exec_size == 8);
953
assert(devinfo->ver >= 9);
954
assert(bit_size == 16 || bit_size == 32);
955
assert(devinfo->ver >= 12 || bit_size == 32);
957
assert(exec_size > 0);
958
const unsigned msg_type = bit_size == 32 ?
959
GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
960
GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
962
const unsigned msg_control =
963
SET_BITS(atomic_op, 1, 0) |
964
SET_BITS(response_expected, 5, 5);
966
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
967
msg_type, msg_control);
970
static inline uint32_t
971
brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
975
bool response_expected)
977
assert(exec_size > 0 || exec_group == 0);
978
assert(exec_group % 8 == 0);
981
if (devinfo->verx10 >= 75) {
982
if (exec_size == 0) {
983
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
985
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
988
/* SIMD4x2 typed surface R/W messages only exist on HSW+ */
989
assert(exec_size > 0);
990
msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
993
const bool high_sample_mask = (exec_group / 8) % 2 == 1;
995
const unsigned msg_control =
996
SET_BITS(atomic_op, 3, 0) |
997
SET_BITS(high_sample_mask, 4, 4) |
998
SET_BITS(response_expected, 5, 5);
1000
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
1003
static inline uint32_t
1004
brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
1006
unsigned exec_group,
1007
unsigned num_channels,
1010
assert(exec_size > 0 || exec_group == 0);
1011
assert(exec_group % 8 == 0);
1013
/* Typed surface reads and writes don't support SIMD16 */
1014
assert(exec_size <= 8);
1018
if (devinfo->verx10 >= 75) {
1019
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
1021
msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
1024
if (devinfo->verx10 >= 75) {
1025
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
1027
msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
1031
/* See also MDC_SG3 in the SKL PRM Vol 2d. */
1032
unsigned msg_control;
1033
if (devinfo->verx10 >= 75) {
1034
/* See also MDC_SG3 in the SKL PRM Vol 2d. */
1035
const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
1036
1 + ((exec_group / 8) % 2);
1039
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1040
SET_BITS(slot_group, 5, 4);
1042
/* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1043
assert(exec_size > 0);
1044
const unsigned slot_group = ((exec_group / 8) % 2);
1047
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1048
SET_BITS(slot_group, 5, 5);
1051
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
1054
static inline uint32_t
1055
brw_fb_desc(const struct intel_device_info *devinfo,
1056
unsigned binding_table_index,
1058
unsigned msg_control)
1060
/* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1063
assert(devinfo->ver >= 6);
1064
const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1065
if (devinfo->ver >= 7) {
1066
return (desc | SET_BITS(msg_control, 13, 8) |
1067
SET_BITS(msg_type, 17, 14));
1069
return (desc | SET_BITS(msg_control, 12, 8) |
1070
SET_BITS(msg_type, 16, 13));
1074
static inline unsigned
1075
brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1078
return GET_BITS(desc, 7, 0);
1081
static inline uint32_t
1082
brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1084
assert(devinfo->ver >= 6);
1085
if (devinfo->ver >= 7)
1086
return GET_BITS(desc, 13, 8);
1088
return GET_BITS(desc, 12, 8);
1091
static inline unsigned
1092
brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1094
assert(devinfo->ver >= 6);
1095
if (devinfo->ver >= 7)
1096
return GET_BITS(desc, 17, 14);
1098
return GET_BITS(desc, 16, 13);
1101
static inline uint32_t
1102
brw_fb_read_desc(const struct intel_device_info *devinfo,
1103
unsigned binding_table_index,
1104
unsigned msg_control,
1108
assert(devinfo->ver >= 9);
1109
assert(exec_size == 8 || exec_size == 16);
1111
return brw_fb_desc(devinfo, binding_table_index,
1112
GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
1113
SET_BITS(per_sample, 13, 13) |
1114
SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
1117
static inline uint32_t
1118
brw_fb_write_desc(const struct intel_device_info *devinfo,
1119
unsigned binding_table_index,
1120
unsigned msg_control,
1121
bool last_render_target,
1124
const unsigned msg_type =
1126
GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1127
BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1129
assert(devinfo->ver >= 10 || !coarse_write);
1131
if (devinfo->ver >= 6) {
1132
return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1133
SET_BITS(last_render_target, 12, 12) |
1134
SET_BITS(coarse_write, 18, 18);
1136
return (SET_BITS(binding_table_index, 7, 0) |
1137
SET_BITS(msg_control, 11, 8) |
1138
SET_BITS(last_render_target, 11, 11) |
1139
SET_BITS(msg_type, 14, 12));
1143
static inline unsigned
1144
brw_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1147
if (devinfo->ver >= 6)
1148
return brw_fb_desc_msg_type(devinfo, desc);
1150
return GET_BITS(desc, 14, 12);
1153
static inline unsigned
1154
brw_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1157
if (devinfo->ver >= 6)
1158
return brw_fb_desc_msg_control(devinfo, desc);
1160
return GET_BITS(desc, 11, 8);
1164
brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1167
if (devinfo->ver >= 6)
1168
return GET_BITS(desc, 12, 12);
1170
return GET_BITS(desc, 11, 11);
1174
brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1177
assert(devinfo->ver <= 6);
1178
if (devinfo->ver >= 6)
1179
return GET_BITS(desc, 17, 17);
1181
return GET_BITS(desc, 15, 15);
1185
brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
1188
assert(devinfo->ver >= 10);
1189
return GET_BITS(desc, 18, 18);
1193
lsc_opcode_has_cmask(enum lsc_opcode opcode)
1195
return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1199
lsc_opcode_has_transpose(enum lsc_opcode opcode)
1201
return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1204
static inline uint32_t
1205
lsc_data_size_bytes(enum lsc_data_size data_size)
1207
switch (data_size) {
1208
case LSC_DATA_SIZE_D8:
1210
case LSC_DATA_SIZE_D16:
1212
case LSC_DATA_SIZE_D32:
1213
case LSC_DATA_SIZE_D8U32:
1214
case LSC_DATA_SIZE_D16U32:
1215
case LSC_DATA_SIZE_D16BF32:
1217
case LSC_DATA_SIZE_D64:
1220
unreachable("Unsupported data payload size.");
1224
static inline uint32_t
1225
lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1227
switch (addr_size) {
1228
case LSC_ADDR_SIZE_A16: return 2;
1229
case LSC_ADDR_SIZE_A32: return 4;
1230
case LSC_ADDR_SIZE_A64: return 8;
1232
unreachable("Unsupported address size.");
1236
static inline uint32_t
1237
lsc_vector_length(enum lsc_vect_size vect_size)
1239
switch (vect_size) {
1240
case LSC_VECT_SIZE_V1: return 1;
1241
case LSC_VECT_SIZE_V2: return 2;
1242
case LSC_VECT_SIZE_V3: return 3;
1243
case LSC_VECT_SIZE_V4: return 4;
1244
case LSC_VECT_SIZE_V8: return 8;
1245
case LSC_VECT_SIZE_V16: return 16;
1246
case LSC_VECT_SIZE_V32: return 32;
1247
case LSC_VECT_SIZE_V64: return 64;
1249
unreachable("Unsupported size of vector");
1253
static inline enum lsc_vect_size
1254
lsc_vect_size(unsigned vect_size)
1257
case 1: return LSC_VECT_SIZE_V1;
1258
case 2: return LSC_VECT_SIZE_V2;
1259
case 3: return LSC_VECT_SIZE_V3;
1260
case 4: return LSC_VECT_SIZE_V4;
1261
case 8: return LSC_VECT_SIZE_V8;
1262
case 16: return LSC_VECT_SIZE_V16;
1263
case 32: return LSC_VECT_SIZE_V32;
1264
case 64: return LSC_VECT_SIZE_V64;
1266
unreachable("Unsupported vector size for dataport");
1270
static inline uint32_t
1271
lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1272
enum lsc_opcode opcode, unsigned simd_size,
1273
enum lsc_addr_surface_type addr_type,
1274
enum lsc_addr_size addr_sz, unsigned num_coordinates,
1275
enum lsc_data_size data_sz, unsigned num_channels,
1276
bool transpose, unsigned cache_ctrl, bool has_dest)
1278
assert(devinfo->has_lsc);
1280
unsigned dest_length = !has_dest ? 0 :
1281
DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1284
unsigned src0_length =
1285
DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1288
assert(!transpose || lsc_opcode_has_transpose(opcode));
1291
SET_BITS(opcode, 5, 0) |
1292
SET_BITS(addr_sz, 8, 7) |
1293
SET_BITS(data_sz, 11, 9) |
1294
SET_BITS(transpose, 15, 15) |
1295
SET_BITS(cache_ctrl, 19, 17) |
1296
SET_BITS(dest_length, 24, 20) |
1297
SET_BITS(src0_length, 28, 25) |
1298
SET_BITS(addr_type, 30, 29);
1300
if (lsc_opcode_has_cmask(opcode))
1301
msg_desc |= SET_BITS(lsc_cmask(num_channels), 15, 12);
1303
msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1308
static inline enum lsc_opcode
1309
lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1312
assert(devinfo->has_lsc);
1313
return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1316
static inline enum lsc_addr_size
1317
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1320
assert(devinfo->has_lsc);
1321
return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1324
static inline enum lsc_data_size
1325
lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1328
assert(devinfo->has_lsc);
1329
return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1332
static inline enum lsc_vect_size
1333
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1336
assert(devinfo->has_lsc);
1337
assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1338
return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1341
static inline enum lsc_cmask
1342
lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1345
assert(devinfo->has_lsc);
1346
assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1347
return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1351
lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1354
assert(devinfo->has_lsc);
1355
return GET_BITS(desc, 15, 15);
1358
static inline unsigned
1359
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1362
assert(devinfo->has_lsc);
1363
return GET_BITS(desc, 19, 17);
1366
static inline unsigned
1367
lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1370
assert(devinfo->has_lsc);
1371
return GET_BITS(desc, 24, 20);
1374
static inline unsigned
1375
lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1378
assert(devinfo->has_lsc);
1379
return GET_BITS(desc, 28, 25);
1382
static inline enum lsc_addr_surface_type
1383
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1386
assert(devinfo->has_lsc);
1387
return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1390
static inline uint32_t
1391
lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1392
enum lsc_fence_scope scope,
1393
enum lsc_flush_type flush_type,
1396
assert(devinfo->has_lsc);
1397
return SET_BITS(LSC_OP_FENCE, 5, 0) |
1398
SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1399
SET_BITS(scope, 11, 9) |
1400
SET_BITS(flush_type, 14, 12) |
1401
SET_BITS(route_to_lsc, 18, 18) |
1402
SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1405
static inline enum lsc_fence_scope
1406
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1409
assert(devinfo->has_lsc);
1410
return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1413
static inline enum lsc_flush_type
1414
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1417
assert(devinfo->has_lsc);
1418
return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1421
static inline enum lsc_backup_fence_routing
1422
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1425
assert(devinfo->has_lsc);
1426
return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1429
static inline uint32_t
1430
lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1432
assert(devinfo->has_lsc);
1433
return SET_BITS(bti, 31, 24) |
1434
SET_BITS(0, 23, 12); /* base offset */
1437
static inline unsigned
1438
lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1441
assert(devinfo->has_lsc);
1442
return GET_BITS(ex_desc, 23, 12);
1445
static inline unsigned
1446
lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1449
assert(devinfo->has_lsc);
1450
return GET_BITS(ex_desc, 31, 24);
1453
static inline unsigned
1454
lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1457
assert(devinfo->has_lsc);
1458
return GET_BITS(ex_desc, 31, 12);
1461
static inline uint32_t
1462
lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1463
unsigned surface_state_index)
1465
assert(devinfo->has_lsc);
1466
return SET_BITS(surface_state_index, 31, 6);
1469
static inline unsigned
1470
lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1473
assert(devinfo->has_lsc);
1474
return GET_BITS(ex_desc, 31, 6);
1477
static inline uint32_t
1478
brw_mdc_sm2(unsigned exec_size)
1480
assert(exec_size == 8 || exec_size == 16);
1481
return exec_size > 8;
1484
static inline uint32_t
1485
brw_mdc_sm2_exec_size(uint32_t sm2)
1491
static inline uint32_t
1492
brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1493
unsigned exec_size, unsigned msg_type)
1495
assert(devinfo->has_ray_tracing);
1497
return SET_BITS(0, 19, 19) | /* No header */
1498
SET_BITS(msg_type, 17, 14) |
1499
SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1502
static inline uint32_t
1503
brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1506
return GET_BITS(desc, 17, 14);
1509
static inline uint32_t
1510
brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1513
return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1516
static inline uint32_t
1517
brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1520
assert(devinfo->has_ray_tracing);
1522
return SET_BITS(0, 19, 19) | /* No header */
1523
SET_BITS(0, 17, 14) | /* Message type */
1524
SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1527
static inline uint32_t
1528
brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1531
return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1535
* Construct a message descriptor immediate with the specified pixel
1536
* interpolator function controls.
1538
static inline uint32_t
1539
brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1542
bool coarse_pixel_rate,
1544
unsigned slot_group)
1546
assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1547
return (SET_BITS(slot_group, 11, 11) |
1548
SET_BITS(msg_type, 13, 12) |
1549
SET_BITS(!!noperspective, 14, 14) |
1550
SET_BITS(coarse_pixel_rate, 15, 15) |
1551
SET_BITS(simd_mode, 16, 16));
1554
void brw_urb_WRITE(struct brw_codegen *p,
1555
struct brw_reg dest,
1556
unsigned msg_reg_nr,
1557
struct brw_reg src0,
1558
enum brw_urb_write_flags flags,
1559
unsigned msg_length,
1560
unsigned response_length,
1565
* Send message to shared unit \p sfid with a possibly indirect descriptor \p
1566
* desc. If \p desc is not an immediate it will be transparently loaded to an
1567
* address register using an OR instruction.
1570
brw_send_indirect_message(struct brw_codegen *p,
1573
struct brw_reg payload,
1574
struct brw_reg desc,
1579
brw_send_indirect_split_message(struct brw_codegen *p,
1582
struct brw_reg payload0,
1583
struct brw_reg payload1,
1584
struct brw_reg desc,
1586
struct brw_reg ex_desc,
1587
unsigned ex_desc_imm,
1590
void brw_ff_sync(struct brw_codegen *p,
1591
struct brw_reg dest,
1592
unsigned msg_reg_nr,
1593
struct brw_reg src0,
1595
unsigned response_length,
1598
void brw_svb_write(struct brw_codegen *p,
1599
struct brw_reg dest,
1600
unsigned msg_reg_nr,
1601
struct brw_reg src0,
1602
unsigned binding_table_index,
1603
bool send_commit_msg);
1605
brw_inst *brw_fb_WRITE(struct brw_codegen *p,
1606
struct brw_reg payload,
1607
struct brw_reg implied_header,
1608
unsigned msg_control,
1609
unsigned binding_table_index,
1610
unsigned msg_length,
1611
unsigned response_length,
1613
bool last_render_target,
1614
bool header_present);
1616
brw_inst *gfx9_fb_READ(struct brw_codegen *p,
1618
struct brw_reg payload,
1619
unsigned binding_table_index,
1620
unsigned msg_length,
1621
unsigned response_length,
1624
void brw_SAMPLE(struct brw_codegen *p,
1625
struct brw_reg dest,
1626
unsigned msg_reg_nr,
1627
struct brw_reg src0,
1628
unsigned binding_table_index,
1631
unsigned response_length,
1632
unsigned msg_length,
1633
unsigned header_present,
1635
unsigned return_format);
1637
void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
1638
struct brw_reg header,
1639
struct brw_reg sampler_index);
1641
void gfx4_math(struct brw_codegen *p,
1642
struct brw_reg dest,
1644
unsigned msg_reg_nr,
1646
unsigned precision );
1648
void gfx6_math(struct brw_codegen *p,
1649
struct brw_reg dest,
1651
struct brw_reg src0,
1652
struct brw_reg src1);
1654
void brw_oword_block_read(struct brw_codegen *p,
1655
struct brw_reg dest,
1658
uint32_t bind_table_index);
1660
unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
1662
void brw_oword_block_read_scratch(struct brw_codegen *p,
1663
struct brw_reg dest,
1668
void brw_oword_block_write_scratch(struct brw_codegen *p,
1673
void gfx7_block_read_scratch(struct brw_codegen *p,
1674
struct brw_reg dest,
1679
* Return the generation-specific jump distance scaling factor.
1681
* Given the number of instructions to jump, we need to scale by
1682
* some number to obtain the actual jump distance to program in an
1685
static inline unsigned
1686
brw_jump_scale(const struct intel_device_info *devinfo)
1688
/* Broadwell measures jump targets in bytes. */
1689
if (devinfo->ver >= 8)
1692
/* Ironlake and later measure jump targets in 64-bit data chunks (in order
1693
* (to support compaction), so each 128-bit instruction requires 2 chunks.
1695
if (devinfo->ver >= 5)
1698
/* Gfx4 simply uses the number of 128-bit instructions. */
1702
void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1704
/* If/else/endif. Works by manipulating the execution flags on each
1707
brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1708
brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1709
struct brw_reg src0, struct brw_reg src1);
1711
void brw_ELSE(struct brw_codegen *p);
1712
void brw_ENDIF(struct brw_codegen *p);
1716
brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1718
brw_inst *brw_WHILE(struct brw_codegen *p);
1720
brw_inst *brw_BREAK(struct brw_codegen *p);
1721
brw_inst *brw_CONT(struct brw_codegen *p);
1722
brw_inst *brw_HALT(struct brw_codegen *p);
1726
void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx);
1728
brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1729
unsigned predicate_control);
1731
void brw_NOP(struct brw_codegen *p);
1733
void brw_WAIT(struct brw_codegen *p);
1735
void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1737
/* Special case: there is never a destination, execution size will be
1740
void brw_CMP(struct brw_codegen *p,
1741
struct brw_reg dest,
1742
unsigned conditional,
1743
struct brw_reg src0,
1744
struct brw_reg src1);
1746
void brw_CMPN(struct brw_codegen *p,
1747
struct brw_reg dest,
1748
unsigned conditional,
1749
struct brw_reg src0,
1750
struct brw_reg src1);
1753
brw_untyped_atomic(struct brw_codegen *p,
1755
struct brw_reg payload,
1756
struct brw_reg surface,
1758
unsigned msg_length,
1759
bool response_expected,
1760
bool header_present);
1763
brw_untyped_surface_read(struct brw_codegen *p,
1765
struct brw_reg payload,
1766
struct brw_reg surface,
1767
unsigned msg_length,
1768
unsigned num_channels);
1771
brw_untyped_surface_write(struct brw_codegen *p,
1772
struct brw_reg payload,
1773
struct brw_reg surface,
1774
unsigned msg_length,
1775
unsigned num_channels,
1776
bool header_present);
1779
brw_memory_fence(struct brw_codegen *p,
1782
enum opcode send_op,
1783
enum brw_message_target sfid,
1789
brw_pixel_interpolator_query(struct brw_codegen *p,
1790
struct brw_reg dest,
1793
bool coarse_pixel_rate,
1795
struct brw_reg data,
1796
unsigned msg_length,
1797
unsigned response_length);
1800
brw_find_live_channel(struct brw_codegen *p,
1802
struct brw_reg mask,
1806
brw_broadcast(struct brw_codegen *p,
1809
struct brw_reg idx);
1812
brw_float_controls_mode(struct brw_codegen *p,
1813
unsigned mode, unsigned mask);
1816
brw_update_reloc_imm(const struct intel_device_info *devinfo,
1821
brw_MOV_reloc_imm(struct brw_codegen *p,
1823
enum brw_reg_type src_type,
1826
/***********************************************************************
1830
void brw_copy_indirect_to_indirect(struct brw_codegen *p,
1831
struct brw_indirect dst_ptr,
1832
struct brw_indirect src_ptr,
1835
void brw_copy_from_indirect(struct brw_codegen *p,
1837
struct brw_indirect ptr,
1840
void brw_copy4(struct brw_codegen *p,
1845
void brw_copy8(struct brw_codegen *p,
1850
void brw_math_invert( struct brw_codegen *p,
1852
struct brw_reg src);
1854
void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1856
void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1857
unsigned desc, unsigned ex_desc);
1860
brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1862
brw_set_desc_ex(p, insn, desc, 0);
1865
void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1867
enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1868
enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1870
/* brw_eu_compact.c */
1871
void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1872
struct disasm_info *disasm);
1873
void brw_uncompact_instruction(const struct intel_device_info *devinfo,
1874
brw_inst *dst, brw_compact_inst *src);
1875
bool brw_try_compact_instruction(const struct intel_device_info *devinfo,
1876
brw_compact_inst *dst, const brw_inst *src);
1878
void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,
1879
brw_inst *orig, brw_inst *uncompacted);
1881
/* brw_eu_validate.c */
1882
bool brw_validate_instruction(const struct intel_device_info *devinfo,
1883
const brw_inst *inst, int offset,
1884
struct disasm_info *disasm);
1885
bool brw_validate_instructions(const struct intel_device_info *devinfo,
1886
const void *assembly, int start_offset, int end_offset,
1887
struct disasm_info *disasm);
1890
next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1892
brw_inst *insn = (brw_inst *)((char *)store + offset);
1894
if (brw_inst_cmpt_control(devinfo, insn))
1900
struct opcode_desc {
1909
const struct opcode_desc *
1910
brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode);
1912
const struct opcode_desc *
1913
brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw);
1915
static inline unsigned
1916
brw_opcode_encode(const struct intel_device_info *devinfo, enum opcode opcode)
1918
return brw_opcode_desc(devinfo, opcode)->hw;
1921
static inline enum opcode
1922
brw_opcode_decode(const struct intel_device_info *devinfo, unsigned hw)
1924
const struct opcode_desc *desc = brw_opcode_desc_from_hw(devinfo, hw);
1925
return desc ? (enum opcode)desc->ir : BRW_OPCODE_ILLEGAL;
1929
brw_inst_set_opcode(const struct intel_device_info *devinfo,
1930
brw_inst *inst, enum opcode opcode)
1932
brw_inst_set_hw_opcode(devinfo, inst, brw_opcode_encode(devinfo, opcode));
1935
static inline enum opcode
1936
brw_inst_opcode(const struct intel_device_info *devinfo, const brw_inst *inst)
1938
return brw_opcode_decode(devinfo, brw_inst_hw_opcode(devinfo, inst));
1942
is_3src(const struct intel_device_info *devinfo, enum opcode opcode)
1944
const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
1945
return desc && desc->nsrc == 3;
1948
/** Maximum SEND message length */
1949
#define BRW_MAX_MSG_LENGTH 15
1951
/** First MRF register used by pull loads */
1952
#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1954
/** First MRF register used by spills */
1955
#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)