2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
develop this 3D driver.
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**********************************************************************/
29
* Keith Whitwell <keith@tungstengraphics.com>
37
#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
39
/***********************************************************************
40
* Internal helper for constructing instructions
43
static void guess_execution_size(struct brw_compile *p,
44
struct brw_instruction *insn,
47
if (reg.width == BRW_WIDTH_8 && p->compressed)
48
insn->header.execution_size = BRW_EXECUTE_16;
50
insn->header.execution_size = reg.width;
55
* Prior to Sandybridge, the SEND instruction accepted non-MRF source
56
* registers, implicitly moving the operand to a message register.
58
* On Sandybridge, this is no longer the case. This function performs the
59
* explicit move; it should be called before emitting a SEND instruction.
62
gen6_resolve_implied_move(struct brw_compile *p,
69
if (src->file == BRW_MESSAGE_REGISTER_FILE)
72
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
73
brw_push_insn_state(p);
74
brw_set_mask_control(p, BRW_MASK_DISABLE);
75
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
76
brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
77
brw_pop_insn_state(p);
79
*src = brw_message_reg(msg_reg_nr);
83
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
85
/* From the BSpec / ISA Reference / send - [DevIVB+]:
86
* "The send with EOT should use register space R112-R127 for <src>. This is
87
* to enable loading of a new thread into the same slot while the message
88
* with EOT for current thread is pending dispatch."
90
* Since we're pretending to have 16 MRFs anyway, we may as well use the
91
* registers required for messages with EOT.
93
if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
94
reg->file = BRW_GENERAL_REGISTER_FILE;
100
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
103
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
104
dest.file != BRW_MESSAGE_REGISTER_FILE)
105
assert(dest.nr < 128);
107
gen7_convert_mrf_to_grf(p, &dest);
109
insn->bits1.da1.dest_reg_file = dest.file;
110
insn->bits1.da1.dest_reg_type = dest.type;
111
insn->bits1.da1.dest_address_mode = dest.address_mode;
113
if (dest.address_mode == BRW_ADDRESS_DIRECT) {
114
insn->bits1.da1.dest_reg_nr = dest.nr;
116
if (insn->header.access_mode == BRW_ALIGN_1) {
117
insn->bits1.da1.dest_subreg_nr = dest.subnr;
118
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
119
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
120
insn->bits1.da1.dest_horiz_stride = dest.hstride;
122
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
123
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
124
/* even ignored in da16, still need to set as '01' */
125
insn->bits1.da16.dest_horiz_stride = 1;
128
insn->bits1.ia1.dest_subreg_nr = dest.subnr;
130
/* These are different sizes in align1 vs align16:
132
if (insn->header.access_mode == BRW_ALIGN_1) {
133
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
134
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
135
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
136
insn->bits1.ia1.dest_horiz_stride = dest.hstride;
139
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
140
/* even ignored in da16, still need to set as '01' */
141
insn->bits1.ia16.dest_horiz_stride = 1;
145
guess_execution_size(p, insn, dest);
148
static const int reg_type_size[8] = {
159
validate_reg(struct brw_instruction *insn, struct brw_reg reg)
161
int hstride_for_reg[] = {0, 1, 2, 4};
162
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
163
int width_for_reg[] = {1, 2, 4, 8, 16};
164
int execsize_for_reg[] = {1, 2, 4, 8, 16};
165
int width, hstride, vstride, execsize;
167
if (reg.file == BRW_IMMEDIATE_VALUE) {
168
/* 3.3.6: Region Parameters. Restriction: Immediate vectors
169
* mean the destination has to be 128-bit aligned and the
170
* destination horiz stride has to be a word.
172
if (reg.type == BRW_REGISTER_TYPE_V) {
173
assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
174
reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
180
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
181
reg.file == BRW_ARF_NULL)
184
assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
185
assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
186
assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
187
assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg));
189
hstride = hstride_for_reg[reg.hstride];
191
if (reg.vstride == 0xf) {
194
vstride = vstride_for_reg[reg.vstride];
197
width = width_for_reg[reg.width];
199
execsize = execsize_for_reg[insn->header.execution_size];
201
/* Restrictions from 3.3.10: Register Region Restrictions. */
203
assert(execsize >= width);
206
if (execsize == width && hstride != 0) {
207
assert(vstride == -1 || vstride == width * hstride);
211
if (execsize == width && hstride == 0) {
212
/* no restriction on vstride. */
217
assert(hstride == 0);
221
if (execsize == 1 && width == 1) {
222
assert(hstride == 0);
223
assert(vstride == 0);
227
if (vstride == 0 && hstride == 0) {
231
/* 10. Check destination issues. */
235
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
238
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
239
assert(reg.nr < 128);
241
gen7_convert_mrf_to_grf(p, ®);
243
validate_reg(insn, reg);
245
insn->bits1.da1.src0_reg_file = reg.file;
246
insn->bits1.da1.src0_reg_type = reg.type;
247
insn->bits2.da1.src0_abs = reg.abs;
248
insn->bits2.da1.src0_negate = reg.negate;
249
insn->bits2.da1.src0_address_mode = reg.address_mode;
251
if (reg.file == BRW_IMMEDIATE_VALUE) {
252
insn->bits3.ud = reg.dw1.ud;
254
/* Required to set some fields in src1 as well:
256
insn->bits1.da1.src1_reg_file = 0; /* arf */
257
insn->bits1.da1.src1_reg_type = reg.type;
259
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
260
if (insn->header.access_mode == BRW_ALIGN_1) {
261
insn->bits2.da1.src0_subreg_nr = reg.subnr;
262
insn->bits2.da1.src0_reg_nr = reg.nr;
264
insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
265
insn->bits2.da16.src0_reg_nr = reg.nr;
268
insn->bits2.ia1.src0_subreg_nr = reg.subnr;
270
if (insn->header.access_mode == BRW_ALIGN_1) {
271
insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
273
insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
277
if (insn->header.access_mode == BRW_ALIGN_1) {
278
if (reg.width == BRW_WIDTH_1 &&
279
insn->header.execution_size == BRW_EXECUTE_1) {
280
insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
281
insn->bits2.da1.src0_width = BRW_WIDTH_1;
282
insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
284
insn->bits2.da1.src0_horiz_stride = reg.hstride;
285
insn->bits2.da1.src0_width = reg.width;
286
insn->bits2.da1.src0_vert_stride = reg.vstride;
289
insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290
insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291
insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292
insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
294
/* This is an oddity of the fact we're using the same
295
* descriptions for registers in align_16 as align_1:
297
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298
insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
300
insn->bits2.da16.src0_vert_stride = reg.vstride;
305
void brw_set_src1(struct brw_compile *p,
306
struct brw_instruction *insn,
309
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
310
assert(reg.nr < 128);
312
gen7_convert_mrf_to_grf(p, ®);
314
validate_reg(insn, reg);
316
insn->bits1.da1.src1_reg_file = reg.file;
317
insn->bits1.da1.src1_reg_type = reg.type;
318
insn->bits3.da1.src1_abs = reg.abs;
319
insn->bits3.da1.src1_negate = reg.negate;
321
/* Only src1 can be immediate in two-argument instructions. */
322
assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
324
if (reg.file == BRW_IMMEDIATE_VALUE) {
325
insn->bits3.ud = reg.dw1.ud;
327
/* This is a hardware restriction, which may or may not be lifted
330
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
331
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
333
if (insn->header.access_mode == BRW_ALIGN_1) {
334
insn->bits3.da1.src1_subreg_nr = reg.subnr;
335
insn->bits3.da1.src1_reg_nr = reg.nr;
337
insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
338
insn->bits3.da16.src1_reg_nr = reg.nr;
341
if (insn->header.access_mode == BRW_ALIGN_1) {
342
if (reg.width == BRW_WIDTH_1 &&
343
insn->header.execution_size == BRW_EXECUTE_1) {
344
insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
345
insn->bits3.da1.src1_width = BRW_WIDTH_1;
346
insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
348
insn->bits3.da1.src1_horiz_stride = reg.hstride;
349
insn->bits3.da1.src1_width = reg.width;
350
insn->bits3.da1.src1_vert_stride = reg.vstride;
353
insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
354
insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
355
insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
356
insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
358
/* This is an oddity of the fact we're using the same
359
* descriptions for registers in align_16 as align_1:
361
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
362
insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
364
insn->bits3.da16.src1_vert_stride = reg.vstride;
370
* Set the Message Descriptor and Extended Message Descriptor fields
373
* \note This zeroes out the Function Control bits, so it must be called
374
* \b before filling out any message-specific data. Callers can
375
* choose not to fill in irrelevant bits; they will be zero.
378
brw_set_message_descriptor(struct brw_compile *p,
379
struct brw_instruction *inst,
380
enum brw_message_target sfid,
382
unsigned response_length,
386
brw_set_src1(p, inst, brw_imm_d(0));
389
inst->bits3.generic_gen5.header_present = header_present;
390
inst->bits3.generic_gen5.response_length = response_length;
391
inst->bits3.generic_gen5.msg_length = msg_length;
392
inst->bits3.generic_gen5.end_of_thread = end_of_thread;
395
/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
396
inst->header.destreg__conditionalmod = sfid;
398
/* Set Extended Message Descriptor (ex_desc) */
399
inst->bits2.send_gen5.sfid = sfid;
400
inst->bits2.send_gen5.end_of_thread = end_of_thread;
403
inst->bits3.generic.response_length = response_length;
404
inst->bits3.generic.msg_length = msg_length;
405
inst->bits3.generic.msg_target = sfid;
406
inst->bits3.generic.end_of_thread = end_of_thread;
411
static void brw_set_math_message(struct brw_compile *p,
412
struct brw_instruction *insn,
414
unsigned integer_type,
420
unsigned response_length;
422
/* Infer message length from the function */
424
case BRW_MATH_FUNCTION_POW:
425
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
426
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
427
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
435
/* Infer response length from the function */
437
case BRW_MATH_FUNCTION_SINCOS:
438
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
446
brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
447
msg_length, response_length,
450
insn->bits3.math_gen5.function = function;
451
insn->bits3.math_gen5.int_type = integer_type;
452
insn->bits3.math_gen5.precision = low_precision;
453
insn->bits3.math_gen5.saturate = saturate;
454
insn->bits3.math_gen5.data_type = dataType;
455
insn->bits3.math_gen5.snapshot = 0;
457
insn->bits3.math.function = function;
458
insn->bits3.math.int_type = integer_type;
459
insn->bits3.math.precision = low_precision;
460
insn->bits3.math.saturate = saturate;
461
insn->bits3.math.data_type = dataType;
465
static void brw_set_ff_sync_message(struct brw_compile *p,
466
struct brw_instruction *insn,
468
unsigned response_length,
471
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
473
true, end_of_thread);
474
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
475
insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
476
insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
477
insn->bits3.urb_gen5.allocate = allocate;
478
insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
479
insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
482
static void brw_set_urb_message(struct brw_compile *p,
483
struct brw_instruction *insn,
487
unsigned response_length,
491
unsigned swizzle_control)
493
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
494
msg_length, response_length, true, end_of_thread);
496
insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
497
insn->bits3.urb_gen7.offset = offset;
498
assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
499
insn->bits3.urb_gen7.swizzle_control = swizzle_control;
500
/* per_slot_offset = 0 makes it ignore offsets in message header */
501
insn->bits3.urb_gen7.per_slot_offset = 0;
502
insn->bits3.urb_gen7.complete = complete;
503
} else if (p->gen >= 050) {
504
insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
505
insn->bits3.urb_gen5.offset = offset;
506
insn->bits3.urb_gen5.swizzle_control = swizzle_control;
507
insn->bits3.urb_gen5.allocate = allocate;
508
insn->bits3.urb_gen5.used = used; /* ? */
509
insn->bits3.urb_gen5.complete = complete;
511
insn->bits3.urb.opcode = 0; /* ? */
512
insn->bits3.urb.offset = offset;
513
insn->bits3.urb.swizzle_control = swizzle_control;
514
insn->bits3.urb.allocate = allocate;
515
insn->bits3.urb.used = used; /* ? */
516
insn->bits3.urb.complete = complete;
521
brw_set_dp_write_message(struct brw_compile *p,
522
struct brw_instruction *insn,
523
unsigned binding_table_index,
524
unsigned msg_control,
528
bool last_render_target,
529
unsigned response_length,
531
bool send_commit_msg)
536
/* Use the Render Cache for RT writes; otherwise use the Data Cache */
537
if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
538
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
540
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
541
} else if (p->gen >= 060) {
542
/* Use the render cache for all write messages. */
543
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
545
sfid = BRW_SFID_DATAPORT_WRITE;
548
brw_set_message_descriptor(p, insn, sfid,
549
msg_length, response_length,
550
header_present, end_of_thread);
553
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
554
insn->bits3.gen7_dp.msg_control = msg_control;
555
insn->bits3.gen7_dp.last_render_target = last_render_target;
556
insn->bits3.gen7_dp.msg_type = msg_type;
557
} else if (p->gen >= 060) {
558
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
559
insn->bits3.gen6_dp.msg_control = msg_control;
560
insn->bits3.gen6_dp.last_render_target = last_render_target;
561
insn->bits3.gen6_dp.msg_type = msg_type;
562
insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
563
} else if (p->gen >= 050) {
564
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
565
insn->bits3.dp_write_gen5.msg_control = msg_control;
566
insn->bits3.dp_write_gen5.last_render_target = last_render_target;
567
insn->bits3.dp_write_gen5.msg_type = msg_type;
568
insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
570
insn->bits3.dp_write.binding_table_index = binding_table_index;
571
insn->bits3.dp_write.msg_control = msg_control;
572
insn->bits3.dp_write.last_render_target = last_render_target;
573
insn->bits3.dp_write.msg_type = msg_type;
574
insn->bits3.dp_write.send_commit_msg = send_commit_msg;
579
brw_set_dp_read_message(struct brw_compile *p,
580
struct brw_instruction *insn,
581
unsigned binding_table_index,
582
unsigned msg_control,
584
unsigned target_cache,
586
unsigned response_length)
591
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
592
} else if (p->gen >= 060) {
593
if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
594
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
596
sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
598
sfid = BRW_SFID_DATAPORT_READ;
601
brw_set_message_descriptor(p, insn, sfid,
602
msg_length, response_length,
606
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
607
insn->bits3.gen7_dp.msg_control = msg_control;
608
insn->bits3.gen7_dp.last_render_target = 0;
609
insn->bits3.gen7_dp.msg_type = msg_type;
610
} else if (p->gen >= 060) {
611
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
612
insn->bits3.gen6_dp.msg_control = msg_control;
613
insn->bits3.gen6_dp.last_render_target = 0;
614
insn->bits3.gen6_dp.msg_type = msg_type;
615
insn->bits3.gen6_dp.send_commit_msg = 0;
616
} else if (p->gen >= 050) {
617
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
618
insn->bits3.dp_read_gen5.msg_control = msg_control;
619
insn->bits3.dp_read_gen5.msg_type = msg_type;
620
insn->bits3.dp_read_gen5.target_cache = target_cache;
621
} else if (p->gen >= 045) {
622
insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
623
insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
624
insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
625
insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
627
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
628
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
629
insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
630
insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
634
static void brw_set_sampler_message(struct brw_compile *p,
635
struct brw_instruction *insn,
636
unsigned binding_table_index,
639
unsigned response_length,
644
brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
645
msg_length, response_length,
646
header_present, false);
649
insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
650
insn->bits3.sampler_gen7.sampler = sampler;
651
insn->bits3.sampler_gen7.msg_type = msg_type;
652
insn->bits3.sampler_gen7.simd_mode = simd_mode;
653
} else if (p->gen >= 050) {
654
insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
655
insn->bits3.sampler_gen5.sampler = sampler;
656
insn->bits3.sampler_gen5.msg_type = msg_type;
657
insn->bits3.sampler_gen5.simd_mode = simd_mode;
658
} else if (p->gen >= 045) {
659
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
660
insn->bits3.sampler_g4x.sampler = sampler;
661
insn->bits3.sampler_g4x.msg_type = msg_type;
663
insn->bits3.sampler.binding_table_index = binding_table_index;
664
insn->bits3.sampler.sampler = sampler;
665
insn->bits3.sampler.msg_type = msg_type;
666
insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
671
void brw_NOP(struct brw_compile *p)
673
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
674
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
675
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
676
brw_set_src1(p, insn, brw_imm_ud(0x0));
679
/***********************************************************************
680
* Comparisons, if/else/endif
684
push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
686
p->if_stack[p->if_stack_depth] = inst;
689
if (p->if_stack_array_size <= p->if_stack_depth) {
690
p->if_stack_array_size *= 2;
691
p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
695
/* EU takes the value from the flag register and pushes it onto some
696
* sort of a stack (presumably merging with any flag value already on
697
* the stack). Within an if block, the flags at the top of the stack
698
* control execution on each channel of the unit, eg. on each of the
699
* 16 pixel values in our wm programs.
701
* When the matching 'else' instruction is reached (presumably by
702
* countdown of the instruction count patched in by our ELSE/ENDIF
703
* functions), the relevant flags are inverted.
705
* When the matching 'endif' instruction is reached, the flags are
706
* popped off. If the stack is now empty, normal execution resumes.
708
struct brw_instruction *
709
brw_IF(struct brw_compile *p, unsigned execute_size)
711
struct brw_instruction *insn;
713
insn = brw_next_insn(p, BRW_OPCODE_IF);
715
/* Override the defaults for this instruction: */
717
brw_set_dest(p, insn, brw_ip_reg());
718
brw_set_src0(p, insn, brw_ip_reg());
719
brw_set_src1(p, insn, brw_imm_d(0x0));
720
} else if (p->gen < 070) {
721
brw_set_dest(p, insn, brw_imm_w(0));
722
insn->bits1.branch_gen6.jump_count = 0;
723
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
724
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
726
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
727
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
728
brw_set_src1(p, insn, brw_imm_ud(0));
729
insn->bits3.break_cont.jip = 0;
730
insn->bits3.break_cont.uip = 0;
733
insn->header.execution_size = execute_size;
734
insn->header.compression_control = BRW_COMPRESSION_NONE;
735
insn->header.predicate_control = BRW_PREDICATE_NORMAL;
736
insn->header.mask_control = BRW_MASK_ENABLE;
737
if (!p->single_program_flow)
738
insn->header.thread_control = BRW_THREAD_SWITCH;
740
p->current->header.predicate_control = BRW_PREDICATE_NONE;
742
push_if_stack(p, insn);
746
/* This function is only used for gen6-style IF instructions with an
747
* embedded comparison (conditional modifier). It is not used on gen7.
749
struct brw_instruction *
750
gen6_IF(struct brw_compile *p, uint32_t conditional,
751
struct brw_reg src0, struct brw_reg src1)
753
struct brw_instruction *insn;
755
insn = brw_next_insn(p, BRW_OPCODE_IF);
757
brw_set_dest(p, insn, brw_imm_w(0));
759
insn->header.execution_size = BRW_EXECUTE_16;
761
insn->header.execution_size = BRW_EXECUTE_8;
763
insn->bits1.branch_gen6.jump_count = 0;
764
brw_set_src0(p, insn, src0);
765
brw_set_src1(p, insn, src1);
767
assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
768
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
769
insn->header.destreg__conditionalmod = conditional;
771
if (!p->single_program_flow)
772
insn->header.thread_control = BRW_THREAD_SWITCH;
774
push_if_stack(p, insn);
779
* In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
782
convert_IF_ELSE_to_ADD(struct brw_compile *p,
783
struct brw_instruction *if_inst,
784
struct brw_instruction *else_inst)
786
/* The next instruction (where the ENDIF would be, if it existed) */
787
struct brw_instruction *next_inst = &p->store[p->nr_insn];
789
assert(p->single_program_flow);
790
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
791
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
792
assert(if_inst->header.execution_size == BRW_EXECUTE_1);
794
/* Convert IF to an ADD instruction that moves the instruction pointer
795
* to the first instruction of the ELSE block. If there is no ELSE
796
* block, point to where ENDIF would be. Reverse the predicate.
798
* There's no need to execute an ENDIF since we don't need to do any
799
* stack operations, and if we're currently executing, we just want to
802
if_inst->header.opcode = BRW_OPCODE_ADD;
803
if_inst->header.predicate_inverse = 1;
805
if (else_inst != NULL) {
806
/* Convert ELSE to an ADD instruction that points where the ENDIF
809
else_inst->header.opcode = BRW_OPCODE_ADD;
811
if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
812
else_inst->bits3.ud = (next_inst - else_inst) * 16;
814
if_inst->bits3.ud = (next_inst - if_inst) * 16;
819
* Patch IF and ELSE instructions with appropriate jump targets.
822
patch_IF_ELSE(struct brw_compile *p,
823
struct brw_instruction *if_inst,
824
struct brw_instruction *else_inst,
825
struct brw_instruction *endif_inst)
829
assert(!p->single_program_flow);
830
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
831
assert(endif_inst != NULL);
832
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
834
/* Jump count is for 64bit data chunk each, so one 128bit instruction
840
assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
841
endif_inst->header.execution_size = if_inst->header.execution_size;
843
if (else_inst == NULL) {
844
/* Patch IF -> ENDIF */
846
/* Turn it into an IFF, which means no mask stack operations for
847
* all-false and jumping past the ENDIF.
849
if_inst->header.opcode = BRW_OPCODE_IFF;
850
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
851
if_inst->bits3.if_else.pop_count = 0;
852
if_inst->bits3.if_else.pad0 = 0;
853
} else if (p->gen < 070) {
854
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
855
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
857
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
858
if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
861
else_inst->header.execution_size = if_inst->header.execution_size;
863
/* Patch IF -> ELSE */
865
if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
866
if_inst->bits3.if_else.pop_count = 0;
867
if_inst->bits3.if_else.pad0 = 0;
868
} else if (p->gen <= 070) {
869
if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
872
/* Patch ELSE -> ENDIF */
874
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
877
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
878
else_inst->bits3.if_else.pop_count = 1;
879
else_inst->bits3.if_else.pad0 = 0;
880
} else if (p->gen < 070) {
881
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
882
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
884
/* The IF instruction's JIP should point just past the ELSE */
885
if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
886
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
887
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
888
else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
894
brw_ELSE(struct brw_compile *p)
896
struct brw_instruction *insn;
898
insn = brw_next_insn(p, BRW_OPCODE_ELSE);
901
brw_set_dest(p, insn, brw_ip_reg());
902
brw_set_src0(p, insn, brw_ip_reg());
903
brw_set_src1(p, insn, brw_imm_d(0x0));
904
} else if (p->gen < 070) {
905
brw_set_dest(p, insn, brw_imm_w(0));
906
insn->bits1.branch_gen6.jump_count = 0;
907
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
908
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
910
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
911
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
912
brw_set_src1(p, insn, brw_imm_ud(0));
913
insn->bits3.break_cont.jip = 0;
914
insn->bits3.break_cont.uip = 0;
917
insn->header.compression_control = BRW_COMPRESSION_NONE;
918
insn->header.mask_control = BRW_MASK_ENABLE;
919
if (!p->single_program_flow)
920
insn->header.thread_control = BRW_THREAD_SWITCH;
922
push_if_stack(p, insn);
926
brw_ENDIF(struct brw_compile *p)
928
struct brw_instruction *insn;
929
struct brw_instruction *else_inst = NULL;
930
struct brw_instruction *if_inst = NULL;
932
/* Pop the IF and (optional) ELSE instructions from the stack */
934
if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
935
else_inst = p->if_stack[p->if_stack_depth];
938
if_inst = p->if_stack[p->if_stack_depth];
940
if (p->single_program_flow) {
941
/* ENDIF is useless; don't bother emitting it. */
942
convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
946
insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
949
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
950
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
951
brw_set_src1(p, insn, brw_imm_d(0x0));
952
} else if (p->gen < 070) {
953
brw_set_dest(p, insn, brw_imm_w(0));
954
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
955
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
957
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
958
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
959
brw_set_src1(p, insn, brw_imm_ud(0));
962
insn->header.compression_control = BRW_COMPRESSION_NONE;
963
insn->header.mask_control = BRW_MASK_ENABLE;
964
insn->header.thread_control = BRW_THREAD_SWITCH;
966
/* Also pop item off the stack in the endif instruction: */
968
insn->bits3.if_else.jump_count = 0;
969
insn->bits3.if_else.pop_count = 1;
970
insn->bits3.if_else.pad0 = 0;
971
} else if (p->gen < 070) {
972
insn->bits1.branch_gen6.jump_count = 2;
974
insn->bits3.break_cont.jip = 2;
976
patch_IF_ELSE(p, if_inst, else_inst, insn);
979
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
981
struct brw_instruction *insn;
983
insn = brw_next_insn(p, BRW_OPCODE_BREAK);
985
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
986
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
987
brw_set_src1(p, insn, brw_imm_d(0x0));
989
brw_set_dest(p, insn, brw_ip_reg());
990
brw_set_src0(p, insn, brw_ip_reg());
991
brw_set_src1(p, insn, brw_imm_d(0x0));
992
insn->bits3.if_else.pad0 = 0;
993
insn->bits3.if_else.pop_count = pop_count;
995
insn->header.compression_control = BRW_COMPRESSION_NONE;
996
insn->header.execution_size = BRW_EXECUTE_8;
1001
struct brw_instruction *gen6_CONT(struct brw_compile *p,
1002
struct brw_instruction *do_insn)
1004
struct brw_instruction *insn;
1006
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1007
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1008
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1009
brw_set_dest(p, insn, brw_ip_reg());
1010
brw_set_src0(p, insn, brw_ip_reg());
1011
brw_set_src1(p, insn, brw_imm_d(0x0));
1013
insn->header.compression_control = BRW_COMPRESSION_NONE;
1014
insn->header.execution_size = BRW_EXECUTE_8;
1018
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1020
struct brw_instruction *insn;
1021
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1022
brw_set_dest(p, insn, brw_ip_reg());
1023
brw_set_src0(p, insn, brw_ip_reg());
1024
brw_set_src1(p, insn, brw_imm_d(0x0));
1025
insn->header.compression_control = BRW_COMPRESSION_NONE;
1026
insn->header.execution_size = BRW_EXECUTE_8;
1027
/* insn->header.mask_control = BRW_MASK_DISABLE; */
1028
insn->bits3.if_else.pad0 = 0;
1029
insn->bits3.if_else.pop_count = pop_count;
1035
* The DO/WHILE is just an unterminated loop -- break or continue are
1036
* used for control within the loop. We have a few ways they can be
1039
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1040
* jip and no DO instruction.
1042
* For non-uniform control flow pre-gen6, there's a DO instruction to
1043
* push the mask, and a WHILE to jump back, and BREAK to get out and
1046
* For gen6, there's no more mask stack, so no need for DO. WHILE
1047
* just points back to the first instruction of the loop.
1049
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1051
if (p->gen >= 060 || p->single_program_flow) {
1052
return &p->store[p->nr_insn];
1054
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
1056
/* Override the defaults for this instruction:
1058
brw_set_dest(p, insn, brw_null_reg());
1059
brw_set_src0(p, insn, brw_null_reg());
1060
brw_set_src1(p, insn, brw_null_reg());
1062
insn->header.compression_control = BRW_COMPRESSION_NONE;
1063
insn->header.execution_size = execute_size;
1064
insn->header.predicate_control = BRW_PREDICATE_NONE;
1065
/* insn->header.mask_control = BRW_MASK_ENABLE; */
1066
/* insn->header.mask_control = BRW_MASK_DISABLE; */
1072
struct brw_instruction *brw_WHILE(struct brw_compile *p,
1073
struct brw_instruction *do_insn)
1075
struct brw_instruction *insn;
1081
if (p->gen >= 070) {
1082
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1084
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1085
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1086
brw_set_src1(p, insn, brw_imm_ud(0));
1087
insn->bits3.break_cont.jip = br * (do_insn - insn);
1089
insn->header.execution_size = BRW_EXECUTE_8;
1090
} else if (p->gen >= 060) {
1091
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1093
brw_set_dest(p, insn, brw_imm_w(0));
1094
insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1095
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1096
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
1098
insn->header.execution_size = BRW_EXECUTE_8;
1100
if (p->single_program_flow) {
1101
insn = brw_next_insn(p, BRW_OPCODE_ADD);
1103
brw_set_dest(p, insn, brw_ip_reg());
1104
brw_set_src0(p, insn, brw_ip_reg());
1105
brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1106
insn->header.execution_size = BRW_EXECUTE_1;
1108
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1110
assert(do_insn->header.opcode == BRW_OPCODE_DO);
1112
brw_set_dest(p, insn, brw_ip_reg());
1113
brw_set_src0(p, insn, brw_ip_reg());
1114
brw_set_src1(p, insn, brw_imm_d(0));
1116
insn->header.execution_size = do_insn->header.execution_size;
1117
insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1118
insn->bits3.if_else.pop_count = 0;
1119
insn->bits3.if_else.pad0 = 0;
1122
insn->header.compression_control = BRW_COMPRESSION_NONE;
1123
p->current->header.predicate_control = BRW_PREDICATE_NONE;
1130
void brw_land_fwd_jump(struct brw_compile *p,
1131
struct brw_instruction *jmp_insn)
1133
struct brw_instruction *landing = &p->store[p->nr_insn];
1139
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1140
assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1142
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1147
/* To integrate with the above, it makes sense that the comparison
1148
* instruction should populate the flag register. It might be simpler
1149
* just to use the flag reg for most WM tasks?
1151
void brw_CMP(struct brw_compile *p,
1152
struct brw_reg dest,
1153
unsigned conditional,
1154
struct brw_reg src0,
1155
struct brw_reg src1)
1157
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
1159
insn->header.destreg__conditionalmod = conditional;
1160
brw_set_dest(p, insn, dest);
1161
brw_set_src0(p, insn, src0);
1162
brw_set_src1(p, insn, src1);
1164
/* Make it so that future instructions will use the computed flag
1165
* value until brw_set_predicate_control_flag_value() is called
1168
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1170
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1171
p->flag_value = 0xff;
1175
/* Issue 'wait' instruction for n1, host could program MMIO
1176
to wake up thread. */
1177
void brw_WAIT(struct brw_compile *p)
1179
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
1180
struct brw_reg src = brw_notification_1_reg();
1182
brw_set_dest(p, insn, src);
1183
brw_set_src0(p, insn, src);
1184
brw_set_src1(p, insn, brw_null_reg());
1185
insn->header.execution_size = 0; /* must */
1186
insn->header.predicate_control = 0;
1187
insn->header.compression_control = 0;
1190
/***********************************************************************
1191
* Helpers for the various SEND message types:
1194
/** Extended math function, float[8].
1196
void brw_math(struct brw_compile *p,
1197
struct brw_reg dest,
1200
unsigned msg_reg_nr,
1205
if (p->gen >= 060) {
1206
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1208
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1209
assert(src.file == BRW_GENERAL_REGISTER_FILE);
1211
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1212
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1214
/* Source modifiers are ignored for extended math instructions. */
1215
assert(!src.negate);
1218
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1219
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1220
assert(src.type == BRW_REGISTER_TYPE_F);
1223
/* Math is the same ISA format as other opcodes, except that CondModifier
1224
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1226
insn->header.destreg__conditionalmod = function;
1227
insn->header.saturate = saturate;
1229
brw_set_dest(p, insn, dest);
1230
brw_set_src0(p, insn, src);
1231
brw_set_src1(p, insn, brw_null_reg());
1233
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1234
/* Example code doesn't set predicate_control for send
1237
insn->header.predicate_control = 0;
1238
insn->header.destreg__conditionalmod = msg_reg_nr;
1240
brw_set_dest(p, insn, dest);
1241
brw_set_src0(p, insn, src);
1242
brw_set_math_message(p, insn, function,
1243
src.type == BRW_REGISTER_TYPE_D,
1250
/** Extended math function, float[8].
1252
void brw_math2(struct brw_compile *p,
1253
struct brw_reg dest,
1255
struct brw_reg src0,
1256
struct brw_reg src1)
1258
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1260
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1261
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1262
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1264
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1265
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1266
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1268
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1269
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1270
assert(src0.type == BRW_REGISTER_TYPE_F);
1271
assert(src1.type == BRW_REGISTER_TYPE_F);
1274
/* Source modifiers are ignored for extended math instructions. */
1275
assert(!src0.negate);
1277
assert(!src1.negate);
1280
/* Math is the same ISA format as other opcodes, except that CondModifier
1281
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1283
insn->header.destreg__conditionalmod = function;
1285
brw_set_dest(p, insn, dest);
1286
brw_set_src0(p, insn, src0);
1287
brw_set_src1(p, insn, src1);
1291
* Extended math function, float[16].
1292
* Use 2 send instructions.
1294
void brw_math_16(struct brw_compile *p,
1295
struct brw_reg dest,
1298
unsigned msg_reg_nr,
1302
struct brw_instruction *insn;
1304
if (p->gen >= 060) {
1305
insn = brw_next_insn(p, BRW_OPCODE_MATH);
1307
/* Math is the same ISA format as other opcodes, except that CondModifier
1308
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1310
insn->header.destreg__conditionalmod = function;
1311
insn->header.saturate = saturate;
1313
/* Source modifiers are ignored for extended math instructions. */
1314
assert(!src.negate);
1317
brw_set_dest(p, insn, dest);
1318
brw_set_src0(p, insn, src);
1319
brw_set_src1(p, insn, brw_null_reg());
1323
/* First instruction:
1325
brw_push_insn_state(p);
1326
brw_set_predicate_control_flag_value(p, 0xff);
1327
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1329
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1330
insn->header.destreg__conditionalmod = msg_reg_nr;
1332
brw_set_dest(p, insn, dest);
1333
brw_set_src0(p, insn, src);
1334
brw_set_math_message(p, insn, function,
1335
BRW_MATH_INTEGER_UNSIGNED,
1338
BRW_MATH_DATA_VECTOR);
1340
/* Second instruction:
1342
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1343
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1344
insn->header.destreg__conditionalmod = msg_reg_nr+1;
1346
brw_set_dest(p, insn, __offset(dest,1));
1347
brw_set_src0(p, insn, src);
1348
brw_set_math_message(p, insn, function,
1349
BRW_MATH_INTEGER_UNSIGNED,
1352
BRW_MATH_DATA_VECTOR);
1354
brw_pop_insn_state(p);
1358
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
1359
* using a constant offset per channel.
1361
* The offset must be aligned to oword size (16 bytes). Used for
1362
* register spilling.
1364
void brw_oword_block_write_scratch(struct brw_compile *p,
1369
uint32_t msg_control, msg_type;
1375
mrf = __retype_ud(mrf);
1377
if (num_regs == 1) {
1378
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1381
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1385
/* Set up the message header. This is g0, with g0.2 filled with
1386
* the offset. We don't want to leave our offset around in g0 or
1387
* it'll screw up texture samples, so set it up inside the message
1391
brw_push_insn_state(p);
1392
brw_set_mask_control(p, BRW_MASK_DISABLE);
1393
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1395
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1397
/* set message header global offset field (reg 0, element 2) */
1399
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1400
brw_imm_ud(offset));
1402
brw_pop_insn_state(p);
1406
struct brw_reg dest;
1407
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1408
int send_commit_msg;
1409
struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
1411
if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1412
insn->header.compression_control = BRW_COMPRESSION_NONE;
1413
src_header = vec16(src_header);
1415
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1416
insn->header.destreg__conditionalmod = mrf.nr;
1418
/* Until gen6, writes followed by reads from the same location
1419
* are not guaranteed to be ordered unless write_commit is set.
1420
* If set, then a no-op write is issued to the destination
1421
* register to set a dependency, and a read from the destination
1422
* can be used to ensure the ordering.
1424
* For gen6, only writes between different threads need ordering
1425
* protection. Our use of DP writes is all about register
1426
* spilling within a thread.
1428
if (p->gen >= 060) {
1429
dest = __retype_uw(vec16(brw_null_reg()));
1430
send_commit_msg = 0;
1433
send_commit_msg = 1;
1436
brw_set_dest(p, insn, dest);
1437
if (p->gen >= 060) {
1438
brw_set_src0(p, insn, mrf);
1440
brw_set_src0(p, insn, brw_null_reg());
1444
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1446
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1448
brw_set_dp_write_message(p,
1450
255, /* binding table index (255=stateless) */
1454
true, /* header_present */
1455
0, /* pixel scoreboard */
1456
send_commit_msg, /* response_length */
1464
* Read a block of owords (half a GRF each) from the scratch buffer
1465
* using a constant index per channel.
1467
* Offset must be aligned to oword size (16 bytes). Used for register
1471
brw_oword_block_read_scratch(struct brw_compile *p,
1472
struct brw_reg dest,
1477
uint32_t msg_control;
1483
mrf = __retype_ud(mrf);
1484
dest = __retype_uw(dest);
1486
if (num_regs == 1) {
1487
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1490
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1495
brw_push_insn_state(p);
1496
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1497
brw_set_mask_control(p, BRW_MASK_DISABLE);
1499
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1501
/* set message header global offset field (reg 0, element 2) */
1503
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1504
brw_imm_ud(offset));
1506
brw_pop_insn_state(p);
1510
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1512
assert(insn->header.predicate_control == 0);
1513
insn->header.compression_control = BRW_COMPRESSION_NONE;
1514
insn->header.destreg__conditionalmod = mrf.nr;
1516
brw_set_dest(p, insn, dest); /* UW? */
1517
if (p->gen >= 060) {
1518
brw_set_src0(p, insn, mrf);
1520
brw_set_src0(p, insn, brw_null_reg());
1523
brw_set_dp_read_message(p,
1525
255, /* binding table index (255=stateless) */
1527
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1528
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1535
* Read a float[4] vector from the data port Data Cache (const buffer).
1536
* Location (in buffer) should be a multiple of 16.
1537
* Used for fetching shader constants.
1539
void brw_oword_block_read(struct brw_compile *p,
1540
struct brw_reg dest,
1543
uint32_t bind_table_index)
1545
struct brw_instruction *insn;
1547
/* On newer hardware, offset is in units of owords. */
1551
mrf = __retype_ud(mrf);
1553
brw_push_insn_state(p);
1554
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1555
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1556
brw_set_mask_control(p, BRW_MASK_DISABLE);
1558
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1560
/* set message header global offset field (reg 0, element 2) */
1562
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1563
brw_imm_ud(offset));
1565
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1566
insn->header.destreg__conditionalmod = mrf.nr;
1568
/* cast dest to a uword[8] vector */
1569
dest = __retype_uw(vec8(dest));
1571
brw_set_dest(p, insn, dest);
1572
if (p->gen >= 060) {
1573
brw_set_src0(p, insn, mrf);
1575
brw_set_src0(p, insn, brw_null_reg());
1578
brw_set_dp_read_message(p,
1581
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1582
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1583
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1585
1); /* response_length (1 reg, 2 owords!) */
1587
brw_pop_insn_state(p);
1591
* Read a set of dwords from the data port Data Cache (const buffer).
1593
* Location (in buffer) appears as UD offsets in the register after
1594
* the provided mrf header reg.
1596
void brw_dword_scattered_read(struct brw_compile *p,
1597
struct brw_reg dest,
1599
uint32_t bind_table_index)
1601
struct brw_instruction *insn;
1603
mrf = __retype_ud(mrf);
1605
brw_push_insn_state(p);
1606
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1607
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1608
brw_set_mask_control(p, BRW_MASK_DISABLE);
1609
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1610
brw_pop_insn_state(p);
1612
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1613
insn->header.destreg__conditionalmod = mrf.nr;
1615
/* cast dest to a uword[8] vector */
1616
dest = __retype_uw(vec8(dest));
1618
brw_set_dest(p, insn, dest);
1619
brw_set_src0(p, insn, brw_null_reg());
1621
brw_set_dp_read_message(p,
1624
BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1625
BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1626
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1628
1); /* response_length */
1632
* Read float[4] constant(s) from VS constant buffer.
1633
* For relative addressing, two float[4] constants will be read into 'dest'.
1634
* Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1636
void brw_dp_READ_4_vs(struct brw_compile *p,
1637
struct brw_reg dest,
1639
unsigned bind_table_index)
1641
struct brw_instruction *insn;
1642
unsigned msg_reg_nr = 1;
1647
/* Setup MRF[1] with location/offset into const buffer */
1648
brw_push_insn_state(p);
1649
brw_set_access_mode(p, BRW_ALIGN_1);
1650
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1651
brw_set_mask_control(p, BRW_MASK_DISABLE);
1652
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1653
brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
1654
brw_imm_ud(location));
1655
brw_pop_insn_state(p);
1657
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1659
insn->header.predicate_control = BRW_PREDICATE_NONE;
1660
insn->header.compression_control = BRW_COMPRESSION_NONE;
1661
insn->header.destreg__conditionalmod = msg_reg_nr;
1662
insn->header.mask_control = BRW_MASK_DISABLE;
1664
brw_set_dest(p, insn, dest);
1665
if (p->gen >= 060) {
1666
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1668
brw_set_src0(p, insn, brw_null_reg());
1671
brw_set_dp_read_message(p,
1675
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1676
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1678
1); /* response_length (1 Oword) */
1682
* Read a float[4] constant per vertex from VS constant buffer, with
1683
* relative addressing.
1685
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1686
struct brw_reg dest,
1687
struct brw_reg addr_reg,
1689
unsigned bind_table_index)
1691
struct brw_reg src = brw_vec8_grf(0, 0);
1692
struct brw_instruction *insn;
1695
/* Setup MRF[1] with offset into const buffer */
1696
brw_push_insn_state(p);
1697
brw_set_access_mode(p, BRW_ALIGN_1);
1698
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1699
brw_set_mask_control(p, BRW_MASK_DISABLE);
1700
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1702
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
1705
brw_ADD(p, __retype_d(brw_message_reg(1)),
1706
addr_reg, brw_imm_d(offset));
1707
brw_pop_insn_state(p);
1709
gen6_resolve_implied_move(p, &src, 0);
1711
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1712
insn->header.predicate_control = BRW_PREDICATE_NONE;
1713
insn->header.compression_control = BRW_COMPRESSION_NONE;
1714
insn->header.destreg__conditionalmod = 0;
1715
insn->header.mask_control = BRW_MASK_DISABLE;
1717
brw_set_dest(p, insn, dest);
1718
brw_set_src0(p, insn, src);
1721
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1722
else if (p->gen >= 045)
1723
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1725
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1727
brw_set_dp_read_message(p,
1730
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1732
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1734
1); /* response_length */
1737
void brw_fb_WRITE(struct brw_compile *p,
1739
unsigned msg_reg_nr,
1740
struct brw_reg src0,
1741
unsigned msg_control,
1742
unsigned binding_table_index,
1743
unsigned msg_length,
1744
unsigned response_length,
1746
bool header_present)
1748
struct brw_instruction *insn;
1750
struct brw_reg dest;
1752
if (dispatch_width == 16)
1753
dest = __retype_uw(vec16(brw_null_reg()));
1755
dest = __retype_uw(vec8(brw_null_reg()));
1757
if (p->gen >= 060 && binding_table_index == 0) {
1758
insn = brw_next_insn(p, BRW_OPCODE_SENDC);
1760
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1762
/* The execution mask is ignored for render target writes. */
1763
insn->header.predicate_control = 0;
1764
insn->header.compression_control = BRW_COMPRESSION_NONE;
1766
if (p->gen >= 060) {
1767
/* headerless version, just submit color payload */
1768
src0 = brw_message_reg(msg_reg_nr);
1770
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1772
insn->header.destreg__conditionalmod = msg_reg_nr;
1774
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1777
brw_set_dest(p, insn, dest);
1778
brw_set_src0(p, insn, src0);
1779
brw_set_dp_write_message(p,
1781
binding_table_index,
1789
0 /* send_commit_msg */);
1793
* Texture sample instruction.
1794
* Note: the msg_type plus msg_length values determine exactly what kind
1795
* of sampling operation is performed. See volume 4, page 161 of docs.
1797
void brw_SAMPLE(struct brw_compile *p,
1798
struct brw_reg dest,
1799
unsigned msg_reg_nr,
1800
struct brw_reg src0,
1801
unsigned binding_table_index,
1805
unsigned response_length,
1806
unsigned msg_length,
1807
bool header_present,
1812
if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
1813
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1815
writemask = ~writemask & WRITEMASK_XYZW;
1817
brw_push_insn_state(p);
1819
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1820
brw_set_mask_control(p, BRW_MASK_DISABLE);
1822
brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
1823
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
1825
brw_pop_insn_state(p);
1827
src0 = __retype_uw(brw_null_reg());
1831
struct brw_instruction *insn;
1833
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1835
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1836
insn->header.predicate_control = 0; /* XXX */
1837
insn->header.compression_control = BRW_COMPRESSION_NONE;
1839
insn->header.destreg__conditionalmod = msg_reg_nr;
1841
brw_set_dest(p, insn, dest);
1842
brw_set_src0(p, insn, src0);
1843
brw_set_sampler_message(p, insn,
1844
binding_table_index,
1854
/* All these variables are pretty confusing - we might be better off
1855
* using bitmasks and macros for this, in the old style. Or perhaps
1856
* just having the caller instantiate the fields in dword3 itself.
1858
void brw_urb_WRITE(struct brw_compile *p,
1859
struct brw_reg dest,
1860
unsigned msg_reg_nr,
1861
struct brw_reg src0,
1864
unsigned msg_length,
1865
unsigned response_length,
1867
bool writes_complete,
1871
struct brw_instruction *insn;
1873
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1875
if (p->gen >= 070) {
1876
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
1877
brw_push_insn_state(p);
1878
brw_set_access_mode(p, BRW_ALIGN_1);
1879
brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
1880
__retype_ud(brw_vec1_grf(0, 5)),
1881
brw_imm_ud(0xff00));
1882
brw_pop_insn_state(p);
1885
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1887
assert(msg_length < BRW_MAX_MRF);
1889
brw_set_dest(p, insn, dest);
1890
brw_set_src0(p, insn, src0);
1891
brw_set_src1(p, insn, brw_imm_d(0));
1894
insn->header.destreg__conditionalmod = msg_reg_nr;
1896
brw_set_urb_message(p,
1909
brw_find_next_block_end(struct brw_compile *p, int start)
1913
for (ip = start + 1; ip < p->nr_insn; ip++) {
1914
struct brw_instruction *insn = &p->store[ip];
1916
switch (insn->header.opcode) {
1917
case BRW_OPCODE_ENDIF:
1918
case BRW_OPCODE_ELSE:
1919
case BRW_OPCODE_WHILE:
1923
assert(!"not reached");
1927
/* There is no DO instruction on gen6, so to find the end of the loop
1928
* we have to see if the loop is jumping back before our start
1932
brw_find_loop_end(struct brw_compile *p, int start)
1937
for (ip = start + 1; ip < p->nr_insn; ip++) {
1938
struct brw_instruction *insn = &p->store[ip];
1940
if (insn->header.opcode == BRW_OPCODE_WHILE) {
1941
int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
1942
: insn->bits3.break_cont.jip;
1943
if (ip + jip / br <= start)
1947
assert(!"not reached");
1951
/* After program generation, go back and update the UIP and JIP of
1952
* BREAK and CONT instructions to their correct locations.
1955
brw_set_uip_jip(struct brw_compile *p)
1963
for (ip = 0; ip < p->nr_insn; ip++) {
1964
struct brw_instruction *insn = &p->store[ip];
1966
switch (insn->header.opcode) {
1967
case BRW_OPCODE_BREAK:
1968
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1969
/* Gen7 UIP points to WHILE; Gen6 points just after it */
1970
insn->bits3.break_cont.uip =
1971
br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
1973
case BRW_OPCODE_CONTINUE:
1974
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1975
insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
1977
assert(insn->bits3.break_cont.uip != 0);
1978
assert(insn->bits3.break_cont.jip != 0);
1984
void brw_ff_sync(struct brw_compile *p,
1985
struct brw_reg dest,
1986
unsigned msg_reg_nr,
1987
struct brw_reg src0,
1989
unsigned response_length,
1992
struct brw_instruction *insn;
1994
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1996
insn = brw_next_insn(p, BRW_OPCODE_SEND);
1997
brw_set_dest(p, insn, dest);
1998
brw_set_src0(p, insn, src0);
1999
brw_set_src1(p, insn, brw_imm_d(0));
2002
insn->header.destreg__conditionalmod = msg_reg_nr;
2004
brw_set_ff_sync_message(p,