~ubuntu-branches/ubuntu/trusty/xserver-xorg-video-intel-lts-xenial/trusty-updates

« back to all changes in this revision

Viewing changes to src/sna/brw/brw_eu_emit.c

  • Committer: Package Import Robot
  • Author(s): Timo Aaltonen
  • Date: 2016-05-03 14:02:35 UTC
  • Revision ID: package-import@ubuntu.com-20160503140235-syaq8uojka8imy1a
Tags: upstream-2.99.917+git20160325
ImportĀ upstreamĀ versionĀ 2.99.917+git20160325

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
 
3
   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
 
4
   develop this 3D driver.
 
5
 
 
6
   Permission is hereby granted, free of charge, to any person obtaining
 
7
   a copy of this software and associated documentation files (the
 
8
   "Software"), to deal in the Software without restriction, including
 
9
   without limitation the rights to use, copy, modify, merge, publish,
 
10
   distribute, sublicense, and/or sell copies of the Software, and to
 
11
   permit persons to whom the Software is furnished to do so, subject to
 
12
   the following conditions:
 
13
 
 
14
   The above copyright notice and this permission notice (including the
 
15
   next paragraph) shall be included in all copies or substantial
 
16
   portions of the Software.
 
17
 
 
18
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
19
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
20
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 
21
   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 
22
   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 
23
   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 
24
   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
25
 
 
26
 **********************************************************************/
 
27
/*
 
28
 * Authors:
 
29
 *   Keith Whitwell <keith@tungstengraphics.com>
 
30
 */
 
31
 
 
32
#include "brw_eu.h"
 
33
 
 
34
#include <string.h>
 
35
#include <stdlib.h>
 
36
 
 
37
#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
 
38
 
 
39
/***********************************************************************
 
40
 * Internal helper for constructing instructions
 
41
 */
 
42
 
 
43
static void guess_execution_size(struct brw_compile *p,
 
44
                                 struct brw_instruction *insn,
 
45
                                 struct brw_reg reg)
 
46
{
 
47
        if (reg.width == BRW_WIDTH_8 && p->compressed)
 
48
                insn->header.execution_size = BRW_EXECUTE_16;
 
49
        else
 
50
                insn->header.execution_size = reg.width;
 
51
}
 
52
 
 
53
 
 
54
/**
 
55
 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
 
56
 * registers, implicitly moving the operand to a message register.
 
57
 *
 
58
 * On Sandybridge, this is no longer the case.  This function performs the
 
59
 * explicit move; it should be called before emitting a SEND instruction.
 
60
 */
 
61
void
 
62
gen6_resolve_implied_move(struct brw_compile *p,
 
63
                          struct brw_reg *src,
 
64
                          unsigned msg_reg_nr)
 
65
{
 
66
        if (p->gen < 060)
 
67
                return;
 
68
 
 
69
        if (src->file == BRW_MESSAGE_REGISTER_FILE)
 
70
                return;
 
71
 
 
72
        if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
 
73
                brw_push_insn_state(p);
 
74
                brw_set_mask_control(p, BRW_MASK_DISABLE);
 
75
                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
76
                brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
 
77
                brw_pop_insn_state(p);
 
78
        }
 
79
        *src = brw_message_reg(msg_reg_nr);
 
80
}
 
81
 
 
82
static void
 
83
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
 
84
{
 
85
        /* From the BSpec / ISA Reference / send - [DevIVB+]:
 
86
         * "The send with EOT should use register space R112-R127 for <src>. This is
 
87
         *  to enable loading of a new thread into the same slot while the message
 
88
         *  with EOT for current thread is pending dispatch."
 
89
         *
 
90
         * Since we're pretending to have 16 MRFs anyway, we may as well use the
 
91
         * registers required for messages with EOT.
 
92
         */
 
93
        if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
 
94
                reg->file = BRW_GENERAL_REGISTER_FILE;
 
95
                reg->nr += 111;
 
96
        }
 
97
}
 
98
 
 
99
void
 
100
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
 
101
             struct brw_reg dest)
 
102
{
 
103
        if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
 
104
            dest.file != BRW_MESSAGE_REGISTER_FILE)
 
105
                assert(dest.nr < 128);
 
106
 
 
107
        gen7_convert_mrf_to_grf(p, &dest);
 
108
 
 
109
        insn->bits1.da1.dest_reg_file = dest.file;
 
110
        insn->bits1.da1.dest_reg_type = dest.type;
 
111
        insn->bits1.da1.dest_address_mode = dest.address_mode;
 
112
 
 
113
        if (dest.address_mode == BRW_ADDRESS_DIRECT) {
 
114
                insn->bits1.da1.dest_reg_nr = dest.nr;
 
115
 
 
116
                if (insn->header.access_mode == BRW_ALIGN_1) {
 
117
                        insn->bits1.da1.dest_subreg_nr = dest.subnr;
 
118
                        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
 
119
                                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
 
120
                        insn->bits1.da1.dest_horiz_stride = dest.hstride;
 
121
                } else {
 
122
                        insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
 
123
                        insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
 
124
                        /* even ignored in da16, still need to set as '01' */
 
125
                        insn->bits1.da16.dest_horiz_stride = 1;
 
126
                }
 
127
        } else {
 
128
                insn->bits1.ia1.dest_subreg_nr = dest.subnr;
 
129
 
 
130
                /* These are different sizes in align1 vs align16:
 
131
                */
 
132
                if (insn->header.access_mode == BRW_ALIGN_1) {
 
133
                        insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
 
134
                        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
 
135
                                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
 
136
                        insn->bits1.ia1.dest_horiz_stride = dest.hstride;
 
137
                }
 
138
                else {
 
139
                        insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
 
140
                        /* even ignored in da16, still need to set as '01' */
 
141
                        insn->bits1.ia16.dest_horiz_stride = 1;
 
142
                }
 
143
        }
 
144
 
 
145
        guess_execution_size(p, insn, dest);
 
146
}
 
147
 
 
148
static const int reg_type_size[8] = {
 
149
        [0] = 4,
 
150
        [1] = 4,
 
151
        [2] = 2,
 
152
        [3] = 2,
 
153
        [4] = 1,
 
154
        [5] = 1,
 
155
        [7] = 4
 
156
};
 
157
 
 
158
static void
 
159
validate_reg(struct brw_instruction *insn, struct brw_reg reg)
 
160
{
 
161
        int hstride_for_reg[] = {0, 1, 2, 4};
 
162
        int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
 
163
        int width_for_reg[] = {1, 2, 4, 8, 16};
 
164
        int execsize_for_reg[] = {1, 2, 4, 8, 16};
 
165
        int width, hstride, vstride, execsize;
 
166
 
 
167
        if (reg.file == BRW_IMMEDIATE_VALUE) {
 
168
                /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
 
169
                 * mean the destination has to be 128-bit aligned and the
 
170
                 * destination horiz stride has to be a word.
 
171
                 */
 
172
                if (reg.type == BRW_REGISTER_TYPE_V) {
 
173
                        assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
 
174
                               reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
 
175
                }
 
176
 
 
177
                return;
 
178
        }
 
179
 
 
180
        if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
 
181
            reg.file == BRW_ARF_NULL)
 
182
                return;
 
183
 
 
184
        assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
 
185
        assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
 
186
        assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
 
187
        assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg));
 
188
 
 
189
        hstride = hstride_for_reg[reg.hstride];
 
190
 
 
191
        if (reg.vstride == 0xf) {
 
192
                vstride = -1;
 
193
        } else {
 
194
                vstride = vstride_for_reg[reg.vstride];
 
195
        }
 
196
 
 
197
        width = width_for_reg[reg.width];
 
198
 
 
199
        execsize = execsize_for_reg[insn->header.execution_size];
 
200
 
 
201
        /* Restrictions from 3.3.10: Register Region Restrictions. */
 
202
        /* 3. */
 
203
        assert(execsize >= width);
 
204
 
 
205
        /* 4. */
 
206
        if (execsize == width && hstride != 0) {
 
207
                assert(vstride == -1 || vstride == width * hstride);
 
208
        }
 
209
 
 
210
        /* 5. */
 
211
        if (execsize == width && hstride == 0) {
 
212
                /* no restriction on vstride. */
 
213
        }
 
214
 
 
215
        /* 6. */
 
216
        if (width == 1) {
 
217
                assert(hstride == 0);
 
218
        }
 
219
 
 
220
        /* 7. */
 
221
        if (execsize == 1 && width == 1) {
 
222
                assert(hstride == 0);
 
223
                assert(vstride == 0);
 
224
        }
 
225
 
 
226
        /* 8. */
 
227
        if (vstride == 0 && hstride == 0) {
 
228
                assert(width == 1);
 
229
        }
 
230
 
 
231
        /* 10. Check destination issues. */
 
232
}
 
233
 
 
234
void
 
235
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
 
236
             struct brw_reg reg)
 
237
{
 
238
        if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
 
239
                assert(reg.nr < 128);
 
240
 
 
241
        gen7_convert_mrf_to_grf(p, &reg);
 
242
 
 
243
        validate_reg(insn, reg);
 
244
 
 
245
        insn->bits1.da1.src0_reg_file = reg.file;
 
246
        insn->bits1.da1.src0_reg_type = reg.type;
 
247
        insn->bits2.da1.src0_abs = reg.abs;
 
248
        insn->bits2.da1.src0_negate = reg.negate;
 
249
        insn->bits2.da1.src0_address_mode = reg.address_mode;
 
250
 
 
251
        if (reg.file == BRW_IMMEDIATE_VALUE) {
 
252
                insn->bits3.ud = reg.dw1.ud;
 
253
 
 
254
                /* Required to set some fields in src1 as well:
 
255
                */
 
256
                insn->bits1.da1.src1_reg_file = 0; /* arf */
 
257
                insn->bits1.da1.src1_reg_type = reg.type;
 
258
        } else {
 
259
                if (reg.address_mode == BRW_ADDRESS_DIRECT) {
 
260
                        if (insn->header.access_mode == BRW_ALIGN_1) {
 
261
                                insn->bits2.da1.src0_subreg_nr = reg.subnr;
 
262
                                insn->bits2.da1.src0_reg_nr = reg.nr;
 
263
                        } else {
 
264
                                insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
 
265
                                insn->bits2.da16.src0_reg_nr = reg.nr;
 
266
                        }
 
267
                } else {
 
268
                        insn->bits2.ia1.src0_subreg_nr = reg.subnr;
 
269
 
 
270
                        if (insn->header.access_mode == BRW_ALIGN_1) {
 
271
                                insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
 
272
                        } else {
 
273
                                insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
 
274
                        }
 
275
                }
 
276
 
 
277
                if (insn->header.access_mode == BRW_ALIGN_1) {
 
278
                        if (reg.width == BRW_WIDTH_1 &&
 
279
                            insn->header.execution_size == BRW_EXECUTE_1) {
 
280
                                insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
 
281
                                insn->bits2.da1.src0_width = BRW_WIDTH_1;
 
282
                                insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
 
283
                        } else {
 
284
                                insn->bits2.da1.src0_horiz_stride = reg.hstride;
 
285
                                insn->bits2.da1.src0_width = reg.width;
 
286
                                insn->bits2.da1.src0_vert_stride = reg.vstride;
 
287
                        }
 
288
                } else {
 
289
                        insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
 
290
                        insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
 
291
                        insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
 
292
                        insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
293
 
 
294
                        /* This is an oddity of the fact we're using the same
 
295
                         * descriptions for registers in align_16 as align_1:
 
296
                         */
 
297
                        if (reg.vstride == BRW_VERTICAL_STRIDE_8)
 
298
                                insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
 
299
                        else
 
300
                                insn->bits2.da16.src0_vert_stride = reg.vstride;
 
301
                }
 
302
        }
 
303
}
 
304
 
 
305
void brw_set_src1(struct brw_compile *p,
 
306
                  struct brw_instruction *insn,
 
307
                  struct brw_reg reg)
 
308
{
 
309
        assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
310
        assert(reg.nr < 128);
 
311
 
 
312
        gen7_convert_mrf_to_grf(p, &reg);
 
313
 
 
314
        validate_reg(insn, reg);
 
315
 
 
316
        insn->bits1.da1.src1_reg_file = reg.file;
 
317
        insn->bits1.da1.src1_reg_type = reg.type;
 
318
        insn->bits3.da1.src1_abs = reg.abs;
 
319
        insn->bits3.da1.src1_negate = reg.negate;
 
320
 
 
321
        /* Only src1 can be immediate in two-argument instructions. */
 
322
        assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
 
323
 
 
324
        if (reg.file == BRW_IMMEDIATE_VALUE) {
 
325
                insn->bits3.ud = reg.dw1.ud;
 
326
        } else {
 
327
                /* This is a hardware restriction, which may or may not be lifted
 
328
                 * in the future:
 
329
                 */
 
330
                assert (reg.address_mode == BRW_ADDRESS_DIRECT);
 
331
                /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
332
 
 
333
                if (insn->header.access_mode == BRW_ALIGN_1) {
 
334
                        insn->bits3.da1.src1_subreg_nr = reg.subnr;
 
335
                        insn->bits3.da1.src1_reg_nr = reg.nr;
 
336
                } else {
 
337
                        insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
 
338
                        insn->bits3.da16.src1_reg_nr = reg.nr;
 
339
                }
 
340
 
 
341
                if (insn->header.access_mode == BRW_ALIGN_1) {
 
342
                        if (reg.width == BRW_WIDTH_1 &&
 
343
                            insn->header.execution_size == BRW_EXECUTE_1) {
 
344
                                insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
 
345
                                insn->bits3.da1.src1_width = BRW_WIDTH_1;
 
346
                                insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
 
347
                        } else {
 
348
                                insn->bits3.da1.src1_horiz_stride = reg.hstride;
 
349
                                insn->bits3.da1.src1_width = reg.width;
 
350
                                insn->bits3.da1.src1_vert_stride = reg.vstride;
 
351
                        }
 
352
                } else {
 
353
                        insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
 
354
                        insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
 
355
                        insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
 
356
                        insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
357
 
 
358
                        /* This is an oddity of the fact we're using the same
 
359
                         * descriptions for registers in align_16 as align_1:
 
360
                         */
 
361
                        if (reg.vstride == BRW_VERTICAL_STRIDE_8)
 
362
                                insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
 
363
                        else
 
364
                                insn->bits3.da16.src1_vert_stride = reg.vstride;
 
365
                }
 
366
        }
 
367
}
 
368
 
 
369
/**
 
370
 * Set the Message Descriptor and Extended Message Descriptor fields
 
371
 * for SEND messages.
 
372
 *
 
373
 * \note This zeroes out the Function Control bits, so it must be called
 
374
 *       \b before filling out any message-specific data.  Callers can
 
375
 *       choose not to fill in irrelevant bits; they will be zero.
 
376
 */
 
377
static void
 
378
brw_set_message_descriptor(struct brw_compile *p,
 
379
                           struct brw_instruction *inst,
 
380
                           enum brw_message_target sfid,
 
381
                           unsigned msg_length,
 
382
                           unsigned response_length,
 
383
                           bool header_present,
 
384
                           bool end_of_thread)
 
385
{
 
386
        brw_set_src1(p, inst, brw_imm_d(0));
 
387
 
 
388
        if (p->gen >= 050) {
 
389
                inst->bits3.generic_gen5.header_present = header_present;
 
390
                inst->bits3.generic_gen5.response_length = response_length;
 
391
                inst->bits3.generic_gen5.msg_length = msg_length;
 
392
                inst->bits3.generic_gen5.end_of_thread = end_of_thread;
 
393
 
 
394
                if (p->gen >= 060) {
 
395
                        /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
 
396
                        inst->header.destreg__conditionalmod = sfid;
 
397
                } else {
 
398
                        /* Set Extended Message Descriptor (ex_desc) */
 
399
                        inst->bits2.send_gen5.sfid = sfid;
 
400
                        inst->bits2.send_gen5.end_of_thread = end_of_thread;
 
401
                }
 
402
        } else {
 
403
                inst->bits3.generic.response_length = response_length;
 
404
                inst->bits3.generic.msg_length = msg_length;
 
405
                inst->bits3.generic.msg_target = sfid;
 
406
                inst->bits3.generic.end_of_thread = end_of_thread;
 
407
        }
 
408
}
 
409
 
 
410
 
 
411
static void brw_set_math_message(struct brw_compile *p,
 
412
                                 struct brw_instruction *insn,
 
413
                                 unsigned function,
 
414
                                 unsigned integer_type,
 
415
                                 bool low_precision,
 
416
                                 bool saturate,
 
417
                                 unsigned dataType)
 
418
{
 
419
        unsigned msg_length;
 
420
        unsigned response_length;
 
421
 
 
422
        /* Infer message length from the function */
 
423
        switch (function) {
 
424
        case BRW_MATH_FUNCTION_POW:
 
425
        case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
 
426
        case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
 
427
        case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
 
428
                msg_length = 2;
 
429
                break;
 
430
        default:
 
431
                msg_length = 1;
 
432
                break;
 
433
        }
 
434
 
 
435
        /* Infer response length from the function */
 
436
        switch (function) {
 
437
        case BRW_MATH_FUNCTION_SINCOS:
 
438
        case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
 
439
                response_length = 2;
 
440
                break;
 
441
        default:
 
442
                response_length = 1;
 
443
                break;
 
444
        }
 
445
 
 
446
        brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
 
447
                                   msg_length, response_length,
 
448
                                   false, false);
 
449
        if (p->gen == 050) {
 
450
                insn->bits3.math_gen5.function = function;
 
451
                insn->bits3.math_gen5.int_type = integer_type;
 
452
                insn->bits3.math_gen5.precision = low_precision;
 
453
                insn->bits3.math_gen5.saturate = saturate;
 
454
                insn->bits3.math_gen5.data_type = dataType;
 
455
                insn->bits3.math_gen5.snapshot = 0;
 
456
        } else {
 
457
                insn->bits3.math.function = function;
 
458
                insn->bits3.math.int_type = integer_type;
 
459
                insn->bits3.math.precision = low_precision;
 
460
                insn->bits3.math.saturate = saturate;
 
461
                insn->bits3.math.data_type = dataType;
 
462
        }
 
463
}
 
464
 
 
465
static void brw_set_ff_sync_message(struct brw_compile *p,
 
466
                                    struct brw_instruction *insn,
 
467
                                    bool allocate,
 
468
                                    unsigned response_length,
 
469
                                    bool end_of_thread)
 
470
{
 
471
        brw_set_message_descriptor(p, insn, BRW_SFID_URB,
 
472
                                   1, response_length,
 
473
                                   true, end_of_thread);
 
474
        insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
 
475
        insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
 
476
        insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
 
477
        insn->bits3.urb_gen5.allocate = allocate;
 
478
        insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
 
479
        insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
 
480
}
 
481
 
 
482
static void brw_set_urb_message(struct brw_compile *p,
 
483
                                struct brw_instruction *insn,
 
484
                                bool allocate,
 
485
                                bool used,
 
486
                                unsigned msg_length,
 
487
                                unsigned response_length,
 
488
                                bool end_of_thread,
 
489
                                bool complete,
 
490
                                unsigned offset,
 
491
                                unsigned swizzle_control)
 
492
{
 
493
        brw_set_message_descriptor(p, insn, BRW_SFID_URB,
 
494
                                   msg_length, response_length, true, end_of_thread);
 
495
        if (p->gen >= 070) {
 
496
                insn->bits3.urb_gen7.opcode = 0;        /* URB_WRITE_HWORD */
 
497
                insn->bits3.urb_gen7.offset = offset;
 
498
                assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
 
499
                insn->bits3.urb_gen7.swizzle_control = swizzle_control;
 
500
                /* per_slot_offset = 0 makes it ignore offsets in message header */
 
501
                insn->bits3.urb_gen7.per_slot_offset = 0;
 
502
                insn->bits3.urb_gen7.complete = complete;
 
503
        } else if (p->gen >= 050) {
 
504
                insn->bits3.urb_gen5.opcode = 0;        /* URB_WRITE */
 
505
                insn->bits3.urb_gen5.offset = offset;
 
506
                insn->bits3.urb_gen5.swizzle_control = swizzle_control;
 
507
                insn->bits3.urb_gen5.allocate = allocate;
 
508
                insn->bits3.urb_gen5.used = used;       /* ? */
 
509
                insn->bits3.urb_gen5.complete = complete;
 
510
        } else {
 
511
                insn->bits3.urb.opcode = 0;     /* ? */
 
512
                insn->bits3.urb.offset = offset;
 
513
                insn->bits3.urb.swizzle_control = swizzle_control;
 
514
                insn->bits3.urb.allocate = allocate;
 
515
                insn->bits3.urb.used = used;    /* ? */
 
516
                insn->bits3.urb.complete = complete;
 
517
        }
 
518
}
 
519
 
 
520
void
 
521
brw_set_dp_write_message(struct brw_compile *p,
 
522
                         struct brw_instruction *insn,
 
523
                         unsigned binding_table_index,
 
524
                         unsigned msg_control,
 
525
                         unsigned msg_type,
 
526
                         unsigned msg_length,
 
527
                         bool header_present,
 
528
                         bool last_render_target,
 
529
                         unsigned response_length,
 
530
                         bool end_of_thread,
 
531
                         bool send_commit_msg)
 
532
{
 
533
        unsigned sfid;
 
534
 
 
535
        if (p->gen >= 070) {
 
536
                /* Use the Render Cache for RT writes; otherwise use the Data Cache */
 
537
                if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
 
538
                        sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
 
539
                else
 
540
                        sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
 
541
        } else if (p->gen >= 060) {
 
542
                /* Use the render cache for all write messages. */
 
543
                sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
 
544
        } else {
 
545
                sfid = BRW_SFID_DATAPORT_WRITE;
 
546
        }
 
547
 
 
548
        brw_set_message_descriptor(p, insn, sfid,
 
549
                                   msg_length, response_length,
 
550
                                   header_present, end_of_thread);
 
551
 
 
552
        if (p->gen >= 070) {
 
553
                insn->bits3.gen7_dp.binding_table_index = binding_table_index;
 
554
                insn->bits3.gen7_dp.msg_control = msg_control;
 
555
                insn->bits3.gen7_dp.last_render_target = last_render_target;
 
556
                insn->bits3.gen7_dp.msg_type = msg_type;
 
557
        } else if (p->gen >= 060) {
 
558
                insn->bits3.gen6_dp.binding_table_index = binding_table_index;
 
559
                insn->bits3.gen6_dp.msg_control = msg_control;
 
560
                insn->bits3.gen6_dp.last_render_target = last_render_target;
 
561
                insn->bits3.gen6_dp.msg_type = msg_type;
 
562
                insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
 
563
        } else if (p->gen >= 050) {
 
564
                insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
 
565
                insn->bits3.dp_write_gen5.msg_control = msg_control;
 
566
                insn->bits3.dp_write_gen5.last_render_target = last_render_target;
 
567
                insn->bits3.dp_write_gen5.msg_type = msg_type;
 
568
                insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
 
569
        } else {
 
570
                insn->bits3.dp_write.binding_table_index = binding_table_index;
 
571
                insn->bits3.dp_write.msg_control = msg_control;
 
572
                insn->bits3.dp_write.last_render_target = last_render_target;
 
573
                insn->bits3.dp_write.msg_type = msg_type;
 
574
                insn->bits3.dp_write.send_commit_msg = send_commit_msg;
 
575
        }
 
576
}
 
577
 
 
578
void
 
579
brw_set_dp_read_message(struct brw_compile *p,
 
580
                        struct brw_instruction *insn,
 
581
                        unsigned binding_table_index,
 
582
                        unsigned msg_control,
 
583
                        unsigned msg_type,
 
584
                        unsigned target_cache,
 
585
                        unsigned msg_length,
 
586
                        unsigned response_length)
 
587
{
 
588
        unsigned sfid;
 
589
 
 
590
        if (p->gen >= 070) {
 
591
                sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
 
592
        } else if (p->gen >= 060) {
 
593
                if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
 
594
                        sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
 
595
                else
 
596
                        sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
 
597
        } else {
 
598
                sfid = BRW_SFID_DATAPORT_READ;
 
599
        }
 
600
 
 
601
        brw_set_message_descriptor(p, insn, sfid,
 
602
                                   msg_length, response_length,
 
603
                                   true, false);
 
604
 
 
605
        if (p->gen >= 070) {
 
606
                insn->bits3.gen7_dp.binding_table_index = binding_table_index;
 
607
                insn->bits3.gen7_dp.msg_control = msg_control;
 
608
                insn->bits3.gen7_dp.last_render_target = 0;
 
609
                insn->bits3.gen7_dp.msg_type = msg_type;
 
610
        } else if (p->gen >= 060) {
 
611
                insn->bits3.gen6_dp.binding_table_index = binding_table_index;
 
612
                insn->bits3.gen6_dp.msg_control = msg_control;
 
613
                insn->bits3.gen6_dp.last_render_target = 0;
 
614
                insn->bits3.gen6_dp.msg_type = msg_type;
 
615
                insn->bits3.gen6_dp.send_commit_msg = 0;
 
616
        } else if (p->gen >= 050) {
 
617
                insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
 
618
                insn->bits3.dp_read_gen5.msg_control = msg_control;
 
619
                insn->bits3.dp_read_gen5.msg_type = msg_type;
 
620
                insn->bits3.dp_read_gen5.target_cache = target_cache;
 
621
        } else if (p->gen >= 045) {
 
622
                insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
 
623
                insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
 
624
                insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
 
625
                insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
 
626
        } else {
 
627
                insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
 
628
                insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
 
629
                insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
 
630
                insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
 
631
        }
 
632
}
 
633
 
 
634
static void brw_set_sampler_message(struct brw_compile *p,
 
635
                                    struct brw_instruction *insn,
 
636
                                    unsigned binding_table_index,
 
637
                                    unsigned sampler,
 
638
                                    unsigned msg_type,
 
639
                                    unsigned response_length,
 
640
                                    unsigned msg_length,
 
641
                                    bool header_present,
 
642
                                    unsigned simd_mode)
 
643
{
 
644
        brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
 
645
                                   msg_length, response_length,
 
646
                                   header_present, false);
 
647
 
 
648
        if (p->gen >= 070) {
 
649
                insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
 
650
                insn->bits3.sampler_gen7.sampler = sampler;
 
651
                insn->bits3.sampler_gen7.msg_type = msg_type;
 
652
                insn->bits3.sampler_gen7.simd_mode = simd_mode;
 
653
        } else if (p->gen >= 050) {
 
654
                insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
 
655
                insn->bits3.sampler_gen5.sampler = sampler;
 
656
                insn->bits3.sampler_gen5.msg_type = msg_type;
 
657
                insn->bits3.sampler_gen5.simd_mode = simd_mode;
 
658
        } else if (p->gen >= 045) {
 
659
                insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
 
660
                insn->bits3.sampler_g4x.sampler = sampler;
 
661
                insn->bits3.sampler_g4x.msg_type = msg_type;
 
662
        } else {
 
663
                insn->bits3.sampler.binding_table_index = binding_table_index;
 
664
                insn->bits3.sampler.sampler = sampler;
 
665
                insn->bits3.sampler.msg_type = msg_type;
 
666
                insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
 
667
        }
 
668
}
 
669
 
 
670
 
 
671
void brw_NOP(struct brw_compile *p)
 
672
{
 
673
        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
 
674
        brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
 
675
        brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
 
676
        brw_set_src1(p, insn, brw_imm_ud(0x0));
 
677
}
 
678
 
 
679
/***********************************************************************
 
680
 * Comparisons, if/else/endif
 
681
 */
 
682
 
 
683
static void
 
684
push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
 
685
{
 
686
        p->if_stack[p->if_stack_depth] = inst;
 
687
 
 
688
        p->if_stack_depth++;
 
689
        if (p->if_stack_array_size <= p->if_stack_depth) {
 
690
                p->if_stack_array_size *= 2;
 
691
                p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
 
692
        }
 
693
}
 
694
 
 
695
/* EU takes the value from the flag register and pushes it onto some
 
696
 * sort of a stack (presumably merging with any flag value already on
 
697
 * the stack).  Within an if block, the flags at the top of the stack
 
698
 * control execution on each channel of the unit, eg. on each of the
 
699
 * 16 pixel values in our wm programs.
 
700
 *
 
701
 * When the matching 'else' instruction is reached (presumably by
 
702
 * countdown of the instruction count patched in by our ELSE/ENDIF
 
703
 * functions), the relevant flags are inverted.
 
704
 *
 
705
 * When the matching 'endif' instruction is reached, the flags are
 
706
 * popped off.  If the stack is now empty, normal execution resumes.
 
707
 */
 
708
struct brw_instruction *
 
709
brw_IF(struct brw_compile *p, unsigned execute_size)
 
710
{
 
711
        struct brw_instruction *insn;
 
712
 
 
713
        insn = brw_next_insn(p, BRW_OPCODE_IF);
 
714
 
 
715
        /* Override the defaults for this instruction: */
 
716
        if (p->gen < 060) {
 
717
                brw_set_dest(p, insn, brw_ip_reg());
 
718
                brw_set_src0(p, insn, brw_ip_reg());
 
719
                brw_set_src1(p, insn, brw_imm_d(0x0));
 
720
        } else if (p->gen < 070) {
 
721
                brw_set_dest(p, insn, brw_imm_w(0));
 
722
                insn->bits1.branch_gen6.jump_count = 0;
 
723
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
724
                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
725
        } else {
 
726
                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
 
727
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
728
                brw_set_src1(p, insn, brw_imm_ud(0));
 
729
                insn->bits3.break_cont.jip = 0;
 
730
                insn->bits3.break_cont.uip = 0;
 
731
        }
 
732
 
 
733
        insn->header.execution_size = execute_size;
 
734
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
735
        insn->header.predicate_control = BRW_PREDICATE_NORMAL;
 
736
        insn->header.mask_control = BRW_MASK_ENABLE;
 
737
        if (!p->single_program_flow)
 
738
                insn->header.thread_control = BRW_THREAD_SWITCH;
 
739
 
 
740
        p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
741
 
 
742
        push_if_stack(p, insn);
 
743
        return insn;
 
744
}
 
745
 
 
746
/* This function is only used for gen6-style IF instructions with an
 
747
 * embedded comparison (conditional modifier).  It is not used on gen7.
 
748
 */
 
749
struct brw_instruction *
 
750
gen6_IF(struct brw_compile *p, uint32_t conditional,
 
751
        struct brw_reg src0, struct brw_reg src1)
 
752
{
 
753
        struct brw_instruction *insn;
 
754
 
 
755
        insn = brw_next_insn(p, BRW_OPCODE_IF);
 
756
 
 
757
        brw_set_dest(p, insn, brw_imm_w(0));
 
758
        if (p->compressed) {
 
759
                insn->header.execution_size = BRW_EXECUTE_16;
 
760
        } else {
 
761
                insn->header.execution_size = BRW_EXECUTE_8;
 
762
        }
 
763
        insn->bits1.branch_gen6.jump_count = 0;
 
764
        brw_set_src0(p, insn, src0);
 
765
        brw_set_src1(p, insn, src1);
 
766
 
 
767
        assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
 
768
        assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
 
769
        insn->header.destreg__conditionalmod = conditional;
 
770
 
 
771
        if (!p->single_program_flow)
 
772
                insn->header.thread_control = BRW_THREAD_SWITCH;
 
773
 
 
774
        push_if_stack(p, insn);
 
775
        return insn;
 
776
}
 
777
 
 
778
/**
 
779
 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
 
780
 */
 
781
static void
 
782
convert_IF_ELSE_to_ADD(struct brw_compile *p,
 
783
                       struct brw_instruction *if_inst,
 
784
                       struct brw_instruction *else_inst)
 
785
{
 
786
        /* The next instruction (where the ENDIF would be, if it existed) */
 
787
        struct brw_instruction *next_inst = &p->store[p->nr_insn];
 
788
 
 
789
        assert(p->single_program_flow);
 
790
        assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
 
791
        assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
 
792
        assert(if_inst->header.execution_size == BRW_EXECUTE_1);
 
793
 
 
794
        /* Convert IF to an ADD instruction that moves the instruction pointer
 
795
         * to the first instruction of the ELSE block.  If there is no ELSE
 
796
         * block, point to where ENDIF would be.  Reverse the predicate.
 
797
         *
 
798
         * There's no need to execute an ENDIF since we don't need to do any
 
799
         * stack operations, and if we're currently executing, we just want to
 
800
         * continue normally.
 
801
         */
 
802
        if_inst->header.opcode = BRW_OPCODE_ADD;
 
803
        if_inst->header.predicate_inverse = 1;
 
804
 
 
805
        if (else_inst != NULL) {
 
806
                /* Convert ELSE to an ADD instruction that points where the ENDIF
 
807
                 * would be.
 
808
                 */
 
809
                else_inst->header.opcode = BRW_OPCODE_ADD;
 
810
 
 
811
                if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
 
812
                else_inst->bits3.ud = (next_inst - else_inst) * 16;
 
813
        } else {
 
814
                if_inst->bits3.ud = (next_inst - if_inst) * 16;
 
815
        }
 
816
}
 
817
 
 
818
/**
 
819
 * Patch IF and ELSE instructions with appropriate jump targets.
 
820
 */
 
821
static void
 
822
patch_IF_ELSE(struct brw_compile *p,
 
823
              struct brw_instruction *if_inst,
 
824
              struct brw_instruction *else_inst,
 
825
              struct brw_instruction *endif_inst)
 
826
{
 
827
        unsigned br = 1;
 
828
 
 
829
        assert(!p->single_program_flow);
 
830
        assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
 
831
        assert(endif_inst != NULL);
 
832
        assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
 
833
 
 
834
        /* Jump count is for 64bit data chunk each, so one 128bit instruction
 
835
         * requires 2 chunks.
 
836
         */
 
837
        if (p->gen >= 050)
 
838
                br = 2;
 
839
 
 
840
        assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
 
841
        endif_inst->header.execution_size = if_inst->header.execution_size;
 
842
 
 
843
        if (else_inst == NULL) {
 
844
                /* Patch IF -> ENDIF */
 
845
                if (p->gen < 060) {
 
846
                        /* Turn it into an IFF, which means no mask stack operations for
 
847
                         * all-false and jumping past the ENDIF.
 
848
                         */
 
849
                        if_inst->header.opcode = BRW_OPCODE_IFF;
 
850
                        if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
 
851
                        if_inst->bits3.if_else.pop_count = 0;
 
852
                        if_inst->bits3.if_else.pad0 = 0;
 
853
                } else if (p->gen < 070) {
 
854
                        /* As of gen6, there is no IFF and IF must point to the ENDIF. */
 
855
                        if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
 
856
                } else {
 
857
                        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
 
858
                        if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
 
859
                }
 
860
        } else {
 
861
                else_inst->header.execution_size = if_inst->header.execution_size;
 
862
 
 
863
                /* Patch IF -> ELSE */
 
864
                if (p->gen < 060) {
 
865
                        if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
 
866
                        if_inst->bits3.if_else.pop_count = 0;
 
867
                        if_inst->bits3.if_else.pad0 = 0;
 
868
                } else if (p->gen <= 070) {
 
869
                        if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
 
870
                }
 
871
 
 
872
                /* Patch ELSE -> ENDIF */
 
873
                if (p->gen < 060) {
 
874
                        /* BRW_OPCODE_ELSE pre-gen6 should point just past the
 
875
                         * matching ENDIF.
 
876
                         */
 
877
                        else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
 
878
                        else_inst->bits3.if_else.pop_count = 1;
 
879
                        else_inst->bits3.if_else.pad0 = 0;
 
880
                } else if (p->gen < 070) {
 
881
                        /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
 
882
                        else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
 
883
                } else {
 
884
                        /* The IF instruction's JIP should point just past the ELSE */
 
885
                        if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
 
886
                        /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
 
887
                        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
 
888
                        else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
 
889
                }
 
890
        }
 
891
}
 
892
 
 
893
void
 
894
brw_ELSE(struct brw_compile *p)
 
895
{
 
896
        struct brw_instruction *insn;
 
897
 
 
898
        insn = brw_next_insn(p, BRW_OPCODE_ELSE);
 
899
 
 
900
        if (p->gen < 060) {
 
901
                brw_set_dest(p, insn, brw_ip_reg());
 
902
                brw_set_src0(p, insn, brw_ip_reg());
 
903
                brw_set_src1(p, insn, brw_imm_d(0x0));
 
904
        } else if (p->gen < 070) {
 
905
                brw_set_dest(p, insn, brw_imm_w(0));
 
906
                insn->bits1.branch_gen6.jump_count = 0;
 
907
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
908
                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
909
        } else {
 
910
                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
 
911
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
912
                brw_set_src1(p, insn, brw_imm_ud(0));
 
913
                insn->bits3.break_cont.jip = 0;
 
914
                insn->bits3.break_cont.uip = 0;
 
915
        }
 
916
 
 
917
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
918
        insn->header.mask_control = BRW_MASK_ENABLE;
 
919
        if (!p->single_program_flow)
 
920
                insn->header.thread_control = BRW_THREAD_SWITCH;
 
921
 
 
922
        push_if_stack(p, insn);
 
923
}
 
924
 
 
925
void
 
926
brw_ENDIF(struct brw_compile *p)
 
927
{
 
928
        struct brw_instruction *insn;
 
929
        struct brw_instruction *else_inst = NULL;
 
930
        struct brw_instruction *if_inst = NULL;
 
931
 
 
932
        /* Pop the IF and (optional) ELSE instructions from the stack */
 
933
        p->if_stack_depth--;
 
934
        if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
 
935
                else_inst = p->if_stack[p->if_stack_depth];
 
936
                p->if_stack_depth--;
 
937
        }
 
938
        if_inst = p->if_stack[p->if_stack_depth];
 
939
 
 
940
        if (p->single_program_flow) {
 
941
                /* ENDIF is useless; don't bother emitting it. */
 
942
                convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
 
943
                return;
 
944
        }
 
945
 
 
946
        insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
 
947
 
 
948
        if (p->gen < 060) {
 
949
                brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
 
950
                brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
 
951
                brw_set_src1(p, insn, brw_imm_d(0x0));
 
952
        } else if (p->gen < 070) {
 
953
                brw_set_dest(p, insn, brw_imm_w(0));
 
954
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
955
                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
956
        } else {
 
957
                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
 
958
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
959
                brw_set_src1(p, insn, brw_imm_ud(0));
 
960
        }
 
961
 
 
962
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
963
        insn->header.mask_control = BRW_MASK_ENABLE;
 
964
        insn->header.thread_control = BRW_THREAD_SWITCH;
 
965
 
 
966
        /* Also pop item off the stack in the endif instruction: */
 
967
        if (p->gen < 060) {
 
968
                insn->bits3.if_else.jump_count = 0;
 
969
                insn->bits3.if_else.pop_count = 1;
 
970
                insn->bits3.if_else.pad0 = 0;
 
971
        } else if (p->gen < 070) {
 
972
                insn->bits1.branch_gen6.jump_count = 2;
 
973
        } else {
 
974
                insn->bits3.break_cont.jip = 2;
 
975
        }
 
976
        patch_IF_ELSE(p, if_inst, else_inst, insn);
 
977
}
 
978
 
 
979
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
 
980
{
 
981
        struct brw_instruction *insn;
 
982
 
 
983
        insn = brw_next_insn(p, BRW_OPCODE_BREAK);
 
984
        if (p->gen >= 060) {
 
985
                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
 
986
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
987
                brw_set_src1(p, insn, brw_imm_d(0x0));
 
988
        } else {
 
989
                brw_set_dest(p, insn, brw_ip_reg());
 
990
                brw_set_src0(p, insn, brw_ip_reg());
 
991
                brw_set_src1(p, insn, brw_imm_d(0x0));
 
992
                insn->bits3.if_else.pad0 = 0;
 
993
                insn->bits3.if_else.pop_count = pop_count;
 
994
        }
 
995
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
996
        insn->header.execution_size = BRW_EXECUTE_8;
 
997
 
 
998
        return insn;
 
999
}
 
1000
 
 
1001
struct brw_instruction *gen6_CONT(struct brw_compile *p,
 
1002
                                  struct brw_instruction *do_insn)
 
1003
{
 
1004
        struct brw_instruction *insn;
 
1005
 
 
1006
        insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
 
1007
        brw_set_dest(p, insn, __retype_d(brw_null_reg()));
 
1008
        brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
1009
        brw_set_dest(p, insn, brw_ip_reg());
 
1010
        brw_set_src0(p, insn, brw_ip_reg());
 
1011
        brw_set_src1(p, insn, brw_imm_d(0x0));
 
1012
 
 
1013
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1014
        insn->header.execution_size = BRW_EXECUTE_8;
 
1015
        return insn;
 
1016
}
 
1017
 
 
1018
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
 
1019
{
 
1020
        struct brw_instruction *insn;
 
1021
        insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
 
1022
        brw_set_dest(p, insn, brw_ip_reg());
 
1023
        brw_set_src0(p, insn, brw_ip_reg());
 
1024
        brw_set_src1(p, insn, brw_imm_d(0x0));
 
1025
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1026
        insn->header.execution_size = BRW_EXECUTE_8;
 
1027
        /* insn->header.mask_control = BRW_MASK_DISABLE; */
 
1028
        insn->bits3.if_else.pad0 = 0;
 
1029
        insn->bits3.if_else.pop_count = pop_count;
 
1030
        return insn;
 
1031
}
 
1032
 
 
1033
/* DO/WHILE loop:
 
1034
 *
 
1035
 * The DO/WHILE is just an unterminated loop -- break or continue are
 
1036
 * used for control within the loop.  We have a few ways they can be
 
1037
 * done.
 
1038
 *
 
1039
 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
 
1040
 * jip and no DO instruction.
 
1041
 *
 
1042
 * For non-uniform control flow pre-gen6, there's a DO instruction to
 
1043
 * push the mask, and a WHILE to jump back, and BREAK to get out and
 
1044
 * pop the mask.
 
1045
 *
 
1046
 * For gen6, there's no more mask stack, so no need for DO.  WHILE
 
1047
 * just points back to the first instruction of the loop.
 
1048
 */
 
1049
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
 
1050
{
 
1051
        if (p->gen >= 060 || p->single_program_flow) {
 
1052
                return &p->store[p->nr_insn];
 
1053
        } else {
 
1054
                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
 
1055
 
 
1056
                /* Override the defaults for this instruction:
 
1057
                */
 
1058
                brw_set_dest(p, insn, brw_null_reg());
 
1059
                brw_set_src0(p, insn, brw_null_reg());
 
1060
                brw_set_src1(p, insn, brw_null_reg());
 
1061
 
 
1062
                insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1063
                insn->header.execution_size = execute_size;
 
1064
                insn->header.predicate_control = BRW_PREDICATE_NONE;
 
1065
                /* insn->header.mask_control = BRW_MASK_ENABLE; */
 
1066
                /* insn->header.mask_control = BRW_MASK_DISABLE; */
 
1067
 
 
1068
                return insn;
 
1069
        }
 
1070
}
 
1071
 
 
1072
struct brw_instruction *brw_WHILE(struct brw_compile *p,
 
1073
                                  struct brw_instruction *do_insn)
 
1074
{
 
1075
        struct brw_instruction *insn;
 
1076
        unsigned br = 1;
 
1077
 
 
1078
        if (p->gen >= 050)
 
1079
                br = 2;
 
1080
 
 
1081
        if (p->gen >= 070) {
 
1082
                insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
1083
 
 
1084
                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
 
1085
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
1086
                brw_set_src1(p, insn, brw_imm_ud(0));
 
1087
                insn->bits3.break_cont.jip = br * (do_insn - insn);
 
1088
 
 
1089
                insn->header.execution_size = BRW_EXECUTE_8;
 
1090
        } else if (p->gen >= 060) {
 
1091
                insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
1092
 
 
1093
                brw_set_dest(p, insn, brw_imm_w(0));
 
1094
                insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
 
1095
                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
 
1096
                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
1097
 
 
1098
                insn->header.execution_size = BRW_EXECUTE_8;
 
1099
        } else {
 
1100
                if (p->single_program_flow) {
 
1101
                        insn = brw_next_insn(p, BRW_OPCODE_ADD);
 
1102
 
 
1103
                        brw_set_dest(p, insn, brw_ip_reg());
 
1104
                        brw_set_src0(p, insn, brw_ip_reg());
 
1105
                        brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
 
1106
                        insn->header.execution_size = BRW_EXECUTE_1;
 
1107
                } else {
 
1108
                        insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
1109
 
 
1110
                        assert(do_insn->header.opcode == BRW_OPCODE_DO);
 
1111
 
 
1112
                        brw_set_dest(p, insn, brw_ip_reg());
 
1113
                        brw_set_src0(p, insn, brw_ip_reg());
 
1114
                        brw_set_src1(p, insn, brw_imm_d(0));
 
1115
 
 
1116
                        insn->header.execution_size = do_insn->header.execution_size;
 
1117
                        insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
 
1118
                        insn->bits3.if_else.pop_count = 0;
 
1119
                        insn->bits3.if_else.pad0 = 0;
 
1120
                }
 
1121
        }
 
1122
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1123
        p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
1124
 
 
1125
        return insn;
 
1126
}
 
1127
 
 
1128
/* FORWARD JUMPS:
 
1129
 */
 
1130
void brw_land_fwd_jump(struct brw_compile *p,
 
1131
                       struct brw_instruction *jmp_insn)
 
1132
{
 
1133
        struct brw_instruction *landing = &p->store[p->nr_insn];
 
1134
        unsigned jmpi = 1;
 
1135
 
 
1136
        if (p->gen >= 050)
 
1137
                jmpi = 2;
 
1138
 
 
1139
        assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
 
1140
        assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
 
1141
 
 
1142
        jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
 
1143
}
 
1144
 
 
1145
 
 
1146
 
 
1147
/* To integrate with the above, it makes sense that the comparison
 
1148
 * instruction should populate the flag register.  It might be simpler
 
1149
 * just to use the flag reg for most WM tasks?
 
1150
 */
 
1151
void brw_CMP(struct brw_compile *p,
 
1152
             struct brw_reg dest,
 
1153
             unsigned conditional,
 
1154
             struct brw_reg src0,
 
1155
             struct brw_reg src1)
 
1156
{
 
1157
        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
 
1158
 
 
1159
        insn->header.destreg__conditionalmod = conditional;
 
1160
        brw_set_dest(p, insn, dest);
 
1161
        brw_set_src0(p, insn, src0);
 
1162
        brw_set_src1(p, insn, src1);
 
1163
 
 
1164
        /* Make it so that future instructions will use the computed flag
 
1165
         * value until brw_set_predicate_control_flag_value() is called
 
1166
         * again.  
 
1167
         */
 
1168
        if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
 
1169
            dest.nr == 0) {
 
1170
                p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
 
1171
                p->flag_value = 0xff;
 
1172
        }
 
1173
}
 
1174
 
 
1175
/* Issue 'wait' instruction for n1, host could program MMIO
 
1176
   to wake up thread. */
 
1177
void brw_WAIT(struct brw_compile *p)
 
1178
{
 
1179
        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
 
1180
        struct brw_reg src = brw_notification_1_reg();
 
1181
 
 
1182
        brw_set_dest(p, insn, src);
 
1183
        brw_set_src0(p, insn, src);
 
1184
        brw_set_src1(p, insn, brw_null_reg());
 
1185
        insn->header.execution_size = 0; /* must */
 
1186
        insn->header.predicate_control = 0;
 
1187
        insn->header.compression_control = 0;
 
1188
}
 
1189
 
 
1190
/***********************************************************************
 
1191
 * Helpers for the various SEND message types:
 
1192
 */
 
1193
 
 
1194
/** Extended math function, float[8].
 
1195
 */
 
1196
void brw_math(struct brw_compile *p,
 
1197
              struct brw_reg dest,
 
1198
              unsigned function,
 
1199
              unsigned saturate,
 
1200
              unsigned msg_reg_nr,
 
1201
              struct brw_reg src,
 
1202
              unsigned data_type,
 
1203
              unsigned precision)
 
1204
{
 
1205
        if (p->gen >= 060) {
 
1206
                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
1207
 
 
1208
                assert(dest.file == BRW_GENERAL_REGISTER_FILE);
 
1209
                assert(src.file == BRW_GENERAL_REGISTER_FILE);
 
1210
 
 
1211
                assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
 
1212
                assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
 
1213
 
 
1214
                /* Source modifiers are ignored for extended math instructions. */
 
1215
                assert(!src.negate);
 
1216
                assert(!src.abs);
 
1217
 
 
1218
                if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
 
1219
                    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
 
1220
                        assert(src.type == BRW_REGISTER_TYPE_F);
 
1221
                }
 
1222
 
 
1223
                /* Math is the same ISA format as other opcodes, except that CondModifier
 
1224
                 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
 
1225
                 */
 
1226
                insn->header.destreg__conditionalmod = function;
 
1227
                insn->header.saturate = saturate;
 
1228
 
 
1229
                brw_set_dest(p, insn, dest);
 
1230
                brw_set_src0(p, insn, src);
 
1231
                brw_set_src1(p, insn, brw_null_reg());
 
1232
        } else {
 
1233
                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1234
                /* Example code doesn't set predicate_control for send
 
1235
                 * instructions.
 
1236
                 */
 
1237
                insn->header.predicate_control = 0;
 
1238
                insn->header.destreg__conditionalmod = msg_reg_nr;
 
1239
 
 
1240
                brw_set_dest(p, insn, dest);
 
1241
                brw_set_src0(p, insn, src);
 
1242
                brw_set_math_message(p, insn, function,
 
1243
                                     src.type == BRW_REGISTER_TYPE_D,
 
1244
                                     precision,
 
1245
                                     saturate,
 
1246
                                     data_type);
 
1247
        }
 
1248
}
 
1249
 
 
1250
/** Extended math function, float[8].
 
1251
 */
 
1252
void brw_math2(struct brw_compile *p,
 
1253
               struct brw_reg dest,
 
1254
               unsigned function,
 
1255
               struct brw_reg src0,
 
1256
               struct brw_reg src1)
 
1257
{
 
1258
        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
1259
 
 
1260
        assert(dest.file == BRW_GENERAL_REGISTER_FILE);
 
1261
        assert(src0.file == BRW_GENERAL_REGISTER_FILE);
 
1262
        assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 
1263
 
 
1264
        assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
 
1265
        assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
 
1266
        assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
 
1267
 
 
1268
        if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
 
1269
            function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
 
1270
                assert(src0.type == BRW_REGISTER_TYPE_F);
 
1271
                assert(src1.type == BRW_REGISTER_TYPE_F);
 
1272
        }
 
1273
 
 
1274
        /* Source modifiers are ignored for extended math instructions. */
 
1275
        assert(!src0.negate);
 
1276
        assert(!src0.abs);
 
1277
        assert(!src1.negate);
 
1278
        assert(!src1.abs);
 
1279
 
 
1280
        /* Math is the same ISA format as other opcodes, except that CondModifier
 
1281
         * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
 
1282
         */
 
1283
        insn->header.destreg__conditionalmod = function;
 
1284
 
 
1285
        brw_set_dest(p, insn, dest);
 
1286
        brw_set_src0(p, insn, src0);
 
1287
        brw_set_src1(p, insn, src1);
 
1288
}
 
1289
 
 
1290
/**
 
1291
 * Extended math function, float[16].
 
1292
 * Use 2 send instructions.
 
1293
 */
 
1294
void brw_math_16(struct brw_compile *p,
 
1295
                 struct brw_reg dest,
 
1296
                 unsigned function,
 
1297
                 unsigned saturate,
 
1298
                 unsigned msg_reg_nr,
 
1299
                 struct brw_reg src,
 
1300
                 unsigned precision)
 
1301
{
 
1302
        struct brw_instruction *insn;
 
1303
 
 
1304
        if (p->gen >= 060) {
 
1305
                insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
1306
 
 
1307
                /* Math is the same ISA format as other opcodes, except that CondModifier
 
1308
                 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
 
1309
                 */
 
1310
                insn->header.destreg__conditionalmod = function;
 
1311
                insn->header.saturate = saturate;
 
1312
 
 
1313
                /* Source modifiers are ignored for extended math instructions. */
 
1314
                assert(!src.negate);
 
1315
                assert(!src.abs);
 
1316
 
 
1317
                brw_set_dest(p, insn, dest);
 
1318
                brw_set_src0(p, insn, src);
 
1319
                brw_set_src1(p, insn, brw_null_reg());
 
1320
                return;
 
1321
        }
 
1322
 
 
1323
        /* First instruction:
 
1324
        */
 
1325
        brw_push_insn_state(p);
 
1326
        brw_set_predicate_control_flag_value(p, 0xff);
 
1327
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1328
 
 
1329
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1330
        insn->header.destreg__conditionalmod = msg_reg_nr;
 
1331
 
 
1332
        brw_set_dest(p, insn, dest);
 
1333
        brw_set_src0(p, insn, src);
 
1334
        brw_set_math_message(p, insn, function,
 
1335
                             BRW_MATH_INTEGER_UNSIGNED,
 
1336
                             precision,
 
1337
                             saturate,
 
1338
                             BRW_MATH_DATA_VECTOR);
 
1339
 
 
1340
        /* Second instruction:
 
1341
        */
 
1342
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1343
        insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
 
1344
        insn->header.destreg__conditionalmod = msg_reg_nr+1;
 
1345
 
 
1346
        brw_set_dest(p, insn, __offset(dest,1));
 
1347
        brw_set_src0(p, insn, src);
 
1348
        brw_set_math_message(p, insn, function,
 
1349
                             BRW_MATH_INTEGER_UNSIGNED,
 
1350
                             precision,
 
1351
                             saturate,
 
1352
                             BRW_MATH_DATA_VECTOR);
 
1353
 
 
1354
        brw_pop_insn_state(p);
 
1355
}
 
1356
 
 
1357
/**
 
1358
 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
 
1359
 * using a constant offset per channel.
 
1360
 *
 
1361
 * The offset must be aligned to oword size (16 bytes).  Used for
 
1362
 * register spilling.
 
1363
 */
 
1364
void brw_oword_block_write_scratch(struct brw_compile *p,
 
1365
                                   struct brw_reg mrf,
 
1366
                                   int num_regs,
 
1367
                                   unsigned offset)
 
1368
{
 
1369
        uint32_t msg_control, msg_type;
 
1370
        int mlen;
 
1371
 
 
1372
        if (p->gen >= 060)
 
1373
                offset /= 16;
 
1374
 
 
1375
        mrf = __retype_ud(mrf);
 
1376
 
 
1377
        if (num_regs == 1) {
 
1378
                msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
 
1379
                mlen = 2;
 
1380
        } else {
 
1381
                msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
 
1382
                mlen = 3;
 
1383
        }
 
1384
 
 
1385
        /* Set up the message header.  This is g0, with g0.2 filled with
 
1386
         * the offset.  We don't want to leave our offset around in g0 or
 
1387
         * it'll screw up texture samples, so set it up inside the message
 
1388
         * reg.
 
1389
         */
 
1390
        {
 
1391
                brw_push_insn_state(p);
 
1392
                brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1393
                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1394
 
 
1395
                brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
1396
 
 
1397
                /* set message header global offset field (reg 0, element 2) */
 
1398
                brw_MOV(p,
 
1399
                        __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
 
1400
                        brw_imm_ud(offset));
 
1401
 
 
1402
                brw_pop_insn_state(p);
 
1403
        }
 
1404
 
 
1405
        {
 
1406
                struct brw_reg dest;
 
1407
                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1408
                int send_commit_msg;
 
1409
                struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
 
1410
 
 
1411
                if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
 
1412
                        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1413
                        src_header = vec16(src_header);
 
1414
                }
 
1415
                assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
 
1416
                insn->header.destreg__conditionalmod = mrf.nr;
 
1417
 
 
1418
                /* Until gen6, writes followed by reads from the same location
 
1419
                 * are not guaranteed to be ordered unless write_commit is set.
 
1420
                 * If set, then a no-op write is issued to the destination
 
1421
                 * register to set a dependency, and a read from the destination
 
1422
                 * can be used to ensure the ordering.
 
1423
                 *
 
1424
                 * For gen6, only writes between different threads need ordering
 
1425
                 * protection.  Our use of DP writes is all about register
 
1426
                 * spilling within a thread.
 
1427
                 */
 
1428
                if (p->gen >= 060) {
 
1429
                        dest = __retype_uw(vec16(brw_null_reg()));
 
1430
                        send_commit_msg = 0;
 
1431
                } else {
 
1432
                        dest = src_header;
 
1433
                        send_commit_msg = 1;
 
1434
                }
 
1435
 
 
1436
                brw_set_dest(p, insn, dest);
 
1437
                if (p->gen >= 060) {
 
1438
                        brw_set_src0(p, insn, mrf);
 
1439
                } else {
 
1440
                        brw_set_src0(p, insn, brw_null_reg());
 
1441
                }
 
1442
 
 
1443
                if (p->gen >= 060)
 
1444
                        msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
1445
                else
 
1446
                        msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
1447
 
 
1448
                brw_set_dp_write_message(p,
 
1449
                                         insn,
 
1450
                                         255, /* binding table index (255=stateless) */
 
1451
                                         msg_control,
 
1452
                                         msg_type,
 
1453
                                         mlen,
 
1454
                                         true, /* header_present */
 
1455
                                         0, /* pixel scoreboard */
 
1456
                                         send_commit_msg, /* response_length */
 
1457
                                         0, /* eot */
 
1458
                                         send_commit_msg);
 
1459
        }
 
1460
}
 
1461
 
 
1462
 
 
1463
/**
 
1464
 * Read a block of owords (half a GRF each) from the scratch buffer
 
1465
 * using a constant index per channel.
 
1466
 *
 
1467
 * Offset must be aligned to oword size (16 bytes).  Used for register
 
1468
 * spilling.
 
1469
 */
 
1470
void
 
1471
brw_oword_block_read_scratch(struct brw_compile *p,
 
1472
                             struct brw_reg dest,
 
1473
                             struct brw_reg mrf,
 
1474
                             int num_regs,
 
1475
                             unsigned offset)
 
1476
{
 
1477
        uint32_t msg_control;
 
1478
        int rlen;
 
1479
 
 
1480
        if (p->gen >= 060)
 
1481
                offset /= 16;
 
1482
 
 
1483
        mrf = __retype_ud(mrf);
 
1484
        dest = __retype_uw(dest);
 
1485
 
 
1486
        if (num_regs == 1) {
 
1487
                msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
 
1488
                rlen = 1;
 
1489
        } else {
 
1490
                msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
 
1491
                rlen = 2;
 
1492
        }
 
1493
 
 
1494
        {
 
1495
                brw_push_insn_state(p);
 
1496
                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1497
                brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1498
 
 
1499
                brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
1500
 
 
1501
                /* set message header global offset field (reg 0, element 2) */
 
1502
                brw_MOV(p,
 
1503
                        __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
 
1504
                        brw_imm_ud(offset));
 
1505
 
 
1506
                brw_pop_insn_state(p);
 
1507
        }
 
1508
 
 
1509
        {
 
1510
                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1511
 
 
1512
                assert(insn->header.predicate_control == 0);
 
1513
                insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1514
                insn->header.destreg__conditionalmod = mrf.nr;
 
1515
 
 
1516
                brw_set_dest(p, insn, dest); /* UW? */
 
1517
                if (p->gen >= 060) {
 
1518
                        brw_set_src0(p, insn, mrf);
 
1519
                } else {
 
1520
                        brw_set_src0(p, insn, brw_null_reg());
 
1521
                }
 
1522
 
 
1523
                brw_set_dp_read_message(p,
 
1524
                                        insn,
 
1525
                                        255, /* binding table index (255=stateless) */
 
1526
                                        msg_control,
 
1527
                                        BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
 
1528
                                        BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
 
1529
                                        1, /* msg_length */
 
1530
                                        rlen);
 
1531
        }
 
1532
}
 
1533
 
 
1534
/**
 
1535
 * Read a float[4] vector from the data port Data Cache (const buffer).
 
1536
 * Location (in buffer) should be a multiple of 16.
 
1537
 * Used for fetching shader constants.
 
1538
 */
 
1539
void brw_oword_block_read(struct brw_compile *p,
 
1540
                          struct brw_reg dest,
 
1541
                          struct brw_reg mrf,
 
1542
                          uint32_t offset,
 
1543
                          uint32_t bind_table_index)
 
1544
{
 
1545
        struct brw_instruction *insn;
 
1546
 
 
1547
        /* On newer hardware, offset is in units of owords. */
 
1548
        if (p->gen >= 060)
 
1549
                offset /= 16;
 
1550
 
 
1551
        mrf = __retype_ud(mrf);
 
1552
 
 
1553
        brw_push_insn_state(p);
 
1554
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
1555
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1556
        brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1557
 
 
1558
        brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
1559
 
 
1560
        /* set message header global offset field (reg 0, element 2) */
 
1561
        brw_MOV(p,
 
1562
                __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
 
1563
                brw_imm_ud(offset));
 
1564
 
 
1565
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1566
        insn->header.destreg__conditionalmod = mrf.nr;
 
1567
 
 
1568
        /* cast dest to a uword[8] vector */
 
1569
        dest = __retype_uw(vec8(dest));
 
1570
 
 
1571
        brw_set_dest(p, insn, dest);
 
1572
        if (p->gen >= 060) {
 
1573
                brw_set_src0(p, insn, mrf);
 
1574
        } else {
 
1575
                brw_set_src0(p, insn, brw_null_reg());
 
1576
        }
 
1577
 
 
1578
        brw_set_dp_read_message(p,
 
1579
                                insn,
 
1580
                                bind_table_index,
 
1581
                                BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
 
1582
                                BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
 
1583
                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
 
1584
                                1, /* msg_length */
 
1585
                                1); /* response_length (1 reg, 2 owords!) */
 
1586
 
 
1587
        brw_pop_insn_state(p);
 
1588
}
 
1589
 
 
1590
/**
 
1591
 * Read a set of dwords from the data port Data Cache (const buffer).
 
1592
 *
 
1593
 * Location (in buffer) appears as UD offsets in the register after
 
1594
 * the provided mrf header reg.
 
1595
 */
 
1596
void brw_dword_scattered_read(struct brw_compile *p,
 
1597
                              struct brw_reg dest,
 
1598
                              struct brw_reg mrf,
 
1599
                              uint32_t bind_table_index)
 
1600
{
 
1601
        struct brw_instruction *insn;
 
1602
 
 
1603
        mrf = __retype_ud(mrf);
 
1604
 
 
1605
        brw_push_insn_state(p);
 
1606
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
1607
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1608
        brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1609
        brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
1610
        brw_pop_insn_state(p);
 
1611
 
 
1612
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1613
        insn->header.destreg__conditionalmod = mrf.nr;
 
1614
 
 
1615
        /* cast dest to a uword[8] vector */
 
1616
        dest = __retype_uw(vec8(dest));
 
1617
 
 
1618
        brw_set_dest(p, insn, dest);
 
1619
        brw_set_src0(p, insn, brw_null_reg());
 
1620
 
 
1621
        brw_set_dp_read_message(p,
 
1622
                                insn,
 
1623
                                bind_table_index,
 
1624
                                BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
 
1625
                                BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
 
1626
                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
 
1627
                                2, /* msg_length */
 
1628
                                1); /* response_length */
 
1629
}
 
1630
 
 
1631
/**
 
1632
 * Read float[4] constant(s) from VS constant buffer.
 
1633
 * For relative addressing, two float[4] constants will be read into 'dest'.
 
1634
 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
 
1635
 */
 
1636
void brw_dp_READ_4_vs(struct brw_compile *p,
 
1637
                      struct brw_reg dest,
 
1638
                      unsigned location,
 
1639
                      unsigned bind_table_index)
 
1640
{
 
1641
        struct brw_instruction *insn;
 
1642
        unsigned msg_reg_nr = 1;
 
1643
 
 
1644
        if (p->gen >= 060)
 
1645
                location /= 16;
 
1646
 
 
1647
        /* Setup MRF[1] with location/offset into const buffer */
 
1648
        brw_push_insn_state(p);
 
1649
        brw_set_access_mode(p, BRW_ALIGN_1);
 
1650
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1651
        brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1652
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
1653
        brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
 
1654
                brw_imm_ud(location));
 
1655
        brw_pop_insn_state(p);
 
1656
 
 
1657
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1658
 
 
1659
        insn->header.predicate_control = BRW_PREDICATE_NONE;
 
1660
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1661
        insn->header.destreg__conditionalmod = msg_reg_nr;
 
1662
        insn->header.mask_control = BRW_MASK_DISABLE;
 
1663
 
 
1664
        brw_set_dest(p, insn, dest);
 
1665
        if (p->gen >= 060) {
 
1666
                brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
 
1667
        } else {
 
1668
                brw_set_src0(p, insn, brw_null_reg());
 
1669
        }
 
1670
 
 
1671
        brw_set_dp_read_message(p,
 
1672
                                insn,
 
1673
                                bind_table_index,
 
1674
                                0,
 
1675
                                BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
 
1676
                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
 
1677
                                1, /* msg_length */
 
1678
                                1); /* response_length (1 Oword) */
 
1679
}
 
1680
 
 
1681
/**
 
1682
 * Read a float[4] constant per vertex from VS constant buffer, with
 
1683
 * relative addressing.
 
1684
 */
 
1685
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
 
1686
                               struct brw_reg dest,
 
1687
                               struct brw_reg addr_reg,
 
1688
                               unsigned offset,
 
1689
                               unsigned bind_table_index)
 
1690
{
 
1691
        struct brw_reg src = brw_vec8_grf(0, 0);
 
1692
        struct brw_instruction *insn;
 
1693
        int msg_type;
 
1694
 
 
1695
        /* Setup MRF[1] with offset into const buffer */
 
1696
        brw_push_insn_state(p);
 
1697
        brw_set_access_mode(p, BRW_ALIGN_1);
 
1698
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1699
        brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1700
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
1701
 
 
1702
        /* M1.0 is block offset 0, M1.4 is block offset 1, all other
 
1703
         * fields ignored.
 
1704
         */
 
1705
        brw_ADD(p, __retype_d(brw_message_reg(1)),
 
1706
                addr_reg, brw_imm_d(offset));
 
1707
        brw_pop_insn_state(p);
 
1708
 
 
1709
        gen6_resolve_implied_move(p, &src, 0);
 
1710
 
 
1711
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1712
        insn->header.predicate_control = BRW_PREDICATE_NONE;
 
1713
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1714
        insn->header.destreg__conditionalmod = 0;
 
1715
        insn->header.mask_control = BRW_MASK_DISABLE;
 
1716
 
 
1717
        brw_set_dest(p, insn, dest);
 
1718
        brw_set_src0(p, insn, src);
 
1719
 
 
1720
        if (p->gen >= 060)
 
1721
                msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
1722
        else if (p->gen >= 045)
 
1723
                msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
1724
        else
 
1725
                msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
1726
 
 
1727
        brw_set_dp_read_message(p,
 
1728
                                insn,
 
1729
                                bind_table_index,
 
1730
                                BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
 
1731
                                msg_type,
 
1732
                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
 
1733
                                2, /* msg_length */
 
1734
                                1); /* response_length */
 
1735
}
 
1736
 
 
1737
void brw_fb_WRITE(struct brw_compile *p,
 
1738
                  int dispatch_width,
 
1739
                  unsigned msg_reg_nr,
 
1740
                  struct brw_reg src0,
 
1741
                  unsigned msg_control,
 
1742
                  unsigned binding_table_index,
 
1743
                  unsigned msg_length,
 
1744
                  unsigned response_length,
 
1745
                  bool eot,
 
1746
                  bool header_present)
 
1747
{
 
1748
        struct brw_instruction *insn;
 
1749
        unsigned msg_type;
 
1750
        struct brw_reg dest;
 
1751
 
 
1752
        if (dispatch_width == 16)
 
1753
                dest = __retype_uw(vec16(brw_null_reg()));
 
1754
        else
 
1755
                dest = __retype_uw(vec8(brw_null_reg()));
 
1756
 
 
1757
        if (p->gen >= 060 && binding_table_index == 0) {
 
1758
                insn = brw_next_insn(p, BRW_OPCODE_SENDC);
 
1759
        } else {
 
1760
                insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1761
        }
 
1762
        /* The execution mask is ignored for render target writes. */
 
1763
        insn->header.predicate_control = 0;
 
1764
        insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1765
 
 
1766
        if (p->gen >= 060) {
 
1767
                /* headerless version, just submit color payload */
 
1768
                src0 = brw_message_reg(msg_reg_nr);
 
1769
 
 
1770
                msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
 
1771
        } else {
 
1772
                insn->header.destreg__conditionalmod = msg_reg_nr;
 
1773
 
 
1774
                msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
 
1775
        }
 
1776
 
 
1777
        brw_set_dest(p, insn, dest);
 
1778
        brw_set_src0(p, insn, src0);
 
1779
        brw_set_dp_write_message(p,
 
1780
                                 insn,
 
1781
                                 binding_table_index,
 
1782
                                 msg_control,
 
1783
                                 msg_type,
 
1784
                                 msg_length,
 
1785
                                 header_present,
 
1786
                                 eot,
 
1787
                                 response_length,
 
1788
                                 eot,
 
1789
                                 0 /* send_commit_msg */);
 
1790
}
 
1791
 
 
1792
/**
 
1793
 * Texture sample instruction.
 
1794
 * Note: the msg_type plus msg_length values determine exactly what kind
 
1795
 * of sampling operation is performed.  See volume 4, page 161 of docs.
 
1796
 */
 
1797
void brw_SAMPLE(struct brw_compile *p,
 
1798
                struct brw_reg dest,
 
1799
                unsigned msg_reg_nr,
 
1800
                struct brw_reg src0,
 
1801
                unsigned binding_table_index,
 
1802
                unsigned sampler,
 
1803
                unsigned writemask,
 
1804
                unsigned msg_type,
 
1805
                unsigned response_length,
 
1806
                unsigned msg_length,
 
1807
                bool header_present,
 
1808
                unsigned simd_mode)
 
1809
{
 
1810
        assert(writemask);
 
1811
 
 
1812
        if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
 
1813
                struct brw_reg m1 = brw_message_reg(msg_reg_nr);
 
1814
 
 
1815
                writemask = ~writemask & WRITEMASK_XYZW;
 
1816
 
 
1817
                brw_push_insn_state(p);
 
1818
 
 
1819
                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
1820
                brw_set_mask_control(p, BRW_MASK_DISABLE);
 
1821
 
 
1822
                brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
 
1823
                brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
 
1824
 
 
1825
                brw_pop_insn_state(p);
 
1826
 
 
1827
                src0 = __retype_uw(brw_null_reg());
 
1828
        }
 
1829
 
 
1830
        {
 
1831
                struct brw_instruction *insn;
 
1832
 
 
1833
                gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
1834
 
 
1835
                insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1836
                insn->header.predicate_control = 0; /* XXX */
 
1837
                insn->header.compression_control = BRW_COMPRESSION_NONE;
 
1838
                if (p->gen < 060)
 
1839
                        insn->header.destreg__conditionalmod = msg_reg_nr;
 
1840
 
 
1841
                brw_set_dest(p, insn, dest);
 
1842
                brw_set_src0(p, insn, src0);
 
1843
                brw_set_sampler_message(p, insn,
 
1844
                                        binding_table_index,
 
1845
                                        sampler,
 
1846
                                        msg_type,
 
1847
                                        response_length,
 
1848
                                        msg_length,
 
1849
                                        header_present,
 
1850
                                        simd_mode);
 
1851
        }
 
1852
}
 
1853
 
 
1854
/* All these variables are pretty confusing - we might be better off
 
1855
 * using bitmasks and macros for this, in the old style.  Or perhaps
 
1856
 * just having the caller instantiate the fields in dword3 itself.
 
1857
 */
 
1858
void brw_urb_WRITE(struct brw_compile *p,
 
1859
                   struct brw_reg dest,
 
1860
                   unsigned msg_reg_nr,
 
1861
                   struct brw_reg src0,
 
1862
                   bool allocate,
 
1863
                   bool used,
 
1864
                   unsigned msg_length,
 
1865
                   unsigned response_length,
 
1866
                   bool eot,
 
1867
                   bool writes_complete,
 
1868
                   unsigned offset,
 
1869
                   unsigned swizzle)
 
1870
{
 
1871
        struct brw_instruction *insn;
 
1872
 
 
1873
        gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
1874
 
 
1875
        if (p->gen >= 070) {
 
1876
                /* Enable Channel Masks in the URB_WRITE_HWORD message header */
 
1877
                brw_push_insn_state(p);
 
1878
                brw_set_access_mode(p, BRW_ALIGN_1);
 
1879
                brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
 
1880
                       __retype_ud(brw_vec1_grf(0, 5)),
 
1881
                       brw_imm_ud(0xff00));
 
1882
                brw_pop_insn_state(p);
 
1883
        }
 
1884
 
 
1885
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1886
 
 
1887
        assert(msg_length < BRW_MAX_MRF);
 
1888
 
 
1889
        brw_set_dest(p, insn, dest);
 
1890
        brw_set_src0(p, insn, src0);
 
1891
        brw_set_src1(p, insn, brw_imm_d(0));
 
1892
 
 
1893
        if (p->gen <= 060)
 
1894
                insn->header.destreg__conditionalmod = msg_reg_nr;
 
1895
 
 
1896
        brw_set_urb_message(p,
 
1897
                            insn,
 
1898
                            allocate,
 
1899
                            used,
 
1900
                            msg_length,
 
1901
                            response_length,
 
1902
                            eot,
 
1903
                            writes_complete,
 
1904
                            offset,
 
1905
                            swizzle);
 
1906
}
 
1907
 
 
1908
static int
 
1909
brw_find_next_block_end(struct brw_compile *p, int start)
 
1910
{
 
1911
        int ip;
 
1912
 
 
1913
        for (ip = start + 1; ip < p->nr_insn; ip++) {
 
1914
                struct brw_instruction *insn = &p->store[ip];
 
1915
 
 
1916
                switch (insn->header.opcode) {
 
1917
                case BRW_OPCODE_ENDIF:
 
1918
                case BRW_OPCODE_ELSE:
 
1919
                case BRW_OPCODE_WHILE:
 
1920
                        return ip;
 
1921
                }
 
1922
        }
 
1923
        assert(!"not reached");
 
1924
        return start + 1;
 
1925
}
 
1926
 
 
1927
/* There is no DO instruction on gen6, so to find the end of the loop
 
1928
 * we have to see if the loop is jumping back before our start
 
1929
 * instruction.
 
1930
 */
 
1931
static int
 
1932
brw_find_loop_end(struct brw_compile *p, int start)
 
1933
{
 
1934
        int ip;
 
1935
        int br = 2;
 
1936
 
 
1937
        for (ip = start + 1; ip < p->nr_insn; ip++) {
 
1938
                struct brw_instruction *insn = &p->store[ip];
 
1939
 
 
1940
                if (insn->header.opcode == BRW_OPCODE_WHILE) {
 
1941
                        int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
 
1942
                                : insn->bits3.break_cont.jip;
 
1943
                        if (ip + jip / br <= start)
 
1944
                                return ip;
 
1945
                }
 
1946
        }
 
1947
        assert(!"not reached");
 
1948
        return start + 1;
 
1949
}
 
1950
 
 
1951
/* After program generation, go back and update the UIP and JIP of
 
1952
 * BREAK and CONT instructions to their correct locations.
 
1953
 */
 
1954
void
 
1955
brw_set_uip_jip(struct brw_compile *p)
 
1956
{
 
1957
        int ip;
 
1958
        int br = 2;
 
1959
 
 
1960
        if (p->gen <= 060)
 
1961
                return;
 
1962
 
 
1963
        for (ip = 0; ip < p->nr_insn; ip++) {
 
1964
                struct brw_instruction *insn = &p->store[ip];
 
1965
 
 
1966
                switch (insn->header.opcode) {
 
1967
                case BRW_OPCODE_BREAK:
 
1968
                        insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
 
1969
                        /* Gen7 UIP points to WHILE; Gen6 points just after it */
 
1970
                        insn->bits3.break_cont.uip =
 
1971
                                br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
 
1972
                        break;
 
1973
                case BRW_OPCODE_CONTINUE:
 
1974
                        insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
 
1975
                        insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
 
1976
 
 
1977
                        assert(insn->bits3.break_cont.uip != 0);
 
1978
                        assert(insn->bits3.break_cont.jip != 0);
 
1979
                        break;
 
1980
                }
 
1981
        }
 
1982
}
 
1983
 
 
1984
void brw_ff_sync(struct brw_compile *p,
 
1985
                   struct brw_reg dest,
 
1986
                   unsigned msg_reg_nr,
 
1987
                   struct brw_reg src0,
 
1988
                   bool allocate,
 
1989
                   unsigned response_length,
 
1990
                   bool eot)
 
1991
{
 
1992
        struct brw_instruction *insn;
 
1993
 
 
1994
        gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
1995
 
 
1996
        insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
1997
        brw_set_dest(p, insn, dest);
 
1998
        brw_set_src0(p, insn, src0);
 
1999
        brw_set_src1(p, insn, brw_imm_d(0));
 
2000
 
 
2001
        if (p->gen < 060)
 
2002
                insn->header.destreg__conditionalmod = msg_reg_nr;
 
2003
 
 
2004
        brw_set_ff_sync_message(p,
 
2005
                                insn,
 
2006
                                allocate,
 
2007
                                response_length,
 
2008
                                eot);
 
2009
}