3
* Copyright © 2011-2015 Intel Corporation
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28
#include "brw_shader.h"
34
class src_reg : public backend_reg
37
DECLARE_RALLOC_CXX_OPERATORS(src_reg)
41
src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
43
src_reg(struct ::brw_reg reg);
45
bool equals(const src_reg &r) const;
46
bool negative_equals(const src_reg &r) const;
48
src_reg(class vec4_visitor *v, const struct glsl_type *type);
49
src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
51
explicit src_reg(const dst_reg ®);
57
retype(src_reg reg, enum brw_reg_type type)
66
add_byte_offset(backend_reg *reg, unsigned bytes)
75
assert(reg->offset % 16 == 0);
78
const unsigned suboffset = reg->offset + bytes;
79
reg->nr += suboffset / REG_SIZE;
80
reg->offset = suboffset % REG_SIZE;
81
assert(reg->offset % 16 == 0);
86
const unsigned suboffset = reg->subnr + bytes;
87
reg->nr += suboffset / REG_SIZE;
88
reg->subnr = suboffset % REG_SIZE;
89
assert(reg->subnr % 16 == 0);
97
} /* namepace detail */
100
byte_offset(src_reg reg, unsigned bytes)
102
detail::add_byte_offset(®, bytes);
106
static inline src_reg
107
offset(src_reg reg, unsigned width, unsigned delta)
109
const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
110
const unsigned num_components = MAX2(width / 4 * stride, 4);
111
return byte_offset(reg, num_components * type_sz(reg.type) * delta);
114
static inline src_reg
115
horiz_offset(src_reg reg, unsigned delta)
117
return byte_offset(reg, delta * type_sz(reg.type));
121
* Reswizzle a given source register.
124
static inline src_reg
125
swizzle(src_reg reg, unsigned swizzle)
128
reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
130
reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
135
static inline src_reg
138
assert(reg.file != IMM);
139
reg.negate = !reg.negate;
144
is_uniform(const src_reg ®)
146
return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
147
(!reg.reladdr || is_uniform(*reg.reladdr));
150
class dst_reg : public backend_reg
153
DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
158
dst_reg(enum brw_reg_file file, int nr);
159
dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
161
dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
163
dst_reg(struct ::brw_reg reg);
164
dst_reg(class vec4_visitor *v, const struct glsl_type *type);
166
explicit dst_reg(const src_reg ®);
168
bool equals(const dst_reg &r) const;
173
static inline dst_reg
174
retype(dst_reg reg, enum brw_reg_type type)
180
static inline dst_reg
181
byte_offset(dst_reg reg, unsigned bytes)
183
detail::add_byte_offset(®, bytes);
187
static inline dst_reg
188
offset(dst_reg reg, unsigned width, unsigned delta)
190
const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
191
const unsigned num_components = MAX2(width / 4 * stride, 4);
192
return byte_offset(reg, num_components * type_sz(reg.type) * delta);
195
static inline dst_reg
196
horiz_offset(const dst_reg ®, unsigned delta)
198
if (is_uniform(src_reg(reg)))
201
return byte_offset(reg, delta * type_sz(reg.type));
204
static inline dst_reg
205
writemask(dst_reg reg, unsigned mask)
207
assert(reg.file != IMM);
208
assert((reg.writemask & mask) != 0);
209
reg.writemask &= mask;
214
* Return an integer identifying the discrete address space a register is
215
* contained in. A register is by definition fully contained in the single
216
* reg_space it belongs to, so two registers with different reg_space ids are
217
* guaranteed not to overlap. Most register files are a single reg_space of
218
* its own, only the VGRF file is composed of multiple discrete address
219
* spaces, one for each VGRF allocation.
221
static inline uint32_t
222
reg_space(const backend_reg &r)
224
return r.file << 16 | (r.file == VGRF ? r.nr : 0);
228
* Return the base offset in bytes of a register relative to the start of its
231
static inline unsigned
232
reg_offset(const backend_reg &r)
234
return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
235
(r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
236
(r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
240
* Return whether the register region starting at \p r and spanning \p dr
241
* bytes could potentially overlap the register region starting at \p s and
242
* spanning \p ds bytes.
245
regions_overlap(const backend_reg &r, unsigned dr,
246
const backend_reg &s, unsigned ds)
248
if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
249
/* COMPR4 regions are translated by the hardware during decompression
250
* into two separate half-regions 4 MRFs apart from each other.
253
t0.nr &= ~BRW_MRF_COMPR4;
255
t1.offset += 4 * REG_SIZE;
256
return regions_overlap(t0, dr / 2, s, ds) ||
257
regions_overlap(t1, dr / 2, s, ds);
259
} else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
260
return regions_overlap(s, ds, r, dr);
263
return reg_space(r) == reg_space(s) &&
264
!(reg_offset(r) + dr <= reg_offset(s) ||
265
reg_offset(s) + ds <= reg_offset(r));
269
class vec4_instruction : public backend_instruction {
271
DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
273
vec4_instruction(enum opcode opcode,
274
const dst_reg &dst = dst_reg(),
275
const src_reg &src0 = src_reg(),
276
const src_reg &src1 = src_reg(),
277
const src_reg &src2 = src_reg());
282
enum brw_urb_write_flags urb_write_flags;
284
unsigned sol_binding; /**< gfx6: SOL binding table index */
285
bool sol_final_write; /**< gfx6: send commit message */
286
unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */
288
bool is_send_from_grf() const;
289
unsigned size_read(unsigned arg) const;
290
bool can_reswizzle(const struct intel_device_info *devinfo,
292
int swizzle, int swizzle_mask);
293
void reswizzle(int dst_writemask, int swizzle);
294
bool can_do_source_mods(const struct intel_device_info *devinfo);
296
bool can_do_writemask(const struct intel_device_info *devinfo);
297
bool can_change_types() const;
298
bool has_source_and_destination_hazard() const;
299
unsigned implied_mrf_writes() const;
301
bool is_align1_partial_write()
303
return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
304
opcode == VEC4_OPCODE_SET_HIGH_32BIT;
307
bool reads_flag() const
309
return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
312
bool reads_flag(unsigned c)
314
if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
318
case BRW_PREDICATE_NONE:
320
case BRW_PREDICATE_ALIGN16_REPLICATE_X:
322
case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
324
case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
326
case BRW_PREDICATE_ALIGN16_REPLICATE_W:
333
bool writes_flag(const intel_device_info *devinfo) const
335
return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
336
opcode != BRW_OPCODE_CSEL &&
337
opcode != BRW_OPCODE_IF &&
338
opcode != BRW_OPCODE_WHILE));
341
bool reads_g0_implicitly() const
344
case SHADER_OPCODE_TEX:
345
case SHADER_OPCODE_TXL:
346
case SHADER_OPCODE_TXD:
347
case SHADER_OPCODE_TXF:
348
case SHADER_OPCODE_TXF_CMS_W:
349
case SHADER_OPCODE_TXF_CMS:
350
case SHADER_OPCODE_TXF_MCS:
351
case SHADER_OPCODE_TXS:
352
case SHADER_OPCODE_TG4:
353
case SHADER_OPCODE_TG4_OFFSET:
354
case SHADER_OPCODE_SAMPLEINFO:
355
case VS_OPCODE_PULL_CONSTANT_LOAD:
356
case GS_OPCODE_SET_PRIMITIVE_ID:
357
case GS_OPCODE_GET_INSTANCE_ID:
358
case SHADER_OPCODE_GFX4_SCRATCH_READ:
359
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
368
* Make the execution of \p inst dependent on the evaluation of a possibly
369
* inverted predicate.
371
inline vec4_instruction *
372
set_predicate_inv(enum brw_predicate pred, bool inverse,
373
vec4_instruction *inst)
375
inst->predicate = pred;
376
inst->predicate_inverse = inverse;
381
* Make the execution of \p inst dependent on the evaluation of a predicate.
383
inline vec4_instruction *
384
set_predicate(enum brw_predicate pred, vec4_instruction *inst)
386
return set_predicate_inv(pred, false, inst);
390
* Write the result of evaluating the condition given by \p mod to a flag
393
inline vec4_instruction *
394
set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
396
inst->conditional_mod = mod;
401
* Clamp the result of \p inst to the saturation range of its destination
404
inline vec4_instruction *
405
set_saturate(bool saturate, vec4_instruction *inst)
407
inst->saturate = saturate;
412
* Return the number of dataflow registers written by the instruction (either
413
* fully or partially) counted from 'floor(reg_offset(inst->dst) /
414
* register_size)'. The somewhat arbitrary register size unit is 16B for the
415
* UNIFORM and IMM files and 32B for all other files.
418
regs_written(const vec4_instruction *inst)
420
assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
421
return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
426
* Return the number of dataflow registers read by the instruction (either
427
* fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
428
* register_size)'. The somewhat arbitrary register size unit is 16B for the
429
* UNIFORM and IMM files and 32B for all other files.
432
regs_read(const vec4_instruction *inst, unsigned i)
434
const unsigned reg_size =
435
inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
436
return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
440
static inline enum brw_reg_type
441
get_exec_type(const vec4_instruction *inst)
443
enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
445
for (int i = 0; i < 3; i++) {
446
if (inst->src[i].file != BAD_FILE) {
447
const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
448
if (type_sz(t) > type_sz(exec_type))
450
else if (type_sz(t) == type_sz(exec_type) &&
451
brw_reg_type_is_floating_point(t))
456
if (exec_type == BRW_REGISTER_TYPE_B)
457
exec_type = inst->dst.type;
459
/* TODO: We need to handle half-float conversions. */
460
assert(exec_type != BRW_REGISTER_TYPE_HF ||
461
inst->dst.type == BRW_REGISTER_TYPE_HF);
462
assert(exec_type != BRW_REGISTER_TYPE_B);
467
static inline unsigned
468
get_exec_type_size(const vec4_instruction *inst)
470
return type_sz(get_exec_type(inst));
473
} /* namespace brw */