2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics to
4
develop this 3D driver.
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**********************************************************************/
29
* Keith Whitwell <keithw@vmware.com>
34
* This file defines struct brw_reg, which is our representation for EU
35
* registers. They're not a hardware specific format, just an abstraction
36
* that intends to capture the full flexibility of the hardware registers.
38
* The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39
* the abstract brw_reg type into the actual hardware instruction encoding.
46
#include "util/compiler.h"
47
#include "main/macros.h"
48
#include "program/prog_instruction.h"
49
#include "brw_eu_defines.h"
50
#include "brw_reg_type.h"
56
struct intel_device_info;
58
/** Number of general purpose registers (VS, WM, etc) */
59
#define BRW_MAX_GRF 128
62
* First GRF used for the MRF hack.
64
* On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We
65
* haven't converted our compiler to be aware of this, so it asks for MRFs and
66
* brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
67
* register allocators have to be careful of this to avoid corrupting the "MRF"s
68
* with actual GRF allocations.
70
#define GFX7_MRF_HACK_START 112
72
/** Number of message register file registers */
73
#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
75
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
76
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
78
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
79
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
80
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
81
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
82
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
83
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
84
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
85
#define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0)
86
#define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2)
87
#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
88
#define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3)
89
#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
90
#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
91
#define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2)
92
#define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
93
#define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2)
94
#define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3)
95
#define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2)
97
#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
98
#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
101
brw_is_single_value_swizzle(unsigned swiz)
103
return (swiz == BRW_SWIZZLE_XXXX ||
104
swiz == BRW_SWIZZLE_YYYY ||
105
swiz == BRW_SWIZZLE_ZZZZ ||
106
swiz == BRW_SWIZZLE_WWWW);
110
* Compute the swizzle obtained from the application of \p swz0 on the result
111
* of \p swz1. The argument ordering is expected to match function
114
static inline unsigned
115
brw_compose_swizzle(unsigned swz0, unsigned swz1)
118
BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
119
BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
120
BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
121
BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
125
* Return the result of applying swizzle \p swz to shuffle the bits of \p mask
128
static inline unsigned
129
brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
133
for (unsigned i = 0; i < 4; i++) {
134
if (mask & (1 << BRW_GET_SWZ(swz, i)))
142
* Return the result of applying the inverse of swizzle \p swz to shuffle the
143
* bits of \p mask (AKA preimage). Useful to find out which components are
144
* read from a swizzled source given the instruction writemask.
146
static inline unsigned
147
brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
151
for (unsigned i = 0; i < 4; i++) {
153
result |= 1 << BRW_GET_SWZ(swz, i);
160
* Construct an identity swizzle for the set of enabled channels given by \p
161
* mask. The result will only reference channels enabled in the provided \p
162
* mask, assuming that \p mask is non-zero. The constructed swizzle will
163
* satisfy the property that for any instruction OP and any mask:
165
* brw_OP(p, brw_writemask(dst, mask),
166
* brw_swizzle(src, brw_swizzle_for_mask(mask)));
168
* will be equivalent to the same instruction without swizzle:
170
* brw_OP(p, brw_writemask(dst, mask), src);
172
static inline unsigned
173
brw_swizzle_for_mask(unsigned mask)
175
unsigned last = (mask ? ffs(mask) - 1 : 0);
178
for (unsigned i = 0; i < 4; i++)
179
last = swz[i] = (mask & (1 << i) ? i : last);
181
return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
185
* Construct an identity swizzle for the first \p n components of a vector.
186
* When only a subset of channels of a vec4 are used we don't want to
187
* reference the other channels, as that will tell optimization passes that
188
* those other channels are used.
190
static inline unsigned
191
brw_swizzle_for_size(unsigned n)
193
return brw_swizzle_for_mask((1 << n) - 1);
197
* Converse of brw_swizzle_for_mask(). Returns the mask of components
198
* accessed by the specified swizzle \p swz.
200
static inline unsigned
201
brw_mask_for_swizzle(unsigned swz)
203
return brw_apply_inv_swizzle_to_mask(swz, ~0);
206
uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
208
#define REG_SIZE (8*4)
210
/* These aren't hardware structs, just something useful for us to pass around:
212
* Align1 operation has a lot of control over input ranges. Used in
213
* WM programs to implement shaders decomposed into "channel serial"
214
* or "structure of array" form:
219
enum brw_reg_type type:4;
220
enum brw_reg_file file:3; /* :2 hardware format */
221
unsigned negate:1; /* source only */
222
unsigned abs:1; /* source only */
223
unsigned address_mode:1; /* relative addressing, hopefully! */
225
unsigned subnr:5; /* :1 in align16 */
233
unsigned swizzle:8; /* src only, align16 only */
234
unsigned writemask:4; /* dest only, align16 only */
235
int indirect_offset:10; /* relative addressing offset */
236
unsigned vstride:4; /* source only */
237
unsigned width:3; /* src only, align1 only */
238
unsigned hstride:2; /* align1 only */
252
brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
254
return a->bits == b->bits && a->u64 == b->u64;
258
brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
260
if (a->file == IMM) {
261
if (a->bits != b->bits)
264
switch ((enum brw_reg_type) a->type) {
265
case BRW_REGISTER_TYPE_UQ:
266
case BRW_REGISTER_TYPE_Q:
267
return a->d64 == -b->d64;
268
case BRW_REGISTER_TYPE_DF:
269
return a->df == -b->df;
270
case BRW_REGISTER_TYPE_UD:
271
case BRW_REGISTER_TYPE_D:
272
return a->d == -b->d;
273
case BRW_REGISTER_TYPE_F:
274
return a->f == -b->f;
275
case BRW_REGISTER_TYPE_VF:
276
/* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
277
* of -0). There are occasions where 0 or -0 is used and the exact
278
* bit pattern is desired. At the very least, changing this to allow
279
* 0 as a negation of 0 causes some fp64 tests to fail on IVB.
281
return a->ud == (b->ud ^ 0x80808080);
282
case BRW_REGISTER_TYPE_UW:
283
case BRW_REGISTER_TYPE_W:
284
case BRW_REGISTER_TYPE_UV:
285
case BRW_REGISTER_TYPE_V:
286
case BRW_REGISTER_TYPE_HF:
287
/* FINISHME: Implement support for these types once there is
288
* something in the compiler that can generate them. Until then,
289
* they cannot be tested.
292
case BRW_REGISTER_TYPE_UB:
293
case BRW_REGISTER_TYPE_B:
294
case BRW_REGISTER_TYPE_NF:
296
unreachable("not reached");
299
struct brw_reg tmp = *a;
301
tmp.negate = !tmp.negate;
303
return brw_regs_equal(&tmp, b);
307
struct brw_indirect {
308
unsigned addr_subnr:4;
314
static inline unsigned
315
type_sz(unsigned type)
318
case BRW_REGISTER_TYPE_UQ:
319
case BRW_REGISTER_TYPE_Q:
320
case BRW_REGISTER_TYPE_DF:
321
case BRW_REGISTER_TYPE_NF:
323
case BRW_REGISTER_TYPE_UD:
324
case BRW_REGISTER_TYPE_D:
325
case BRW_REGISTER_TYPE_F:
326
case BRW_REGISTER_TYPE_VF:
328
case BRW_REGISTER_TYPE_UW:
329
case BRW_REGISTER_TYPE_W:
330
case BRW_REGISTER_TYPE_HF:
331
/* [U]V components are 4-bit, but HW unpacks them to 16-bit (2 bytes) */
332
case BRW_REGISTER_TYPE_UV:
333
case BRW_REGISTER_TYPE_V:
335
case BRW_REGISTER_TYPE_UB:
336
case BRW_REGISTER_TYPE_B:
339
unreachable("not reached");
343
static inline enum brw_reg_type
344
get_exec_type(const enum brw_reg_type type)
347
case BRW_REGISTER_TYPE_B:
348
case BRW_REGISTER_TYPE_V:
349
return BRW_REGISTER_TYPE_W;
350
case BRW_REGISTER_TYPE_UB:
351
case BRW_REGISTER_TYPE_UV:
352
return BRW_REGISTER_TYPE_UW;
353
case BRW_REGISTER_TYPE_VF:
354
return BRW_REGISTER_TYPE_F;
361
* Return an integer type of the requested size and signedness.
363
static inline enum brw_reg_type
364
brw_int_type(unsigned sz, bool is_signed)
368
return (is_signed ? BRW_REGISTER_TYPE_B : BRW_REGISTER_TYPE_UB);
370
return (is_signed ? BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
372
return (is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
374
return (is_signed ? BRW_REGISTER_TYPE_Q : BRW_REGISTER_TYPE_UQ);
376
unreachable("Not reached.");
381
* Construct a brw_reg.
382
* \param file one of the BRW_x_REGISTER_FILE values
383
* \param nr register number/index
384
* \param subnr register sub number
385
* \param negate register negate modifier
386
* \param abs register abs modifier
387
* \param type one of BRW_REGISTER_TYPE_x
388
* \param vstride one of BRW_VERTICAL_STRIDE_x
389
* \param width one of BRW_WIDTH_x
390
* \param hstride one of BRW_HORIZONTAL_STRIDE_x
391
* \param swizzle one of BRW_SWIZZLE_x
392
* \param writemask WRITEMASK_X/Y/Z/W bitfield
394
static inline struct brw_reg
395
brw_reg(enum brw_reg_file file,
400
enum brw_reg_type type,
408
if (file == BRW_GENERAL_REGISTER_FILE)
409
assert(nr < BRW_MAX_GRF);
410
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
411
assert(nr <= BRW_ARF_TIMESTAMP);
412
/* Asserting on the MRF register number requires to know the hardware gen
413
* (gfx6 has 24 MRF registers), which we don't know here, so we assert
414
* for that in the generators and in brw_eu_emit.c
421
reg.address_mode = BRW_ADDRESS_DIRECT;
423
reg.subnr = subnr * type_sz(type);
426
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
427
* set swizzle and writemask to W, as the lower bits of subnr will
428
* be lost when converted to align16. This is probably too much to
429
* keep track of as you'd want it adjusted by suboffset(), etc.
430
* Perhaps fix up when converting to align16?
432
reg.swizzle = swizzle;
433
reg.writemask = writemask;
434
reg.indirect_offset = 0;
435
reg.vstride = vstride;
437
reg.hstride = hstride;
442
/** Construct float[16] register */
443
static inline struct brw_reg
444
brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
452
BRW_VERTICAL_STRIDE_16,
454
BRW_HORIZONTAL_STRIDE_1,
459
/** Construct float[8] register */
460
static inline struct brw_reg
461
brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
469
BRW_VERTICAL_STRIDE_8,
471
BRW_HORIZONTAL_STRIDE_1,
476
/** Construct float[4] register */
477
static inline struct brw_reg
478
brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
486
BRW_VERTICAL_STRIDE_4,
488
BRW_HORIZONTAL_STRIDE_1,
493
/** Construct float[2] register */
494
static inline struct brw_reg
495
brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
503
BRW_VERTICAL_STRIDE_2,
505
BRW_HORIZONTAL_STRIDE_1,
510
/** Construct float[1] register */
511
static inline struct brw_reg
512
brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
520
BRW_VERTICAL_STRIDE_0,
522
BRW_HORIZONTAL_STRIDE_0,
527
static inline struct brw_reg
528
brw_vecn_reg(unsigned width, enum brw_reg_file file,
529
unsigned nr, unsigned subnr)
533
return brw_vec1_reg(file, nr, subnr);
535
return brw_vec2_reg(file, nr, subnr);
537
return brw_vec4_reg(file, nr, subnr);
539
return brw_vec8_reg(file, nr, subnr);
541
return brw_vec16_reg(file, nr, subnr);
543
unreachable("Invalid register width");
547
static inline struct brw_reg
548
retype(struct brw_reg reg, enum brw_reg_type type)
554
static inline struct brw_reg
555
firsthalf(struct brw_reg reg)
560
static inline struct brw_reg
561
sechalf(struct brw_reg reg)
568
static inline struct brw_reg
569
offset(struct brw_reg reg, unsigned delta)
576
static inline struct brw_reg
577
byte_offset(struct brw_reg reg, unsigned bytes)
579
unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
580
reg.nr = newoffset / REG_SIZE;
581
reg.subnr = newoffset % REG_SIZE;
585
static inline struct brw_reg
586
suboffset(struct brw_reg reg, unsigned delta)
588
return byte_offset(reg, delta * type_sz(reg.type));
591
/** Construct unsigned word[16] register */
592
static inline struct brw_reg
593
brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
595
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
598
/** Construct unsigned word[8] register */
599
static inline struct brw_reg
600
brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
602
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
605
/** Construct unsigned word[1] register */
606
static inline struct brw_reg
607
brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
609
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
612
static inline struct brw_reg
613
brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
615
return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD);
618
static inline struct brw_reg
619
brw_imm_reg(enum brw_reg_type type)
621
return brw_reg(BRW_IMMEDIATE_VALUE,
627
BRW_VERTICAL_STRIDE_0,
629
BRW_HORIZONTAL_STRIDE_0,
634
/** Construct float immediate register */
635
static inline struct brw_reg
636
brw_imm_df(double df)
638
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_DF);
643
static inline struct brw_reg
644
brw_imm_u64(uint64_t u64)
646
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
651
static inline struct brw_reg
654
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
659
/** Construct int64_t immediate register */
660
static inline struct brw_reg
663
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q);
668
/** Construct int64_t immediate register */
669
static inline struct brw_reg
670
brw_imm_uq(uint64_t uq)
672
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
677
/** Construct integer immediate register */
678
static inline struct brw_reg
681
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
686
/** Construct uint immediate register */
687
static inline struct brw_reg
688
brw_imm_ud(unsigned ud)
690
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
695
/** Construct ushort immediate register */
696
static inline struct brw_reg
697
brw_imm_uw(uint16_t uw)
699
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
700
imm.ud = uw | (uw << 16);
704
/** Construct short immediate register */
705
static inline struct brw_reg
708
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
709
imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
713
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
714
* numbers alias with _V and _VF below:
717
/** Construct vector of eight signed half-byte values */
718
static inline struct brw_reg
719
brw_imm_v(unsigned v)
721
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
726
/** Construct vector of eight unsigned half-byte values */
727
static inline struct brw_reg
728
brw_imm_uv(unsigned uv)
730
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV);
735
/** Construct vector of four 8-bit float values */
736
static inline struct brw_reg
737
brw_imm_vf(unsigned v)
739
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
744
static inline struct brw_reg
745
brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
747
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
748
imm.vstride = BRW_VERTICAL_STRIDE_0;
749
imm.width = BRW_WIDTH_4;
750
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
751
imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
756
static inline struct brw_reg
757
brw_address(struct brw_reg reg)
759
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
762
/** Construct float[1] general-purpose register */
763
static inline struct brw_reg
764
brw_vec1_grf(unsigned nr, unsigned subnr)
766
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
769
/** Construct float[2] general-purpose register */
770
static inline struct brw_reg
771
brw_vec2_grf(unsigned nr, unsigned subnr)
773
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
776
/** Construct float[4] general-purpose register */
777
static inline struct brw_reg
778
brw_vec4_grf(unsigned nr, unsigned subnr)
780
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
783
/** Construct float[8] general-purpose register */
784
static inline struct brw_reg
785
brw_vec8_grf(unsigned nr, unsigned subnr)
787
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
790
/** Construct float[16] general-purpose register */
791
static inline struct brw_reg
792
brw_vec16_grf(unsigned nr, unsigned subnr)
794
return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
797
static inline struct brw_reg
798
brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
800
return brw_vecn_reg(width, BRW_GENERAL_REGISTER_FILE, nr, subnr);
804
static inline struct brw_reg
805
brw_uw8_grf(unsigned nr, unsigned subnr)
807
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
810
static inline struct brw_reg
811
brw_uw16_grf(unsigned nr, unsigned subnr)
813
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
817
/** Construct null register (usually used for setting condition codes) */
818
static inline struct brw_reg
821
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
824
static inline struct brw_reg
825
brw_null_vec(unsigned width)
827
return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
830
static inline struct brw_reg
831
brw_address_reg(unsigned subnr)
833
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
836
static inline struct brw_reg
839
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0);
842
/* If/else instructions break in align16 mode if writemask & swizzle
843
* aren't xyzw. This goes against the convention for other scalar
846
static inline struct brw_reg
849
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
854
BRW_REGISTER_TYPE_UD,
855
BRW_VERTICAL_STRIDE_4, /* ? */
857
BRW_HORIZONTAL_STRIDE_0,
858
BRW_SWIZZLE_XYZW, /* NOTE! */
859
WRITEMASK_XYZW); /* NOTE! */
862
static inline struct brw_reg
863
brw_notification_reg(void)
865
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
866
BRW_ARF_NOTIFICATION_COUNT,
870
BRW_REGISTER_TYPE_UD,
871
BRW_VERTICAL_STRIDE_0,
873
BRW_HORIZONTAL_STRIDE_0,
878
static inline struct brw_reg
879
brw_cr0_reg(unsigned subnr)
881
return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_CONTROL, subnr);
884
static inline struct brw_reg
885
brw_sr0_reg(unsigned subnr)
887
return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr);
890
static inline struct brw_reg
891
brw_acc_reg(unsigned width)
893
return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
894
BRW_ARF_ACCUMULATOR, 0);
897
static inline struct brw_reg
898
brw_flag_reg(int reg, int subreg)
900
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
901
BRW_ARF_FLAG + reg, subreg);
904
static inline struct brw_reg
905
brw_flag_subreg(unsigned subreg)
907
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
908
BRW_ARF_FLAG + subreg / 2, subreg % 2);
912
* Return the mask register present in Gfx4-5, or the related register present
913
* in Gfx7.5 and later hardware referred to as "channel enable" register in
916
static inline struct brw_reg
917
brw_mask_reg(unsigned subnr)
919
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
922
static inline struct brw_reg
925
return brw_sr0_reg(3);
928
static inline struct brw_reg
931
return brw_sr0_reg(2);
934
static inline struct brw_reg
935
brw_mask_stack_reg(unsigned subnr)
937
return suboffset(retype(brw_vec16_reg(BRW_ARCHITECTURE_REGISTER_FILE,
938
BRW_ARF_MASK_STACK, 0),
939
BRW_REGISTER_TYPE_UB), subnr);
942
static inline struct brw_reg
943
brw_mask_stack_depth_reg(unsigned subnr)
945
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
946
BRW_ARF_MASK_STACK_DEPTH, subnr);
949
static inline struct brw_reg
950
brw_message_reg(unsigned nr)
952
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
955
static inline struct brw_reg
956
brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
958
return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
959
BRW_REGISTER_TYPE_UD);
962
/* This is almost always called with a numeric constant argument, so
963
* make things easy to evaluate at compile time:
965
static inline unsigned cvt(unsigned val)
979
static inline struct brw_reg
980
stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
982
reg.vstride = cvt(vstride);
983
reg.width = cvt(width) - 1;
984
reg.hstride = cvt(hstride);
989
* Multiply the vertical and horizontal stride of a register by the given
992
static inline struct brw_reg
993
spread(struct brw_reg reg, unsigned s)
996
assert(util_is_power_of_two_nonzero(s));
999
reg.hstride += cvt(s) - 1;
1002
reg.vstride += cvt(s) - 1;
1006
return stride(reg, 0, 1, 0);
1011
* Reinterpret each channel of register \p reg as a vector of values of the
1012
* given smaller type and take the i-th subcomponent from each.
1014
static inline struct brw_reg
1015
subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1017
unsigned scale = type_sz(reg.type) / type_sz(type);
1018
assert(scale >= 1 && i < scale);
1020
if (reg.file == IMM) {
1021
unsigned bit_size = type_sz(type) * 8;
1022
reg.u64 >>= i * bit_size;
1023
reg.u64 &= BITFIELD64_MASK(bit_size);
1025
reg.u64 |= reg.u64 << 16;
1026
return retype(reg, type);
1029
return suboffset(retype(spread(reg, scale), type), i);
1032
static inline struct brw_reg
1033
vec16(struct brw_reg reg)
1035
return stride(reg, 16,16,1);
1038
static inline struct brw_reg
1039
vec8(struct brw_reg reg)
1041
return stride(reg, 8,8,1);
1044
static inline struct brw_reg
1045
vec4(struct brw_reg reg)
1047
return stride(reg, 4,4,1);
1050
static inline struct brw_reg
1051
vec2(struct brw_reg reg)
1053
return stride(reg, 2,2,1);
1056
static inline struct brw_reg
1057
vec1(struct brw_reg reg)
1059
return stride(reg, 0,1,0);
1063
static inline struct brw_reg
1064
get_element(struct brw_reg reg, unsigned elt)
1066
return vec1(suboffset(reg, elt));
1069
static inline struct brw_reg
1070
get_element_ud(struct brw_reg reg, unsigned elt)
1072
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
1075
static inline struct brw_reg
1076
get_element_d(struct brw_reg reg, unsigned elt)
1078
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
1081
static inline struct brw_reg
1082
brw_swizzle(struct brw_reg reg, unsigned swz)
1084
if (reg.file == BRW_IMMEDIATE_VALUE)
1085
reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1087
reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1092
static inline struct brw_reg
1093
brw_writemask(struct brw_reg reg, unsigned mask)
1095
assert(reg.file != BRW_IMMEDIATE_VALUE);
1096
reg.writemask &= mask;
1100
static inline struct brw_reg
1101
brw_set_writemask(struct brw_reg reg, unsigned mask)
1103
assert(reg.file != BRW_IMMEDIATE_VALUE);
1104
reg.writemask = mask;
1108
static inline unsigned
1109
brw_writemask_for_size(unsigned n)
1111
return (1 << n) - 1;
1114
static inline unsigned
1115
brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1117
assert(first_component + n <= 4);
1118
return (((1 << n) - 1) << first_component);
1121
static inline struct brw_reg
1122
negate(struct brw_reg reg)
1128
static inline struct brw_reg
1129
brw_abs(struct brw_reg reg)
1136
/************************************************************************/
1138
static inline struct brw_reg
1139
brw_vec4_indirect(unsigned subnr, int offset)
1141
struct brw_reg reg = brw_vec4_grf(0, 0);
1143
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1144
reg.indirect_offset = offset;
1148
static inline struct brw_reg
1149
brw_vec1_indirect(unsigned subnr, int offset)
1151
struct brw_reg reg = brw_vec1_grf(0, 0);
1153
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1154
reg.indirect_offset = offset;
1158
static inline struct brw_reg
1159
brw_VxH_indirect(unsigned subnr, int offset)
1161
struct brw_reg reg = brw_vec1_grf(0, 0);
1162
reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1164
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1165
reg.indirect_offset = offset;
1169
static inline struct brw_reg
1170
deref_4f(struct brw_indirect ptr, int offset)
1172
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1175
static inline struct brw_reg
1176
deref_1f(struct brw_indirect ptr, int offset)
1178
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1181
static inline struct brw_reg
1182
deref_4b(struct brw_indirect ptr, int offset)
1184
return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1187
static inline struct brw_reg
1188
deref_1uw(struct brw_indirect ptr, int offset)
1190
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1193
static inline struct brw_reg
1194
deref_1d(struct brw_indirect ptr, int offset)
1196
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1199
static inline struct brw_reg
1200
deref_1ud(struct brw_indirect ptr, int offset)
1202
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1205
static inline struct brw_reg
1206
get_addr_reg(struct brw_indirect ptr)
1208
return brw_address_reg(ptr.addr_subnr);
1211
static inline struct brw_indirect
1212
brw_indirect_offset(struct brw_indirect ptr, int offset)
1214
ptr.addr_offset += offset;
1218
static inline struct brw_indirect
1219
brw_indirect(unsigned addr_subnr, int offset)
1221
struct brw_indirect ptr;
1222
ptr.addr_subnr = addr_subnr;
1223
ptr.addr_offset = offset;
1229
region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1230
enum brw_width w, enum brw_horizontal_stride h)
1232
return reg.vstride == v &&
1237
#define has_scalar_region(reg) \
1238
region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1239
BRW_HORIZONTAL_STRIDE_0)
1242
* Return the size in bytes per data element of register \p reg on the
1243
* corresponding register file.
1245
static inline unsigned
1246
element_sz(struct brw_reg reg)
1248
if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) {
1249
return type_sz(reg.type);
1251
} else if (reg.width == BRW_WIDTH_1 &&
1252
reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1253
assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1254
return type_sz(reg.type) << (reg.vstride - 1);
1257
assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1258
assert(reg.vstride == reg.hstride + reg.width);
1259
return type_sz(reg.type) << (reg.hstride - 1);
1263
/* brw_packed_float.c */
1264
int brw_float_to_vf(float f);
1265
float brw_vf_to_float(unsigned char vf);