2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
develop this 3D driver.
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**********************************************************************/
29
* Keith Whitwell <keith@tungstengraphics.com>
36
#include "util/u_debug.h"
37
#include "pipe/p_defines.h"
39
#include "brw_structs.h"
40
#include "brw_defines.h"
42
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
43
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
45
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
46
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
47
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
48
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
50
#define BRW_WRITEMASK_NONE 0x00
51
#define BRW_WRITEMASK_X 0x01
52
#define BRW_WRITEMASK_Y 0x02
53
#define BRW_WRITEMASK_XY 0x03
54
#define BRW_WRITEMASK_Z 0x04
55
#define BRW_WRITEMASK_XZ 0x05
56
#define BRW_WRITEMASK_YZ 0x06
57
#define BRW_WRITEMASK_XYZ 0x07
58
#define BRW_WRITEMASK_W 0x08
59
#define BRW_WRITEMASK_XW 0x09
60
#define BRW_WRITEMASK_YW 0x0A
61
#define BRW_WRITEMASK_XYW 0x0B
62
#define BRW_WRITEMASK_ZW 0x0C
63
#define BRW_WRITEMASK_XZW 0x0D
64
#define BRW_WRITEMASK_YZW 0x0E
65
#define BRW_WRITEMASK_XYZW 0x0F
68
#define REG_SIZE (8*4)
71
/* These aren't hardware structs, just something useful for us to pass around:
73
* Align1 operation has a lot of control over input ranges. Used in
74
* WM programs to implement shaders decomposed into "channel serial"
75
* or "structure of array" form:
82
GLuint subnr:5; /* :1 in align16 */
83
GLuint negate:1; /* source only */
84
GLuint abs:1; /* source only */
85
GLuint vstride:4; /* source only */
86
GLuint width:3; /* src only, align1 only */
87
GLuint hstride:2; /* align1 only */
88
GLuint address_mode:1; /* relative addressing, hopefully! */
93
GLuint swizzle:8; /* src only, align16 only */
94
GLuint writemask:4; /* dest only, align16 only */
95
GLint indirect_offset:10; /* relative addressing offset */
96
GLuint pad1:10; /* two dwords total */
106
struct brw_indirect {
108
GLint addr_offset:10;
118
#define BRW_EU_MAX_INSN_STACK 5
119
#define BRW_EU_MAX_INSN 10000
122
struct brw_instruction store[BRW_EU_MAX_INSN];
125
/* Allow clients to push/pop instruction state:
127
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
128
struct brw_instruction *current;
131
GLboolean single_program_flow;
132
struct brw_context *brw;
134
struct brw_eu_label *first_label; /**< linked list of labels */
135
struct brw_eu_call *first_call; /**< linked list of CALs */
142
brw_save_label(struct brw_compile *c, unsigned label, GLuint position);
145
brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos);
148
brw_resolve_cals(struct brw_compile *c);
152
static INLINE int type_sz( GLuint type )
155
case BRW_REGISTER_TYPE_UD:
156
case BRW_REGISTER_TYPE_D:
157
case BRW_REGISTER_TYPE_F:
159
case BRW_REGISTER_TYPE_HF:
160
case BRW_REGISTER_TYPE_UW:
161
case BRW_REGISTER_TYPE_W:
163
case BRW_REGISTER_TYPE_UB:
164
case BRW_REGISTER_TYPE_B:
172
* Construct a brw_reg.
173
* \param file one of the BRW_x_REGISTER_FILE values
174
* \param nr register number/index
175
* \param subnr register sub number
176
* \param type one of BRW_REGISTER_TYPE_x
177
* \param vstride one of BRW_VERTICAL_STRIDE_x
178
* \param width one of BRW_WIDTH_x
179
* \param hstride one of BRW_HORIZONTAL_STRIDE_x
180
* \param swizzle one of BRW_SWIZZLE_x
181
* \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
183
static INLINE struct brw_reg brw_reg( GLuint file,
194
if (type == BRW_GENERAL_REGISTER_FILE)
195
assert(nr < BRW_MAX_GRF);
196
else if (type == BRW_MESSAGE_REGISTER_FILE)
197
assert(nr < BRW_MAX_MRF);
198
else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
199
assert(nr <= BRW_ARF_IP);
204
reg.subnr = subnr * type_sz(type);
207
reg.vstride = vstride;
209
reg.hstride = hstride;
210
reg.address_mode = BRW_ADDRESS_DIRECT;
213
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
214
* set swizzle and writemask to W, as the lower bits of subnr will
215
* be lost when converted to align16. This is probably too much to
216
* keep track of as you'd want it adjusted by suboffset(), etc.
217
* Perhaps fix up when converting to align16?
219
reg.dw1.bits.swizzle = swizzle;
220
reg.dw1.bits.writemask = writemask;
221
reg.dw1.bits.indirect_offset = 0;
222
reg.dw1.bits.pad1 = 0;
226
/** Construct float[16] register */
227
static INLINE struct brw_reg brw_vec16_reg( GLuint file,
235
BRW_VERTICAL_STRIDE_16,
237
BRW_HORIZONTAL_STRIDE_1,
242
/** Construct float[8] register */
243
static INLINE struct brw_reg brw_vec8_reg( GLuint file,
251
BRW_VERTICAL_STRIDE_8,
253
BRW_HORIZONTAL_STRIDE_1,
258
/** Construct float[4] register */
259
static INLINE struct brw_reg brw_vec4_reg( GLuint file,
267
BRW_VERTICAL_STRIDE_4,
269
BRW_HORIZONTAL_STRIDE_1,
274
/** Construct float[2] register */
275
static INLINE struct brw_reg brw_vec2_reg( GLuint file,
283
BRW_VERTICAL_STRIDE_2,
285
BRW_HORIZONTAL_STRIDE_1,
290
/** Construct float[1] register */
291
static INLINE struct brw_reg brw_vec1_reg( GLuint file,
299
BRW_VERTICAL_STRIDE_0,
301
BRW_HORIZONTAL_STRIDE_0,
307
static INLINE struct brw_reg retype( struct brw_reg reg,
314
static INLINE struct brw_reg suboffset( struct brw_reg reg,
317
reg.subnr += delta * type_sz(reg.type);
322
static INLINE struct brw_reg offset( struct brw_reg reg,
330
static INLINE struct brw_reg byte_offset( struct brw_reg reg,
333
GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
334
reg.nr = newoffset / REG_SIZE;
335
reg.subnr = newoffset % REG_SIZE;
340
/** Construct unsigned word[16] register */
341
static INLINE struct brw_reg brw_uw16_reg( GLuint file,
345
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
348
/** Construct unsigned word[8] register */
349
static INLINE struct brw_reg brw_uw8_reg( GLuint file,
353
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
356
/** Construct unsigned word[1] register */
357
static INLINE struct brw_reg brw_uw1_reg( GLuint file,
361
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
364
static INLINE struct brw_reg brw_imm_reg( GLuint type )
366
return brw_reg( BRW_IMMEDIATE_VALUE,
370
BRW_VERTICAL_STRIDE_0,
372
BRW_HORIZONTAL_STRIDE_0,
377
/** Construct float immediate register */
378
static INLINE struct brw_reg brw_imm_f( GLfloat f )
380
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
385
/** Construct integer immediate register */
386
static INLINE struct brw_reg brw_imm_d( GLint d )
388
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
393
/** Construct uint immediate register */
394
static INLINE struct brw_reg brw_imm_ud( GLuint ud )
396
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
401
/** Construct ushort immediate register */
402
static INLINE struct brw_reg brw_imm_uw( GLushort uw )
404
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
405
imm.dw1.ud = uw | (uw << 16);
409
/** Construct short immediate register */
410
static INLINE struct brw_reg brw_imm_w( GLshort w )
412
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
413
imm.dw1.d = w | (w << 16);
417
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
418
* numbers alias with _V and _VF below:
421
/** Construct vector of eight signed half-byte values */
422
static INLINE struct brw_reg brw_imm_v( GLuint v )
424
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
425
imm.vstride = BRW_VERTICAL_STRIDE_0;
426
imm.width = BRW_WIDTH_8;
427
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
432
/** Construct vector of four 8-bit float values */
433
static INLINE struct brw_reg brw_imm_vf( GLuint v )
435
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
436
imm.vstride = BRW_VERTICAL_STRIDE_0;
437
imm.width = BRW_WIDTH_4;
438
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
445
#define VF_NEG (1<<7)
447
static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
452
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
453
imm.vstride = BRW_VERTICAL_STRIDE_0;
454
imm.width = BRW_WIDTH_4;
455
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
456
imm.dw1.ud = ((v0 << 0) |
464
static INLINE struct brw_reg brw_address( struct brw_reg reg )
466
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
469
/** Construct float[1] general-purpose register */
470
static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
472
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
475
/** Construct float[2] general-purpose register */
476
static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
478
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
481
/** Construct float[4] general-purpose register */
482
static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
484
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
487
/** Construct float[8] general-purpose register */
488
static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
490
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
494
static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
496
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
499
static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
501
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
505
/** Construct null register (usually used for setting condition codes) */
506
static INLINE struct brw_reg brw_null_reg( void )
508
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
513
static INLINE struct brw_reg brw_address_reg( GLuint subnr )
515
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
520
/* If/else instructions break in align16 mode if writemask & swizzle
521
* aren't xyzw. This goes against the convention for other scalar
524
static INLINE struct brw_reg brw_ip_reg( void )
526
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
529
BRW_REGISTER_TYPE_UD,
530
BRW_VERTICAL_STRIDE_4, /* ? */
532
BRW_HORIZONTAL_STRIDE_0,
533
BRW_SWIZZLE_XYZW, /* NOTE! */
534
BRW_WRITEMASK_XYZW); /* NOTE! */
537
static INLINE struct brw_reg brw_acc_reg( void )
539
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
545
static INLINE struct brw_reg brw_flag_reg( void )
547
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
553
static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
555
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
560
static INLINE struct brw_reg brw_message_reg( GLuint nr )
562
assert(nr < BRW_MAX_MRF);
563
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
571
/* This is almost always called with a numeric constant argument, so
572
* make things easy to evaluate at compile time:
574
static INLINE GLuint cvt( GLuint val )
588
static INLINE struct brw_reg stride( struct brw_reg reg,
593
reg.vstride = cvt(vstride);
594
reg.width = cvt(width) - 1;
595
reg.hstride = cvt(hstride);
600
static INLINE struct brw_reg vec16( struct brw_reg reg )
602
return stride(reg, 16,16,1);
605
static INLINE struct brw_reg vec8( struct brw_reg reg )
607
return stride(reg, 8,8,1);
610
static INLINE struct brw_reg vec4( struct brw_reg reg )
612
return stride(reg, 4,4,1);
615
static INLINE struct brw_reg vec2( struct brw_reg reg )
617
return stride(reg, 2,2,1);
620
static INLINE struct brw_reg vec1( struct brw_reg reg )
622
return stride(reg, 0,1,0);
626
static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
628
return vec1(suboffset(reg, elt));
631
static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
633
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
637
static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
643
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
644
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
645
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
646
BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
651
static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
654
return brw_swizzle(reg, x, x, x, x);
657
static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
660
reg.dw1.bits.writemask &= mask;
664
static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
667
reg.dw1.bits.writemask = mask;
671
static INLINE struct brw_reg negate( struct brw_reg reg )
677
static INLINE struct brw_reg brw_abs( struct brw_reg reg )
683
/***********************************************************************
685
static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
688
struct brw_reg reg = brw_vec4_grf(0, 0);
690
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
691
reg.dw1.bits.indirect_offset = offset;
695
static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
698
struct brw_reg reg = brw_vec1_grf(0, 0);
700
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
701
reg.dw1.bits.indirect_offset = offset;
705
static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
707
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
710
static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
712
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
715
static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
717
return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
720
static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
722
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
725
static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
727
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
730
static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
732
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
735
static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
737
return brw_address_reg(ptr.addr_subnr);
740
static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
742
ptr.addr_offset += offset;
746
static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
748
struct brw_indirect ptr;
749
ptr.addr_subnr = addr_subnr;
750
ptr.addr_offset = offset;
755
/** Do two brw_regs refer to the same register? */
756
static INLINE GLboolean
757
brw_same_reg(struct brw_reg r1, struct brw_reg r2)
759
return r1.file == r2.file && r1.nr == r2.nr;
762
static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
764
return &p->store[p->nr_insn];
767
void brw_pop_insn_state( struct brw_compile *p );
768
void brw_push_insn_state( struct brw_compile *p );
769
void brw_set_mask_control( struct brw_compile *p, GLuint value );
770
void brw_set_saturate( struct brw_compile *p, GLuint value );
771
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
772
void brw_set_compression_control( struct brw_compile *p, GLboolean control );
773
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
774
void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
775
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
777
void brw_init_compile( struct brw_context *, struct brw_compile *p );
779
enum pipe_error brw_get_program( struct brw_compile *p,
780
const GLuint **program,
784
/* Helpers for regular instructions:
787
struct brw_instruction *brw_##OP(struct brw_compile *p, \
788
struct brw_reg dest, \
789
struct brw_reg src0);
792
struct brw_instruction *brw_##OP(struct brw_compile *p, \
793
struct brw_reg dest, \
794
struct brw_reg src0, \
795
struct brw_reg src1);
828
/* Helpers for SEND instruction:
830
void brw_urb_WRITE(struct brw_compile *p,
837
GLuint response_length,
839
GLboolean writes_complete,
843
void brw_ff_sync(struct brw_compile *p,
850
GLuint response_length,
852
GLboolean writes_complete,
856
void brw_fb_WRITE(struct brw_compile *p,
860
GLuint binding_table_index,
862
GLuint response_length,
865
void brw_SAMPLE(struct brw_compile *p,
869
GLuint binding_table_index,
873
GLuint response_length,
876
GLuint header_present,
879
void brw_math_16( struct brw_compile *p,
887
void brw_math( struct brw_compile *p,
896
void brw_dp_READ_16( struct brw_compile *p,
898
GLuint scratch_offset );
900
void brw_dp_READ_4( struct brw_compile *p,
904
GLuint bind_table_index );
906
void brw_dp_READ_4_vs( struct brw_compile *p,
910
struct brw_reg addrReg,
912
GLuint bind_table_index );
914
void brw_dp_WRITE_16( struct brw_compile *p,
916
GLuint scratch_offset );
918
/* If/else/endif. Works by manipulating the execution flags on each
921
struct brw_instruction *brw_IF(struct brw_compile *p,
922
GLuint execute_size);
924
struct brw_instruction *brw_ELSE(struct brw_compile *p,
925
struct brw_instruction *if_insn);
927
void brw_ENDIF(struct brw_compile *p,
928
struct brw_instruction *if_or_else_insn);
933
struct brw_instruction *brw_DO(struct brw_compile *p,
934
GLuint execute_size);
936
struct brw_instruction *brw_WHILE(struct brw_compile *p,
937
struct brw_instruction *patch_insn);
939
struct brw_instruction *brw_BREAK(struct brw_compile *p);
940
struct brw_instruction *brw_CONT(struct brw_compile *p);
943
void brw_land_fwd_jump(struct brw_compile *p,
944
struct brw_instruction *jmp_insn);
948
void brw_NOP(struct brw_compile *p);
950
/* Special case: there is never a destination, execution size will be
953
void brw_CMP(struct brw_compile *p,
957
struct brw_reg src1);
959
void brw_print_reg( struct brw_reg reg );
962
/***********************************************************************
966
void brw_copy_indirect_to_indirect(struct brw_compile *p,
967
struct brw_indirect dst_ptr,
968
struct brw_indirect src_ptr,
971
void brw_copy_from_indirect(struct brw_compile *p,
973
struct brw_indirect ptr,
976
void brw_copy4(struct brw_compile *p,
981
void brw_copy8(struct brw_compile *p,
986
void brw_math_invert( struct brw_compile *p,
990
void brw_set_src1( struct brw_instruction *insn,
991
struct brw_reg reg );