2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
develop this 3D driver.
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
**********************************************************************/
29
* Keith Whitwell <keith@tungstengraphics.com>
36
#include "brw_structs.h"
37
#include "brw_defines.h"
38
#include "shader/prog_instruction.h"
40
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
41
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
44
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
45
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
46
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
49
#define REG_SIZE (8*4)
52
/* These aren't hardware structs, just something useful for us to pass around:
54
* Align1 operation has a lot of control over input ranges. Used in
55
* WM programs to implement shaders decomposed into "channel serial"
56
* or "structure of array" form:
63
GLuint subnr:5; /* :1 in align16 */
64
GLuint negate:1; /* source only */
65
GLuint abs:1; /* source only */
66
GLuint vstride:4; /* source only */
67
GLuint width:3; /* src only, align1 only */
68
GLuint hstride:2; /* src only, align1 only */
69
GLuint address_mode:1; /* relative addressing, hopefully! */
74
GLuint swizzle:8; /* src only, align16 only */
75
GLuint writemask:4; /* dest only, align16 only */
76
GLint indirect_offset:10; /* relative addressing offset */
77
GLuint pad1:10; /* two dwords total */
94
#define BRW_EU_MAX_INSN_STACK 5
95
#define BRW_EU_MAX_INSN 1200
98
struct brw_instruction store[BRW_EU_MAX_INSN];
101
/* Allow clients to push/pop instruction state:
103
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
104
struct brw_instruction *current;
107
GLboolean single_program_flow;
108
struct brw_context *brw;
113
static __inline int type_sz( GLuint type )
116
case BRW_REGISTER_TYPE_UD:
117
case BRW_REGISTER_TYPE_D:
118
case BRW_REGISTER_TYPE_F:
120
case BRW_REGISTER_TYPE_HF:
121
case BRW_REGISTER_TYPE_UW:
122
case BRW_REGISTER_TYPE_W:
124
case BRW_REGISTER_TYPE_UB:
125
case BRW_REGISTER_TYPE_B:
132
static __inline struct brw_reg brw_reg( GLuint file,
147
reg.subnr = subnr * type_sz(type);
150
reg.vstride = vstride;
152
reg.hstride = hstride;
153
reg.address_mode = BRW_ADDRESS_DIRECT;
156
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
157
* set swizzle and writemask to W, as the lower bits of subnr will
158
* be lost when converted to align16. This is probably too much to
159
* keep track of as you'd want it adjusted by suboffset(), etc.
160
* Perhaps fix up when converting to align16?
162
reg.dw1.bits.swizzle = swizzle;
163
reg.dw1.bits.writemask = writemask;
164
reg.dw1.bits.indirect_offset = 0;
165
reg.dw1.bits.pad1 = 0;
169
static __inline struct brw_reg brw_vec16_reg( GLuint file,
177
BRW_VERTICAL_STRIDE_16,
179
BRW_HORIZONTAL_STRIDE_1,
184
static __inline struct brw_reg brw_vec8_reg( GLuint file,
192
BRW_VERTICAL_STRIDE_8,
194
BRW_HORIZONTAL_STRIDE_1,
200
static __inline struct brw_reg brw_vec4_reg( GLuint file,
208
BRW_VERTICAL_STRIDE_4,
210
BRW_HORIZONTAL_STRIDE_1,
216
static __inline struct brw_reg brw_vec2_reg( GLuint file,
224
BRW_VERTICAL_STRIDE_2,
226
BRW_HORIZONTAL_STRIDE_1,
231
static __inline struct brw_reg brw_vec1_reg( GLuint file,
239
BRW_VERTICAL_STRIDE_0,
241
BRW_HORIZONTAL_STRIDE_0,
247
static __inline struct brw_reg retype( struct brw_reg reg,
254
static __inline struct brw_reg suboffset( struct brw_reg reg,
257
reg.subnr += delta * type_sz(reg.type);
262
static __inline struct brw_reg offset( struct brw_reg reg,
270
static __inline struct brw_reg byte_offset( struct brw_reg reg,
273
GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
274
reg.nr = newoffset / REG_SIZE;
275
reg.subnr = newoffset % REG_SIZE;
280
static __inline struct brw_reg brw_uw16_reg( GLuint file,
284
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
287
static __inline struct brw_reg brw_uw8_reg( GLuint file,
291
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
294
static __inline struct brw_reg brw_uw1_reg( GLuint file,
298
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
301
static __inline struct brw_reg brw_imm_reg( GLuint type )
303
return brw_reg( BRW_IMMEDIATE_VALUE,
307
BRW_VERTICAL_STRIDE_0,
309
BRW_HORIZONTAL_STRIDE_0,
314
static __inline struct brw_reg brw_imm_f( GLfloat f )
316
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
321
static __inline struct brw_reg brw_imm_d( GLint d )
323
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
328
static __inline struct brw_reg brw_imm_ud( GLuint ud )
330
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
335
static __inline struct brw_reg brw_imm_uw( GLushort uw )
337
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
342
static __inline struct brw_reg brw_imm_w( GLshort w )
344
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
349
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
350
* numbers alias with _V and _VF below:
353
/* Vector of eight signed half-byte values:
355
static __inline struct brw_reg brw_imm_v( GLuint v )
357
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
358
imm.vstride = BRW_VERTICAL_STRIDE_0;
359
imm.width = BRW_WIDTH_8;
360
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
365
/* Vector of four 8-bit float values:
367
static __inline struct brw_reg brw_imm_vf( GLuint v )
369
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
370
imm.vstride = BRW_VERTICAL_STRIDE_0;
371
imm.width = BRW_WIDTH_4;
372
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
379
#define VF_NEG (1<<7)
381
static __inline struct brw_reg brw_imm_vf4( GLuint v0,
386
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
387
imm.vstride = BRW_VERTICAL_STRIDE_0;
388
imm.width = BRW_WIDTH_4;
389
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
390
imm.dw1.ud = ((v0 << 0) |
398
static __inline struct brw_reg brw_address( struct brw_reg reg )
400
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
404
static __inline struct brw_reg brw_vec1_grf( GLuint nr,
407
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
410
static __inline struct brw_reg brw_vec8_grf( GLuint nr,
413
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
416
static __inline struct brw_reg brw_vec4_grf( GLuint nr,
419
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
423
static __inline struct brw_reg brw_vec2_grf( GLuint nr,
426
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
429
static __inline struct brw_reg brw_uw8_grf( GLuint nr,
432
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
435
static __inline struct brw_reg brw_null_reg( void )
437
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
442
static __inline struct brw_reg brw_address_reg( GLuint subnr )
444
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
449
/* If/else instructions break in align16 mode if writemask & swizzle
450
* aren't xyzw. This goes against the convention for other scalar
453
static __inline struct brw_reg brw_ip_reg( void )
455
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
458
BRW_REGISTER_TYPE_UD,
459
BRW_VERTICAL_STRIDE_4, /* ? */
461
BRW_HORIZONTAL_STRIDE_0,
462
BRW_SWIZZLE_XYZW, /* NOTE! */
463
WRITEMASK_XYZW); /* NOTE! */
466
static __inline struct brw_reg brw_acc_reg( void )
468
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
474
static __inline struct brw_reg brw_flag_reg( void )
476
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
482
static __inline struct brw_reg brw_mask_reg( GLuint subnr )
484
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
489
static __inline struct brw_reg brw_message_reg( GLuint nr )
491
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
499
/* This is almost always called with a numeric constant argument, so
500
* make things easy to evaluate at compile time:
502
static __inline GLuint cvt( GLuint val )
516
static __inline struct brw_reg stride( struct brw_reg reg,
522
reg.vstride = cvt(vstride);
523
reg.width = cvt(width) - 1;
524
reg.hstride = cvt(hstride);
528
static __inline struct brw_reg vec16( struct brw_reg reg )
530
return stride(reg, 16,16,1);
533
static __inline struct brw_reg vec8( struct brw_reg reg )
535
return stride(reg, 8,8,1);
538
static __inline struct brw_reg vec4( struct brw_reg reg )
540
return stride(reg, 4,4,1);
543
static __inline struct brw_reg vec2( struct brw_reg reg )
545
return stride(reg, 2,2,1);
548
static __inline struct brw_reg vec1( struct brw_reg reg )
550
return stride(reg, 0,1,0);
553
static __inline struct brw_reg get_element( struct brw_reg reg, GLuint elt )
555
return vec1(suboffset(reg, elt));
558
static __inline struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
560
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
564
static __inline struct brw_reg brw_swizzle( struct brw_reg reg,
570
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
571
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
572
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
573
BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
578
static __inline struct brw_reg brw_swizzle1( struct brw_reg reg,
581
return brw_swizzle(reg, x, x, x, x);
584
static __inline struct brw_reg brw_writemask( struct brw_reg reg,
587
reg.dw1.bits.writemask &= mask;
591
static __inline struct brw_reg brw_set_writemask( struct brw_reg reg,
594
reg.dw1.bits.writemask = mask;
598
static __inline struct brw_reg negate( struct brw_reg reg )
604
static __inline struct brw_reg brw_abs( struct brw_reg reg )
610
/***********************************************************************
612
static __inline struct brw_reg brw_vec4_indirect( GLuint subnr,
615
struct brw_reg reg = brw_vec4_grf(0, 0);
617
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
618
reg.dw1.bits.indirect_offset = offset;
622
static __inline struct brw_reg brw_vec1_indirect( GLuint subnr,
625
struct brw_reg reg = brw_vec1_grf(0, 0);
627
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
628
reg.dw1.bits.indirect_offset = offset;
632
static __inline struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
634
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
637
static __inline struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
639
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
642
static __inline struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
644
return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
647
static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
649
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
652
static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
654
return brw_address_reg(ptr.addr_subnr);
657
static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
659
ptr.addr_offset += offset;
663
static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
665
struct brw_indirect ptr;
666
ptr.addr_subnr = addr_subnr;
667
ptr.addr_offset = offset;
674
void brw_pop_insn_state( struct brw_compile *p );
675
void brw_push_insn_state( struct brw_compile *p );
676
void brw_set_mask_control( struct brw_compile *p, GLuint value );
677
void brw_set_saturate( struct brw_compile *p, GLuint value );
678
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
679
void brw_set_compression_control( struct brw_compile *p, GLboolean control );
680
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
681
void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
682
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
684
void brw_init_compile( struct brw_context *, struct brw_compile *p );
685
const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
688
/* Helpers for regular instructions:
691
struct brw_instruction *brw_##OP(struct brw_compile *p, \
692
struct brw_reg dest, \
693
struct brw_reg src0);
696
struct brw_instruction *brw_##OP(struct brw_compile *p, \
697
struct brw_reg dest, \
698
struct brw_reg src0, \
699
struct brw_reg src1);
731
/* Helpers for SEND instruction:
733
void brw_urb_WRITE(struct brw_compile *p,
740
GLuint response_length,
742
GLboolean writes_complete,
746
void brw_fb_WRITE(struct brw_compile *p,
750
GLuint binding_table_index,
752
GLuint response_length,
755
void brw_SAMPLE(struct brw_compile *p,
759
GLuint binding_table_index,
763
GLuint response_length,
767
void brw_math_16( struct brw_compile *p,
775
void brw_math( struct brw_compile *p,
784
void brw_dp_READ_16( struct brw_compile *p,
787
GLuint scratch_offset );
789
void brw_dp_WRITE_16( struct brw_compile *p,
792
GLuint scratch_offset );
794
/* If/else/endif. Works by manipulating the execution flags on each
797
struct brw_instruction *brw_IF(struct brw_compile *p,
798
GLuint execute_size);
800
struct brw_instruction *brw_ELSE(struct brw_compile *p,
801
struct brw_instruction *if_insn);
803
void brw_ENDIF(struct brw_compile *p,
804
struct brw_instruction *if_or_else_insn);
809
struct brw_instruction *brw_DO(struct brw_compile *p,
810
GLuint execute_size);
812
void brw_WHILE(struct brw_compile *p,
813
struct brw_instruction *patch_insn);
817
void brw_land_fwd_jump(struct brw_compile *p,
818
struct brw_instruction *jmp_insn);
822
void brw_NOP(struct brw_compile *p);
824
/* Special case: there is never a destination, execution size will be
827
void brw_CMP(struct brw_compile *p,
831
struct brw_reg src1);
833
void brw_print_reg( struct brw_reg reg );
836
/***********************************************************************
840
void brw_copy_indirect_to_indirect(struct brw_compile *p,
841
struct brw_indirect dst_ptr,
842
struct brw_indirect src_ptr,
845
void brw_copy_from_indirect(struct brw_compile *p,
847
struct brw_indirect ptr,
850
void brw_copy4(struct brw_compile *p,
855
void brw_copy8(struct brw_compile *p,
860
void brw_math_invert( struct brw_compile *p,