1
/* $XFree86: xc/extras/Mesa/src/X86/x86_vertex.S,v 1.2 2002/03/05 20:27:24 dawes Exp $ */
8
/*#define MAT_SX 0*/ /* accessed by REGIND !! */
16
#define FP_ONE 1065353216
19
#define S0 REGOFF(0, ESI)
20
#define S1 REGOFF(4, ESI)
21
#define S2 REGOFF(8, ESI)
22
#define S3 REGOFF(12, ESI)
23
#define D0 REGOFF(0, EDI)
24
#define D1 REGOFF(4, EDI)
25
#define D2 REGOFF(8, EDI)
26
#define D3 REGOFF(12, EDI)
27
#define M0 REGOFF(0, EDX)
28
#define M1 REGOFF(4, EDX)
29
#define M2 REGOFF(8, EDX)
30
#define M3 REGOFF(12, EDX)
35
* void _mesa_v16_x86_general_xform ( GLfloat *dest,
43
/* This is nothing more glamorous than an objdump of one of Josh's
44
* routines hacked to match the above.
47
GLOBL GLNAME( _mesa_v16_x86_general_xform )
48
GLNAME( _mesa_v16_x86_general_xform ):
54
MOV_L ( REGOFF(12, ESP), EAX ) /* dest */
55
MOV_L ( REGOFF(16, ESP), ESI ) /* mat */
56
MOV_L ( REGOFF(20, ESP), EDX ) /* src */
57
MOV_L ( REGOFF(24, ESP), EDI ) /* src_stride */
58
MOV_L ( REGOFF(28, ESP), ECX ) /* count */
62
FLD_S ( REGOFF( 0x0, EDX ) )
63
FMUL_S ( REGOFF( 0x0, ESI ) )
64
FLD_S ( REGOFF( 0x0, EDX ) )
65
FMUL_S ( REGOFF( 0x4, ESI ) )
66
FLD_S ( REGOFF( 0x0, EDX ) )
67
FMUL_S ( REGOFF( 0x8, ESI ) )
68
FLD_S ( REGOFF( 0x0, EDX ) )
69
FMUL_S ( REGOFF( 0xc, ESI ) )
70
FLD_S ( REGOFF( 0x4, EDX ) )
71
FMUL_S ( REGOFF( 0x10, ESI ) )
72
FLD_S ( REGOFF( 0x4, EDX ) )
73
FMUL_S ( REGOFF( 0x14, ESI ) )
74
FLD_S ( REGOFF( 0x4, EDX ) )
75
FMUL_S ( REGOFF( 0x18, ESI ) )
76
FLD_S ( REGOFF( 0x4, EDX ) )
77
FMUL_S ( REGOFF( 0x1c, ESI ) )
84
FLD_S ( REGOFF( 0x8, EDX ) )
85
FMUL_S ( REGOFF( 0x20, ESI ) )
86
FLD_S ( REGOFF( 0x8, EDX ) )
87
FMUL_S ( REGOFF( 0x24, ESI ) )
88
FLD_S ( REGOFF( 0x8, EDX ) )
89
FMUL_S ( REGOFF( 0x28, ESI ) )
90
FLD_S ( REGOFF( 0x8, EDX ) )
91
FMUL_S ( REGOFF( 0x2c, ESI ) )
99
FADD_S ( REGOFF( 0x30, ESI ) )
101
FADD_S ( REGOFF( 0x34, ESI ) )
103
FADD_S ( REGOFF( 0x38, ESI ) )
105
FADD_S ( REGOFF( 0x3c, ESI ) )
107
FSTP_S ( REGOFF( 0x0, EAX ) )
108
FSTP_S ( REGOFF( 0x4, EAX ) )
110
FSTP_S ( REGOFF( 0x8, EAX ) )
111
FSTP_S ( REGOFF( 0xc, EAX ) )
112
ADD_L ( CONST(64), EAX )
115
JNE ( LLBL(v16x86_loop) )
123
* Table for clip test.
127
* bit4 = abs(S2) > abs(S3)
129
* bit2 = abs(S1) > abs(S3)
131
* bit0 = abs(S0) > abs(S3)
138
D_BYTE 0, 1, 0, 2, 4, 5, 4, 6
139
D_BYTE 0, 1, 0, 2, 8, 9, 8, 10
140
D_BYTE 32, 33, 32, 34, 36, 37, 36, 38
141
D_BYTE 32, 33, 32, 34, 40, 41, 40, 42
142
D_BYTE 0, 1, 0, 2, 4, 5, 4, 6
143
D_BYTE 0, 1, 0, 2, 8, 9, 8, 10
144
D_BYTE 16, 17, 16, 18, 20, 21, 20, 22
145
D_BYTE 16, 17, 16, 18, 24, 25, 24, 26
146
D_BYTE 63, 61, 63, 62, 55, 53, 55, 54
147
D_BYTE 63, 61, 63, 62, 59, 57, 59, 58
148
D_BYTE 47, 45, 47, 46, 39, 37, 39, 38
149
D_BYTE 47, 45, 47, 46, 43, 41, 43, 42
150
D_BYTE 63, 61, 63, 62, 55, 53, 55, 54
151
D_BYTE 63, 61, 63, 62, 59, 57, 59, 58
152
D_BYTE 31, 29, 31, 30, 23, 21, 23, 22
153
D_BYTE 31, 29, 31, 30, 27, 25, 27, 26
159
########################################
161
## _mesa_v16_x86_cliptest_points4
163
## Performs cliptesting equivalent to that done by cliptest_v16()
166
## This is a hacked version of the original above.
168
########################################
173
#define OFFSET_V16_SOURCE 4
174
#define OFFSET_V16_LAST 8
175
#define OFFSET_V16_OR 12
176
#define OFFSET_V16_AND 16
177
#define OFFSET_V16_MASK 20
179
#define ARG_V16_SOURCE REGOFF(V16_FRAME_OFFSET+OFFSET_V16_SOURCE, ESP)
180
#define ARG_V16_LAST REGOFF(V16_FRAME_OFFSET+OFFSET_V16_LAST, ESP)
181
#define ARG_V16_OR REGOFF(V16_FRAME_OFFSET+OFFSET_V16_OR, ESP)
182
#define ARG_V16_AND REGOFF(V16_FRAME_OFFSET+OFFSET_V16_AND, ESP)
183
#define ARG_V16_MASK REGOFF(V16_FRAME_OFFSET+OFFSET_V16_MASK, ESP)
186
#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC)
190
GLOBL GLNAME(_mesa_v16_x86_cliptest_points4)
193
GLNAME(_mesa_v16_x86_cliptest_points4):
195
#define V16_FRAME_OFFSET 20
197
#define V16_FRAME_OFFSET 16
205
/* store pointer to clip_table on stack */
206
CALL( LLBL(v16_ctp4_get_eip) )
207
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
208
MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
210
JMP( LLBL(v16_ctp4_clip_table_ready) )
212
LLBL(v16_ctp4_get_eip):
213
/* store eip in ebx */
214
MOV_L( REGIND(ESP), EBX )
217
LLBL(v16_ctp4_clip_table_ready):
220
MOV_L( ARG_V16_SOURCE, ESI ) /* ptr to first source vertex */
221
MOV_L( ARG_V16_LAST, EDX ) /* ptr to last source vertex */
222
MOV_L( ARG_V16_OR, EBX )
223
MOV_L( ARG_V16_AND, EBP )
224
MOV_L( ARG_V16_MASK, EDI )
228
MOV_B( REGIND(EBX), AL )
229
MOV_B( REGIND(EBP), AH )
231
JZ( LLBL(v16_ctp4_finish) )
244
ADD_L( EBP, EBP ) /* ebp = abs(S3)*2 ; carry = sign of S3 */
247
ADD_L( EBX, EBX ) /* ebx = abs(S2)*2 ; carry = sign of S2 */
250
CMP_L( EBX, EBP ) /* carry = abs(S2)*2 > abs(S3)*2 */
255
ADD_L( EBX, EBX ) /* ebx = abs(S1)*2 ; carry = sign of S1 */
258
CMP_L( EBX, EBP ) /* carry = abs(S1)*2 > abs(S3)*2 */
263
ADD_L( EBX, EBX ) /* ebx = abs(S0)*2 ; carry = sign of S0 */
266
CMP_L( EBX, EBP ) /* carry = abs(S0)*2 > abs(S3)*2 */
271
MOV_L( REGIND(ESP), EBP ) /* clip_table */
273
MOV_B( REGBI(EBP, ECX), CL )
275
MOV_B( REGOFF(clip_table,ECX), CL )
281
MOV_B( CL, REGIND(EDI) ) /* save clipmask */
282
INC_L( EDI ) /* next clipmask */
285
FSTP_S( S8 ) /* */ /* GR_VERTEX_OOW_OFFSET */
288
ADD_L( CONST(64), ESI ) /* next fxVertex */
290
CMP_L( EDX, ESI ) /* finished? */
291
JNZ( LLBL(v16_ctp4_top) )
293
MOV_L( ARG_V16_OR, ECX )
294
MOV_L( ARG_V16_AND, EDX )
296
MOV_B( AL, REGIND(ECX) )
297
MOV_B( AH, REGIND(EDX) )
299
LLBL(v16_ctp4_finish):
302
POP_L( ESI ) /* discard ptr to clip_table */