1
/* $XFree86: xc/lib/GL/mesa/src/drv/tdfx/X86/fx_3dnow_fasttmp.h,v 1.2 2000/09/26 15:56:51 tsi Exp $ */
3
#if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER)
4
#define TAGLLBL(a) TAG(.L##a)
6
#define TAGLLBL(a) TAG(a)
11
#define GR_VERTEX_X_OFFSET 0
12
#define GR_VERTEX_Y_OFFSET 4
13
#define GR_VERTEX_Z_OFFSET 8
14
#define GR_VERTEX_R_OFFSET 12
15
#define GR_VERTEX_G_OFFSET 16
16
#define GR_VERTEX_B_OFFSET 20
17
#define GR_VERTEX_OOZ_OFFSET 24
18
#define GR_VERTEX_A_OFFSET 28
19
#define GR_VERTEX_OOW_OFFSET 32
23
#define GR_VERTEX_X_OFFSET 0
24
#define GR_VERTEX_Y_OFFSET 4
25
#define GR_VERTEX_OOZ_OFFSET 8
26
#define GR_VERTEX_OOW_OFFSET 12
27
#define GR_VERTEX_R_OFFSET 16
28
#define GR_VERTEX_G_OFFSET 20
29
#define GR_VERTEX_B_OFFSET 24
30
#define GR_VERTEX_A_OFFSET 28
31
#define GR_VERTEX_Z_OFFSET 32
35
#define GR_VERTEX_SOW_TMU0_OFFSET 36
36
#define GR_VERTEX_TOW_TMU0_OFFSET 40
37
#define GR_VERTEX_OOW_TMU0_OFFSET 44
38
#define GR_VERTEX_SOW_TMU1_OFFSET 48
39
#define GR_VERTEX_TOW_TMU1_OFFSET 52
40
#define GR_VERTEX_OOW_TMU1_OFFSET 56
45
/*#define MAT_SX 0 /* accessed by REGIND !! */
55
/* Do viewport map, device scale and perspective projection.
57
* void project_verts( GLfloat *first,
63
* Rearrange fxVertices to look like grVertices.
66
GLOBL GLNAME( TAG(fx_3dnow_project_vertices) )
67
GLNAME( TAG(fx_3dnow_project_vertices) ):
71
MOV_L ( REGOFF(8, ESP), ECX ) /* first_vert */
72
MOV_L ( REGOFF(12, ESP), EDX ) /* last_vert */
75
JE ( TAGLLBL(FXPV_end) )
79
PREFETCH ( REGIND(ECX) ) /* fetch the first vertex */
81
MOV_L ( REGOFF(16, ESP), EBP ) /* matrix */
82
MOV_L ( REGOFF(20, ESP), EAX ) /* stride */
84
MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
85
PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
88
MOV_L ( CONST(0x49400000), REGOFF(-8, ESP) ) /* snapper */
89
MOV_L ( CONST(0x49400000), REGOFF(-4, ESP) ) /* snapper */
92
MOVQ ( REGOFF(-8, ESP), MM4 ) /* snapper | snapper */
93
PFADD ( MM4, MM6 ) /* ty+snapper | tx+snapper */
95
MOVD ( REGIND(EBP), MM5 )
96
PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
98
MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
102
TAGLLBL(FXPV_loop_start):
104
PREFETCH ( REGOFF(64, ECX) ) /* fetch the next-ish vertex */
107
MOVD ( REGOFF(12, ECX), MM0 ) /* | f[3] */
108
PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
110
MOVD ( REGOFF(12, ECX), MM7 ) /* | f[3] */
111
PFRCPIT1 ( MM0, MM7 )
112
PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
114
PUNPCKLDQ ( MM7, MM7 )
117
#if (TYPE & SETUP_RGBA)
118
MOVD ( REGOFF(CLIP_R, ECX ), MM0 ) /* f[RCOORD] = f[CLIP_R]; */
119
MOVD ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
122
#if (TYPE & SETUP_TMU1)
123
MOVQ ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow */
124
PFMUL ( MM7, MM0 ) /* f[T1COORD] = f[CLIP_T1] * oow */
125
MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
129
#if (TYPE & SETUP_TMU0)
130
MOVQ ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow */
131
PFMUL ( MM7, MM0 ) /* f[T0COORD] = f[CLIP_T0] * oow */
132
MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
141
MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
142
PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
144
MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
145
PFMUL ( MM7, MM3 ) /* | f[2] * oow */
147
MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
148
PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
150
PFADD ( MM0, MM3 ) /* | f[2] += vtz */
151
PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
153
PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
156
PFSUB ( MM4, MM2 ) /* f[0,1] -= snapper */
159
MOVQ ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
160
MOVD ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
163
/* end of DO_SETUP_XYZ */
165
MOVD ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
166
ADD_L ( EAX, ECX ) /* f += stride */
168
CMP_L ( ECX, EDX ) /* stall??? */
169
JA ( TAGLLBL(FXPV_loop_start) )
182
/* void project_verts( GLfloat *first,
186
* const GLubyte *mask )
190
GLOBL GLNAME( TAG(fx_3dnow_project_clipped_vertices) )
191
GLNAME( TAG(fx_3dnow_project_clipped_vertices) ):
195
MOV_L ( REGOFF(8, ESP), ECX ) /* first FXDRIVER(VB)->verts*/
196
MOV_L ( REGOFF(12, ESP), EDX ) /* last FXDRIVER(VB)->last_vert */
203
PREFETCH ( REGIND(ECX) ) /* fetch the first vertex */
205
MOV_L ( REGOFF(24, ESP), EBP ) /* mat ctx->Viewport.WindowMap.M */
206
MOV_L ( REGOFF(28, ESP), EAX ) /* stride */
207
MOV_L ( REGOFF(32, ESP), ESI ) /* VB->ClipMask */
209
MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
210
PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
213
MOV_L ( CONST(0x49400000), REGOFF(-8, ESP) ) /* snapper */
214
MOV_L ( CONST(0x49400000), REGOFF(-4, ESP) ) /* snapper */
217
MOVQ ( REGOFF(-8, ESP), MM4 ) /* snapper | snapper */
218
PFADD ( MM4, MM6 ) /* ty+snapper | tx+snapper */
220
MOVD ( REGIND(EBP), MM5 )
221
PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
223
MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
228
TAGLLBL(FXPCV_loop_start):
230
PREFETCH ( REGOFF(64, ECX) ) /* fetch the next-ish vertex */
232
CMP_B ( CONST(0), REGIND(ESI) )
233
JNE ( TAGLLBL(FXPCV_skip) )
235
MOVD ( REGOFF(12, ECX), MM0) /* | f[3] */
236
PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
238
MOVD ( REGOFF(12, ECX), MM7) /* | f[3] */
239
PFRCPIT1 ( MM0, MM7 )
240
PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
242
PUNPCKLDQ ( MM7, MM7 )
245
#if (TYPE & SETUP_RGBA)
246
MOVD ( REGOFF(CLIP_R, ECX ), MM0 ) /* f[RCOORD] = f[CLIP_R]; */
247
MOVD ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
250
#if (TYPE & SETUP_TMU1)
251
MOVQ ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow */
252
PFMUL ( MM7, MM0 ) /* f[T1COORD] = f[CLIP_T1] * oow */
253
MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
257
#if (TYPE & SETUP_TMU0)
258
MOVQ ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow */
259
PFMUL ( MM7, MM0 ) /* f[T0COORD] = f[CLIP_T0] * oow */
260
MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
268
MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
269
PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
271
MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
272
PFMUL ( MM7, MM3 ) /* | f[2] * oow */
274
MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
275
PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
277
PFADD ( MM0, MM3 ) /* | f[2] += vtz */
278
PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
280
PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
283
PFSUB ( MM4, MM2 ) /* f[0,1] -= snapper */
286
MOVQ ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
287
MOVD ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
290
/* end of DO_SETUP_XYZ */
292
MOVD ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
295
ADD_L ( EAX, ECX ) /* f += stride */
297
INC_L ( ESI ) /* next ClipMask */
299
JA ( TAGLLBL(FXPCV_loop_start) )