13
* void _mesa_v16_3dnow_general_xform( GLfloat *dest,
19
* These tranformation functions could disappear if the standard ones
20
* took an output stride.
22
GLOBL GLNAME( _mesa_v16_3dnow_general_xform )
23
GLNAME( _mesa_v16_3dnow_general_xform ):
28
MOV_L ( REGOFF(12, ESP), EAX ) /* dest */
29
MOV_L ( REGOFF(16, ESP), ESI ) /* mat */
30
MOV_L ( REGOFF(20, ESP), EDX ) /* src */
31
MOV_L ( REGOFF(24, ESP), EDI ) /* src_stride */
32
MOV_L ( REGOFF(28, ESP), ECX ) /* count */
36
MOVQ ( REGOFF(MAT_TX, ESI), MM7 ) /* ty | tx */
37
MOVQ ( REGOFF(MAT_TZ, ESI), MM3 ) /* tw | tz */
40
LLBL( v16_3dnow_general_loop ):
42
PREFETCHW ( REGOFF(128, EAX) ) /* write alloc 2 verts ahead */
43
PREFETCH ( REGOFF(32, EDX) ) /* prefetch next cache line */
45
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
46
MOVD ( REGOFF(8, EDX), MM1 ) /* | x2 */
47
MOVQ ( REGIND(ESI), MM4 ) /* m1 | m0 */
48
PUNPCKHDQ ( MM0, MM2 ) /* x1 | */
49
MOVQ ( REGOFF(16, ESI), MM5 ) /* m5 | m4 */
50
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
51
MOVQ ( REGOFF(32, ESI), MM6 ) /* m9 | m8 */
52
PFMUL ( MM0, MM4 ) /* x0*m1 | x0*m0 */
53
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
54
PFMUL ( MM2, MM5 ) /* x1*m5 | x1*m4 */
55
PUNPCKLDQ ( MM1, MM1 ) /* x2 | x2 */
56
PFMUL ( REGOFF(8, ESI), MM0 ) /* x0*m3 | x0*m2 */
57
PFMUL ( REGOFF(24, ESI), MM2 ) /* x1*m7 | x1*m6 */
58
PFMUL ( MM1, MM6 ) /* x2*m9 | x2*m8 */
59
PFADD ( MM4, MM5 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
60
PFMUL ( REGOFF(40, ESI), MM1 ) /* x2*m11 | x2*m10 */
61
PFADD ( MM0, MM2 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
64
PFADD ( MM7, MM6 ) /* r1 | r0 */
65
PFADD ( MM3, MM2 ) /* r3 | r2 */
66
ADD_L ( EDI, EDX ) /* next input vertex */
67
MOVQ ( MM6, REGIND(EAX) )
68
MOVQ ( MM2, REGOFF(8, EAX) )
69
ADD_L ( CONST(64), EAX ) /* next output vertex */
71
JNE ( LLBL(v16_3dnow_general_loop) )
81
/* Do viewport map and perspective projection. Args should look like:
83
* _mesa_3dnow_project_vertices( float *first_vertex,
84
* const float *last_vertex,
88
* This routine assumes a sane vertex layout with x,y,z,w as
89
* the first four elements, to be projected in clip-space, to
90
* x/w,y/w,z/w,1/w, and then transformed according to the matrix to
91
* device space. The device coordinates will overwrite the clip
92
* coordinates as the first four elements of the vertex.
94
* If projection is required for other elements, such as texcoords,
95
* you will have to code a specialized version of this routine. See
96
* FX/X86 for examples.
98
* These routines are simplified versions of the FX code written by
102
GLOBL GLNAME( _mesa_3dnow_project_vertices )
103
GLNAME( _mesa_3dnow_project_vertices ):
107
PREFETCH ( REGOFF(8, ESP) ) /* fetch the first vertex */
109
MOV_L ( REGOFF(8, ESP), ECX ) /* first_vert */
110
MOV_L ( REGOFF(12, ESP), EDX ) /* last_vert */
111
MOV_L ( REGOFF(16, ESP), EBP ) /* matrix */
112
MOV_L ( REGOFF(20, ESP), EAX ) /* stride */
114
MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
115
PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
116
MOVD ( REGIND(EBP), MM5 )
117
PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
118
MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
119
SUB_L ( ECX, EDX ) /* last -= first */
122
LLBL( v16_3dnow_pv_loop_start ):
124
PREFETCH ( REGOFF(64, ECX) ) /* fetch one/two verts ahead */
125
MOVD ( REGOFF(12, ECX), MM0 ) /* | f[3] */
126
PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
127
MOVD ( REGOFF(12, ECX), MM7 ) /* | f[3] */
128
PFRCPIT1 ( MM0, MM7 )
129
PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
130
PUNPCKLDQ ( MM7, MM7 )
131
MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
132
PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
133
MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
134
PFMUL ( MM7, MM3 ) /* | f[2] * oow */
135
MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
136
PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
137
PFADD ( MM0, MM3 ) /* | f[2] += vtz */
138
PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
139
PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
140
PUNPCKLDQ ( MM7, MM3 ) /* f[3] = oow | f[2] */
141
MOVQ ( MM2, REGOFF(0, ECX) )
142
MOVQ ( MM3, REGOFF(8, ECX) )
143
ADD_L ( EAX, ECX ) /* f += stride */
145
JA ( LLBL(v16_3dnow_pv_loop_start) )
154
* _mesa_3dnow_project_clipped_vertices( float *first_vertex,
155
* const float *last_vertex,
158
* const GLubyte *clip_mask )
160
GLOBL GLNAME( _mesa_3dnow_project_clipped_vertices )
161
GLNAME( _mesa_3dnow_project_clipped_vertices ):
168
PREFETCH ( REGOFF(12, ESP) ) /* fetch the first vertex */
170
MOV_L ( REGOFF(12, ESP), ECX ) /* first_vert */
171
MOV_L ( REGOFF(16, ESP), EDX ) /* last_vert */
172
MOV_L ( REGOFF(20, ESP), EBP ) /* matrix */
173
MOV_L ( REGOFF(24, ESP), EAX ) /* stride */
174
MOV_L ( REGOFF(28, ESP), ESI ) /* clip_mask */
177
MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
178
PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
179
MOVD ( REGIND(EBP), MM5 )
180
PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
181
MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
184
LLBL( v16_3dnow_pcv_loop_start ):
186
CMP_B ( CONST(0), REGIND(ESI) )
187
JNE ( LLBL(v16_3dnow_pcv_skip) )
189
MOVD ( REGOFF(12, ECX), MM0) /* | f[3] */
190
PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
191
MOVD ( REGOFF(12, ECX), MM7) /* | f[3] */
192
PFRCPIT1 ( MM0, MM7 )
193
PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
194
PUNPCKLDQ ( MM7, MM7 )
195
MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
196
PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
197
MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
198
PFMUL ( MM7, MM3 ) /* | f[2] * oow */
199
MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
200
PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
201
PFADD ( MM0, MM3 ) /* | f[2] += vtz */
202
PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
203
PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
204
PUNPCKLDQ ( MM7, MM3 ) /* f[3] = oow | f[2] */
205
MOVQ ( MM2, REGOFF(0, ECX) )
206
MOVQ ( MM3, REGOFF(8, ECX) )
208
LLBL( v16_3dnow_pcv_skip ):
210
ADD_L ( EAX, ECX ) /* f += stride */
211
INC_L ( ESI ) /* next clip_mask */
214
JNE ( LLBL(v16_3dnow_pcv_loop_start) )