~agrip-hackers/sns/trunk

« back to all changes in this revision

Viewing changes to zq-repo/zquake/source/d_polysa.s

  • Committer: Matthew Tylee Atkinson
  • Date: 2008-03-07 20:15:20 UTC
  • Revision ID: mta@agrip.org.uk-20080307201520-uj9sa2jrytx91b2t
Remove non-SNS components.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
        d_polysa.S
3
 
 
4
 
        (description)
5
 
 
6
 
        Copyright (C) 1996-1997  Id Software, Inc.
7
 
 
8
 
        This program is free software; you can redistribute it and/or
9
 
        modify it under the terms of the GNU General Public License
10
 
        as published by the Free Software Foundation; either version 2
11
 
        of the License, or (at your option) any later version.
12
 
 
13
 
        This program is distributed in the hope that it will be useful,
14
 
        but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
 
 
17
 
        See the GNU General Public License for more details.
18
 
 
19
 
        You should have received a copy of the GNU General Public License
20
 
        along with this program; if not, write to:
21
 
 
22
 
                Free Software Foundation, Inc.
23
 
                59 Temple Place - Suite 330
24
 
                Boston, MA  02111-1307, USA
25
 
 
26
 
        $Id: d_polysa.s,v 1.1 2000/08/26 12:26:18 tonik Exp $
27
 
*/
28
 
// d_polysa.s
29
 
// x86 assembly-language polygon model drawing code
30
 
 
31
 
#include "asm_i386.h"
32
 
#include "quakeasm.h"
33
 
#include "asm_draw.h"
34
 
#include "d_ifacea.h"
35
 
 
36
 
#ifdef id386
37
 
 
38
 
// !!! if this is changed, it must be changed in d_polyse.c too !!!
39
 
#define DPS_MAXSPANS                    MAXHEIGHT+1     
40
 
                                                                        // 1 extra for spanpackage that marks end
41
 
 
42
 
//#define       SPAN_SIZE       (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
43
 
#define SPAN_SIZE (1024+1+1+1)*32 
44
 
 
45
 
 
46
 
        .data
47
 
 
48
 
        .align  4
49
 
p10_minus_p20:  .single         0
50
 
p01_minus_p21:  .single         0
51
 
temp0:                  .single         0
52
 
temp1:                  .single         0
53
 
Ltemp:                  .single         0
54
 
 
55
 
aff8entryvec_table:     .long   LDraw8, LDraw7, LDraw6, LDraw5
56
 
                                .long   LDraw4, LDraw3, LDraw2, LDraw1
57
 
 
58
 
lzistepx:               .long   0
59
 
 
60
 
 
61
 
        .text
62
 
 
63
 
#ifndef NeXT
64
 
        .extern C(D_PolysetSetEdgeTable)
65
 
        .extern C(D_RasterizeAliasPolySmooth)
66
 
#endif
67
 
 
68
 
//----------------------------------------------------------------------
69
 
// affine triangle gradient calculation code
70
 
//----------------------------------------------------------------------
71
 
 
72
 
#define skinwidth       4+0
73
 
 
74
 
.globl C(D_PolysetCalcGradients)
75
 
C(D_PolysetCalcGradients):
76
 
 
77
 
//      p00_minus_p20 = r_p0[0] - r_p2[0];
78
 
//      p01_minus_p21 = r_p0[1] - r_p2[1];
79
 
//      p10_minus_p20 = r_p1[0] - r_p2[0];
80
 
//      p11_minus_p21 = r_p1[1] - r_p2[1];
81
 
//
82
 
//      xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
83
 
//                           p00_minus_p20 * p11_minus_p21);
84
 
//
85
 
//      ystepdenominv = -xstepdenominv;
86
 
 
87
 
        fildl   C(r_p0)+0               // r_p0[0]
88
 
        fildl   C(r_p2)+0               // r_p2[0] | r_p0[0]
89
 
        fildl   C(r_p0)+4               // r_p0[1] | r_p2[0] | r_p0[0]
90
 
        fildl   C(r_p2)+4               // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
91
 
        fildl   C(r_p1)+0               // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
92
 
        fildl   C(r_p1)+4               // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
93
 
                                                        //  r_p2[0] | r_p0[0]
94
 
        fxch    %st(3)                  // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
95
 
                                                        //  r_p2[0] | r_p0[0]
96
 
        fsub    %st(2),%st(0)   // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
97
 
                                                        //  r_p2[0] | r_p0[0]
98
 
        fxch    %st(1)                  // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
99
 
                                                        //  r_p2[0] | r_p0[0]
100
 
        fsub    %st(4),%st(0)   // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
101
 
                                                        //  r_p1[1] | r_p2[0] | r_p0[0]
102
 
        fxch    %st(5)                  // r_p0[0] | p01_minus_p21 | r_p2[1] |
103
 
                                                        //  r_p1[1] | r_p2[0] | p10_minus_p20
104
 
        fsubp   %st(0),%st(4)   // p01_minus_p21 | r_p2[1] | r_p1[1] |
105
 
                                                        //  p00_minus_p20 | p10_minus_p20
106
 
        fxch    %st(2)                  // r_p1[1] | r_p2[1] | p01_minus_p21 |
107
 
                                                        //  p00_minus_p20 | p10_minus_p20
108
 
        fsubp   %st(0),%st(1)   // p11_minus_p21 | p01_minus_p21 |
109
 
                                                        //  p00_minus_p20 | p10_minus_p20
110
 
        fxch    %st(1)                  // p01_minus_p21 | p11_minus_p21 |
111
 
                                                        //  p00_minus_p20 | p10_minus_p20
112
 
        flds    C(d_xdenom)             // d_xdenom | p01_minus_p21 | p11_minus_p21 |
113
 
                                                        //  p00_minus_p20 | p10_minus_p20
114
 
        fxch    %st(4)                  // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
115
 
                                                        //  p00_minus_p20 | d_xdenom
116
 
        fstps   p10_minus_p20   // p01_minus_p21 | p11_minus_p21 |
117
 
                                                        //  p00_minus_p20 | d_xdenom
118
 
        fstps   p01_minus_p21   // p11_minus_p21 | p00_minus_p20 | xstepdenominv
119
 
        fxch    %st(2)                  // xstepdenominv | p00_minus_p20 | p11_minus_p21
120
 
 
121
 
//// ceil () for light so positive steps are exaggerated, negative steps
122
 
//// diminished,  pushing us away from underflow toward overflow. Underflow is
123
 
//// very visible, overflow is very unlikely, because of ambient lighting
124
 
//      t0 = r_p0[4] - r_p2[4];
125
 
//      t1 = r_p1[4] - r_p2[4];
126
 
 
127
 
        fildl   C(r_p2)+16              // r_p2[4] | xstepdenominv | p00_minus_p20 |
128
 
                                                        //  p11_minus_p21
129
 
        fildl   C(r_p0)+16              // r_p0[4] | r_p2[4] | xstepdenominv |
130
 
                                                        //  p00_minus_p20 | p11_minus_p21
131
 
        fildl   C(r_p1)+16              // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
132
 
                                                        //  p00_minus_p20 | p11_minus_p21
133
 
        fxch    %st(2)                  // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
134
 
                                                        //  p00_minus_p20 | p11_minus_p21
135
 
        fld             %st(0)                  // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
136
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
137
 
        fsubrp  %st(0),%st(2)   // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
138
 
                                                        //  p00_minus_p20 | p11_minus_p21
139
 
        fsubrp  %st(0),%st(2)   // t0 | t1 | xstepdenominv | p00_minus_p20 |
140
 
                                                        //  p11_minus_p21
141
 
 
142
 
//      r_lstepx = (int)
143
 
//                      ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
144
 
//      r_lstepy = (int)
145
 
//                      ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
146
 
 
147
 
        fld             %st(0)                  // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
148
 
                                                        //  p11_minus_p21
149
 
        fmul    %st(5),%st(0)   // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
150
 
                                                        //  p00_minus_p20 | p11_minus_p21
151
 
        fxch    %st(2)                  // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
152
 
                                                        //  p00_minus_p20 | p11_minus_p21
153
 
        fld             %st(0)                  // t1 | t1 | t0 | t0*p11_minus_p21 |
154
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
155
 
        fmuls   p01_minus_p21   // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
156
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
157
 
        fxch    %st(2)                  // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
158
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
159
 
        fmuls   p10_minus_p20   // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
160
 
                                                        //  t0*p11_minus_p21 | xstepdenominv |
161
 
                                                        //  p00_minus_p20 | p11_minus_p21
162
 
        fxch    %st(1)                  // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
163
 
                                                        //  t0*p11_minus_p21 | xstepdenominv |
164
 
                                                        //  p00_minus_p20 | p11_minus_p21
165
 
        fmul    %st(5),%st(0)   // t1*p00_minus_p20 | t0*p10_minus_p20 |
166
 
                                                        //  t1*p01_minus_p21 | t0*p11_minus_p21 |
167
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
168
 
        fxch    %st(2)                  // t1*p01_minus_p21 | t0*p10_minus_p20 |
169
 
                                                        //  t1*p00_minus_p20 | t0*p11_minus_p21 |
170
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
171
 
        fsubp   %st(0),%st(3)   // t0*p10_minus_p20 | t1*p00_minus_p20 |
172
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
173
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
174
 
        fsubrp  %st(0),%st(1)   // t1*p00_minus_p20 - t0*p10_minus_p20 |
175
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
176
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
177
 
        fld             %st(2)                  // xstepdenominv |
178
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20 |
179
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
180
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
181
 
        fmuls   float_minus_1   // ystepdenominv |
182
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20 |
183
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
184
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
185
 
        fxch    %st(2)                  // t1*p01_minus_p21 - t0*p11_minus_p21 |
186
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20 |
187
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
188
 
                                                        //  p11_minus_p21
189
 
        fmul    %st(3),%st(0)   // (t1*p01_minus_p21 - t0*p11_minus_p21)*
190
 
                                                        //   xstepdenominv |
191
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20 |
192
 
                                                        //   | ystepdenominv | xstepdenominv |
193
 
                                                        //   p00_minus_p20 | p11_minus_p21
194
 
        fxch    %st(1)                  // t1*p00_minus_p20 - t0*p10_minus_p20 |
195
 
                                                        //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
196
 
                                                        //   xstepdenominv | ystepdenominv |
197
 
                                                        //   xstepdenominv | p00_minus_p20 | p11_minus_p21
198
 
        fmul    %st(2),%st(0)   // (t1*p00_minus_p20 - t0*p10_minus_p20)*
199
 
                                                        //  ystepdenominv |
200
 
                                                        //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
201
 
                                                        //  xstepdenominv | ystepdenominv |
202
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
203
 
        fldcw   ceil_cw
204
 
        fistpl  C(r_lstepy)             // r_lstepx | ystepdenominv | xstepdenominv |
205
 
                                                        //  p00_minus_p20 | p11_minus_p21
206
 
        fistpl  C(r_lstepx)             // ystepdenominv | xstepdenominv | p00_minus_p20 |
207
 
                                                        //  p11_minus_p21
208
 
        fldcw   single_cw
209
 
 
210
 
//      t0 = r_p0[2] - r_p2[2];
211
 
//      t1 = r_p1[2] - r_p2[2];
212
 
 
213
 
        fildl   C(r_p2)+8               // r_p2[2] | ystepdenominv | xstepdenominv |
214
 
                                                        //  p00_minus_p20 | p11_minus_p21
215
 
        fildl   C(r_p0)+8               // r_p0[2] | r_p2[2] | ystepdenominv |
216
 
                                                        //   xstepdenominv | p00_minus_p20 | p11_minus_p21
217
 
        fildl   C(r_p1)+8               // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
218
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
219
 
        fxch    %st(2)                  // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
220
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
221
 
        fld             %st(0)                  // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
222
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
223
 
                                                        //  p11_minus_p21
224
 
        fsubrp  %st(0),%st(2)   // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
225
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
226
 
        fsubrp  %st(0),%st(2)   // t0 | t1 | ystepdenominv | xstepdenominv |
227
 
                                                        //  p00_minus_p20 | p11_minus_p21
228
 
 
229
 
//      r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
230
 
//                      xstepdenominv);
231
 
//      r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
232
 
//                      ystepdenominv);
233
 
 
234
 
        fld             %st(0)                  // t0 | t0 | t1 | ystepdenominv | xstepdenominv
235
 
        fmul    %st(6),%st(0)   // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
236
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
237
 
        fxch    %st(2)                  // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
238
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
239
 
        fld             %st(0)                  // t1 | t1 | t0 | t0*p11_minus_p21 |
240
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
241
 
                                                        //  p11_minus_p21
242
 
        fmuls   p01_minus_p21   // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
243
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
244
 
                                                        //  p11_minus_p21
245
 
        fxch    %st(2)                  // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
246
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
247
 
                                                        //  p11_minus_p21
248
 
        fmuls   p10_minus_p20   // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
249
 
                                                        //  t0*p11_minus_p21 | ystepdenominv |
250
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
251
 
        fxch    %st(1)                  // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
252
 
                                                        //  t0*p11_minus_p21 | ystepdenominv |
253
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
254
 
        fmul    %st(6),%st(0)   // t1*p00_minus_p20 | t0*p10_minus_p20 |
255
 
                                                        //  t1*p01_minus_p21 | t0*p11_minus_p21 |
256
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
257
 
                                                        //  p11_minus_p21
258
 
        fxch    %st(2)                  // t1*p01_minus_p21 | t0*p10_minus_p20 |
259
 
                                                        //  t1*p00_minus_p20 | t0*p11_minus_p21 |
260
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
261
 
                                                        //  p11_minus_p21
262
 
        fsubp   %st(0),%st(3)   // t0*p10_minus_p20 | t1*p00_minus_p20 |
263
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
264
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
265
 
                                                        //  p11_minus_p21
266
 
        fsubrp  %st(0),%st(1)   // t1*p00_minus_p20 - t0*p10_minus_p20 |
267
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
268
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
269
 
                                                        //  p11_minus_p21
270
 
        fmul    %st(2),%st(0)   // (t1*p00_minus_p20 - t0*p10_minus_p20)*
271
 
                                                        //   ystepdenominv |
272
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
273
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
274
 
                                                        //  p11_minus_p21
275
 
        fxch    %st(1)                  // t1*p01_minus_p21 - t0*p11_minus_p21 |
276
 
                                                        //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
277
 
                                                        //   ystepdenominv | ystepdenominv |
278
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
279
 
        fmul    %st(3),%st(0)   // (t1*p01_minus_p21 - t0*p11_minus_p21)*
280
 
                                                        //  xstepdenominv |
281
 
                                                        //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
282
 
                                                        //  ystepdenominv | ystepdenominv |
283
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
284
 
        fxch    %st(1)                  // (t1*p00_minus_p20 - t0*p10_minus_p20)*
285
 
                                                        //  ystepdenominv |
286
 
                                                        //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
287
 
                                                        //  xstepdenominv | ystepdenominv |
288
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
289
 
        fistpl  C(r_sstepy)             // r_sstepx | ystepdenominv | xstepdenominv |
290
 
                                                        //  p00_minus_p20 | p11_minus_p21
291
 
        fistpl  C(r_sstepx)             // ystepdenominv | xstepdenominv | p00_minus_p20 |
292
 
                                                        //  p11_minus_p21
293
 
 
294
 
//      t0 = r_p0[3] - r_p2[3];
295
 
//      t1 = r_p1[3] - r_p2[3];
296
 
 
297
 
        fildl   C(r_p2)+12              // r_p2[3] | ystepdenominv | xstepdenominv |
298
 
                                                        //  p00_minus_p20 | p11_minus_p21
299
 
        fildl   C(r_p0)+12              // r_p0[3] | r_p2[3] | ystepdenominv |
300
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
301
 
        fildl   C(r_p1)+12              // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
302
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
303
 
        fxch    %st(2)                  // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
304
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
305
 
        fld             %st(0)                  // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
306
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
307
 
                                                        //  p11_minus_p21
308
 
        fsubrp  %st(0),%st(2)   // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
309
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
310
 
        fsubrp  %st(0),%st(2)   // t0 | t1 | ystepdenominv | xstepdenominv |
311
 
                                                        //  p00_minus_p20 | p11_minus_p21
312
 
 
313
 
//      r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
314
 
//                      xstepdenominv);
315
 
//      r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
316
 
//                      ystepdenominv);
317
 
 
318
 
        fld             %st(0)                  // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
319
 
                                                        //  p00_minus_p20 | p11_minus_p21
320
 
        fmul    %st(6),%st(0)   // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
321
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
322
 
        fxch    %st(2)                  // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
323
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
324
 
        fld             %st(0)                  // t1 | t1 | t0 | t0*p11_minus_p21 |
325
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
326
 
                                                        //  p11_minus_p21
327
 
        fmuls   p01_minus_p21   // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
328
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
329
 
                                                        //  p11_minus_p21
330
 
        fxch    %st(2)                  // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
331
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
332
 
                                                        //  p11_minus_p21
333
 
        fmuls   p10_minus_p20   // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
334
 
                                                        //  t0*p11_minus_p21 | ystepdenominv |
335
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
336
 
        fxch    %st(1)                  // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
337
 
                                                        //  t0*p11_minus_p21 | ystepdenominv |
338
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
339
 
        fmul    %st(6),%st(0)   // t1*p00_minus_p20 | t0*p10_minus_p20 |
340
 
                                                        //  t1*p01_minus_p21 | t0*p11_minus_p21 |
341
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
342
 
                                                        //  p11_minus_p21
343
 
        fxch    %st(2)                  // t1*p01_minus_p21 | t0*p10_minus_p20 |
344
 
                                                        //  t1*p00_minus_p20 | t0*p11_minus_p21 |
345
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
346
 
                                                        //  p11_minus_p21
347
 
        fsubp   %st(0),%st(3)   // t0*p10_minus_p20 | t1*p00_minus_p20 |
348
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
349
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
350
 
                                                        //  p11_minus_p21
351
 
        fsubrp  %st(0),%st(1)   // t1*p00_minus_p20 - t0*p10_minus_p20 |
352
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
353
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
354
 
                                                        //  p11_minus_p21
355
 
        fmul    %st(2),%st(0)   // (t1*p00_minus_p20 - t0*p10_minus_p20)*
356
 
                                                        //   ystepdenominv |
357
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
358
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
359
 
                                                        //  p11_minus_p21
360
 
        fxch    %st(1)                  // t1*p01_minus_p21 - t0*p11_minus_p21 |
361
 
                                                        //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
362
 
                                                        //  ystepdenominv | ystepdenominv |
363
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
364
 
        fmul    %st(3),%st(0)   // (t1*p01_minus_p21 - t0*p11_minus_p21)*
365
 
                                                        //  xstepdenominv |
366
 
                                                        //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
367
 
                                                        //  ystepdenominv | ystepdenominv |
368
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
369
 
        fxch    %st(1)                  // (t1*p00_minus_p20 - t0*p10_minus_p20)*
370
 
                                                        //  ystepdenominv |
371
 
                                                        //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
372
 
                                                        //  xstepdenominv | ystepdenominv |
373
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
374
 
        fistpl  C(r_tstepy)             // r_tstepx | ystepdenominv | xstepdenominv |
375
 
                                                        //  p00_minus_p20 | p11_minus_p21
376
 
        fistpl  C(r_tstepx)             // ystepdenominv | xstepdenominv | p00_minus_p20 |
377
 
                                                        //  p11_minus_p21
378
 
 
379
 
//      t0 = r_p0[5] - r_p2[5];
380
 
//      t1 = r_p1[5] - r_p2[5];
381
 
 
382
 
        fildl   C(r_p2)+20              // r_p2[5] | ystepdenominv | xstepdenominv |
383
 
                                                        //  p00_minus_p20 | p11_minus_p21
384
 
        fildl   C(r_p0)+20              // r_p0[5] | r_p2[5] | ystepdenominv |
385
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
386
 
        fildl   C(r_p1)+20              // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
387
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
388
 
        fxch    %st(2)                  // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
389
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
390
 
        fld             %st(0)                  // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
391
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
392
 
                                                        //  p11_minus_p21
393
 
        fsubrp  %st(0),%st(2)   // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
394
 
                                                        //  xstepdenominv | p00_minus_p20 | p11_minus_p21
395
 
        fsubrp  %st(0),%st(2)   // t0 | t1 | ystepdenominv | xstepdenominv |
396
 
                                                        //  p00_minus_p20 | p11_minus_p21
397
 
 
398
 
//      r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
399
 
//                      xstepdenominv);
400
 
//      r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
401
 
//                      ystepdenominv);
402
 
 
403
 
        fld             %st(0)                  // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
404
 
                                                        //  p00_minus_p20 | p11_minus_p21
405
 
        fmulp   %st(0),%st(6)   // t0 | t1 | ystepdenominv | xstepdenominv |
406
 
                                                        //  p00_minus_p20 | t0*p11_minus_p21
407
 
        fxch    %st(1)                  // t1 | t0 | ystepdenominv | xstepdenominv |
408
 
                                                        //  p00_minus_p20 | t0*p11_minus_p21
409
 
        fld             %st(0)                  // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
410
 
                                                        //  p00_minus_p20 | t0*p11_minus_p21
411
 
        fmuls   p01_minus_p21   // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
412
 
                                                        //  xstepdenominv | p00_minus_p20 |
413
 
                                                        //  t0*p11_minus_p21
414
 
        fxch    %st(2)                  // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
415
 
                                                        //  xstepdenominv | p00_minus_p20 |
416
 
                                                        //  t0*p11_minus_p21
417
 
        fmuls   p10_minus_p20   // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
418
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
419
 
                                                        //  t0*p11_minus_p21
420
 
        fxch    %st(1)                  // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
421
 
                                                        //  ystepdenominv | xstepdenominv | p00_minus_p20 |
422
 
                                                        //  t0*p11_minus_p21
423
 
        fmulp   %st(0),%st(5)   // t0*p10_minus_p20 | t1*p01_minus_p21 |
424
 
                                                        //  ystepdenominv | xstepdenominv |
425
 
                                                        //  t1*p00_minus_p20 | t0*p11_minus_p21
426
 
        fxch    %st(5)                  // t0*p11_minus_p21 | t1*p01_minus_p21 |
427
 
                                                        //  ystepdenominv | xstepdenominv |
428
 
                                                        //  t1*p00_minus_p20 | t0*p10_minus_p20
429
 
        fsubrp  %st(0),%st(1)   // t1*p01_minus_p21 - t0*p11_minus_p21 |
430
 
                                                        //  ystepdenominv | xstepdenominv |
431
 
                                                        //  t1*p00_minus_p20 | t0*p10_minus_p20
432
 
        fxch    %st(3)                  // t1*p00_minus_p20 | ystepdenominv |
433
 
                                                        //  xstepdenominv |
434
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
435
 
                                                        //  t0*p10_minus_p20
436
 
        fsubp   %st(0),%st(4)   // ystepdenominv | xstepdenominv |
437
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
438
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20
439
 
        fxch    %st(1)                  // xstepdenominv | ystepdenominv |
440
 
                                                        //  t1*p01_minus_p21 - t0*p11_minus_p21 |
441
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20
442
 
        fmulp   %st(0),%st(2)   // ystepdenominv |
443
 
                                                        //  (t1*p01_minus_p21 - t0*p11_minus_p21) *
444
 
                                                        //  xstepdenominv |
445
 
                                                        //  t1*p00_minus_p20 - t0*p10_minus_p20
446
 
        fmulp   %st(0),%st(2)   // (t1*p01_minus_p21 - t0*p11_minus_p21) *
447
 
                                                        //  xstepdenominv |
448
 
                                                        //  (t1*p00_minus_p20 - t0*p10_minus_p20) *
449
 
                                                        //  ystepdenominv
450
 
        fistpl  C(r_zistepx)    // (t1*p00_minus_p20 - t0*p10_minus_p20) *
451
 
                                                        //  ystepdenominv
452
 
        fistpl  C(r_zistepy)
453
 
 
454
 
//      a_sstepxfrac = r_sstepx << 16;
455
 
//      a_tstepxfrac = r_tstepx << 16;
456
 
//
457
 
//      a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
458
 
//                      (r_sstepx >> 16);
459
 
 
460
 
        movl    C(r_sstepx),%eax
461
 
        movl    C(r_tstepx),%edx
462
 
        shll    $16,%eax
463
 
        shll    $16,%edx
464
 
        movl    %eax,C(a_sstepxfrac)
465
 
        movl    %edx,C(a_tstepxfrac)
466
 
 
467
 
        movl    C(r_sstepx),%ecx
468
 
        movl    C(r_tstepx),%eax
469
 
        sarl    $16,%ecx
470
 
        sarl    $16,%eax
471
 
        imull   skinwidth(%esp)
472
 
        addl    %ecx,%eax
473
 
        movl    %eax,C(a_ststepxwhole)
474
 
 
475
 
        ret
476
 
 
477
 
 
478
 
//----------------------------------------------------------------------
479
 
// recursive subdivision affine triangle drawing code
480
 
//
481
 
// not C-callable because of stdcall return
482
 
//----------------------------------------------------------------------
483
 
 
484
 
#define lp1     4+16
485
 
#define lp2     8+16
486
 
#define lp3     12+16
487
 
 
488
 
.globl C(D_PolysetRecursiveTriangle)
489
 
C(D_PolysetRecursiveTriangle):
490
 
        pushl   %ebp                            // preserve caller stack frame pointer
491
 
        pushl   %esi                            // preserve register variables
492
 
        pushl   %edi
493
 
        pushl   %ebx
494
 
 
495
 
//      int             *temp;
496
 
//      int             d;
497
 
//      int             new[6];
498
 
//      int             i;
499
 
//      int             z;
500
 
//      short   *zbuf;
501
 
        movl    lp2(%esp),%esi
502
 
        movl    lp1(%esp),%ebx
503
 
        movl    lp3(%esp),%edi
504
 
 
505
 
//      d = lp2[0] - lp1[0];
506
 
//      if (d < -1 || d > 1)
507
 
//              goto split;
508
 
        movl    0(%esi),%eax
509
 
 
510
 
        movl    0(%ebx),%edx
511
 
        movl    4(%esi),%ebp
512
 
 
513
 
        subl    %edx,%eax
514
 
        movl    4(%ebx),%ecx
515
 
 
516
 
        subl    %ecx,%ebp
517
 
        incl    %eax
518
 
 
519
 
        cmpl    $2,%eax
520
 
        ja              LSplit
521
 
 
522
 
//      d = lp2[1] - lp1[1];
523
 
//      if (d < -1 || d > 1)
524
 
//              goto split;
525
 
        movl    0(%edi),%eax
526
 
        incl    %ebp
527
 
 
528
 
        cmpl    $2,%ebp
529
 
        ja              LSplit
530
 
 
531
 
//      d = lp3[0] - lp2[0];
532
 
//      if (d < -1 || d > 1)
533
 
//              goto split2;
534
 
        movl    0(%esi),%edx
535
 
        movl    4(%edi),%ebp
536
 
 
537
 
        subl    %edx,%eax
538
 
        movl    4(%esi),%ecx
539
 
 
540
 
        subl    %ecx,%ebp
541
 
        incl    %eax
542
 
 
543
 
        cmpl    $2,%eax
544
 
        ja              LSplit2
545
 
 
546
 
//      d = lp3[1] - lp2[1];
547
 
//      if (d < -1 || d > 1)
548
 
//              goto split2;
549
 
        movl    0(%ebx),%eax
550
 
        incl    %ebp
551
 
 
552
 
        cmpl    $2,%ebp
553
 
        ja              LSplit2
554
 
 
555
 
//      d = lp1[0] - lp3[0];
556
 
//      if (d < -1 || d > 1)
557
 
//              goto split3;
558
 
        movl    0(%edi),%edx
559
 
        movl    4(%ebx),%ebp
560
 
 
561
 
        subl    %edx,%eax
562
 
        movl    4(%edi),%ecx
563
 
 
564
 
        subl    %ecx,%ebp
565
 
        incl    %eax
566
 
 
567
 
        incl    %ebp
568
 
        movl    %ebx,%edx
569
 
 
570
 
        cmpl    $2,%eax
571
 
        ja              LSplit3
572
 
 
573
 
//      d = lp1[1] - lp3[1];
574
 
//      if (d < -1 || d > 1)
575
 
//      {
576
 
//split3:
577
 
//              temp = lp1;
578
 
//              lp3 = lp2;
579
 
//              lp1 = lp3;
580
 
//              lp2 = temp;
581
 
//              goto split;
582
 
//      }
583
 
//
584
 
//      return;                 // entire tri is filled
585
 
//
586
 
        cmpl    $2,%ebp
587
 
        jna             LDone
588
 
 
589
 
LSplit3:
590
 
        movl    %edi,%ebx
591
 
        movl    %esi,%edi
592
 
        movl    %edx,%esi
593
 
        jmp             LSplit
594
 
 
595
 
//split2:
596
 
LSplit2:
597
 
 
598
 
//      temp = lp1;
599
 
//      lp1 = lp2;
600
 
//      lp2 = lp3;
601
 
//      lp3 = temp;
602
 
        movl    %ebx,%eax
603
 
        movl    %esi,%ebx
604
 
        movl    %edi,%esi
605
 
        movl    %eax,%edi
606
 
 
607
 
//split:
608
 
LSplit:
609
 
 
610
 
        subl    $24,%esp                // allocate space for a new vertex
611
 
 
612
 
//// split this edge
613
 
//      new[0] = (lp1[0] + lp2[0]) >> 1;
614
 
//      new[1] = (lp1[1] + lp2[1]) >> 1;
615
 
//      new[2] = (lp1[2] + lp2[2]) >> 1;
616
 
//      new[3] = (lp1[3] + lp2[3]) >> 1;
617
 
//      new[5] = (lp1[5] + lp2[5]) >> 1;
618
 
        movl    8(%ebx),%eax
619
 
 
620
 
        movl    8(%esi),%edx
621
 
        movl    12(%ebx),%ecx
622
 
 
623
 
        addl    %edx,%eax
624
 
        movl    12(%esi),%edx
625
 
 
626
 
        sarl    $1,%eax
627
 
        addl    %edx,%ecx
628
 
 
629
 
        movl    %eax,8(%esp)
630
 
        movl    20(%ebx),%eax
631
 
 
632
 
        sarl    $1,%ecx
633
 
        movl    20(%esi),%edx
634
 
 
635
 
        movl    %ecx,12(%esp)
636
 
        addl    %edx,%eax
637
 
 
638
 
        movl    0(%ebx),%ecx
639
 
        movl    0(%esi),%edx
640
 
 
641
 
        sarl    $1,%eax
642
 
        addl    %ecx,%edx
643
 
 
644
 
        movl    %eax,20(%esp)
645
 
        movl    4(%ebx),%eax
646
 
 
647
 
        sarl    $1,%edx
648
 
        movl    4(%esi),%ebp
649
 
 
650
 
        movl    %edx,0(%esp)
651
 
        addl    %eax,%ebp
652
 
 
653
 
        sarl    $1,%ebp
654
 
        movl    %ebp,4(%esp)
655
 
 
656
 
//// draw the point if splitting a leading edge
657
 
//      if (lp2[1] > lp1[1])
658
 
//              goto nodraw;
659
 
        cmpl    %eax,4(%esi)
660
 
        jg              LNoDraw
661
 
 
662
 
//      if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
663
 
//              goto nodraw;
664
 
        movl    0(%esi),%edx
665
 
        jnz             LDraw
666
 
 
667
 
        cmpl    %ecx,%edx
668
 
        jl              LNoDraw
669
 
 
670
 
LDraw:
671
 
 
672
 
// z = new[5] >> 16;
673
 
        movl    20(%esp),%edx
674
 
        movl    4(%esp),%ecx
675
 
 
676
 
        sarl    $16,%edx
677
 
        movl    0(%esp),%ebp
678
 
 
679
 
//      zbuf = zspantable[new[1]] + new[0];
680
 
        movl    C(zspantable)(,%ecx,4),%eax
681
 
 
682
 
//      if (z >= *zbuf)
683
 
//      {
684
 
        cmpw    (%eax,%ebp,2),%dx
685
 
        jnge    LNoDraw
686
 
 
687
 
//              int             pix;
688
 
//              
689
 
//              *zbuf = z;
690
 
        movw    %dx,(%eax,%ebp,2)
691
 
 
692
 
//              pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
693
 
        movl    12(%esp),%eax
694
 
 
695
 
        sarl    $16,%eax
696
 
        movl    8(%esp),%edx
697
 
 
698
 
        sarl    $16,%edx
699
 
        subl    %ecx,%ecx
700
 
 
701
 
        movl    C(skintable)(,%eax,4),%eax
702
 
        movl    4(%esp),%ebp
703
 
 
704
 
        movb    (%eax,%edx,),%cl
705
 
        movl    C(d_pcolormap),%edx
706
 
 
707
 
        movb    (%edx,%ecx,),%dl
708
 
        movl    0(%esp),%ecx
709
 
 
710
 
//              d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
711
 
        movl    C(d_scantable)(,%ebp,4),%eax
712
 
        addl    %eax,%ecx
713
 
        movl    C(d_viewbuffer),%eax
714
 
        movb    %dl,(%eax,%ecx,1)
715
 
 
716
 
//      }
717
 
//
718
 
//nodraw:
719
 
LNoDraw:
720
 
 
721
 
//// recursively continue
722
 
//      D_PolysetRecursiveTriangle (lp3, lp1, new);
723
 
        pushl   %esp
724
 
        pushl   %ebx
725
 
        pushl   %edi
726
 
        call    C(D_PolysetRecursiveTriangle)
727
 
 
728
 
//      D_PolysetRecursiveTriangle (lp3, new, lp2);
729
 
        movl    %esp,%ebx
730
 
        pushl   %esi
731
 
        pushl   %ebx
732
 
        pushl   %edi
733
 
        call    C(D_PolysetRecursiveTriangle)
734
 
        addl    $24,%esp
735
 
 
736
 
LDone:
737
 
        popl    %ebx                            // restore register variables
738
 
        popl    %edi
739
 
        popl    %esi
740
 
        popl    %ebp                            // restore caller stack frame pointer
741
 
        ret             $12
742
 
 
743
 
 
744
 
//----------------------------------------------------------------------
745
 
// 8-bpp horizontal span drawing code for affine polygons, with smooth
746
 
// shading and no transparency
747
 
//----------------------------------------------------------------------
748
 
 
749
 
#define pspans  4+8
750
 
 
751
 
.globl C(D_PolysetAff8Start)
752
 
C(D_PolysetAff8Start):
753
 
 
754
 
.globl C(D_PolysetDrawSpans8)
755
 
C(D_PolysetDrawSpans8):
756
 
        pushl   %esi                            // preserve register variables
757
 
        pushl   %ebx
758
 
 
759
 
        movl    pspans(%esp),%esi       // point to the first span descriptor
760
 
        movl    C(r_zistepx),%ecx
761
 
 
762
 
        pushl   %ebp                            // preserve caller's stack frame
763
 
        pushl   %edi
764
 
 
765
 
        rorl    $16,%ecx                        // put high 16 bits of 1/z step in low word
766
 
        movl    spanpackage_t_count(%esi),%edx
767
 
 
768
 
        movl    %ecx,lzistepx
769
 
 
770
 
LSpanLoop:
771
 
 
772
 
//              lcount = d_aspancount - pspanpackage->count;
773
 
//
774
 
//              errorterm += erroradjustup;
775
 
//              if (errorterm >= 0)
776
 
//              {
777
 
//                      d_aspancount += d_countextrastep;
778
 
//                      errorterm -= erroradjustdown;
779
 
//              }
780
 
//              else
781
 
//              {
782
 
//                      d_aspancount += ubasestep;
783
 
//              }
784
 
        movl    C(d_aspancount),%eax
785
 
        subl    %edx,%eax
786
 
 
787
 
        movl    C(erroradjustup),%edx
788
 
        movl    C(errorterm),%ebx
789
 
        addl    %edx,%ebx
790
 
        js              LNoTurnover
791
 
 
792
 
        movl    C(erroradjustdown),%edx
793
 
        movl    C(d_countextrastep),%edi
794
 
        subl    %edx,%ebx
795
 
        movl    C(d_aspancount),%ebp
796
 
        movl    %ebx,C(errorterm)
797
 
        addl    %edi,%ebp
798
 
        movl    %ebp,C(d_aspancount)
799
 
        jmp             LRightEdgeStepped
800
 
 
801
 
LNoTurnover:
802
 
        movl    C(d_aspancount),%edi
803
 
        movl    C(ubasestep),%edx
804
 
        movl    %ebx,C(errorterm)
805
 
        addl    %edx,%edi
806
 
        movl    %edi,C(d_aspancount)
807
 
 
808
 
LRightEdgeStepped:
809
 
        cmpl    $1,%eax
810
 
 
811
 
        jl              LNextSpan
812
 
        jz              LExactlyOneLong
813
 
 
814
 
//
815
 
// set up advancetable
816
 
//
817
 
        movl    C(a_ststepxwhole),%ecx
818
 
        movl    C(r_affinetridesc)+atd_skinwidth,%edx
819
 
 
820
 
        movl    %ecx,advancetable+4     // advance base in t
821
 
        addl    %edx,%ecx
822
 
 
823
 
        movl    %ecx,advancetable       // advance extra in t
824
 
        movl    C(a_tstepxfrac),%ecx
825
 
 
826
 
        movw    C(r_lstepx),%cx
827
 
        movl    %eax,%edx                       // count
828
 
 
829
 
        movl    %ecx,tstep
830
 
        addl    $7,%edx
831
 
 
832
 
        shrl    $3,%edx                         // count of full and partial loops
833
 
        movl    spanpackage_t_sfrac(%esi),%ebx
834
 
 
835
 
        movw    %dx,%bx
836
 
        movl    spanpackage_t_pz(%esi),%ecx
837
 
 
838
 
        negl    %eax
839
 
 
840
 
        movl    spanpackage_t_pdest(%esi),%edi
841
 
        andl    $7,%eax         // 0->0, 1->7, 2->6, ... , 7->1
842
 
 
843
 
        subl    %eax,%edi       // compensate for hardwired offsets
844
 
        subl    %eax,%ecx
845
 
 
846
 
        subl    %eax,%ecx
847
 
        movl    spanpackage_t_tfrac(%esi),%edx
848
 
 
849
 
        movw    spanpackage_t_light(%esi),%dx
850
 
        movl    spanpackage_t_zi(%esi),%ebp
851
 
 
852
 
        rorl    $16,%ebp        // put high 16 bits of 1/z in low word
853
 
        pushl   %esi
854
 
 
855
 
        movl    spanpackage_t_ptex(%esi),%esi
856
 
        jmp             aff8entryvec_table(,%eax,4)
857
 
 
858
 
// %bx = count of full and partial loops
859
 
// %ebx high word = sfrac
860
 
// %ecx = pz
861
 
// %dx = light
862
 
// %edx high word = tfrac
863
 
// %esi = ptex
864
 
// %edi = pdest
865
 
// %ebp = 1/z
866
 
// tstep low word = C(r_lstepx)
867
 
// tstep high word = C(a_tstepxfrac)
868
 
// C(a_sstepxfrac) low word = 0
869
 
// C(a_sstepxfrac) high word = C(a_sstepxfrac)
870
 
 
871
 
LDrawLoop:
872
 
 
873
 
// FIXME: do we need to clamp light? We may need at least a buffer bit to
874
 
// keep it from poking into tfrac and causing problems
875
 
 
876
 
LDraw8:
877
 
        cmpw    (%ecx),%bp
878
 
        jl              Lp1
879
 
        xorl    %eax,%eax
880
 
        movb    %dh,%ah
881
 
        movb    (%esi),%al
882
 
        movw    %bp,(%ecx)
883
 
        movb    0x12345678(%eax),%al
884
 
LPatch8:
885
 
        movb    %al,(%edi)
886
 
Lp1:
887
 
        addl    tstep,%edx
888
 
        sbbl    %eax,%eax
889
 
        addl    lzistepx,%ebp
890
 
        adcl    $0,%ebp
891
 
        addl    C(a_sstepxfrac),%ebx
892
 
        adcl    advancetable+4(,%eax,4),%esi
893
 
 
894
 
LDraw7:
895
 
        cmpw    2(%ecx),%bp
896
 
        jl              Lp2
897
 
        xorl    %eax,%eax
898
 
        movb    %dh,%ah
899
 
        movb    (%esi),%al
900
 
        movw    %bp,2(%ecx)
901
 
        movb    0x12345678(%eax),%al
902
 
LPatch7:
903
 
        movb    %al,1(%edi)
904
 
Lp2:
905
 
        addl    tstep,%edx
906
 
        sbbl    %eax,%eax
907
 
        addl    lzistepx,%ebp
908
 
        adcl    $0,%ebp
909
 
        addl    C(a_sstepxfrac),%ebx
910
 
        adcl    advancetable+4(,%eax,4),%esi
911
 
 
912
 
LDraw6:
913
 
        cmpw    4(%ecx),%bp
914
 
        jl              Lp3
915
 
        xorl    %eax,%eax
916
 
        movb    %dh,%ah
917
 
        movb    (%esi),%al
918
 
        movw    %bp,4(%ecx)
919
 
        movb    0x12345678(%eax),%al
920
 
LPatch6:
921
 
        movb    %al,2(%edi)
922
 
Lp3:
923
 
        addl    tstep,%edx
924
 
        sbbl    %eax,%eax
925
 
        addl    lzistepx,%ebp
926
 
        adcl    $0,%ebp
927
 
        addl    C(a_sstepxfrac),%ebx
928
 
        adcl    advancetable+4(,%eax,4),%esi
929
 
 
930
 
LDraw5:
931
 
        cmpw    6(%ecx),%bp
932
 
        jl              Lp4
933
 
        xorl    %eax,%eax
934
 
        movb    %dh,%ah
935
 
        movb    (%esi),%al
936
 
        movw    %bp,6(%ecx)
937
 
        movb    0x12345678(%eax),%al
938
 
LPatch5:
939
 
        movb    %al,3(%edi)
940
 
Lp4:
941
 
        addl    tstep,%edx
942
 
        sbbl    %eax,%eax
943
 
        addl    lzistepx,%ebp
944
 
        adcl    $0,%ebp
945
 
        addl    C(a_sstepxfrac),%ebx
946
 
        adcl    advancetable+4(,%eax,4),%esi
947
 
 
948
 
LDraw4:
949
 
        cmpw    8(%ecx),%bp
950
 
        jl              Lp5
951
 
        xorl    %eax,%eax
952
 
        movb    %dh,%ah
953
 
        movb    (%esi),%al
954
 
        movw    %bp,8(%ecx)
955
 
        movb    0x12345678(%eax),%al
956
 
LPatch4:
957
 
        movb    %al,4(%edi)
958
 
Lp5:
959
 
        addl    tstep,%edx
960
 
        sbbl    %eax,%eax
961
 
        addl    lzistepx,%ebp
962
 
        adcl    $0,%ebp
963
 
        addl    C(a_sstepxfrac),%ebx
964
 
        adcl    advancetable+4(,%eax,4),%esi
965
 
 
966
 
LDraw3:
967
 
        cmpw    10(%ecx),%bp
968
 
        jl              Lp6
969
 
        xorl    %eax,%eax
970
 
        movb    %dh,%ah
971
 
        movb    (%esi),%al
972
 
        movw    %bp,10(%ecx)
973
 
        movb    0x12345678(%eax),%al
974
 
LPatch3:
975
 
        movb    %al,5(%edi)
976
 
Lp6:
977
 
        addl    tstep,%edx
978
 
        sbbl    %eax,%eax
979
 
        addl    lzistepx,%ebp
980
 
        adcl    $0,%ebp
981
 
        addl    C(a_sstepxfrac),%ebx
982
 
        adcl    advancetable+4(,%eax,4),%esi
983
 
 
984
 
LDraw2:
985
 
        cmpw    12(%ecx),%bp
986
 
        jl              Lp7
987
 
        xorl    %eax,%eax
988
 
        movb    %dh,%ah
989
 
        movb    (%esi),%al
990
 
        movw    %bp,12(%ecx)
991
 
        movb    0x12345678(%eax),%al
992
 
LPatch2:
993
 
        movb    %al,6(%edi)
994
 
Lp7:
995
 
        addl    tstep,%edx
996
 
        sbbl    %eax,%eax
997
 
        addl    lzistepx,%ebp
998
 
        adcl    $0,%ebp
999
 
        addl    C(a_sstepxfrac),%ebx
1000
 
        adcl    advancetable+4(,%eax,4),%esi
1001
 
 
1002
 
LDraw1:
1003
 
        cmpw    14(%ecx),%bp
1004
 
        jl              Lp8
1005
 
        xorl    %eax,%eax
1006
 
        movb    %dh,%ah
1007
 
        movb    (%esi),%al
1008
 
        movw    %bp,14(%ecx)
1009
 
        movb    0x12345678(%eax),%al
1010
 
LPatch1:
1011
 
        movb    %al,7(%edi)
1012
 
Lp8:
1013
 
        addl    tstep,%edx
1014
 
        sbbl    %eax,%eax
1015
 
        addl    lzistepx,%ebp
1016
 
        adcl    $0,%ebp
1017
 
        addl    C(a_sstepxfrac),%ebx
1018
 
        adcl    advancetable+4(,%eax,4),%esi
1019
 
 
1020
 
        addl    $8,%edi
1021
 
        addl    $16,%ecx
1022
 
 
1023
 
        decw    %bx
1024
 
        jnz             LDrawLoop
1025
 
 
1026
 
        popl    %esi                            // restore spans pointer
1027
 
LNextSpan:
1028
 
        addl    $(spanpackage_t_size),%esi      // point to next span
1029
 
LNextSpanESISet:
1030
 
        movl    spanpackage_t_count(%esi),%edx
1031
 
        cmpl    $-999999,%edx           // any more spans?
1032
 
        jnz             LSpanLoop                       // yes
1033
 
 
1034
 
        popl    %edi
1035
 
        popl    %ebp                            // restore the caller's stack frame
1036
 
        popl    %ebx                            // restore register variables
1037
 
        popl    %esi
1038
 
        ret
1039
 
 
1040
 
 
1041
 
// draw a one-long span
1042
 
 
1043
 
LExactlyOneLong:
1044
 
 
1045
 
        movl    spanpackage_t_pz(%esi),%ecx
1046
 
        movl    spanpackage_t_zi(%esi),%ebp
1047
 
 
1048
 
        rorl    $16,%ebp        // put high 16 bits of 1/z in low word
1049
 
        movl    spanpackage_t_ptex(%esi),%ebx
1050
 
 
1051
 
        cmpw    (%ecx),%bp
1052
 
        jl              LNextSpan
1053
 
        xorl    %eax,%eax
1054
 
        movl    spanpackage_t_pdest(%esi),%edi
1055
 
        movb    spanpackage_t_light+1(%esi),%ah
1056
 
        addl    $(spanpackage_t_size),%esi      // point to next span
1057
 
        movb    (%ebx),%al
1058
 
        movw    %bp,(%ecx)
1059
 
        movb    0x12345678(%eax),%al
1060
 
LPatch9:
1061
 
        movb    %al,(%edi)
1062
 
 
1063
 
        jmp             LNextSpanESISet
1064
 
 
1065
 
.globl C(D_PolysetAff8End)
1066
 
C(D_PolysetAff8End):
1067
 
 
1068
 
 
1069
 
#define pcolormap               4
1070
 
 
1071
 
.globl C(D_Aff8Patch)
1072
 
C(D_Aff8Patch):
1073
 
        movl    pcolormap(%esp),%eax
1074
 
        movl    %eax,LPatch1-4
1075
 
        movl    %eax,LPatch2-4
1076
 
        movl    %eax,LPatch3-4
1077
 
        movl    %eax,LPatch4-4
1078
 
        movl    %eax,LPatch5-4
1079
 
        movl    %eax,LPatch6-4
1080
 
        movl    %eax,LPatch7-4
1081
 
        movl    %eax,LPatch8-4
1082
 
        movl    %eax,LPatch9-4
1083
 
 
1084
 
        ret
1085
 
 
1086
 
 
1087
 
//----------------------------------------------------------------------
1088
 
// Alias model polygon dispatching code, combined with subdivided affine
1089
 
// triangle drawing code
1090
 
//----------------------------------------------------------------------
1091
 
 
1092
 
.globl C(D_PolysetDraw)
1093
 
C(D_PolysetDraw):
1094
 
 
1095
 
//      spanpackage_t   spans[DPS_MAXSPANS + 1 +
1096
 
//                      ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
1097
 
//                                              // one extra because of cache line pretouching
1098
 
//
1099
 
//      a_spans = (spanpackage_t *)
1100
 
//                      (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
1101
 
        subl    $(SPAN_SIZE),%esp
1102
 
        movl    %esp,%eax
1103
 
        addl    $(CACHE_SIZE - 1),%eax
1104
 
        andl    $(~(CACHE_SIZE - 1)),%eax
1105
 
        movl    %eax,C(a_spans)
1106
 
 
1107
 
//      if (r_affinetridesc.drawtype)
1108
 
//              D_DrawSubdiv ();
1109
 
//      else
1110
 
//              D_DrawNonSubdiv ();
1111
 
        movl    C(r_affinetridesc)+atd_drawtype,%eax
1112
 
        testl   %eax,%eax
1113
 
        jz              C(D_DrawNonSubdiv)
1114
 
 
1115
 
        pushl   %ebp                            // preserve caller stack frame pointer
1116
 
 
1117
 
//      lnumtriangles = r_affinetridesc.numtriangles;
1118
 
        movl    C(r_affinetridesc)+atd_numtriangles,%ebp
1119
 
 
1120
 
        pushl   %esi                            // preserve register variables
1121
 
        shll    $4,%ebp
1122
 
 
1123
 
        pushl   %ebx
1124
 
//      ptri = r_affinetridesc.ptriangles;
1125
 
        movl    C(r_affinetridesc)+atd_ptriangles,%ebx
1126
 
 
1127
 
        pushl   %edi
1128
 
 
1129
 
//      mtriangle_t             *ptri;
1130
 
//      finalvert_t             *pfv, *index0, *index1, *index2;
1131
 
//      int                             i;
1132
 
//      int                             lnumtriangles;
1133
 
//      int                             s0, s1, s2;
1134
 
 
1135
 
//      pfv = r_affinetridesc.pfinalverts;
1136
 
        movl    C(r_affinetridesc)+atd_pfinalverts,%edi
1137
 
 
1138
 
//      for (i=0 ; i<lnumtriangles ; i++)
1139
 
//      {
1140
 
 
1141
 
Llooptop:
1142
 
 
1143
 
//              index0 = pfv + ptri[i].vertindex[0];
1144
 
//              index1 = pfv + ptri[i].vertindex[1];
1145
 
//              index2 = pfv + ptri[i].vertindex[2];
1146
 
        movl    mtri_vertindex-16+0(%ebx,%ebp,),%ecx
1147
 
        movl    mtri_vertindex-16+4(%ebx,%ebp,),%esi
1148
 
 
1149
 
        shll    $(fv_shift),%ecx
1150
 
        movl    mtri_vertindex-16+8(%ebx,%ebp,),%edx
1151
 
 
1152
 
        shll    $(fv_shift),%esi
1153
 
        addl    %edi,%ecx
1154
 
 
1155
 
        shll    $(fv_shift),%edx
1156
 
        addl    %edi,%esi
1157
 
 
1158
 
        addl    %edi,%edx
1159
 
 
1160
 
//              if (((index0->v[1]-index1->v[1]) *
1161
 
//                              (index0->v[0]-index2->v[0]) -
1162
 
//                              (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
1163
 
//              {
1164
 
//                      continue;
1165
 
//              }
1166
 
//
1167
 
//              d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
1168
 
        fildl   fv_v+4(%ecx)    // i0v1
1169
 
        fildl   fv_v+4(%esi)    // i1v1 | i0v1
1170
 
        fildl   fv_v+0(%ecx)    // i0v0 | i1v1 | i0v1
1171
 
        fildl   fv_v+0(%edx)    // i2v0 | i0v0 | i1v1 | i0v1
1172
 
        fxch    %st(2)                  // i1v1 | i0v0 | i2v0 | i0v1
1173
 
        fsubr   %st(3),%st(0)   // i0v1-i1v1 | i0v0 | i2v0 | i0v1
1174
 
        fildl   fv_v+0(%esi)    // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
1175
 
        fxch    %st(2)                  // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
1176
 
        fsub    %st(0),%st(3)   // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
1177
 
        fildl   fv_v+4(%edx)    // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1178
 
        fxch    %st(1)                  // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1179
 
        fsubp   %st(0),%st(3)   // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1180
 
        fxch    %st(1)                  // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1181
 
        fmulp   %st(0),%st(3)   // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
1182
 
        fsubrp  %st(0),%st(3)   // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
1183
 
        movl    fv_v+16(%ecx),%eax
1184
 
        andl    $0xFF00,%eax
1185
 
        fmulp   %st(0),%st(2)   // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
1186
 
        addl    C(acolormap),%eax
1187
 
        fsubp   %st(0),%st(1)   // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
1188
 
        movl    %eax,C(d_pcolormap)
1189
 
        fstps   Ltemp
1190
 
        movl    Ltemp,%eax
1191
 
        subl    $0x80000001,%eax
1192
 
        jc              Lskip
1193
 
 
1194
 
//              if (ptri[i].facesfront)
1195
 
//              {
1196
 
//                      D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
1197
 
        movl    mtri_facesfront-16(%ebx,%ebp,),%eax
1198
 
        testl   %eax,%eax
1199
 
        jz              Lfacesback
1200
 
 
1201
 
        pushl   %edx
1202
 
        pushl   %esi
1203
 
        pushl   %ecx
1204
 
        call    C(D_PolysetRecursiveTriangle)
1205
 
 
1206
 
        subl    $16,%ebp
1207
 
        jnz             Llooptop
1208
 
        jmp             Ldone2
1209
 
 
1210
 
//              }
1211
 
//              else
1212
 
//              {
1213
 
Lfacesback:
1214
 
 
1215
 
//                      s0 = index0->v[2];
1216
 
//                      s1 = index1->v[2];
1217
 
//                      s2 = index2->v[2];
1218
 
        movl    fv_v+8(%ecx),%eax
1219
 
        pushl   %eax
1220
 
        movl    fv_v+8(%esi),%eax
1221
 
        pushl   %eax
1222
 
        movl    fv_v+8(%edx),%eax
1223
 
        pushl   %eax
1224
 
        pushl   %ecx
1225
 
        pushl   %edx
1226
 
 
1227
 
//                      if (index0->flags & ALIAS_ONSEAM)
1228
 
//                              index0->v[2] += r_affinetridesc.seamfixupX16;
1229
 
        movl    C(r_affinetridesc)+atd_seamfixupX16,%eax
1230
 
        testl   $(ALIAS_ONSEAM),fv_flags(%ecx)
1231
 
        jz              Lp11
1232
 
        addl    %eax,fv_v+8(%ecx)
1233
 
Lp11:
1234
 
 
1235
 
//                      if (index1->flags & ALIAS_ONSEAM)
1236
 
//                              index1->v[2] += r_affinetridesc.seamfixupX16;
1237
 
        testl   $(ALIAS_ONSEAM),fv_flags(%esi)
1238
 
        jz              Lp12
1239
 
        addl    %eax,fv_v+8(%esi)
1240
 
Lp12:
1241
 
 
1242
 
//                      if (index2->flags & ALIAS_ONSEAM)
1243
 
//                              index2->v[2] += r_affinetridesc.seamfixupX16;
1244
 
        testl   $(ALIAS_ONSEAM),fv_flags(%edx)
1245
 
        jz              Lp13
1246
 
        addl    %eax,fv_v+8(%edx)
1247
 
Lp13:
1248
 
 
1249
 
//                      D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
1250
 
        pushl   %edx
1251
 
        pushl   %esi
1252
 
        pushl   %ecx
1253
 
        call    C(D_PolysetRecursiveTriangle)
1254
 
 
1255
 
//                      index0->v[2] = s0;
1256
 
//                      index1->v[2] = s1;
1257
 
//                      index2->v[2] = s2;
1258
 
        popl    %edx
1259
 
        popl    %ecx
1260
 
        popl    %eax
1261
 
        movl    %eax,fv_v+8(%edx)
1262
 
        popl    %eax
1263
 
        movl    %eax,fv_v+8(%esi)
1264
 
        popl    %eax
1265
 
        movl    %eax,fv_v+8(%ecx)
1266
 
 
1267
 
//              }
1268
 
//      }
1269
 
Lskip:
1270
 
        subl    $16,%ebp
1271
 
        jnz             Llooptop
1272
 
 
1273
 
Ldone2:
1274
 
        popl    %edi                            // restore the caller's stack frame
1275
 
        popl    %ebx
1276
 
        popl    %esi                            // restore register variables
1277
 
        popl    %ebp
1278
 
 
1279
 
        addl    $(SPAN_SIZE),%esp
1280
 
 
1281
 
        ret
1282
 
 
1283
 
 
1284
 
//----------------------------------------------------------------------
1285
 
// Alias model triangle left-edge scanning code
1286
 
//----------------------------------------------------------------------
1287
 
 
1288
 
#define height  4+16
1289
 
 
1290
 
.globl C(D_PolysetScanLeftEdge)
1291
 
C(D_PolysetScanLeftEdge):
1292
 
        pushl   %ebp                            // preserve caller stack frame pointer
1293
 
        pushl   %esi                            // preserve register variables
1294
 
        pushl   %edi
1295
 
        pushl   %ebx
1296
 
 
1297
 
        movl    height(%esp),%eax
1298
 
        movl    C(d_sfrac),%ecx
1299
 
        andl    $0xFFFF,%eax
1300
 
        movl    C(d_ptex),%ebx
1301
 
        orl             %eax,%ecx
1302
 
        movl    C(d_pedgespanpackage),%esi
1303
 
        movl    C(d_tfrac),%edx
1304
 
        movl    C(d_light),%edi
1305
 
        movl    C(d_zi),%ebp
1306
 
 
1307
 
// %eax: scratch
1308
 
// %ebx: d_ptex
1309
 
// %ecx: d_sfrac in high word, count in low word
1310
 
// %edx: d_tfrac
1311
 
// %esi: d_pedgespanpackage, errorterm, scratch alternately
1312
 
// %edi: d_light
1313
 
// %ebp: d_zi
1314
 
 
1315
 
//      do
1316
 
//      {
1317
 
 
1318
 
LScanLoop:
1319
 
 
1320
 
//              d_pedgespanpackage->ptex = ptex;
1321
 
//              d_pedgespanpackage->pdest = d_pdest;
1322
 
//              d_pedgespanpackage->pz = d_pz;
1323
 
//              d_pedgespanpackage->count = d_aspancount;
1324
 
//              d_pedgespanpackage->light = d_light;
1325
 
//              d_pedgespanpackage->zi = d_zi;
1326
 
//              d_pedgespanpackage->sfrac = d_sfrac << 16;
1327
 
//              d_pedgespanpackage->tfrac = d_tfrac << 16;
1328
 
        movl    %ebx,spanpackage_t_ptex(%esi)
1329
 
        movl    C(d_pdest),%eax
1330
 
        movl    %eax,spanpackage_t_pdest(%esi)
1331
 
        movl    C(d_pz),%eax
1332
 
        movl    %eax,spanpackage_t_pz(%esi)
1333
 
        movl    C(d_aspancount),%eax
1334
 
        movl    %eax,spanpackage_t_count(%esi)
1335
 
        movl    %edi,spanpackage_t_light(%esi)
1336
 
        movl    %ebp,spanpackage_t_zi(%esi)
1337
 
        movl    %ecx,spanpackage_t_sfrac(%esi)
1338
 
        movl    %edx,spanpackage_t_tfrac(%esi)
1339
 
 
1340
 
// pretouch the next cache line
1341
 
        movb    spanpackage_t_size(%esi),%al
1342
 
 
1343
 
//              d_pedgespanpackage++;
1344
 
        addl    $(spanpackage_t_size),%esi
1345
 
        movl    C(erroradjustup),%eax
1346
 
        movl    %esi,C(d_pedgespanpackage)
1347
 
 
1348
 
//              errorterm += erroradjustup;
1349
 
        movl    C(errorterm),%esi
1350
 
        addl    %eax,%esi
1351
 
        movl    C(d_pdest),%eax
1352
 
 
1353
 
//              if (errorterm >= 0)
1354
 
//              {
1355
 
        js              LNoLeftEdgeTurnover
1356
 
 
1357
 
//                      errorterm -= erroradjustdown;
1358
 
//                      d_pdest += d_pdestextrastep;
1359
 
        subl    C(erroradjustdown),%esi
1360
 
        addl    C(d_pdestextrastep),%eax
1361
 
        movl    %esi,C(errorterm)
1362
 
        movl    %eax,C(d_pdest)
1363
 
 
1364
 
//                      d_pz += d_pzextrastep;
1365
 
//                      d_aspancount += d_countextrastep;
1366
 
//                      d_ptex += d_ptexextrastep;
1367
 
//                      d_sfrac += d_sfracextrastep;
1368
 
//                      d_ptex += d_sfrac >> 16;
1369
 
//                      d_sfrac &= 0xFFFF;
1370
 
//                      d_tfrac += d_tfracextrastep;
1371
 
        movl    C(d_pz),%eax
1372
 
        movl    C(d_aspancount),%esi
1373
 
        addl    C(d_pzextrastep),%eax
1374
 
        addl    C(d_sfracextrastep),%ecx
1375
 
        adcl    C(d_ptexextrastep),%ebx
1376
 
        addl    C(d_countextrastep),%esi
1377
 
        movl    %eax,C(d_pz)
1378
 
        movl    C(d_tfracextrastep),%eax
1379
 
        movl    %esi,C(d_aspancount)
1380
 
        addl    %eax,%edx
1381
 
 
1382
 
//                      if (d_tfrac & 0x10000)
1383
 
//                      {
1384
 
        jnc             LSkip1
1385
 
 
1386
 
//                              d_ptex += r_affinetridesc.skinwidth;
1387
 
//                              d_tfrac &= 0xFFFF;
1388
 
        addl    C(r_affinetridesc)+atd_skinwidth,%ebx
1389
 
 
1390
 
//                      }
1391
 
 
1392
 
LSkip1:
1393
 
 
1394
 
//                      d_light += d_lightextrastep;
1395
 
//                      d_zi += d_ziextrastep;
1396
 
        addl    C(d_lightextrastep),%edi
1397
 
        addl    C(d_ziextrastep),%ebp
1398
 
 
1399
 
//              }
1400
 
        movl    C(d_pedgespanpackage),%esi
1401
 
        decl    %ecx
1402
 
        testl   $0xFFFF,%ecx
1403
 
        jnz             LScanLoop
1404
 
 
1405
 
        popl    %ebx
1406
 
        popl    %edi
1407
 
        popl    %esi
1408
 
        popl    %ebp
1409
 
        ret
1410
 
 
1411
 
//              else
1412
 
//              {
1413
 
 
1414
 
LNoLeftEdgeTurnover:
1415
 
        movl    %esi,C(errorterm)
1416
 
 
1417
 
//                      d_pdest += d_pdestbasestep;
1418
 
        addl    C(d_pdestbasestep),%eax
1419
 
        movl    %eax,C(d_pdest)
1420
 
 
1421
 
//                      d_pz += d_pzbasestep;
1422
 
//                      d_aspancount += ubasestep;
1423
 
//                      d_ptex += d_ptexbasestep;
1424
 
//                      d_sfrac += d_sfracbasestep;
1425
 
//                      d_ptex += d_sfrac >> 16;
1426
 
//                      d_sfrac &= 0xFFFF;
1427
 
        movl    C(d_pz),%eax
1428
 
        movl    C(d_aspancount),%esi
1429
 
        addl    C(d_pzbasestep),%eax
1430
 
        addl    C(d_sfracbasestep),%ecx
1431
 
        adcl    C(d_ptexbasestep),%ebx
1432
 
        addl    C(ubasestep),%esi
1433
 
        movl    %eax,C(d_pz)
1434
 
        movl    %esi,C(d_aspancount)
1435
 
 
1436
 
//                      d_tfrac += d_tfracbasestep;
1437
 
        movl    C(d_tfracbasestep),%esi
1438
 
        addl    %esi,%edx
1439
 
 
1440
 
//                      if (d_tfrac & 0x10000)
1441
 
//                      {
1442
 
        jnc             LSkip2
1443
 
 
1444
 
//                              d_ptex += r_affinetridesc.skinwidth;
1445
 
//                              d_tfrac &= 0xFFFF;
1446
 
        addl    C(r_affinetridesc)+atd_skinwidth,%ebx
1447
 
 
1448
 
//                      }
1449
 
 
1450
 
LSkip2:
1451
 
 
1452
 
//                      d_light += d_lightbasestep;
1453
 
//                      d_zi += d_zibasestep;
1454
 
        addl    C(d_lightbasestep),%edi
1455
 
        addl    C(d_zibasestep),%ebp
1456
 
 
1457
 
//              }
1458
 
//      } while (--height);
1459
 
        movl    C(d_pedgespanpackage),%esi
1460
 
        decl    %ecx
1461
 
        testl   $0xFFFF,%ecx
1462
 
        jnz             LScanLoop
1463
 
 
1464
 
        popl    %ebx
1465
 
        popl    %edi
1466
 
        popl    %esi
1467
 
        popl    %ebp
1468
 
        ret
1469
 
 
1470
 
 
1471
 
//----------------------------------------------------------------------
1472
 
// Alias model vertex drawing code
1473
 
//----------------------------------------------------------------------
1474
 
 
1475
 
#define fv                      4+8
1476
 
#define numverts        8+8
1477
 
 
1478
 
.globl C(D_PolysetDrawFinalVerts)
1479
 
C(D_PolysetDrawFinalVerts):
1480
 
        pushl   %ebp                            // preserve caller stack frame pointer
1481
 
        pushl   %ebx
1482
 
 
1483
 
//      int             i, z;
1484
 
//      short   *zbuf;
1485
 
 
1486
 
        movl    numverts(%esp),%ecx
1487
 
        movl    fv(%esp),%ebx
1488
 
 
1489
 
        pushl   %esi                            // preserve register variables
1490
 
        pushl   %edi
1491
 
 
1492
 
LFVLoop:
1493
 
 
1494
 
//      for (i=0 ; i<numverts ; i++, fv++)
1495
 
//      {
1496
 
//      // valid triangle coordinates for filling can include the bottom and
1497
 
//      // right clip edges, due to the fill rule; these shouldn't be drawn
1498
 
//              if ((fv->v[0] < r_refdef.vrectright) &&
1499
 
//                      (fv->v[1] < r_refdef.vrectbottom))
1500
 
//              {
1501
 
        movl    fv_v+0(%ebx),%eax
1502
 
        movl    C(r_refdef)+rd_vrectright,%edx
1503
 
        cmpl    %edx,%eax
1504
 
        jge             LNextVert
1505
 
        movl    fv_v+4(%ebx),%esi
1506
 
        movl    C(r_refdef)+rd_vrectbottom,%edx
1507
 
        cmpl    %edx,%esi
1508
 
        jge             LNextVert
1509
 
 
1510
 
//                      zbuf = zspantable[fv->v[1]] + fv->v[0];
1511
 
        movl    C(zspantable)(,%esi,4),%edi
1512
 
 
1513
 
//                      z = fv->v[5]>>16;
1514
 
        movl    fv_v+20(%ebx),%edx
1515
 
        shrl    $16,%edx
1516
 
 
1517
 
//                      if (z >= *zbuf)
1518
 
//                      {
1519
 
//                              int             pix;
1520
 
        cmpw    (%edi,%eax,2),%dx
1521
 
        jl              LNextVert
1522
 
 
1523
 
//                              *zbuf = z;
1524
 
        movw    %dx,(%edi,%eax,2)
1525
 
 
1526
 
//                              pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
1527
 
        movl    fv_v+12(%ebx),%edi
1528
 
        shrl    $16,%edi
1529
 
        movl    C(skintable)(,%edi,4),%edi
1530
 
        movl    fv_v+8(%ebx),%edx
1531
 
        shrl    $16,%edx
1532
 
        movb    (%edi,%edx),%dl
1533
 
 
1534
 
//                              pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
1535
 
        movl    fv_v+16(%ebx),%edi
1536
 
        andl    $0xFF00,%edi
1537
 
        andl    $0x00FF,%edx
1538
 
        addl    %edx,%edi
1539
 
        movl    C(acolormap),%edx
1540
 
        movb    (%edx,%edi,1),%dl
1541
 
 
1542
 
//                              d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
1543
 
        movl    C(d_scantable)(,%esi,4),%edi
1544
 
        movl    C(d_viewbuffer),%esi
1545
 
        addl    %eax,%edi
1546
 
        movb    %dl,(%esi,%edi)
1547
 
 
1548
 
//                      }
1549
 
//              }
1550
 
//      }
1551
 
LNextVert:
1552
 
        addl    $(fv_size),%ebx
1553
 
        decl    %ecx
1554
 
        jnz             LFVLoop
1555
 
 
1556
 
        popl    %edi
1557
 
        popl    %esi
1558
 
        popl    %ebx
1559
 
        popl    %ebp
1560
 
        ret
1561
 
 
1562
 
 
1563
 
//----------------------------------------------------------------------
1564
 
// Alias model non-subdivided polygon dispatching code
1565
 
//
1566
 
// not C-callable because of stack buffer cleanup
1567
 
//----------------------------------------------------------------------
1568
 
 
1569
 
.globl C(D_DrawNonSubdiv)
1570
 
C(D_DrawNonSubdiv):
1571
 
        pushl   %ebp                            // preserve caller stack frame pointer
1572
 
        movl    C(r_affinetridesc)+atd_numtriangles,%ebp
1573
 
        pushl   %ebx
1574
 
        shll    $(mtri_shift),%ebp
1575
 
        pushl   %esi                            // preserve register variables
1576
 
        movl    C(r_affinetridesc)+atd_ptriangles,%esi
1577
 
        pushl   %edi
1578
 
 
1579
 
//      mtriangle_t             *ptri;
1580
 
//      finalvert_t             *pfv, *index0, *index1, *index2;
1581
 
//      int                             i;
1582
 
//      int                             lnumtriangles;
1583
 
 
1584
 
//      pfv = r_affinetridesc.pfinalverts;
1585
 
//      ptri = r_affinetridesc.ptriangles;
1586
 
//      lnumtriangles = r_affinetridesc.numtriangles;
1587
 
 
1588
 
LNDLoop:
1589
 
 
1590
 
//      for (i=0 ; i<lnumtriangles ; i++, ptri++)
1591
 
//      {
1592
 
//              index0 = pfv + ptri->vertindex[0];
1593
 
//              index1 = pfv + ptri->vertindex[1];
1594
 
//              index2 = pfv + ptri->vertindex[2];
1595
 
        movl    C(r_affinetridesc)+atd_pfinalverts,%edi
1596
 
        movl    mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
1597
 
        shll    $(fv_shift),%ecx
1598
 
        movl    mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
1599
 
        shll    $(fv_shift),%edx
1600
 
        movl    mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
1601
 
        shll    $(fv_shift),%ebx
1602
 
        addl    %edi,%ecx
1603
 
        addl    %edi,%edx
1604
 
        addl    %edi,%ebx
1605
 
 
1606
 
//              d_xdenom = (index0->v[1]-index1->v[1]) *
1607
 
//                              (index0->v[0]-index2->v[0]) -
1608
 
//                              (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
1609
 
        movl    fv_v+4(%ecx),%eax
1610
 
        movl    fv_v+0(%ecx),%esi
1611
 
        subl    fv_v+4(%edx),%eax
1612
 
        subl    fv_v+0(%ebx),%esi
1613
 
        imull   %esi,%eax
1614
 
        movl    fv_v+0(%ecx),%esi
1615
 
        movl    fv_v+4(%ecx),%edi
1616
 
        subl    fv_v+0(%edx),%esi
1617
 
        subl    fv_v+4(%ebx),%edi
1618
 
        imull   %esi,%edi
1619
 
        subl    %edi,%eax
1620
 
 
1621
 
//              if (d_xdenom >= 0)
1622
 
//              {
1623
 
//                      continue;
1624
 
        jns             LNextTri
1625
 
 
1626
 
//              }
1627
 
 
1628
 
        movl    %eax,C(d_xdenom)
1629
 
        fildl   C(d_xdenom)
1630
 
 
1631
 
//              r_p0[0] = index0->v[0];         // u
1632
 
//              r_p0[1] = index0->v[1];         // v
1633
 
//              r_p0[2] = index0->v[2];         // s
1634
 
//              r_p0[3] = index0->v[3];         // t
1635
 
//              r_p0[4] = index0->v[4];         // light
1636
 
//              r_p0[5] = index0->v[5];         // iz
1637
 
        movl    fv_v+0(%ecx),%eax
1638
 
        movl    fv_v+4(%ecx),%esi
1639
 
        movl    %eax,C(r_p0)+0
1640
 
        movl    %esi,C(r_p0)+4
1641
 
        movl    fv_v+8(%ecx),%eax
1642
 
        movl    fv_v+12(%ecx),%esi
1643
 
        movl    %eax,C(r_p0)+8
1644
 
        movl    %esi,C(r_p0)+12
1645
 
        movl    fv_v+16(%ecx),%eax
1646
 
        movl    fv_v+20(%ecx),%esi
1647
 
        movl    %eax,C(r_p0)+16
1648
 
        movl    %esi,C(r_p0)+20
1649
 
 
1650
 
        fdivrs  float_1
1651
 
 
1652
 
//              r_p1[0] = index1->v[0];
1653
 
//              r_p1[1] = index1->v[1];
1654
 
//              r_p1[2] = index1->v[2];
1655
 
//              r_p1[3] = index1->v[3];
1656
 
//              r_p1[4] = index1->v[4];
1657
 
//              r_p1[5] = index1->v[5];
1658
 
        movl    fv_v+0(%edx),%eax
1659
 
        movl    fv_v+4(%edx),%esi
1660
 
        movl    %eax,C(r_p1)+0
1661
 
        movl    %esi,C(r_p1)+4
1662
 
        movl    fv_v+8(%edx),%eax
1663
 
        movl    fv_v+12(%edx),%esi
1664
 
        movl    %eax,C(r_p1)+8
1665
 
        movl    %esi,C(r_p1)+12
1666
 
        movl    fv_v+16(%edx),%eax
1667
 
        movl    fv_v+20(%edx),%esi
1668
 
        movl    %eax,C(r_p1)+16
1669
 
        movl    %esi,C(r_p1)+20
1670
 
 
1671
 
//              r_p2[0] = index2->v[0];
1672
 
//              r_p2[1] = index2->v[1];
1673
 
//              r_p2[2] = index2->v[2];
1674
 
//              r_p2[3] = index2->v[3];
1675
 
//              r_p2[4] = index2->v[4];
1676
 
//              r_p2[5] = index2->v[5];
1677
 
        movl    fv_v+0(%ebx),%eax
1678
 
        movl    fv_v+4(%ebx),%esi
1679
 
        movl    %eax,C(r_p2)+0
1680
 
        movl    %esi,C(r_p2)+4
1681
 
        movl    fv_v+8(%ebx),%eax
1682
 
        movl    fv_v+12(%ebx),%esi
1683
 
        movl    %eax,C(r_p2)+8
1684
 
        movl    %esi,C(r_p2)+12
1685
 
        movl    fv_v+16(%ebx),%eax
1686
 
        movl    fv_v+20(%ebx),%esi
1687
 
        movl    %eax,C(r_p2)+16
1688
 
        movl    C(r_affinetridesc)+atd_ptriangles,%edi
1689
 
        movl    %esi,C(r_p2)+20
1690
 
        movl    mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
1691
 
 
1692
 
//              if (!ptri->facesfront)
1693
 
//              {
1694
 
        testl   %eax,%eax
1695
 
        jnz             LFacesFront
1696
 
 
1697
 
//                      if (index0->flags & ALIAS_ONSEAM)
1698
 
//                              r_p0[2] += r_affinetridesc.seamfixupX16;
1699
 
        movl    fv_flags(%ecx),%eax
1700
 
        movl    fv_flags(%edx),%esi
1701
 
        movl    fv_flags(%ebx),%edi
1702
 
        testl   $(ALIAS_ONSEAM),%eax
1703
 
        movl    C(r_affinetridesc)+atd_seamfixupX16,%eax
1704
 
        jz              LOnseamDone0
1705
 
        addl    %eax,C(r_p0)+8
1706
 
LOnseamDone0:
1707
 
 
1708
 
//                      if (index1->flags & ALIAS_ONSEAM)
1709
 
//                              r_p1[2] += r_affinetridesc.seamfixupX16;
1710
 
        testl   $(ALIAS_ONSEAM),%esi
1711
 
        jz              LOnseamDone1
1712
 
        addl    %eax,C(r_p1)+8
1713
 
LOnseamDone1:
1714
 
 
1715
 
//                      if (index2->flags & ALIAS_ONSEAM)
1716
 
//                              r_p2[2] += r_affinetridesc.seamfixupX16;
1717
 
        testl   $(ALIAS_ONSEAM),%edi
1718
 
        jz              LOnseamDone2
1719
 
        addl    %eax,C(r_p2)+8
1720
 
LOnseamDone2:
1721
 
 
1722
 
//              }
1723
 
 
1724
 
LFacesFront:
1725
 
 
1726
 
        fstps   C(d_xdenom)
1727
 
 
1728
 
//              D_PolysetSetEdgeTable ();
1729
 
//              D_RasterizeAliasPolySmooth ();
1730
 
                call    C(D_PolysetSetEdgeTable)
1731
 
                call    C(D_RasterizeAliasPolySmooth)
1732
 
 
1733
 
LNextTri:
1734
 
                movl    C(r_affinetridesc)+atd_ptriangles,%esi
1735
 
                subl    $16,%ebp
1736
 
                jnz             LNDLoop
1737
 
//      }
1738
 
 
1739
 
        popl    %edi
1740
 
        popl    %esi
1741
 
        popl    %ebx
1742
 
        popl    %ebp
1743
 
 
1744
 
        addl    $(SPAN_SIZE),%esp
1745
 
 
1746
 
        ret
1747
 
 
1748
 
 
1749
 
#endif  // id386
1750