~ubuntu-branches/ubuntu/precise/openarena/precise

« back to all changes in this revision

Viewing changes to code/unix/matha.s

  • Committer: Bazaar Package Importer
  • Author(s): Bruno "Fuddl" Kleinert
  • Date: 2007-01-20 12:28:09 UTC
  • Revision ID: james.westby@ubuntu.com-20070120122809-2yza5ojt7nqiyiam
Tags: upstream-0.6.0
ImportĀ upstreamĀ versionĀ 0.6.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
===========================================================================
 
3
Copyright (C) 1999-2005 Id Software, Inc.
 
4
 
 
5
This file is part of Quake III Arena source code.
 
6
 
 
7
Quake III Arena source code is free software; you can redistribute it
 
8
and/or modify it under the terms of the GNU General Public License as
 
9
published by the Free Software Foundation; either version 2 of the License,
 
10
or (at your option) any later version.
 
11
 
 
12
Quake III Arena source code is distributed in the hope that it will be
 
13
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
15
GNU General Public License for more details.
 
16
 
 
17
You should have received a copy of the GNU General Public License
 
18
along with Foobar; if not, write to the Free Software
 
19
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
20
===========================================================================
 
21
*/
 
22
//
 
23
// math.s
 
24
// x86 assembly-language math routines.
 
25
 
 
26
#include "qasm.h"
 
27
 
 
28
 
 
29
#if     id386
 
30
 
 
31
        .data
 
32
 
 
33
        .align  4
 
34
Ljmptab:        .long   Lcase0, Lcase1, Lcase2, Lcase3
 
35
                        .long   Lcase4, Lcase5, Lcase6, Lcase7
 
36
 
 
37
        .text
 
38
 
 
39
// TODO: rounding needed?
 
40
// stack parameter offset
 
41
#define val     4
 
42
 
 
43
.globl C(Invert24To16)
 
44
C(Invert24To16):
 
45
 
 
46
        movl    val(%esp),%ecx
 
47
        movl    $0x100,%edx             // 0x10000000000 as dividend
 
48
        cmpl    %edx,%ecx
 
49
        jle             LOutOfRange
 
50
 
 
51
        subl    %eax,%eax
 
52
        divl    %ecx
 
53
 
 
54
        ret
 
55
 
 
56
LOutOfRange:
 
57
        movl    $0xFFFFFFFF,%eax
 
58
        ret
 
59
 
 
60
#if 0
 
61
 
 
62
#define in      4
 
63
#define out     8
 
64
 
 
65
        .align 2
 
66
.globl C(TransformVector)
 
67
C(TransformVector):
 
68
        movl    in(%esp),%eax
 
69
        movl    out(%esp),%edx
 
70
 
 
71
        flds    (%eax)          // in[0]
 
72
        fmuls   C(vright)               // in[0]*vright[0]
 
73
        flds    (%eax)          // in[0] | in[0]*vright[0]
 
74
        fmuls   C(vup)          // in[0]*vup[0] | in[0]*vright[0]
 
75
        flds    (%eax)          // in[0] | in[0]*vup[0] | in[0]*vright[0]
 
76
        fmuls   C(vpn)          // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
 
77
 
 
78
        flds    4(%eax)         // in[1] | ...
 
79
        fmuls   C(vright)+4     // in[1]*vright[1] | ...
 
80
        flds    4(%eax)         // in[1] | in[1]*vright[1] | ...
 
81
        fmuls   C(vup)+4                // in[1]*vup[1] | in[1]*vright[1] | ...
 
82
        flds    4(%eax)         // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
 
83
        fmuls   C(vpn)+4                // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
 
84
        fxch    %st(2)          // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
 
85
 
 
86
        faddp   %st(0),%st(5)   // in[1]*vup[1] | in[1]*vpn[1] | ...
 
87
        faddp   %st(0),%st(3)   // in[1]*vpn[1] | ...
 
88
        faddp   %st(0),%st(1)   // vpn_accum | vup_accum | vright_accum
 
89
 
 
90
        flds    8(%eax)         // in[2] | ...
 
91
        fmuls   C(vright)+8     // in[2]*vright[2] | ...
 
92
        flds    8(%eax)         // in[2] | in[2]*vright[2] | ...
 
93
        fmuls   C(vup)+8                // in[2]*vup[2] | in[2]*vright[2] | ...
 
94
        flds    8(%eax)         // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
 
95
        fmuls   C(vpn)+8                // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
 
96
        fxch    %st(2)          // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
 
97
 
 
98
        faddp   %st(0),%st(5)   // in[2]*vup[2] | in[2]*vpn[2] | ...
 
99
        faddp   %st(0),%st(3)   // in[2]*vpn[2] | ...
 
100
        faddp   %st(0),%st(1)   // vpn_accum | vup_accum | vright_accum
 
101
 
 
102
        fstps   8(%edx)         // out[2]
 
103
        fstps   4(%edx)         // out[1]
 
104
        fstps   (%edx)          // out[0]
 
105
 
 
106
        ret
 
107
 
 
108
#endif
 
109
 
 
110
#define EMINS   4+4
 
111
#define EMAXS   4+8
 
112
#define P               4+12
 
113
 
 
114
        .align 2
 
115
.globl C(BoxOnPlaneSide)
 
116
C(BoxOnPlaneSide):
 
117
        pushl   %ebx
 
118
 
 
119
        movl    P(%esp),%edx
 
120
        movl    EMINS(%esp),%ecx
 
121
        xorl    %eax,%eax
 
122
        movl    EMAXS(%esp),%ebx
 
123
        movb    pl_signbits(%edx),%al
 
124
        cmpb    $8,%al
 
125
        jge             Lerror
 
126
        flds    pl_normal(%edx)         // p->normal[0]
 
127
        fld             %st(0)                          // p->normal[0] | p->normal[0]
 
128
        // bk000422 - warning: missing prefix `*' in absolute indirect address, maybe misassembled!
 
129
        // bk001129 - fix from Andrew Henderson, was: Ljmptab(,%eax,4) 
 
130
        jmp             *Ljmptab(,%eax,4)
 
131
 
 
132
 
 
133
//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
 
134
//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
 
135
Lcase0:
 
136
        fmuls   (%ebx)                          // p->normal[0]*emaxs[0] | p->normal[0]
 
137
        flds    pl_normal+4(%edx)       // p->normal[1] | p->normal[0]*emaxs[0] |
 
138
                                                                //  p->normal[0]
 
139
        fxch    %st(2)                          // p->normal[0] | p->normal[0]*emaxs[0] |
 
140
                                                                //  p->normal[1]
 
141
        fmuls   (%ecx)                          // p->normal[0]*emins[0] |
 
142
                                                                //  p->normal[0]*emaxs[0] | p->normal[1]
 
143
        fxch    %st(2)                          // p->normal[1] | p->normal[0]*emaxs[0] |
 
144
                                                                //  p->normal[0]*emins[0]
 
145
        fld             %st(0)                          // p->normal[1] | p->normal[1] |
 
146
                                                                //  p->normal[0]*emaxs[0] |
 
147
                                                                //  p->normal[0]*emins[0]
 
148
        fmuls   4(%ebx)                         // p->normal[1]*emaxs[1] | p->normal[1] |
 
149
                                                                //  p->normal[0]*emaxs[0] |
 
150
                                                                //  p->normal[0]*emins[0]
 
151
        flds    pl_normal+8(%edx)       // p->normal[2] | p->normal[1]*emaxs[1] |
 
152
                                                                //  p->normal[1] | p->normal[0]*emaxs[0] |
 
153
                                                                //  p->normal[0]*emins[0]
 
154
        fxch    %st(2)                          // p->normal[1] | p->normal[1]*emaxs[1] |
 
155
                                                                //  p->normal[2] | p->normal[0]*emaxs[0] |
 
156
                                                                //  p->normal[0]*emins[0]
 
157
        fmuls   4(%ecx)                         // p->normal[1]*emins[1] |
 
158
                                                                //  p->normal[1]*emaxs[1] |
 
159
                                                                //  p->normal[2] | p->normal[0]*emaxs[0] |
 
160
                                                                //  p->normal[0]*emins[0]
 
161
        fxch    %st(2)                          // p->normal[2] | p->normal[1]*emaxs[1] |
 
162
                                                                //  p->normal[1]*emins[1] |
 
163
                                                                //  p->normal[0]*emaxs[0] |
 
164
                                                                //  p->normal[0]*emins[0]
 
165
        fld             %st(0)                          // p->normal[2] | p->normal[2] |
 
166
                                                                //  p->normal[1]*emaxs[1] |
 
167
                                                                //  p->normal[1]*emins[1] |
 
168
                                                                //  p->normal[0]*emaxs[0] |
 
169
                                                                //  p->normal[0]*emins[0]
 
170
        fmuls   8(%ebx)                         // p->normal[2]*emaxs[2] |
 
171
                                                                //  p->normal[2] |
 
172
                                                                //  p->normal[1]*emaxs[1] |
 
173
                                                                //  p->normal[1]*emins[1] |
 
174
                                                                //  p->normal[0]*emaxs[0] |
 
175
                                                                //  p->normal[0]*emins[0]
 
176
        fxch    %st(5)                          // p->normal[0]*emins[0] |
 
177
                                                                //  p->normal[2] |
 
178
                                                                //  p->normal[1]*emaxs[1] |
 
179
                                                                //  p->normal[1]*emins[1] |
 
180
                                                                //  p->normal[0]*emaxs[0] |
 
181
                                                                //  p->normal[2]*emaxs[2]
 
182
        faddp   %st(0),%st(3)           //p->normal[2] |
 
183
                                                                // p->normal[1]*emaxs[1] |
 
184
                                                                // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
 
185
                                                                // p->normal[0]*emaxs[0] |
 
186
                                                                // p->normal[2]*emaxs[2]
 
187
        fmuls   8(%ecx)                         //p->normal[2]*emins[2] |
 
188
                                                                // p->normal[1]*emaxs[1] |
 
189
                                                                // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
 
190
                                                                // p->normal[0]*emaxs[0] |
 
191
                                                                // p->normal[2]*emaxs[2]
 
192
        fxch    %st(1)                          //p->normal[1]*emaxs[1] |
 
193
                                                                // p->normal[2]*emins[2] |
 
194
                                                                // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
 
195
                                                                // p->normal[0]*emaxs[0] |
 
196
                                                                // p->normal[2]*emaxs[2]
 
197
        faddp   %st(0),%st(3)           //p->normal[2]*emins[2] |
 
198
                                                                // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
 
199
                                                                // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
 
200
                                                                // p->normal[2]*emaxs[2]
 
201
        fxch    %st(3)                          //p->normal[2]*emaxs[2] +
 
202
                                                                // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
 
203
                                                                // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
 
204
                                                                // p->normal[2]*emins[2]
 
205
        faddp   %st(0),%st(2)           //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
 
206
                                                                // dist1 | p->normal[2]*emins[2]
 
207
 
 
208
        jmp             LSetSides
 
209
 
 
210
//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
 
211
//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
 
212
Lcase1:
 
213
        fmuls   (%ecx)                          // emins[0]
 
214
        flds    pl_normal+4(%edx)
 
215
        fxch    %st(2)
 
216
        fmuls   (%ebx)                          // emaxs[0]
 
217
        fxch    %st(2)
 
218
        fld             %st(0)
 
219
        fmuls   4(%ebx)                         // emaxs[1]
 
220
        flds    pl_normal+8(%edx)
 
221
        fxch    %st(2)
 
222
        fmuls   4(%ecx)                         // emins[1]
 
223
        fxch    %st(2)
 
224
        fld             %st(0)
 
225
        fmuls   8(%ebx)                         // emaxs[2]
 
226
        fxch    %st(5)
 
227
        faddp   %st(0),%st(3)
 
228
        fmuls   8(%ecx)                         // emins[2]
 
229
        fxch    %st(1)
 
230
        faddp   %st(0),%st(3)
 
231
        fxch    %st(3)
 
232
        faddp   %st(0),%st(2)
 
233
 
 
234
        jmp             LSetSides
 
235
 
 
236
//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
 
237
//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
 
238
Lcase2:
 
239
        fmuls   (%ebx)                          // emaxs[0]
 
240
        flds    pl_normal+4(%edx)
 
241
        fxch    %st(2)
 
242
        fmuls   (%ecx)                          // emins[0]
 
243
        fxch    %st(2)
 
244
        fld             %st(0)
 
245
        fmuls   4(%ecx)                         // emins[1]
 
246
        flds    pl_normal+8(%edx)
 
247
        fxch    %st(2)
 
248
        fmuls   4(%ebx)                         // emaxs[1]
 
249
        fxch    %st(2)
 
250
        fld             %st(0)
 
251
        fmuls   8(%ebx)                         // emaxs[2]
 
252
        fxch    %st(5)
 
253
        faddp   %st(0),%st(3)
 
254
        fmuls   8(%ecx)                         // emins[2]
 
255
        fxch    %st(1)
 
256
        faddp   %st(0),%st(3)
 
257
        fxch    %st(3)
 
258
        faddp   %st(0),%st(2)
 
259
 
 
260
        jmp             LSetSides
 
261
 
 
262
//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
 
263
//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
 
264
Lcase3:
 
265
        fmuls   (%ecx)                          // emins[0]
 
266
        flds    pl_normal+4(%edx)
 
267
        fxch    %st(2)
 
268
        fmuls   (%ebx)                          // emaxs[0]
 
269
        fxch    %st(2)
 
270
        fld             %st(0)
 
271
        fmuls   4(%ecx)                         // emins[1]
 
272
        flds    pl_normal+8(%edx)
 
273
        fxch    %st(2)
 
274
        fmuls   4(%ebx)                         // emaxs[1]
 
275
        fxch    %st(2)
 
276
        fld             %st(0)
 
277
        fmuls   8(%ebx)                         // emaxs[2]
 
278
        fxch    %st(5)
 
279
        faddp   %st(0),%st(3)
 
280
        fmuls   8(%ecx)                         // emins[2]
 
281
        fxch    %st(1)
 
282
        faddp   %st(0),%st(3)
 
283
        fxch    %st(3)
 
284
        faddp   %st(0),%st(2)
 
285
 
 
286
        jmp             LSetSides
 
287
 
 
288
//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
 
289
//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
 
290
Lcase4:
 
291
        fmuls   (%ebx)                          // emaxs[0]
 
292
        flds    pl_normal+4(%edx)
 
293
        fxch    %st(2)
 
294
        fmuls   (%ecx)                          // emins[0]
 
295
        fxch    %st(2)
 
296
        fld             %st(0)
 
297
        fmuls   4(%ebx)                         // emaxs[1]
 
298
        flds    pl_normal+8(%edx)
 
299
        fxch    %st(2)
 
300
        fmuls   4(%ecx)                         // emins[1]
 
301
        fxch    %st(2)
 
302
        fld             %st(0)
 
303
        fmuls   8(%ecx)                         // emins[2]
 
304
        fxch    %st(5)
 
305
        faddp   %st(0),%st(3)
 
306
        fmuls   8(%ebx)                         // emaxs[2]
 
307
        fxch    %st(1)
 
308
        faddp   %st(0),%st(3)
 
309
        fxch    %st(3)
 
310
        faddp   %st(0),%st(2)
 
311
 
 
312
        jmp             LSetSides
 
313
 
 
314
//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
 
315
//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
 
316
Lcase5:
 
317
        fmuls   (%ecx)                          // emins[0]
 
318
        flds    pl_normal+4(%edx)
 
319
        fxch    %st(2)
 
320
        fmuls   (%ebx)                          // emaxs[0]
 
321
        fxch    %st(2)
 
322
        fld             %st(0)
 
323
        fmuls   4(%ebx)                         // emaxs[1]
 
324
        flds    pl_normal+8(%edx)
 
325
        fxch    %st(2)
 
326
        fmuls   4(%ecx)                         // emins[1]
 
327
        fxch    %st(2)
 
328
        fld             %st(0)
 
329
        fmuls   8(%ecx)                         // emins[2]
 
330
        fxch    %st(5)
 
331
        faddp   %st(0),%st(3)
 
332
        fmuls   8(%ebx)                         // emaxs[2]
 
333
        fxch    %st(1)
 
334
        faddp   %st(0),%st(3)
 
335
        fxch    %st(3)
 
336
        faddp   %st(0),%st(2)
 
337
 
 
338
        jmp             LSetSides
 
339
 
 
340
//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
 
341
//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
 
342
Lcase6:
 
343
        fmuls   (%ebx)                          // emaxs[0]
 
344
        flds    pl_normal+4(%edx)
 
345
        fxch    %st(2)
 
346
        fmuls   (%ecx)                          // emins[0]
 
347
        fxch    %st(2)
 
348
        fld             %st(0)
 
349
        fmuls   4(%ecx)                         // emins[1]
 
350
        flds    pl_normal+8(%edx)
 
351
        fxch    %st(2)
 
352
        fmuls   4(%ebx)                         // emaxs[1]
 
353
        fxch    %st(2)
 
354
        fld             %st(0)
 
355
        fmuls   8(%ecx)                         // emins[2]
 
356
        fxch    %st(5)
 
357
        faddp   %st(0),%st(3)
 
358
        fmuls   8(%ebx)                         // emaxs[2]
 
359
        fxch    %st(1)
 
360
        faddp   %st(0),%st(3)
 
361
        fxch    %st(3)
 
362
        faddp   %st(0),%st(2)
 
363
 
 
364
        jmp             LSetSides
 
365
 
 
366
//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
 
367
//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
 
368
Lcase7:
 
369
        fmuls   (%ecx)                          // emins[0]
 
370
        flds    pl_normal+4(%edx)
 
371
        fxch    %st(2)
 
372
        fmuls   (%ebx)                          // emaxs[0]
 
373
        fxch    %st(2)
 
374
        fld             %st(0)
 
375
        fmuls   4(%ecx)                         // emins[1]
 
376
        flds    pl_normal+8(%edx)
 
377
        fxch    %st(2)
 
378
        fmuls   4(%ebx)                         // emaxs[1]
 
379
        fxch    %st(2)
 
380
        fld             %st(0)
 
381
        fmuls   8(%ecx)                         // emins[2]
 
382
        fxch    %st(5)
 
383
        faddp   %st(0),%st(3)
 
384
        fmuls   8(%ebx)                         // emaxs[2]
 
385
        fxch    %st(1)
 
386
        faddp   %st(0),%st(3)
 
387
        fxch    %st(3)
 
388
        faddp   %st(0),%st(2)
 
389
 
 
390
LSetSides:
 
391
 
 
392
//      sides = 0;
 
393
//      if (dist1 >= p->dist)
 
394
//              sides = 1;
 
395
//      if (dist2 < p->dist)
 
396
//              sides |= 2;
 
397
 
 
398
        faddp   %st(0),%st(2)           // dist1 | dist2
 
399
        fcomps  pl_dist(%edx)
 
400
        xorl    %ecx,%ecx
 
401
        fnstsw  %ax
 
402
        fcomps  pl_dist(%edx)
 
403
        andb    $1,%ah
 
404
        xorb    $1,%ah
 
405
        addb    %ah,%cl
 
406
 
 
407
        fnstsw  %ax
 
408
        andb    $1,%ah
 
409
        addb    %ah,%ah
 
410
        addb    %ah,%cl
 
411
 
 
412
//      return sides;
 
413
 
 
414
        popl    %ebx
 
415
        movl    %ecx,%eax       // return status
 
416
 
 
417
        ret
 
418
 
 
419
 
 
420
Lerror:
 
421
        movl    1, %eax
 
422
        ret
 
423
 
 
424
#endif  // id386