~ubuntu-branches/ubuntu/quantal/mesa-glw/quantal

« back to all changes in this revision

Viewing changes to src/mesa/drivers/dri/r300/r300_fragprog.c

  • Committer: Bazaar Package Importer
  • Author(s): Morten Kjeldgaard
  • Date: 2008-05-06 16:19:15 UTC
  • Revision ID: james.westby@ubuntu.com-20080506161915-uynz7nftmfixu6bq
Tags: upstream-7.0.3
ImportĀ upstreamĀ versionĀ 7.0.3

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright (C) 2005 Ben Skeggs.
 
3
 *
 
4
 * All Rights Reserved.
 
5
 *
 
6
 * Permission is hereby granted, free of charge, to any person obtaining
 
7
 * a copy of this software and associated documentation files (the
 
8
 * "Software"), to deal in the Software without restriction, including
 
9
 * without limitation the rights to use, copy, modify, merge, publish,
 
10
 * distribute, sublicense, and/or sell copies of the Software, and to
 
11
 * permit persons to whom the Software is furnished to do so, subject to
 
12
 * the following conditions:
 
13
 *
 
14
 * The above copyright notice and this permission notice (including the
 
15
 * next paragraph) shall be included in all copies or substantial
 
16
 * portions of the Software.
 
17
 *
 
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 
21
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 
22
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 
23
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 
24
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
25
 *
 
26
 */
 
27
 
 
28
/**
 
29
 * \file
 
30
 *
 
31
 * \author Ben Skeggs <darktama@iinet.net.au>
 
32
 *
 
33
 * \author Jerome Glisse <j.glisse@gmail.com>
 
34
 *
 
35
 * \todo Depth write, WPOS/FOGC inputs
 
36
 *
 
37
 * \todo FogOption
 
38
 *
 
39
 * \todo Verify results of opcodes for accuracy, I've only checked them in
 
40
 * specific cases.
 
41
 */
 
42
 
 
43
#include "glheader.h"
 
44
#include "macros.h"
 
45
#include "enums.h"
 
46
#include "shader/prog_instruction.h"
 
47
#include "shader/prog_parameter.h"
 
48
#include "shader/prog_print.h"
 
49
 
 
50
#include "r300_context.h"
 
51
#include "r300_fragprog.h"
 
52
#include "r300_reg.h"
 
53
#include "r300_state.h"
 
54
 
 
55
/*
 
56
 * Usefull macros and values
 
57
 */
 
58
#define ERROR(fmt, args...) do {                        \
 
59
                fprintf(stderr, "%s::%s(): " fmt "\n",  \
 
60
                        __FILE__, __FUNCTION__, ##args);        \
 
61
                fp->error = GL_TRUE;                    \
 
62
        } while(0)
 
63
 
 
64
#define PFS_INVAL 0xFFFFFFFF
 
65
#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
 
66
 
 
67
#define SWIZZLE_XYZ             0
 
68
#define SWIZZLE_XXX             1
 
69
#define SWIZZLE_YYY             2
 
70
#define SWIZZLE_ZZZ             3
 
71
#define SWIZZLE_WWW             4
 
72
#define SWIZZLE_YZX             5
 
73
#define SWIZZLE_ZXY             6
 
74
#define SWIZZLE_WZY             7
 
75
#define SWIZZLE_111             8
 
76
#define SWIZZLE_000             9
 
77
#define SWIZZLE_HHH             10
 
78
 
 
79
#define swizzle(r, x, y, z, w) do_swizzle(fp, r,                \
 
80
                                          ((SWIZZLE_##x<<0)|    \
 
81
                                           (SWIZZLE_##y<<3)|    \
 
82
                                           (SWIZZLE_##z<<6)|    \
 
83
                                           (SWIZZLE_##w<<9)),   \
 
84
                                          0)
 
85
 
 
86
#define REG_TYPE_INPUT          0
 
87
#define REG_TYPE_OUTPUT         1
 
88
#define REG_TYPE_TEMP           2
 
89
#define REG_TYPE_CONST          3
 
90
 
 
91
#define REG_TYPE_SHIFT          0
 
92
#define REG_INDEX_SHIFT         2
 
93
#define REG_VSWZ_SHIFT          8
 
94
#define REG_SSWZ_SHIFT          13
 
95
#define REG_NEGV_SHIFT          18
 
96
#define REG_NEGS_SHIFT          19
 
97
#define REG_ABS_SHIFT           20
 
98
#define REG_NO_USE_SHIFT        21      // Hack for refcounting
 
99
#define REG_VALID_SHIFT         22      // Does the register contain a defined value?
 
100
#define REG_BUILTIN_SHIFT   23  // Is it a builtin (like all zero/all one)?
 
101
 
 
102
#define REG_TYPE_MASK           (0x03 << REG_TYPE_SHIFT)
 
103
#define REG_INDEX_MASK          (0x3F << REG_INDEX_SHIFT)
 
104
#define REG_VSWZ_MASK           (0x1F << REG_VSWZ_SHIFT)
 
105
#define REG_SSWZ_MASK           (0x1F << REG_SSWZ_SHIFT)
 
106
#define REG_NEGV_MASK           (0x01 << REG_NEGV_SHIFT)
 
107
#define REG_NEGS_MASK           (0x01 << REG_NEGS_SHIFT)
 
108
#define REG_ABS_MASK            (0x01 << REG_ABS_SHIFT)
 
109
#define REG_NO_USE_MASK         (0x01 << REG_NO_USE_SHIFT)
 
110
#define REG_VALID_MASK          (0x01 << REG_VALID_SHIFT)
 
111
#define REG_BUILTIN_MASK        (0x01 << REG_BUILTIN_SHIFT)
 
112
 
 
113
#define REG(type, index, vswz, sswz, nouse, valid, builtin)     \
 
114
        (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) |                   \
 
115
         ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) |                \
 
116
         ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) |              \
 
117
         ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) |                \
 
118
         ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) |  \
 
119
         ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) |                   \
 
120
         ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
 
121
#define REG_GET_TYPE(reg)                                               \
 
122
        ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
 
123
#define REG_GET_INDEX(reg)                                              \
 
124
        ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
 
125
#define REG_GET_VSWZ(reg)                                               \
 
126
        ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
 
127
#define REG_GET_SSWZ(reg)                                               \
 
128
        ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
 
129
#define REG_GET_NO_USE(reg)                                             \
 
130
        ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
 
131
#define REG_GET_VALID(reg)                                              \
 
132
        ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
 
133
#define REG_GET_BUILTIN(reg)                                            \
 
134
        ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT)
 
135
#define REG_SET_TYPE(reg, type)                                         \
 
136
        reg = ((reg & ~REG_TYPE_MASK) |                                 \
 
137
               ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
 
138
#define REG_SET_INDEX(reg, index)                                       \
 
139
        reg = ((reg & ~REG_INDEX_MASK) |                                \
 
140
               ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
 
141
#define REG_SET_VSWZ(reg, vswz)                                         \
 
142
        reg = ((reg & ~REG_VSWZ_MASK) |                                 \
 
143
               ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
 
144
#define REG_SET_SSWZ(reg, sswz)                                         \
 
145
        reg = ((reg & ~REG_SSWZ_MASK) |                                 \
 
146
               ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
 
147
#define REG_SET_NO_USE(reg, nouse)                                      \
 
148
        reg = ((reg & ~REG_NO_USE_MASK) |                               \
 
149
               ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
 
150
#define REG_SET_VALID(reg, valid)                                       \
 
151
        reg = ((reg & ~REG_VALID_MASK) |                                \
 
152
               ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
 
153
#define REG_SET_BUILTIN(reg, builtin)                                   \
 
154
        reg = ((reg & ~REG_BUILTIN_MASK) |                              \
 
155
               ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK))
 
156
#define REG_ABS(reg)                                                    \
 
157
        reg = (reg | REG_ABS_MASK)
 
158
#define REG_NEGV(reg)                                                   \
 
159
        reg = (reg | REG_NEGV_MASK)
 
160
#define REG_NEGS(reg)                                                   \
 
161
        reg = (reg | REG_NEGS_MASK)
 
162
 
 
163
/*
 
164
 * Datas structures for fragment program generation
 
165
 */
 
166
 
 
167
/* description of r300 native hw instructions */
 
168
static const struct {
 
169
        const char *name;
 
170
        int argc;
 
171
        int v_op;
 
172
        int s_op;
 
173
} r300_fpop[] = {
 
174
        /* *INDENT-OFF* */
 
175
        {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD},
 
176
        {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4},
 
177
        {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4},
 
178
        {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN},
 
179
        {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX},
 
180
        {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP},
 
181
        {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC},
 
182
        {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2},
 
183
        {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2},
 
184
        {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP},
 
185
        {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ},
 
186
        {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL},
 
187
        {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL},
 
188
        /* *INDENT-ON* */
 
189
};
 
190
 
 
191
/* vector swizzles r300 can support natively, with a couple of
 
192
 * cases we handle specially
 
193
 *
 
194
 * REG_VSWZ/REG_SSWZ is an index into this table
 
195
 */
 
196
 
 
197
/* mapping from SWIZZLE_* to r300 native values for scalar insns */
 
198
#define SWIZZLE_HALF 6
 
199
 
 
200
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
 
201
                                          SWIZZLE_##y, \
 
202
                                          SWIZZLE_##z, \
 
203
                                          SWIZZLE_ZERO))
 
204
/* native swizzles */
 
205
static const struct r300_pfs_swizzle {
 
206
        GLuint hash;            /* swizzle value this matches */
 
207
        GLuint base;            /* base value for hw swizzle */
 
208
        GLuint stride;          /* difference in base between arg0/1/2 */
 
209
        GLuint flags;
 
210
} v_swiz[] = {
 
211
        /* *INDENT-OFF* */
 
212
        {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR},
 
213
        {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR},
 
214
        {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR},
 
215
        {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR},
 
216
        {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR},
 
217
        {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR},
 
218
        {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR},
 
219
        {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH},
 
220
        {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0},
 
221
        {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0},
 
222
        {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0},
 
223
        {PFS_INVAL, 0, 0, 0},
 
224
        /* *INDENT-ON* */
 
225
};
 
226
 
 
227
/* used during matching of non-native swizzles */
 
228
#define SWZ_X_MASK (7 << 0)
 
229
#define SWZ_Y_MASK (7 << 3)
 
230
#define SWZ_Z_MASK (7 << 6)
 
231
#define SWZ_W_MASK (7 << 9)
 
232
static const struct {
 
233
        GLuint hash;            /* used to mask matching swizzle components */
 
234
        int mask;               /* actual outmask */
 
235
        int count;              /* count of components matched */
 
236
} s_mask[] = {
 
237
        /* *INDENT-OFF* */
 
238
        {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3},
 
239
        {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2},
 
240
        {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2},
 
241
        {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2},
 
242
        {SWZ_X_MASK, 1, 1},
 
243
        {SWZ_Y_MASK, 2, 1},
 
244
        {SWZ_Z_MASK, 4, 1},
 
245
        {PFS_INVAL, PFS_INVAL, PFS_INVAL}
 
246
        /* *INDENT-ON* */
 
247
};
 
248
 
 
249
static const struct {
 
250
        int base;               /* hw value of swizzle */
 
251
        int stride;             /* difference between SRC0/1/2 */
 
252
        GLuint flags;
 
253
} s_swiz[] = {
 
254
        /* *INDENT-OFF* */
 
255
        {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR},
 
256
        {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR},
 
257
        {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR},
 
258
        {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR},
 
259
        {R300_FPI2_ARGA_ZERO, 0, 0},
 
260
        {R300_FPI2_ARGA_ONE, 0, 0},
 
261
        {R300_FPI2_ARGA_HALF, 0, 0}
 
262
        /* *INDENT-ON* */
 
263
};
 
264
 
 
265
/* boiler-plate reg, for convenience */
 
266
static const GLuint undef = REG(REG_TYPE_TEMP,
 
267
                                0,
 
268
                                SWIZZLE_XYZ,
 
269
                                SWIZZLE_W,
 
270
                                GL_FALSE,
 
271
                                GL_FALSE,
 
272
                                GL_FALSE);
 
273
 
 
274
/* constant one source */
 
275
static const GLuint pfs_one = REG(REG_TYPE_CONST,
 
276
                                  0,
 
277
                                  SWIZZLE_111,
 
278
                                  SWIZZLE_ONE,
 
279
                                  GL_FALSE,
 
280
                                  GL_TRUE,
 
281
                                  GL_TRUE);
 
282
 
 
283
/* constant half source */
 
284
static const GLuint pfs_half = REG(REG_TYPE_CONST,
 
285
                                   0,
 
286
                                   SWIZZLE_HHH,
 
287
                                   SWIZZLE_HALF,
 
288
                                   GL_FALSE,
 
289
                                   GL_TRUE,
 
290
                                   GL_TRUE);
 
291
 
 
292
/* constant zero source */
 
293
static const GLuint pfs_zero = REG(REG_TYPE_CONST,
 
294
                                   0,
 
295
                                   SWIZZLE_000,
 
296
                                   SWIZZLE_ZERO,
 
297
                                   GL_FALSE,
 
298
                                   GL_TRUE,
 
299
                                   GL_TRUE);
 
300
 
 
301
/*
 
302
 * Common functions prototypes
 
303
 */
 
304
static void dump_program(struct r300_fragment_program *fp);
 
305
static void emit_arith(struct r300_fragment_program *fp, int op,
 
306
                       GLuint dest, int mask,
 
307
                       GLuint src0, GLuint src1, GLuint src2, int flags);
 
308
 
 
309
/**
 
310
 * Get an R300 temporary that can be written to in the given slot.
 
311
 */
 
312
static int get_hw_temp(struct r300_fragment_program *fp, int slot)
 
313
{
 
314
        COMPILE_STATE;
 
315
        int r;
 
316
 
 
317
        for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
 
318
                if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot)
 
319
                        break;
 
320
        }
 
321
 
 
322
        if (r >= PFS_NUM_TEMP_REGS) {
 
323
                ERROR("Out of hardware temps\n");
 
324
                return 0;
 
325
        }
 
326
        // Reserved is used to avoid the following scenario:
 
327
        //  R300 temporary X is first assigned to Mesa temporary Y during vector ops
 
328
        //  R300 temporary X is then assigned to Mesa temporary Z for further vector ops
 
329
        //  Then scalar ops on Mesa temporary Z are emitted and move back in time
 
330
        //  to overwrite the value of temporary Y.
 
331
        // End scenario.
 
332
        cs->hwtemps[r].reserved = cs->hwtemps[r].free;
 
333
        cs->hwtemps[r].free = -1;
 
334
 
 
335
        // Reset to some value that won't mess things up when the user
 
336
        // tries to read from a temporary that hasn't been assigned a value yet.
 
337
        // In the normal case, vector_valid and scalar_valid should be set to
 
338
        // a sane value by the first emit that writes to this temporary.
 
339
        cs->hwtemps[r].vector_valid = 0;
 
340
        cs->hwtemps[r].scalar_valid = 0;
 
341
 
 
342
        if (r > fp->max_temp_idx)
 
343
                fp->max_temp_idx = r;
 
344
 
 
345
        return r;
 
346
}
 
347
 
 
348
/**
 
349
 * Get an R300 temporary that will act as a TEX destination register.
 
350
 */
 
351
static int get_hw_temp_tex(struct r300_fragment_program *fp)
 
352
{
 
353
        COMPILE_STATE;
 
354
        int r;
 
355
 
 
356
        for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
 
357
                if (cs->used_in_node & (1 << r))
 
358
                        continue;
 
359
 
 
360
                // Note: Be very careful here
 
361
                if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0)
 
362
                        break;
 
363
        }
 
364
 
 
365
        if (r >= PFS_NUM_TEMP_REGS)
 
366
                return get_hw_temp(fp, 0);      /* Will cause an indirection */
 
367
 
 
368
        cs->hwtemps[r].reserved = cs->hwtemps[r].free;
 
369
        cs->hwtemps[r].free = -1;
 
370
 
 
371
        // Reset to some value that won't mess things up when the user
 
372
        // tries to read from a temporary that hasn't been assigned a value yet.
 
373
        // In the normal case, vector_valid and scalar_valid should be set to
 
374
        // a sane value by the first emit that writes to this temporary.
 
375
        cs->hwtemps[r].vector_valid = cs->nrslots;
 
376
        cs->hwtemps[r].scalar_valid = cs->nrslots;
 
377
 
 
378
        if (r > fp->max_temp_idx)
 
379
                fp->max_temp_idx = r;
 
380
 
 
381
        return r;
 
382
}
 
383
 
 
384
/**
 
385
 * Mark the given hardware register as free.
 
386
 */
 
387
static void free_hw_temp(struct r300_fragment_program *fp, int idx)
 
388
{
 
389
        COMPILE_STATE;
 
390
 
 
391
        // Be very careful here. Consider sequences like
 
392
        //  MAD r0, r1,r2,r3
 
393
        //  TEX r4, ...
 
394
        // The TEX instruction may be moved in front of the MAD instruction
 
395
        // due to the way nodes work. We don't want to alias r1 and r4 in
 
396
        // this case.
 
397
        // I'm certain the register allocation could be further sanitized,
 
398
        // but it's tricky because of stuff that can happen inside emit_tex
 
399
        // and emit_arith.
 
400
        cs->hwtemps[idx].free = cs->nrslots + 1;
 
401
}
 
402
 
 
403
/**
 
404
 * Create a new Mesa temporary register.
 
405
 */
 
406
static GLuint get_temp_reg(struct r300_fragment_program *fp)
 
407
{
 
408
        COMPILE_STATE;
 
409
        GLuint r = undef;
 
410
        GLuint index;
 
411
 
 
412
        index = ffs(~cs->temp_in_use);
 
413
        if (!index) {
 
414
                ERROR("Out of program temps\n");
 
415
                return r;
 
416
        }
 
417
 
 
418
        cs->temp_in_use |= (1 << --index);
 
419
        cs->temps[index].refcount = 0xFFFFFFFF;
 
420
        cs->temps[index].reg = -1;
 
421
 
 
422
        REG_SET_TYPE(r, REG_TYPE_TEMP);
 
423
        REG_SET_INDEX(r, index);
 
424
        REG_SET_VALID(r, GL_TRUE);
 
425
        return r;
 
426
}
 
427
 
 
428
/**
 
429
 * Create a new Mesa temporary register that will act as the destination
 
430
 * register for a texture read.
 
431
 */
 
432
static GLuint get_temp_reg_tex(struct r300_fragment_program *fp)
 
433
{
 
434
        COMPILE_STATE;
 
435
        GLuint r = undef;
 
436
        GLuint index;
 
437
 
 
438
        index = ffs(~cs->temp_in_use);
 
439
        if (!index) {
 
440
                ERROR("Out of program temps\n");
 
441
                return r;
 
442
        }
 
443
 
 
444
        cs->temp_in_use |= (1 << --index);
 
445
        cs->temps[index].refcount = 0xFFFFFFFF;
 
446
        cs->temps[index].reg = get_hw_temp_tex(fp);
 
447
 
 
448
        REG_SET_TYPE(r, REG_TYPE_TEMP);
 
449
        REG_SET_INDEX(r, index);
 
450
        REG_SET_VALID(r, GL_TRUE);
 
451
        return r;
 
452
}
 
453
 
 
454
/**
 
455
 * Free a Mesa temporary and the associated R300 temporary.
 
456
 */
 
457
static void free_temp(struct r300_fragment_program *fp, GLuint r)
 
458
{
 
459
        COMPILE_STATE;
 
460
        GLuint index = REG_GET_INDEX(r);
 
461
 
 
462
        if (!(cs->temp_in_use & (1 << index)))
 
463
                return;
 
464
 
 
465
        if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
 
466
                free_hw_temp(fp, cs->temps[index].reg);
 
467
                cs->temps[index].reg = -1;
 
468
                cs->temp_in_use &= ~(1 << index);
 
469
        } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
 
470
                free_hw_temp(fp, cs->inputs[index].reg);
 
471
                cs->inputs[index].reg = -1;
 
472
        }
 
473
}
 
474
 
 
475
/**
 
476
 * Emit a hardware constant/parameter.
 
477
 *
 
478
 * \p cp Stable pointer to an array of 4 floats.
 
479
 *  The pointer must be stable in the sense that it remains to be valid
 
480
 *  and hold the contents of the constant/parameter throughout the lifetime
 
481
 *  of the fragment program (actually, up until the next time the fragment
 
482
 *  program is translated).
 
483
 */
 
484
static GLuint emit_const4fv(struct r300_fragment_program *fp,
 
485
                            const GLfloat * cp)
 
486
{
 
487
        GLuint reg = undef;
 
488
        int index;
 
489
 
 
490
        for (index = 0; index < fp->const_nr; ++index) {
 
491
                if (fp->constant[index] == cp)
 
492
                        break;
 
493
        }
 
494
 
 
495
        if (index >= fp->const_nr) {
 
496
                if (index >= PFS_NUM_CONST_REGS) {
 
497
                        ERROR("Out of hw constants!\n");
 
498
                        return reg;
 
499
                }
 
500
 
 
501
                fp->const_nr++;
 
502
                fp->constant[index] = cp;
 
503
        }
 
504
 
 
505
        REG_SET_TYPE(reg, REG_TYPE_CONST);
 
506
        REG_SET_INDEX(reg, index);
 
507
        REG_SET_VALID(reg, GL_TRUE);
 
508
        return reg;
 
509
}
 
510
 
 
511
static inline GLuint negate(GLuint r)
 
512
{
 
513
        REG_NEGS(r);
 
514
        REG_NEGV(r);
 
515
        return r;
 
516
}
 
517
 
 
518
/* Hack, to prevent clobbering sources used multiple times when
 
519
 * emulating non-native instructions
 
520
 */
 
521
static inline GLuint keep(GLuint r)
 
522
{
 
523
        REG_SET_NO_USE(r, GL_TRUE);
 
524
        return r;
 
525
}
 
526
 
 
527
static inline GLuint absolute(GLuint r)
 
528
{
 
529
        REG_ABS(r);
 
530
        return r;
 
531
}
 
532
 
 
533
static int swz_native(struct r300_fragment_program *fp,
 
534
                      GLuint src, GLuint * r, GLuint arbneg)
 
535
{
 
536
        /* Native swizzle, handle negation */
 
537
        src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT);
 
538
 
 
539
        if ((arbneg & 0x7) == 0x0) {
 
540
                src = src & ~REG_NEGV_MASK;
 
541
                *r = src;
 
542
        } else if ((arbneg & 0x7) == 0x7) {
 
543
                src |= REG_NEGV_MASK;
 
544
                *r = src;
 
545
        } else {
 
546
                if (!REG_GET_VALID(*r))
 
547
                        *r = get_temp_reg(fp);
 
548
                src |= REG_NEGV_MASK;
 
549
                emit_arith(fp,
 
550
                           PFS_OP_MAD,
 
551
                           *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0);
 
552
                src = src & ~REG_NEGV_MASK;
 
553
                emit_arith(fp,
 
554
                           PFS_OP_MAD,
 
555
                           *r,
 
556
                           (arbneg ^ 0x7) | WRITEMASK_W,
 
557
                           src, pfs_one, pfs_zero, 0);
 
558
        }
 
559
 
 
560
        return 3;
 
561
}
 
562
 
 
563
static int swz_emit_partial(struct r300_fragment_program *fp,
 
564
                            GLuint src,
 
565
                            GLuint * r, int mask, int mc, GLuint arbneg)
 
566
{
 
567
        GLuint tmp;
 
568
        GLuint wmask = 0;
 
569
 
 
570
        if (!REG_GET_VALID(*r))
 
571
                *r = get_temp_reg(fp);
 
572
 
 
573
        /* A partial match, VSWZ/mask define what parts of the
 
574
         * desired swizzle we match
 
575
         */
 
576
        if (mc + s_mask[mask].count == 3) {
 
577
                wmask = WRITEMASK_W;
 
578
                src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT;
 
579
        }
 
580
 
 
581
        tmp = arbneg & s_mask[mask].mask;
 
582
        if (tmp) {
 
583
                tmp = tmp ^ s_mask[mask].mask;
 
584
                if (tmp) {
 
585
                        emit_arith(fp,
 
586
                                   PFS_OP_MAD,
 
587
                                   *r,
 
588
                                   arbneg & s_mask[mask].mask,
 
589
                                   keep(src) | REG_NEGV_MASK,
 
590
                                   pfs_one, pfs_zero, 0);
 
591
                        if (!wmask) {
 
592
                                REG_SET_NO_USE(src, GL_TRUE);
 
593
                        } else {
 
594
                                REG_SET_NO_USE(src, GL_FALSE);
 
595
                        }
 
596
                        emit_arith(fp,
 
597
                                   PFS_OP_MAD,
 
598
                                   *r, tmp | wmask, src, pfs_one, pfs_zero, 0);
 
599
                } else {
 
600
                        if (!wmask) {
 
601
                                REG_SET_NO_USE(src, GL_TRUE);
 
602
                        } else {
 
603
                                REG_SET_NO_USE(src, GL_FALSE);
 
604
                        }
 
605
                        emit_arith(fp,
 
606
                                   PFS_OP_MAD,
 
607
                                   *r,
 
608
                                   (arbneg & s_mask[mask].mask) | wmask,
 
609
                                   src | REG_NEGV_MASK, pfs_one, pfs_zero, 0);
 
610
                }
 
611
        } else {
 
612
                if (!wmask) {
 
613
                        REG_SET_NO_USE(src, GL_TRUE);
 
614
                } else {
 
615
                        REG_SET_NO_USE(src, GL_FALSE);
 
616
                }
 
617
                emit_arith(fp, PFS_OP_MAD,
 
618
                           *r,
 
619
                           s_mask[mask].mask | wmask,
 
620
                           src, pfs_one, pfs_zero, 0);
 
621
        }
 
622
 
 
623
        return s_mask[mask].count;
 
624
}
 
625
 
 
626
static GLuint do_swizzle(struct r300_fragment_program *fp,
 
627
                         GLuint src, GLuint arbswz, GLuint arbneg)
 
628
{
 
629
        GLuint r = undef;
 
630
        GLuint vswz;
 
631
        int c_mask = 0;
 
632
        int v_match = 0;
 
633
 
 
634
        /* If swizzling from something without an XYZW native swizzle,
 
635
         * emit result to a temp, and do new swizzle from the temp.
 
636
         */
 
637
#if 0
 
638
        if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) {
 
639
                GLuint temp = get_temp_reg(fp);
 
640
                emit_arith(fp,
 
641
                           PFS_OP_MAD,
 
642
                           temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0);
 
643
                src = temp;
 
644
        }
 
645
#endif
 
646
 
 
647
        if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) {
 
648
                GLuint vsrcswz =
 
649
                    (v_swiz[REG_GET_VSWZ(src)].
 
650
                     hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) |
 
651
                    REG_GET_SSWZ(src) << 9;
 
652
                GLint i;
 
653
 
 
654
                GLuint newswz = 0;
 
655
                GLuint offset;
 
656
                for (i = 0; i < 4; ++i) {
 
657
                        offset = GET_SWZ(arbswz, i);
 
658
 
 
659
                        newswz |=
 
660
                            (offset <= 3) ? GET_SWZ(vsrcswz,
 
661
                                                    offset) << i *
 
662
                            3 : offset << i * 3;
 
663
                }
 
664
 
 
665
                arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK);
 
666
                REG_SET_SSWZ(src, GET_SWZ(newswz, 3));
 
667
        } else {
 
668
                /* set scalar swizzling */
 
669
                REG_SET_SSWZ(src, GET_SWZ(arbswz, 3));
 
670
 
 
671
        }
 
672
        do {
 
673
                vswz = REG_GET_VSWZ(src);
 
674
                do {
 
675
                        int chash;
 
676
 
 
677
                        REG_SET_VSWZ(src, vswz);
 
678
                        chash = v_swiz[REG_GET_VSWZ(src)].hash &
 
679
                            s_mask[c_mask].hash;
 
680
 
 
681
                        if (chash == (arbswz & s_mask[c_mask].hash)) {
 
682
                                if (s_mask[c_mask].count == 3) {
 
683
                                        v_match += swz_native(fp,
 
684
                                                              src, &r, arbneg);
 
685
                                } else {
 
686
                                        v_match += swz_emit_partial(fp,
 
687
                                                                    src,
 
688
                                                                    &r,
 
689
                                                                    c_mask,
 
690
                                                                    v_match,
 
691
                                                                    arbneg);
 
692
                                }
 
693
 
 
694
                                if (v_match == 3)
 
695
                                        return r;
 
696
 
 
697
                                /* Fill with something invalid.. all 0's was
 
698
                                 * wrong before, matched SWIZZLE_X.  So all
 
699
                                 * 1's will be okay for now
 
700
                                 */
 
701
                                arbswz |= (PFS_INVAL & s_mask[c_mask].hash);
 
702
                        }
 
703
                } while (v_swiz[++vswz].hash != PFS_INVAL);
 
704
                REG_SET_VSWZ(src, SWIZZLE_XYZ);
 
705
        } while (s_mask[++c_mask].hash != PFS_INVAL);
 
706
 
 
707
        ERROR("should NEVER get here\n");
 
708
        return r;
 
709
}
 
710
 
 
711
static GLuint t_src(struct r300_fragment_program *fp,
 
712
                    struct prog_src_register fpsrc)
 
713
{
 
714
        GLuint r = undef;
 
715
 
 
716
        switch (fpsrc.File) {
 
717
        case PROGRAM_TEMPORARY:
 
718
                REG_SET_INDEX(r, fpsrc.Index);
 
719
                REG_SET_VALID(r, GL_TRUE);
 
720
                REG_SET_TYPE(r, REG_TYPE_TEMP);
 
721
                break;
 
722
        case PROGRAM_INPUT:
 
723
                REG_SET_INDEX(r, fpsrc.Index);
 
724
                REG_SET_VALID(r, GL_TRUE);
 
725
                REG_SET_TYPE(r, REG_TYPE_INPUT);
 
726
                break;
 
727
        case PROGRAM_LOCAL_PARAM:
 
728
                r = emit_const4fv(fp,
 
729
                                  fp->mesa_program.Base.LocalParams[fpsrc.
 
730
                                                                    Index]);
 
731
                break;
 
732
        case PROGRAM_ENV_PARAM:
 
733
                r = emit_const4fv(fp,
 
734
                                  fp->ctx->FragmentProgram.Parameters[fpsrc.
 
735
                                                                      Index]);
 
736
                break;
 
737
        case PROGRAM_STATE_VAR:
 
738
        case PROGRAM_NAMED_PARAM:
 
739
                r = emit_const4fv(fp,
 
740
                                  fp->mesa_program.Base.Parameters->
 
741
                                  ParameterValues[fpsrc.Index]);
 
742
                break;
 
743
        default:
 
744
                ERROR("unknown SrcReg->File %x\n", fpsrc.File);
 
745
                return r;
 
746
        }
 
747
 
 
748
        /* no point swizzling ONE/ZERO/HALF constants... */
 
749
        if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO)
 
750
                r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase);
 
751
        return r;
 
752
}
 
753
 
 
754
static GLuint t_scalar_src(struct r300_fragment_program *fp,
 
755
                           struct prog_src_register fpsrc)
 
756
{
 
757
        struct prog_src_register src = fpsrc;
 
758
        int sc = GET_SWZ(fpsrc.Swizzle, 0);     /* X */
 
759
 
 
760
        src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9));
 
761
 
 
762
        return t_src(fp, src);
 
763
}
 
764
 
 
765
static GLuint t_dst(struct r300_fragment_program *fp,
 
766
                    struct prog_dst_register dest)
 
767
{
 
768
        GLuint r = undef;
 
769
 
 
770
        switch (dest.File) {
 
771
        case PROGRAM_TEMPORARY:
 
772
                REG_SET_INDEX(r, dest.Index);
 
773
                REG_SET_VALID(r, GL_TRUE);
 
774
                REG_SET_TYPE(r, REG_TYPE_TEMP);
 
775
                return r;
 
776
        case PROGRAM_OUTPUT:
 
777
                REG_SET_TYPE(r, REG_TYPE_OUTPUT);
 
778
                switch (dest.Index) {
 
779
                case FRAG_RESULT_COLR:
 
780
                case FRAG_RESULT_DEPR:
 
781
                        REG_SET_INDEX(r, dest.Index);
 
782
                        REG_SET_VALID(r, GL_TRUE);
 
783
                        return r;
 
784
                default:
 
785
                        ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
 
786
                        return r;
 
787
                }
 
788
        default:
 
789
                ERROR("Bad DstReg->File 0x%x\n", dest.File);
 
790
                return r;
 
791
        }
 
792
}
 
793
 
 
794
static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex)
 
795
{
 
796
        COMPILE_STATE;
 
797
        int idx;
 
798
        int index = REG_GET_INDEX(src);
 
799
 
 
800
        switch (REG_GET_TYPE(src)) {
 
801
        case REG_TYPE_TEMP:
 
802
                /* NOTE: if reg==-1 here, a source is being read that
 
803
                 *       hasn't been written to. Undefined results.
 
804
                 */
 
805
                if (cs->temps[index].reg == -1)
 
806
                        cs->temps[index].reg = get_hw_temp(fp, cs->nrslots);
 
807
 
 
808
                idx = cs->temps[index].reg;
 
809
 
 
810
                if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0))
 
811
                        free_temp(fp, src);
 
812
                break;
 
813
        case REG_TYPE_INPUT:
 
814
                idx = cs->inputs[index].reg;
 
815
 
 
816
                if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0))
 
817
                        free_hw_temp(fp, cs->inputs[index].reg);
 
818
                break;
 
819
        case REG_TYPE_CONST:
 
820
                return (index | SRC_CONST);
 
821
        default:
 
822
                ERROR("Invalid type for source reg\n");
 
823
                return (0 | SRC_CONST);
 
824
        }
 
825
 
 
826
        if (!tex)
 
827
                cs->used_in_node |= (1 << idx);
 
828
 
 
829
        return idx;
 
830
}
 
831
 
 
832
static int t_hw_dst(struct r300_fragment_program *fp,
 
833
                    GLuint dest, GLboolean tex, int slot)
 
834
{
 
835
        COMPILE_STATE;
 
836
        int idx;
 
837
        GLuint index = REG_GET_INDEX(dest);
 
838
        assert(REG_GET_VALID(dest));
 
839
 
 
840
        switch (REG_GET_TYPE(dest)) {
 
841
        case REG_TYPE_TEMP:
 
842
                if (cs->temps[REG_GET_INDEX(dest)].reg == -1) {
 
843
                        if (!tex) {
 
844
                                cs->temps[index].reg = get_hw_temp(fp, slot);
 
845
                        } else {
 
846
                                cs->temps[index].reg = get_hw_temp_tex(fp);
 
847
                        }
 
848
                }
 
849
                idx = cs->temps[index].reg;
 
850
 
 
851
                if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0))
 
852
                        free_temp(fp, dest);
 
853
 
 
854
                cs->dest_in_node |= (1 << idx);
 
855
                cs->used_in_node |= (1 << idx);
 
856
                break;
 
857
        case REG_TYPE_OUTPUT:
 
858
                switch (index) {
 
859
                case FRAG_RESULT_COLR:
 
860
                        fp->node[fp->cur_node].flags |=
 
861
                            R300_PFS_NODE_OUTPUT_COLOR;
 
862
                        break;
 
863
                case FRAG_RESULT_DEPR:
 
864
                        fp->node[fp->cur_node].flags |=
 
865
                            R300_PFS_NODE_OUTPUT_DEPTH;
 
866
                        break;
 
867
                }
 
868
                return index;
 
869
                break;
 
870
        default:
 
871
                ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
 
872
                return 0;
 
873
        }
 
874
 
 
875
        return idx;
 
876
}
 
877
 
 
878
static void emit_nop(struct r300_fragment_program *fp)
 
879
{
 
880
        COMPILE_STATE;
 
881
 
 
882
        if (cs->nrslots >= PFS_MAX_ALU_INST) {
 
883
                ERROR("Out of ALU instruction slots\n");
 
884
                return;
 
885
        }
 
886
 
 
887
        fp->alu.inst[cs->nrslots].inst0 = NOP_INST0;
 
888
        fp->alu.inst[cs->nrslots].inst1 = NOP_INST1;
 
889
        fp->alu.inst[cs->nrslots].inst2 = NOP_INST2;
 
890
        fp->alu.inst[cs->nrslots].inst3 = NOP_INST3;
 
891
        cs->nrslots++;
 
892
}
 
893
 
 
894
static void emit_tex(struct r300_fragment_program *fp,
 
895
                     struct prog_instruction *fpi, int opcode)
 
896
{
 
897
        COMPILE_STATE;
 
898
        GLuint coord = t_src(fp, fpi->SrcReg[0]);
 
899
        GLuint dest = undef, rdest = undef;
 
900
        GLuint din, uin;
 
901
        int unit = fpi->TexSrcUnit;
 
902
        int hwsrc, hwdest;
 
903
        GLuint tempreg = 0;
 
904
 
 
905
        uin = cs->used_in_node;
 
906
        din = cs->dest_in_node;
 
907
 
 
908
        /* Resolve source/dest to hardware registers */
 
909
        if (opcode != R300_FPITX_OP_KIL) {
 
910
                if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
 
911
                        /**
 
912
                         * Hardware uses [0..1]x[0..1] range for rectangle textures
 
913
                         * instead of [0..Width]x[0..Height].
 
914
                         * Add a scaling instruction.
 
915
                         *
 
916
                         * \todo Refactor this once we have proper rewriting/optimization
 
917
                         * support for programs.
 
918
                         */
 
919
                        gl_state_index tokens[STATE_LENGTH] = {
 
920
                                STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
 
921
                                0
 
922
                        };
 
923
                        int factor_index;
 
924
                        GLuint factorreg;
 
925
 
 
926
                        tokens[2] = unit;
 
927
                        factor_index =
 
928
                            _mesa_add_state_reference(fp->mesa_program.Base.
 
929
                                                      Parameters, tokens);
 
930
                        factorreg =
 
931
                            emit_const4fv(fp,
 
932
                                          fp->mesa_program.Base.Parameters->
 
933
                                          ParameterValues[factor_index]);
 
934
                        tempreg = keep(get_temp_reg(fp));
 
935
 
 
936
                        emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
 
937
                                   coord, factorreg, pfs_zero, 0);
 
938
 
 
939
                        /* Ensure correct node indirection */
 
940
                        uin = cs->used_in_node;
 
941
                        din = cs->dest_in_node;
 
942
 
 
943
                        hwsrc = t_hw_src(fp, tempreg, GL_TRUE);
 
944
                } else {
 
945
                        hwsrc = t_hw_src(fp, coord, GL_TRUE);
 
946
                }
 
947
 
 
948
                dest = t_dst(fp, fpi->DstReg);
 
949
 
 
950
                /* r300 doesn't seem to be able to do TEX->output reg */
 
951
                if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
 
952
                        rdest = dest;
 
953
                        dest = get_temp_reg_tex(fp);
 
954
                }
 
955
                hwdest =
 
956
                    t_hw_dst(fp, dest, GL_TRUE,
 
957
                             fp->node[fp->cur_node].alu_offset);
 
958
 
 
959
                /* Use a temp that hasn't been used in this node, rather
 
960
                 * than causing an indirection
 
961
                 */
 
962
                if (uin & (1 << hwdest)) {
 
963
                        free_hw_temp(fp, hwdest);
 
964
                        hwdest = get_hw_temp_tex(fp);
 
965
                        cs->temps[REG_GET_INDEX(dest)].reg = hwdest;
 
966
                }
 
967
        } else {
 
968
                hwdest = 0;
 
969
                unit = 0;
 
970
                hwsrc = t_hw_src(fp, coord, GL_TRUE);
 
971
        }
 
972
 
 
973
        /* Indirection if source has been written in this node, or if the
 
974
         * dest has been read/written in this node
 
975
         */
 
976
        if ((REG_GET_TYPE(coord) != REG_TYPE_CONST &&
 
977
             (din & (1 << hwsrc))) || (uin & (1 << hwdest))) {
 
978
 
 
979
                /* Finish off current node */
 
980
                if (fp->node[fp->cur_node].alu_offset == cs->nrslots)
 
981
                        emit_nop(fp);
 
982
 
 
983
                fp->node[fp->cur_node].alu_end =
 
984
                    cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
 
985
                assert(fp->node[fp->cur_node].alu_end >= 0);
 
986
 
 
987
                if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) {
 
988
                        ERROR("too many levels of texture indirection\n");
 
989
                        return;
 
990
                }
 
991
 
 
992
                /* Start new node */
 
993
                fp->node[fp->cur_node].tex_offset = fp->tex.length;
 
994
                fp->node[fp->cur_node].alu_offset = cs->nrslots;
 
995
                fp->node[fp->cur_node].tex_end = -1;
 
996
                fp->node[fp->cur_node].alu_end = -1;
 
997
                fp->node[fp->cur_node].flags = 0;
 
998
                cs->used_in_node = 0;
 
999
                cs->dest_in_node = 0;
 
1000
        }
 
1001
 
 
1002
        if (fp->cur_node == 0)
 
1003
                fp->first_node_has_tex = 1;
 
1004
 
 
1005
        fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT)
 
1006
            | (hwdest << R300_FPITX_DST_SHIFT)
 
1007
            | (unit << R300_FPITX_IMAGE_SHIFT)
 
1008
            /* not entirely sure about this */
 
1009
            | (opcode << R300_FPITX_OPCODE_SHIFT);
 
1010
 
 
1011
        cs->dest_in_node |= (1 << hwdest);
 
1012
        if (REG_GET_TYPE(coord) != REG_TYPE_CONST)
 
1013
                cs->used_in_node |= (1 << hwsrc);
 
1014
 
 
1015
        fp->node[fp->cur_node].tex_end++;
 
1016
 
 
1017
        /* Copy from temp to output if needed */
 
1018
        if (REG_GET_VALID(rdest)) {
 
1019
                emit_arith(fp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,
 
1020
                           pfs_one, pfs_zero, 0);
 
1021
                free_temp(fp, dest);
 
1022
        }
 
1023
 
 
1024
        /* Free temp register */
 
1025
        if (tempreg != 0)
 
1026
                free_temp(fp, tempreg);
 
1027
}
 
1028
 
 
1029
/**
 
1030
 * Returns the first slot where we could possibly allow writing to dest,
 
1031
 * according to register allocation.
 
1032
 */
 
1033
static int get_earliest_allowed_write(struct r300_fragment_program *fp,
 
1034
                                      GLuint dest, int mask)
 
1035
{
 
1036
        COMPILE_STATE;
 
1037
        int idx;
 
1038
        int pos;
 
1039
        GLuint index = REG_GET_INDEX(dest);
 
1040
        assert(REG_GET_VALID(dest));
 
1041
 
 
1042
        switch (REG_GET_TYPE(dest)) {
 
1043
        case REG_TYPE_TEMP:
 
1044
                if (cs->temps[index].reg == -1)
 
1045
                        return 0;
 
1046
 
 
1047
                idx = cs->temps[index].reg;
 
1048
                break;
 
1049
        case REG_TYPE_OUTPUT:
 
1050
                return 0;
 
1051
        default:
 
1052
                ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
 
1053
                return 0;
 
1054
        }
 
1055
 
 
1056
        pos = cs->hwtemps[idx].reserved;
 
1057
        if (mask & WRITEMASK_XYZ) {
 
1058
                if (pos < cs->hwtemps[idx].vector_lastread)
 
1059
                        pos = cs->hwtemps[idx].vector_lastread;
 
1060
        }
 
1061
        if (mask & WRITEMASK_W) {
 
1062
                if (pos < cs->hwtemps[idx].scalar_lastread)
 
1063
                        pos = cs->hwtemps[idx].scalar_lastread;
 
1064
        }
 
1065
 
 
1066
        return pos;
 
1067
}
 
1068
 
 
1069
/**
 
1070
 * Allocates a slot for an ALU instruction that can consist of
 
1071
 * a vertex part or a scalar part or both.
 
1072
 *
 
1073
 * Sources from src (src[0] to src[argc-1]) are added to the slot in the
 
1074
 * appropriate position (vector and/or scalar), and their positions are
 
1075
 * recorded in the srcpos array.
 
1076
 *
 
1077
 * This function emits instruction code for the source fetch and the
 
1078
 * argument selection. It does not emit instruction code for the
 
1079
 * opcode or the destination selection.
 
1080
 *
 
1081
 * @return the index of the slot
 
1082
 */
 
1083
static int find_and_prepare_slot(struct r300_fragment_program *fp,
 
1084
                                 GLboolean emit_vop,
 
1085
                                 GLboolean emit_sop,
 
1086
                                 int argc, GLuint * src, GLuint dest, int mask)
 
1087
{
 
1088
        COMPILE_STATE;
 
1089
        int hwsrc[3];
 
1090
        int srcpos[3];
 
1091
        unsigned int used;
 
1092
        int tempused;
 
1093
        int tempvsrc[3];
 
1094
        int tempssrc[3];
 
1095
        int pos;
 
1096
        int regnr;
 
1097
        int i, j;
 
1098
 
 
1099
        // Determine instruction slots, whether sources are required on
 
1100
        // vector or scalar side, and the smallest slot number where
 
1101
        // all source registers are available
 
1102
        used = 0;
 
1103
        if (emit_vop)
 
1104
                used |= SLOT_OP_VECTOR;
 
1105
        if (emit_sop)
 
1106
                used |= SLOT_OP_SCALAR;
 
1107
 
 
1108
        pos = get_earliest_allowed_write(fp, dest, mask);
 
1109
 
 
1110
        if (fp->node[fp->cur_node].alu_offset > pos)
 
1111
                pos = fp->node[fp->cur_node].alu_offset;
 
1112
        for (i = 0; i < argc; ++i) {
 
1113
                if (!REG_GET_BUILTIN(src[i])) {
 
1114
                        if (emit_vop)
 
1115
                                used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i;
 
1116
                        if (emit_sop)
 
1117
                                used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i;
 
1118
                }
 
1119
 
 
1120
                hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE);      /* Note: sideeffects wrt refcounting! */
 
1121
                regnr = hwsrc[i] & 31;
 
1122
 
 
1123
                if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
 
1124
                        if (used & (SLOT_SRC_VECTOR << i)) {
 
1125
                                if (cs->hwtemps[regnr].vector_valid > pos)
 
1126
                                        pos = cs->hwtemps[regnr].vector_valid;
 
1127
                        }
 
1128
                        if (used & (SLOT_SRC_SCALAR << i)) {
 
1129
                                if (cs->hwtemps[regnr].scalar_valid > pos)
 
1130
                                        pos = cs->hwtemps[regnr].scalar_valid;
 
1131
                        }
 
1132
                }
 
1133
        }
 
1134
 
 
1135
        // Find a slot that fits
 
1136
        for (;; ++pos) {
 
1137
                if (cs->slot[pos].used & used & SLOT_OP_BOTH)
 
1138
                        continue;
 
1139
 
 
1140
                if (pos >= cs->nrslots) {
 
1141
                        if (cs->nrslots >= PFS_MAX_ALU_INST) {
 
1142
                                ERROR("Out of ALU instruction slots\n");
 
1143
                                return -1;
 
1144
                        }
 
1145
 
 
1146
                        fp->alu.inst[pos].inst0 = NOP_INST0;
 
1147
                        fp->alu.inst[pos].inst1 = NOP_INST1;
 
1148
                        fp->alu.inst[pos].inst2 = NOP_INST2;
 
1149
                        fp->alu.inst[pos].inst3 = NOP_INST3;
 
1150
 
 
1151
                        cs->nrslots++;
 
1152
                }
 
1153
                // Note: When we need both parts (vector and scalar) of a source,
 
1154
                // we always try to put them into the same position. This makes the
 
1155
                // code easier to read, and it is optimal (i.e. one doesn't gain
 
1156
                // anything by splitting the parts).
 
1157
                // It also avoids headaches with swizzles that access both parts (i.e WXY)
 
1158
                tempused = cs->slot[pos].used;
 
1159
                for (i = 0; i < 3; ++i) {
 
1160
                        tempvsrc[i] = cs->slot[pos].vsrc[i];
 
1161
                        tempssrc[i] = cs->slot[pos].ssrc[i];
 
1162
                }
 
1163
 
 
1164
                for (i = 0; i < argc; ++i) {
 
1165
                        int flags = (used >> i) & SLOT_SRC_BOTH;
 
1166
 
 
1167
                        if (!flags) {
 
1168
                                srcpos[i] = 0;
 
1169
                                continue;
 
1170
                        }
 
1171
 
 
1172
                        for (j = 0; j < 3; ++j) {
 
1173
                                if ((tempused >> j) & flags & SLOT_SRC_VECTOR) {
 
1174
                                        if (tempvsrc[j] != hwsrc[i])
 
1175
                                                continue;
 
1176
                                }
 
1177
 
 
1178
                                if ((tempused >> j) & flags & SLOT_SRC_SCALAR) {
 
1179
                                        if (tempssrc[j] != hwsrc[i])
 
1180
                                                continue;
 
1181
                                }
 
1182
 
 
1183
                                break;
 
1184
                        }
 
1185
 
 
1186
                        if (j == 3)
 
1187
                                break;
 
1188
 
 
1189
                        srcpos[i] = j;
 
1190
                        tempused |= flags << j;
 
1191
                        if (flags & SLOT_SRC_VECTOR)
 
1192
                                tempvsrc[j] = hwsrc[i];
 
1193
                        if (flags & SLOT_SRC_SCALAR)
 
1194
                                tempssrc[j] = hwsrc[i];
 
1195
                }
 
1196
 
 
1197
                if (i == argc)
 
1198
                        break;
 
1199
        }
 
1200
 
 
1201
        // Found a slot, reserve it
 
1202
        cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH);
 
1203
        for (i = 0; i < 3; ++i) {
 
1204
                cs->slot[pos].vsrc[i] = tempvsrc[i];
 
1205
                cs->slot[pos].ssrc[i] = tempssrc[i];
 
1206
        }
 
1207
 
 
1208
        for (i = 0; i < argc; ++i) {
 
1209
                if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
 
1210
                        int regnr = hwsrc[i] & 31;
 
1211
 
 
1212
                        if (used & (SLOT_SRC_VECTOR << i)) {
 
1213
                                if (cs->hwtemps[regnr].vector_lastread < pos)
 
1214
                                        cs->hwtemps[regnr].vector_lastread =
 
1215
                                            pos;
 
1216
                        }
 
1217
                        if (used & (SLOT_SRC_SCALAR << i)) {
 
1218
                                if (cs->hwtemps[regnr].scalar_lastread < pos)
 
1219
                                        cs->hwtemps[regnr].scalar_lastread =
 
1220
                                            pos;
 
1221
                        }
 
1222
                }
 
1223
        }
 
1224
 
 
1225
        // Emit the source fetch code
 
1226
        fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK;
 
1227
        fp->alu.inst[pos].inst1 |=
 
1228
            ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |
 
1229
             (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |
 
1230
             (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));
 
1231
 
 
1232
        fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK;
 
1233
        fp->alu.inst[pos].inst3 |=
 
1234
            ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |
 
1235
             (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |
 
1236
             (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));
 
1237
 
 
1238
        // Emit the argument selection code
 
1239
        if (emit_vop) {
 
1240
                int swz[3];
 
1241
 
 
1242
                for (i = 0; i < 3; ++i) {
 
1243
                        if (i < argc) {
 
1244
                                swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
 
1245
                                          (srcpos[i] *
 
1246
                                           v_swiz[REG_GET_VSWZ(src[i])].
 
1247
                                           stride)) | ((src[i] & REG_NEGV_MASK)
 
1248
                                                       ? ARG_NEG : 0) | ((src[i]
 
1249
                                                                          &
 
1250
                                                                          REG_ABS_MASK)
 
1251
                                                                         ?
 
1252
                                                                         ARG_ABS
 
1253
                                                                         : 0);
 
1254
                        } else {
 
1255
                                swz[i] = R300_FPI0_ARGC_ZERO;
 
1256
                        }
 
1257
                }
 
1258
 
 
1259
                fp->alu.inst[pos].inst0 &=
 
1260
                    ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK |
 
1261
                      R300_FPI0_ARG2C_MASK);
 
1262
                fp->alu.inst[pos].inst0 |=
 
1263
                    (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] <<
 
1264
                                                         R300_FPI0_ARG1C_SHIFT)
 
1265
                    | (swz[2] << R300_FPI0_ARG2C_SHIFT);
 
1266
        }
 
1267
 
 
1268
        if (emit_sop) {
 
1269
                int swz[3];
 
1270
 
 
1271
                for (i = 0; i < 3; ++i) {
 
1272
                        if (i < argc) {
 
1273
                                swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
 
1274
                                          (srcpos[i] *
 
1275
                                           s_swiz[REG_GET_SSWZ(src[i])].
 
1276
                                           stride)) | ((src[i] & REG_NEGV_MASK)
 
1277
                                                       ? ARG_NEG : 0) | ((src[i]
 
1278
                                                                          &
 
1279
                                                                          REG_ABS_MASK)
 
1280
                                                                         ?
 
1281
                                                                         ARG_ABS
 
1282
                                                                         : 0);
 
1283
                        } else {
 
1284
                                swz[i] = R300_FPI2_ARGA_ZERO;
 
1285
                        }
 
1286
                }
 
1287
 
 
1288
                fp->alu.inst[pos].inst2 &=
 
1289
                    ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK |
 
1290
                      R300_FPI2_ARG2A_MASK);
 
1291
                fp->alu.inst[pos].inst2 |=
 
1292
                    (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] <<
 
1293
                                                         R300_FPI2_ARG1A_SHIFT)
 
1294
                    | (swz[2] << R300_FPI2_ARG2A_SHIFT);
 
1295
        }
 
1296
 
 
1297
        return pos;
 
1298
}
 
1299
 
 
1300
/**
 
1301
 * Append an ALU instruction to the instruction list.
 
1302
 */
 
1303
static void emit_arith(struct r300_fragment_program *fp,
 
1304
                       int op,
 
1305
                       GLuint dest,
 
1306
                       int mask,
 
1307
                       GLuint src0, GLuint src1, GLuint src2, int flags)
 
1308
{
 
1309
        COMPILE_STATE;
 
1310
        GLuint src[3] = { src0, src1, src2 };
 
1311
        int hwdest;
 
1312
        GLboolean emit_vop, emit_sop;
 
1313
        int vop, sop, argc;
 
1314
        int pos;
 
1315
 
 
1316
        vop = r300_fpop[op].v_op;
 
1317
        sop = r300_fpop[op].s_op;
 
1318
        argc = r300_fpop[op].argc;
 
1319
 
 
1320
        if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
 
1321
            REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
 
1322
                if (mask & WRITEMASK_Z) {
 
1323
                        mask = WRITEMASK_W;
 
1324
                } else {
 
1325
                        return;
 
1326
                }
 
1327
        }
 
1328
 
 
1329
        emit_vop = GL_FALSE;
 
1330
        emit_sop = GL_FALSE;
 
1331
        if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)
 
1332
                emit_vop = GL_TRUE;
 
1333
        if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)
 
1334
                emit_sop = GL_TRUE;
 
1335
 
 
1336
        pos =
 
1337
            find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest,
 
1338
                                  mask);
 
1339
        if (pos < 0)
 
1340
                return;
 
1341
 
 
1342
        hwdest = t_hw_dst(fp, dest, GL_FALSE, pos);     /* Note: Side effects wrt register allocation */
 
1343
 
 
1344
        if (flags & PFS_FLAG_SAT) {
 
1345
                vop |= R300_FPI0_OUTC_SAT;
 
1346
                sop |= R300_FPI2_OUTA_SAT;
 
1347
        }
 
1348
 
 
1349
        /* Throw the pieces together and get FPI0/1 */
 
1350
        if (emit_vop) {
 
1351
                fp->alu.inst[pos].inst0 |= vop;
 
1352
 
 
1353
                fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
 
1354
 
 
1355
                if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
 
1356
                        if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
 
1357
                                fp->alu.inst[pos].inst1 |=
 
1358
                                    (mask & WRITEMASK_XYZ) <<
 
1359
                                    R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;
 
1360
                        } else
 
1361
                                assert(0);
 
1362
                } else {
 
1363
                        fp->alu.inst[pos].inst1 |=
 
1364
                            (mask & WRITEMASK_XYZ) <<
 
1365
                            R300_FPI1_DSTC_REG_MASK_SHIFT;
 
1366
 
 
1367
                        cs->hwtemps[hwdest].vector_valid = pos + 1;
 
1368
                }
 
1369
        }
 
1370
 
 
1371
        /* And now FPI2/3 */
 
1372
        if (emit_sop) {
 
1373
                fp->alu.inst[pos].inst2 |= sop;
 
1374
 
 
1375
                if (mask & WRITEMASK_W) {
 
1376
                        if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
 
1377
                                if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
 
1378
                                        fp->alu.inst[pos].inst3 |=
 
1379
                                            (hwdest << R300_FPI3_DSTA_SHIFT) |
 
1380
                                            R300_FPI3_DSTA_OUTPUT;
 
1381
                                } else if (REG_GET_INDEX(dest) ==
 
1382
                                           FRAG_RESULT_DEPR) {
 
1383
                                        fp->alu.inst[pos].inst3 |=
 
1384
                                            R300_FPI3_DSTA_DEPTH;
 
1385
                                } else
 
1386
                                        assert(0);
 
1387
                        } else {
 
1388
                                fp->alu.inst[pos].inst3 |=
 
1389
                                    (hwdest << R300_FPI3_DSTA_SHIFT) |
 
1390
                                    R300_FPI3_DSTA_REG;
 
1391
 
 
1392
                                cs->hwtemps[hwdest].scalar_valid = pos + 1;
 
1393
                        }
 
1394
                }
 
1395
        }
 
1396
 
 
1397
        return;
 
1398
}
 
1399
 
 
1400
#if 0
 
1401
static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr)
 
1402
{
 
1403
        struct gl_fragment_program *mp = &fp->mesa_program;
 
1404
        GLuint r = undef;
 
1405
 
 
1406
        if (!(mp->Base.InputsRead & (1 << attr))) {
 
1407
                ERROR("Attribute %d was not provided!\n", attr);
 
1408
                return undef;
 
1409
        }
 
1410
 
 
1411
        REG_SET_TYPE(r, REG_TYPE_INPUT);
 
1412
        REG_SET_INDEX(r, attr);
 
1413
        REG_SET_VALID(r, GL_TRUE);
 
1414
        return r;
 
1415
}
 
1416
#endif
 
1417
 
 
1418
static GLfloat SinCosConsts[2][4] = {
 
1419
        {
 
1420
         1.273239545,           // 4/PI
 
1421
         -0.405284735,          // -4/(PI*PI)
 
1422
         3.141592654,           // PI
 
1423
         0.2225                 // weight
 
1424
         },
 
1425
        {
 
1426
         0.75,
 
1427
         0.0,
 
1428
         0.159154943,           // 1/(2*PI)
 
1429
         6.283185307            // 2*PI
 
1430
         }
 
1431
};
 
1432
 
 
1433
/**
 
1434
 * Emit a LIT instruction.
 
1435
 * \p flags may be PFS_FLAG_SAT
 
1436
 *
 
1437
 * Definition of LIT (from ARB_fragment_program):
 
1438
 * tmp = VectorLoad(op0);
 
1439
 * if (tmp.x < 0) tmp.x = 0;
 
1440
 * if (tmp.y < 0) tmp.y = 0;
 
1441
 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 
1442
 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 
1443
 * result.x = 1.0;
 
1444
 * result.y = tmp.x;
 
1445
 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 
1446
 * result.w = 1.0;
 
1447
 *
 
1448
 * The longest path of computation is the one leading to result.z,
 
1449
 * consisting of 5 operations. This implementation of LIT takes
 
1450
 * 5 slots. So unless there's some special undocumented opcode,
 
1451
 * this implementation is potentially optimal. Unfortunately,
 
1452
 * emit_arith is a bit too conservative because it doesn't understand
 
1453
 * partial writes to the vector component.
 
1454
 */
 
1455
static const GLfloat LitConst[4] =
 
1456
    { 127.999999, 127.999999, 127.999999, -127.999999 };
 
1457
 
 
1458
static void emit_lit(struct r300_fragment_program *fp,
 
1459
                     GLuint dest, int mask, GLuint src, int flags)
 
1460
{
 
1461
        COMPILE_STATE;
 
1462
        GLuint cnst;
 
1463
        int needTemporary;
 
1464
        GLuint temp;
 
1465
 
 
1466
        cnst = emit_const4fv(fp, LitConst);
 
1467
 
 
1468
        needTemporary = 0;
 
1469
        if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) {
 
1470
                needTemporary = 1;
 
1471
        } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
 
1472
                // LIT is typically followed by DP3/DP4, so there's no point
 
1473
                // in creating special code for this case
 
1474
                needTemporary = 1;
 
1475
        }
 
1476
 
 
1477
        if (needTemporary) {
 
1478
                temp = keep(get_temp_reg(fp));
 
1479
        } else {
 
1480
                temp = keep(dest);
 
1481
        }
 
1482
 
 
1483
        // Note: The order of emit_arith inside the slots is relevant,
 
1484
        // because emit_arith only looks at scalar vs. vector when resolving
 
1485
        // dependencies, and it does not consider individual vector components,
 
1486
        // so swizzling between the two parts can create fake dependencies.
 
1487
 
 
1488
        // First slot
 
1489
        emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY,
 
1490
                   keep(src), pfs_zero, undef, 0);
 
1491
        emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0);
 
1492
 
 
1493
        // Second slot
 
1494
        emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z,
 
1495
                   swizzle(temp, W, W, W, W), cnst, undef, 0);
 
1496
        emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W,
 
1497
                   swizzle(temp, Y, Y, Y, Y), undef, undef, 0);
 
1498
 
 
1499
        // Third slot
 
1500
        // If desired, we saturate the y result here.
 
1501
        // This does not affect the use as a condition variable in the CMP later
 
1502
        emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W,
 
1503
                   temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0);
 
1504
        emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y,
 
1505
                   swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags);
 
1506
 
 
1507
        // Fourth slot
 
1508
        emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X,
 
1509
                   pfs_one, pfs_one, pfs_zero, 0);
 
1510
        emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0);
 
1511
 
 
1512
        // Fifth slot
 
1513
        emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z,
 
1514
                   pfs_zero, swizzle(temp, W, W, W, W),
 
1515
                   negate(swizzle(temp, Y, Y, Y, Y)), flags);
 
1516
        emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one,
 
1517
                   pfs_zero, 0);
 
1518
 
 
1519
        if (needTemporary) {
 
1520
                emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1521
                           temp, pfs_one, pfs_zero, flags);
 
1522
                free_temp(fp, temp);
 
1523
        } else {
 
1524
                // Decrease refcount of the destination
 
1525
                t_hw_dst(fp, dest, GL_FALSE, cs->nrslots);
 
1526
        }
 
1527
}
 
1528
 
 
1529
static GLboolean parse_program(struct r300_fragment_program *fp)
 
1530
{
 
1531
        struct gl_fragment_program *mp = &fp->mesa_program;
 
1532
        const struct prog_instruction *inst = mp->Base.Instructions;
 
1533
        struct prog_instruction *fpi;
 
1534
        GLuint src[3], dest, temp[2];
 
1535
        int flags, mask = 0;
 
1536
        int const_sin[2];
 
1537
 
 
1538
        if (!inst || inst[0].Opcode == OPCODE_END) {
 
1539
                ERROR("empty program?\n");
 
1540
                return GL_FALSE;
 
1541
        }
 
1542
 
 
1543
        for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
 
1544
                if (fpi->SaturateMode == SATURATE_ZERO_ONE)
 
1545
                        flags = PFS_FLAG_SAT;
 
1546
                else
 
1547
                        flags = 0;
 
1548
 
 
1549
                if (fpi->Opcode != OPCODE_KIL) {
 
1550
                        dest = t_dst(fp, fpi->DstReg);
 
1551
                        mask = fpi->DstReg.WriteMask;
 
1552
                }
 
1553
 
 
1554
                switch (fpi->Opcode) {
 
1555
                case OPCODE_ABS:
 
1556
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1557
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1558
                                   absolute(src[0]), pfs_one, pfs_zero, flags);
 
1559
                        break;
 
1560
                case OPCODE_ADD:
 
1561
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1562
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1563
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1564
                                   src[0], pfs_one, src[1], flags);
 
1565
                        break;
 
1566
                case OPCODE_CMP:
 
1567
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1568
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1569
                        src[2] = t_src(fp, fpi->SrcReg[2]);
 
1570
                        /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
 
1571
                         *    r300 - if src2.c < 0.0 ? src1.c : src0.c
 
1572
                         */
 
1573
                        emit_arith(fp, PFS_OP_CMP, dest, mask,
 
1574
                                   src[2], src[1], src[0], flags);
 
1575
                        break;
 
1576
                case OPCODE_COS:
 
1577
                        /*
 
1578
                         * cos using a parabola (see SIN):
 
1579
                         * cos(x):
 
1580
                         *   x = (x/(2*PI))+0.75
 
1581
                         *   x = frac(x)
 
1582
                         *   x = (x*2*PI)-PI
 
1583
                         *   result = sin(x)
 
1584
                         */
 
1585
                        temp[0] = get_temp_reg(fp);
 
1586
                        const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
 
1587
                        const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
 
1588
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1589
 
 
1590
                        /* add 0.5*PI and do range reduction */
 
1591
 
 
1592
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
 
1593
                                   swizzle(src[0], X, X, X, X),
 
1594
                                   swizzle(const_sin[1], Z, Z, Z, Z),
 
1595
                                   swizzle(const_sin[1], X, X, X, X), 0);
 
1596
 
 
1597
                        emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X,
 
1598
                                   swizzle(temp[0], X, X, X, X),
 
1599
                                   undef, undef, 0);
 
1600
 
 
1601
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W),       //2*PI
 
1602
                                   negate(swizzle(const_sin[0], Z, Z, Z, Z)),   //-PI
 
1603
                                   0);
 
1604
 
 
1605
                        /* SIN */
 
1606
 
 
1607
                        emit_arith(fp, PFS_OP_MAD, temp[0],
 
1608
                                   WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
 
1609
                                                                      Z, Z, Z,
 
1610
                                                                      Z),
 
1611
                                   const_sin[0], pfs_zero, 0);
 
1612
 
 
1613
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
 
1614
                                   swizzle(temp[0], Y, Y, Y, Y),
 
1615
                                   absolute(swizzle(temp[0], Z, Z, Z, Z)),
 
1616
                                   swizzle(temp[0], X, X, X, X), 0);
 
1617
 
 
1618
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
 
1619
                                   swizzle(temp[0], X, X, X, X),
 
1620
                                   absolute(swizzle(temp[0], X, X, X, X)),
 
1621
                                   negate(swizzle(temp[0], X, X, X, X)), 0);
 
1622
 
 
1623
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1624
                                   swizzle(temp[0], Y, Y, Y, Y),
 
1625
                                   swizzle(const_sin[0], W, W, W, W),
 
1626
                                   swizzle(temp[0], X, X, X, X), flags);
 
1627
 
 
1628
                        free_temp(fp, temp[0]);
 
1629
                        break;
 
1630
                case OPCODE_DP3:
 
1631
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1632
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1633
                        emit_arith(fp, PFS_OP_DP3, dest, mask,
 
1634
                                   src[0], src[1], undef, flags);
 
1635
                        break;
 
1636
                case OPCODE_DP4:
 
1637
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1638
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1639
                        emit_arith(fp, PFS_OP_DP4, dest, mask,
 
1640
                                   src[0], src[1], undef, flags);
 
1641
                        break;
 
1642
                case OPCODE_DPH:
 
1643
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1644
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1645
                        /* src0.xyz1 -> temp
 
1646
                         * DP4 dest, temp, src1
 
1647
                         */
 
1648
#if 0
 
1649
                        temp[0] = get_temp_reg(fp);
 
1650
                        src[0].s_swz = SWIZZLE_ONE;
 
1651
                        emit_arith(fp, PFS_OP_MAD, temp[0], mask,
 
1652
                                   src[0], pfs_one, pfs_zero, 0);
 
1653
                        emit_arith(fp, PFS_OP_DP4, dest, mask,
 
1654
                                   temp[0], src[1], undef, flags);
 
1655
                        free_temp(fp, temp[0]);
 
1656
#else
 
1657
                        emit_arith(fp, PFS_OP_DP4, dest, mask,
 
1658
                                   swizzle(src[0], X, Y, Z, ONE), src[1],
 
1659
                                   undef, flags);
 
1660
#endif
 
1661
                        break;
 
1662
                case OPCODE_DST:
 
1663
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1664
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1665
                        /* dest.y = src0.y * src1.y */
 
1666
                        if (mask & WRITEMASK_Y)
 
1667
                                emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y,
 
1668
                                           keep(src[0]), keep(src[1]),
 
1669
                                           pfs_zero, flags);
 
1670
                        /* dest.z = src0.z */
 
1671
                        if (mask & WRITEMASK_Z)
 
1672
                                emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z,
 
1673
                                           src[0], pfs_one, pfs_zero, flags);
 
1674
                        /* result.x = 1.0
 
1675
                         * result.w = src1.w */
 
1676
                        if (mask & WRITEMASK_XW) {
 
1677
                                REG_SET_VSWZ(src[1], SWIZZLE_111);      /*Cheat */
 
1678
                                emit_arith(fp, PFS_OP_MAD, dest,
 
1679
                                           mask & WRITEMASK_XW,
 
1680
                                           src[1], pfs_one, pfs_zero, flags);
 
1681
                        }
 
1682
                        break;
 
1683
                case OPCODE_EX2:
 
1684
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1685
                        emit_arith(fp, PFS_OP_EX2, dest, mask,
 
1686
                                   src[0], undef, undef, flags);
 
1687
                        break;
 
1688
                case OPCODE_FLR:
 
1689
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1690
                        temp[0] = get_temp_reg(fp);
 
1691
                        /* FRC temp, src0
 
1692
                         * MAD dest, src0, 1.0, -temp
 
1693
                         */
 
1694
                        emit_arith(fp, PFS_OP_FRC, temp[0], mask,
 
1695
                                   keep(src[0]), undef, undef, 0);
 
1696
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1697
                                   src[0], pfs_one, negate(temp[0]), flags);
 
1698
                        free_temp(fp, temp[0]);
 
1699
                        break;
 
1700
                case OPCODE_FRC:
 
1701
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1702
                        emit_arith(fp, PFS_OP_FRC, dest, mask,
 
1703
                                   src[0], undef, undef, flags);
 
1704
                        break;
 
1705
                case OPCODE_KIL:
 
1706
                        emit_tex(fp, fpi, R300_FPITX_OP_KIL);
 
1707
                        break;
 
1708
                case OPCODE_LG2:
 
1709
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1710
                        emit_arith(fp, PFS_OP_LG2, dest, mask,
 
1711
                                   src[0], undef, undef, flags);
 
1712
                        break;
 
1713
                case OPCODE_LIT:
 
1714
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1715
                        emit_lit(fp, dest, mask, src[0], flags);
 
1716
                        break;
 
1717
                case OPCODE_LRP:
 
1718
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1719
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1720
                        src[2] = t_src(fp, fpi->SrcReg[2]);
 
1721
                        /* result = tmp0tmp1 + (1 - tmp0)tmp2
 
1722
                         *        = tmp0tmp1 + tmp2 + (-tmp0)tmp2
 
1723
                         *     MAD temp, -tmp0, tmp2, tmp2
 
1724
                         *     MAD result, tmp0, tmp1, temp
 
1725
                         */
 
1726
                        temp[0] = get_temp_reg(fp);
 
1727
                        emit_arith(fp, PFS_OP_MAD, temp[0], mask,
 
1728
                                   negate(keep(src[0])), keep(src[2]), src[2],
 
1729
                                   0);
 
1730
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1731
                                   src[0], src[1], temp[0], flags);
 
1732
                        free_temp(fp, temp[0]);
 
1733
                        break;
 
1734
                case OPCODE_MAD:
 
1735
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1736
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1737
                        src[2] = t_src(fp, fpi->SrcReg[2]);
 
1738
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1739
                                   src[0], src[1], src[2], flags);
 
1740
                        break;
 
1741
                case OPCODE_MAX:
 
1742
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1743
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1744
                        emit_arith(fp, PFS_OP_MAX, dest, mask,
 
1745
                                   src[0], src[1], undef, flags);
 
1746
                        break;
 
1747
                case OPCODE_MIN:
 
1748
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1749
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1750
                        emit_arith(fp, PFS_OP_MIN, dest, mask,
 
1751
                                   src[0], src[1], undef, flags);
 
1752
                        break;
 
1753
                case OPCODE_MOV:
 
1754
                case OPCODE_SWZ:
 
1755
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1756
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1757
                                   src[0], pfs_one, pfs_zero, flags);
 
1758
                        break;
 
1759
                case OPCODE_MUL:
 
1760
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1761
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1762
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1763
                                   src[0], src[1], pfs_zero, flags);
 
1764
                        break;
 
1765
                case OPCODE_POW:
 
1766
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1767
                        src[1] = t_scalar_src(fp, fpi->SrcReg[1]);
 
1768
                        temp[0] = get_temp_reg(fp);
 
1769
                        emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W,
 
1770
                                   src[0], undef, undef, 0);
 
1771
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W,
 
1772
                                   temp[0], src[1], pfs_zero, 0);
 
1773
                        emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask,
 
1774
                                   temp[0], undef, undef, 0);
 
1775
                        free_temp(fp, temp[0]);
 
1776
                        break;
 
1777
                case OPCODE_RCP:
 
1778
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1779
                        emit_arith(fp, PFS_OP_RCP, dest, mask,
 
1780
                                   src[0], undef, undef, flags);
 
1781
                        break;
 
1782
                case OPCODE_RSQ:
 
1783
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1784
                        emit_arith(fp, PFS_OP_RSQ, dest, mask,
 
1785
                                   absolute(src[0]), pfs_zero, pfs_zero, flags);
 
1786
                        break;
 
1787
                case OPCODE_SCS:
 
1788
                        /*
 
1789
                         * scs using a parabola :
 
1790
                         * scs(x):
 
1791
                         *   result.x = sin(-abs(x)+0.5*PI)  (cos)
 
1792
                         *   result.y = sin(x)               (sin)
 
1793
                         *
 
1794
                         */
 
1795
                        temp[0] = get_temp_reg(fp);
 
1796
                        temp[1] = get_temp_reg(fp);
 
1797
                        const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
 
1798
                        const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
 
1799
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1800
 
 
1801
                        /* x = -abs(x)+0.5*PI */
 
1802
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z),     //PI
 
1803
                                   pfs_half,
 
1804
                                   negate(abs
 
1805
                                          (swizzle(keep(src[0]), X, X, X, X))),
 
1806
                                   0);
 
1807
 
 
1808
                        /* C*x (sin) */
 
1809
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W,
 
1810
                                   swizzle(const_sin[0], Y, Y, Y, Y),
 
1811
                                   swizzle(keep(src[0]), X, X, X, X),
 
1812
                                   pfs_zero, 0);
 
1813
 
 
1814
                        /* B*x, C*x (cos) */
 
1815
                        emit_arith(fp, PFS_OP_MAD, temp[0],
 
1816
                                   WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
 
1817
                                                                      Z, Z, Z,
 
1818
                                                                      Z),
 
1819
                                   const_sin[0], pfs_zero, 0);
 
1820
 
 
1821
                        /* B*x (sin) */
 
1822
                        emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W,
 
1823
                                   swizzle(const_sin[0], X, X, X, X),
 
1824
                                   keep(src[0]), pfs_zero, 0);
 
1825
 
 
1826
                        /* y = B*x + C*x*abs(x) (sin) */
 
1827
                        emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z,
 
1828
                                   absolute(src[0]),
 
1829
                                   swizzle(temp[0], W, W, W, W),
 
1830
                                   swizzle(temp[1], W, W, W, W), 0);
 
1831
 
 
1832
                        /* y = B*x + C*x*abs(x) (cos) */
 
1833
                        emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W,
 
1834
                                   swizzle(temp[0], Y, Y, Y, Y),
 
1835
                                   absolute(swizzle(temp[0], Z, Z, Z, Z)),
 
1836
                                   swizzle(temp[0], X, X, X, X), 0);
 
1837
 
 
1838
                        /* y*abs(y) - y (cos), y*abs(y) - y (sin) */
 
1839
                        emit_arith(fp, PFS_OP_MAD, temp[0],
 
1840
                                   WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1],
 
1841
                                                                      W, Z, Y,
 
1842
                                                                      X),
 
1843
                                   absolute(swizzle(temp[1], W, Z, Y, X)),
 
1844
                                   negate(swizzle(temp[1], W, Z, Y, X)), 0);
 
1845
 
 
1846
                        /* dest.xy = mad(temp.xy, P, temp2.wz) */
 
1847
                        emit_arith(fp, PFS_OP_MAD, dest,
 
1848
                                   mask & (WRITEMASK_X | WRITEMASK_Y), temp[0],
 
1849
                                   swizzle(const_sin[0], W, W, W, W),
 
1850
                                   swizzle(temp[1], W, Z, Y, X), flags);
 
1851
 
 
1852
                        free_temp(fp, temp[0]);
 
1853
                        free_temp(fp, temp[1]);
 
1854
                        break;
 
1855
                case OPCODE_SGE:
 
1856
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1857
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1858
                        temp[0] = get_temp_reg(fp);
 
1859
                        /* temp = src0 - src1
 
1860
                         * dest.c = (temp.c < 0.0) ? 0 : 1
 
1861
                         */
 
1862
                        emit_arith(fp, PFS_OP_MAD, temp[0], mask,
 
1863
                                   src[0], pfs_one, negate(src[1]), 0);
 
1864
                        emit_arith(fp, PFS_OP_CMP, dest, mask,
 
1865
                                   pfs_one, pfs_zero, temp[0], 0);
 
1866
                        free_temp(fp, temp[0]);
 
1867
                        break;
 
1868
                case OPCODE_SIN:
 
1869
                        /*
 
1870
                         *  using a parabola:
 
1871
                         * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
 
1872
                         * extra precision is obtained by weighting against
 
1873
                         * itself squared.
 
1874
                         */
 
1875
 
 
1876
                        temp[0] = get_temp_reg(fp);
 
1877
                        const_sin[0] = emit_const4fv(fp, SinCosConsts[0]);
 
1878
                        const_sin[1] = emit_const4fv(fp, SinCosConsts[1]);
 
1879
                        src[0] = t_scalar_src(fp, fpi->SrcReg[0]);
 
1880
 
 
1881
                        /* do range reduction */
 
1882
 
 
1883
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
 
1884
                                   swizzle(keep(src[0]), X, X, X, X),
 
1885
                                   swizzle(const_sin[1], Z, Z, Z, Z),
 
1886
                                   pfs_half, 0);
 
1887
 
 
1888
                        emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X,
 
1889
                                   swizzle(temp[0], X, X, X, X),
 
1890
                                   undef, undef, 0);
 
1891
 
 
1892
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W),       //2*PI
 
1893
                                   negate(swizzle(const_sin[0], Z, Z, Z, Z)),   //PI
 
1894
                                   0);
 
1895
 
 
1896
                        /* SIN */
 
1897
 
 
1898
                        emit_arith(fp, PFS_OP_MAD, temp[0],
 
1899
                                   WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
 
1900
                                                                      Z, Z, Z,
 
1901
                                                                      Z),
 
1902
                                   const_sin[0], pfs_zero, 0);
 
1903
 
 
1904
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X,
 
1905
                                   swizzle(temp[0], Y, Y, Y, Y),
 
1906
                                   absolute(swizzle(temp[0], Z, Z, Z, Z)),
 
1907
                                   swizzle(temp[0], X, X, X, X), 0);
 
1908
 
 
1909
                        emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
 
1910
                                   swizzle(temp[0], X, X, X, X),
 
1911
                                   absolute(swizzle(temp[0], X, X, X, X)),
 
1912
                                   negate(swizzle(temp[0], X, X, X, X)), 0);
 
1913
 
 
1914
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1915
                                   swizzle(temp[0], Y, Y, Y, Y),
 
1916
                                   swizzle(const_sin[0], W, W, W, W),
 
1917
                                   swizzle(temp[0], X, X, X, X), flags);
 
1918
 
 
1919
                        free_temp(fp, temp[0]);
 
1920
                        break;
 
1921
                case OPCODE_SLT:
 
1922
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1923
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1924
                        temp[0] = get_temp_reg(fp);
 
1925
                        /* temp = src0 - src1
 
1926
                         * dest.c = (temp.c < 0.0) ? 1 : 0
 
1927
                         */
 
1928
                        emit_arith(fp, PFS_OP_MAD, temp[0], mask,
 
1929
                                   src[0], pfs_one, negate(src[1]), 0);
 
1930
                        emit_arith(fp, PFS_OP_CMP, dest, mask,
 
1931
                                   pfs_zero, pfs_one, temp[0], 0);
 
1932
                        free_temp(fp, temp[0]);
 
1933
                        break;
 
1934
                case OPCODE_SUB:
 
1935
                        src[0] = t_src(fp, fpi->SrcReg[0]);
 
1936
                        src[1] = t_src(fp, fpi->SrcReg[1]);
 
1937
                        emit_arith(fp, PFS_OP_MAD, dest, mask,
 
1938
                                   src[0], pfs_one, negate(src[1]), flags);
 
1939
                        break;
 
1940
                case OPCODE_TEX:
 
1941
                        emit_tex(fp, fpi, R300_FPITX_OP_TEX);
 
1942
                        break;
 
1943
                case OPCODE_TXB:
 
1944
                        emit_tex(fp, fpi, R300_FPITX_OP_TXB);
 
1945
                        break;
 
1946
                case OPCODE_TXP:
 
1947
                        emit_tex(fp, fpi, R300_FPITX_OP_TXP);
 
1948
                        break;
 
1949
                case OPCODE_XPD:{
 
1950
                                src[0] = t_src(fp, fpi->SrcReg[0]);
 
1951
                                src[1] = t_src(fp, fpi->SrcReg[1]);
 
1952
                                temp[0] = get_temp_reg(fp);
 
1953
                                /* temp = src0.zxy * src1.yzx */
 
1954
                                emit_arith(fp, PFS_OP_MAD, temp[0],
 
1955
                                           WRITEMASK_XYZ, swizzle(keep(src[0]),
 
1956
                                                                  Z, X, Y, W),
 
1957
                                           swizzle(keep(src[1]), Y, Z, X, W),
 
1958
                                           pfs_zero, 0);
 
1959
                                /* dest.xyz = src0.yzx * src1.zxy - temp
 
1960
                                 * dest.w       = undefined
 
1961
                                 * */
 
1962
                                emit_arith(fp, PFS_OP_MAD, dest,
 
1963
                                           mask & WRITEMASK_XYZ, swizzle(src[0],
 
1964
                                                                         Y, Z,
 
1965
                                                                         X, W),
 
1966
                                           swizzle(src[1], Z, X, Y, W),
 
1967
                                           negate(temp[0]), flags);
 
1968
                                /* cleanup */
 
1969
                                free_temp(fp, temp[0]);
 
1970
                                break;
 
1971
                        }
 
1972
                default:
 
1973
                        ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
 
1974
                        break;
 
1975
                }
 
1976
 
 
1977
                if (fp->error)
 
1978
                        return GL_FALSE;
 
1979
 
 
1980
        }
 
1981
 
 
1982
        return GL_TRUE;
 
1983
}
 
1984
 
 
1985
static void insert_wpos(struct gl_program *prog)
 
1986
{
 
1987
        static gl_state_index tokens[STATE_LENGTH] = {
 
1988
                STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
 
1989
        };
 
1990
        struct prog_instruction *fpi;
 
1991
        GLuint window_index;
 
1992
        int i = 0;
 
1993
        GLuint tempregi = prog->NumTemporaries;
 
1994
        /* should do something else if no temps left... */
 
1995
        prog->NumTemporaries++;
 
1996
 
 
1997
        fpi = _mesa_alloc_instructions(prog->NumInstructions + 3);
 
1998
        _mesa_init_instructions(fpi, prog->NumInstructions + 3);
 
1999
 
 
2000
        /* perspective divide */
 
2001
        fpi[i].Opcode = OPCODE_RCP;
 
2002
 
 
2003
        fpi[i].DstReg.File = PROGRAM_TEMPORARY;
 
2004
        fpi[i].DstReg.Index = tempregi;
 
2005
        fpi[i].DstReg.WriteMask = WRITEMASK_W;
 
2006
        fpi[i].DstReg.CondMask = COND_TR;
 
2007
 
 
2008
        fpi[i].SrcReg[0].File = PROGRAM_INPUT;
 
2009
        fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
 
2010
        fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
 
2011
        i++;
 
2012
 
 
2013
        fpi[i].Opcode = OPCODE_MUL;
 
2014
 
 
2015
        fpi[i].DstReg.File = PROGRAM_TEMPORARY;
 
2016
        fpi[i].DstReg.Index = tempregi;
 
2017
        fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
 
2018
        fpi[i].DstReg.CondMask = COND_TR;
 
2019
 
 
2020
        fpi[i].SrcReg[0].File = PROGRAM_INPUT;
 
2021
        fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
 
2022
        fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
 
2023
 
 
2024
        fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
 
2025
        fpi[i].SrcReg[1].Index = tempregi;
 
2026
        fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
 
2027
        i++;
 
2028
 
 
2029
        /* viewport transformation */
 
2030
        window_index = _mesa_add_state_reference(prog->Parameters, tokens);
 
2031
 
 
2032
        fpi[i].Opcode = OPCODE_MAD;
 
2033
 
 
2034
        fpi[i].DstReg.File = PROGRAM_TEMPORARY;
 
2035
        fpi[i].DstReg.Index = tempregi;
 
2036
        fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
 
2037
        fpi[i].DstReg.CondMask = COND_TR;
 
2038
 
 
2039
        fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
 
2040
        fpi[i].SrcReg[0].Index = tempregi;
 
2041
        fpi[i].SrcReg[0].Swizzle =
 
2042
            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
 
2043
 
 
2044
        fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
 
2045
        fpi[i].SrcReg[1].Index = window_index;
 
2046
        fpi[i].SrcReg[1].Swizzle =
 
2047
            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
 
2048
 
 
2049
        fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
 
2050
        fpi[i].SrcReg[2].Index = window_index;
 
2051
        fpi[i].SrcReg[2].Swizzle =
 
2052
            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
 
2053
        i++;
 
2054
 
 
2055
        _mesa_copy_instructions(&fpi[i], prog->Instructions,
 
2056
                                prog->NumInstructions);
 
2057
 
 
2058
        free(prog->Instructions);
 
2059
 
 
2060
        prog->Instructions = fpi;
 
2061
 
 
2062
        prog->NumInstructions += i;
 
2063
        fpi = &prog->Instructions[prog->NumInstructions - 1];
 
2064
 
 
2065
        assert(fpi->Opcode == OPCODE_END);
 
2066
 
 
2067
        for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) {
 
2068
                for (i = 0; i < 3; i++)
 
2069
                        if (fpi->SrcReg[i].File == PROGRAM_INPUT &&
 
2070
                            fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
 
2071
                                fpi->SrcReg[i].File = PROGRAM_TEMPORARY;
 
2072
                                fpi->SrcReg[i].Index = tempregi;
 
2073
                        }
 
2074
        }
 
2075
}
 
2076
 
 
2077
/* - Init structures
 
2078
 * - Determine what hwregs each input corresponds to
 
2079
 */
 
2080
static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp)
 
2081
{
 
2082
        struct r300_pfs_compile_state *cs = NULL;
 
2083
        struct gl_fragment_program *mp = &fp->mesa_program;
 
2084
        struct prog_instruction *fpi;
 
2085
        GLuint InputsRead = mp->Base.InputsRead;
 
2086
        GLuint temps_used = 0;  /* for fp->temps[] */
 
2087
        int i, j;
 
2088
 
 
2089
        /* New compile, reset tracking data */
 
2090
        fp->optimization =
 
2091
            driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
 
2092
        fp->translated = GL_FALSE;
 
2093
        fp->error = GL_FALSE;
 
2094
        fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
 
2095
        fp->tex.length = 0;
 
2096
        fp->cur_node = 0;
 
2097
        fp->first_node_has_tex = 0;
 
2098
        fp->const_nr = 0;
 
2099
        fp->max_temp_idx = 0;
 
2100
        fp->node[0].alu_end = -1;
 
2101
        fp->node[0].tex_end = -1;
 
2102
 
 
2103
        _mesa_memset(cs, 0, sizeof(*fp->cs));
 
2104
        for (i = 0; i < PFS_MAX_ALU_INST; i++) {
 
2105
                for (j = 0; j < 3; j++) {
 
2106
                        cs->slot[i].vsrc[j] = SRC_CONST;
 
2107
                        cs->slot[i].ssrc[j] = SRC_CONST;
 
2108
                }
 
2109
        }
 
2110
 
 
2111
        /* Work out what temps the Mesa inputs correspond to, this must match
 
2112
         * what setup_rs_unit does, which shouldn't be a problem as rs_unit
 
2113
         * configures itself based on the fragprog's InputsRead
 
2114
         *
 
2115
         * NOTE: this depends on get_hw_temp() allocating registers in order,
 
2116
         * starting from register 0.
 
2117
         */
 
2118
 
 
2119
        /* Texcoords come first */
 
2120
        for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
 
2121
                if (InputsRead & (FRAG_BIT_TEX0 << i)) {
 
2122
                        cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
 
2123
                        cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
 
2124
                            get_hw_temp(fp, 0);
 
2125
                }
 
2126
        }
 
2127
        InputsRead &= ~FRAG_BITS_TEX_ANY;
 
2128
 
 
2129
        /* fragment position treated as a texcoord */
 
2130
        if (InputsRead & FRAG_BIT_WPOS) {
 
2131
                cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
 
2132
                cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
 
2133
                insert_wpos(&mp->Base);
 
2134
        }
 
2135
        InputsRead &= ~FRAG_BIT_WPOS;
 
2136
 
 
2137
        /* Then primary colour */
 
2138
        if (InputsRead & FRAG_BIT_COL0) {
 
2139
                cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
 
2140
                cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
 
2141
        }
 
2142
        InputsRead &= ~FRAG_BIT_COL0;
 
2143
 
 
2144
        /* Secondary color */
 
2145
        if (InputsRead & FRAG_BIT_COL1) {
 
2146
                cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
 
2147
                cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
 
2148
        }
 
2149
        InputsRead &= ~FRAG_BIT_COL1;
 
2150
 
 
2151
        /* Anything else */
 
2152
        if (InputsRead) {
 
2153
                WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
 
2154
                /* force read from hwreg 0 for now */
 
2155
                for (i = 0; i < 32; i++)
 
2156
                        if (InputsRead & (1 << i))
 
2157
                                cs->inputs[i].reg = 0;
 
2158
        }
 
2159
 
 
2160
        /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
 
2161
         * That way, we can free up the reg when it's no longer needed
 
2162
         */
 
2163
        if (!mp->Base.Instructions) {
 
2164
                ERROR("No instructions found in program\n");
 
2165
                return;
 
2166
        }
 
2167
 
 
2168
        for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
 
2169
                int idx;
 
2170
 
 
2171
                for (i = 0; i < 3; i++) {
 
2172
                        idx = fpi->SrcReg[i].Index;
 
2173
                        switch (fpi->SrcReg[i].File) {
 
2174
                        case PROGRAM_TEMPORARY:
 
2175
                                if (!(temps_used & (1 << idx))) {
 
2176
                                        cs->temps[idx].reg = -1;
 
2177
                                        cs->temps[idx].refcount = 1;
 
2178
                                        temps_used |= (1 << idx);
 
2179
                                } else
 
2180
                                        cs->temps[idx].refcount++;
 
2181
                                break;
 
2182
                        case PROGRAM_INPUT:
 
2183
                                cs->inputs[idx].refcount++;
 
2184
                                break;
 
2185
                        default:
 
2186
                                break;
 
2187
                        }
 
2188
                }
 
2189
 
 
2190
                idx = fpi->DstReg.Index;
 
2191
                if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
 
2192
                        if (!(temps_used & (1 << idx))) {
 
2193
                                cs->temps[idx].reg = -1;
 
2194
                                cs->temps[idx].refcount = 1;
 
2195
                                temps_used |= (1 << idx);
 
2196
                        } else
 
2197
                                cs->temps[idx].refcount++;
 
2198
                }
 
2199
        }
 
2200
        cs->temp_in_use = temps_used;
 
2201
}
 
2202
 
 
2203
static void update_params(struct r300_fragment_program *fp)
 
2204
{
 
2205
        struct gl_fragment_program *mp = &fp->mesa_program;
 
2206
 
 
2207
        /* Ask Mesa nicely to fill in ParameterValues for us */
 
2208
        if (mp->Base.Parameters)
 
2209
                _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
 
2210
}
 
2211
 
 
2212
void r300TranslateFragmentShader(r300ContextPtr r300,
 
2213
                                 struct r300_fragment_program *fp)
 
2214
{
 
2215
        struct r300_pfs_compile_state *cs = NULL;
 
2216
 
 
2217
        if (!fp->translated) {
 
2218
 
 
2219
                init_program(r300, fp);
 
2220
                cs = fp->cs;
 
2221
 
 
2222
                if (parse_program(fp) == GL_FALSE) {
 
2223
                        dump_program(fp);
 
2224
                        return;
 
2225
                }
 
2226
 
 
2227
                /* Finish off */
 
2228
                fp->node[fp->cur_node].alu_end =
 
2229
                    cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
 
2230
                if (fp->node[fp->cur_node].tex_end < 0)
 
2231
                        fp->node[fp->cur_node].tex_end = 0;
 
2232
                fp->alu_offset = 0;
 
2233
                fp->alu_end = cs->nrslots - 1;
 
2234
                fp->tex_offset = 0;
 
2235
                fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0;
 
2236
                assert(fp->node[fp->cur_node].alu_end >= 0);
 
2237
                assert(fp->alu_end >= 0);
 
2238
 
 
2239
                fp->translated = GL_TRUE;
 
2240
                if (RADEON_DEBUG & DEBUG_PIXEL)
 
2241
                        dump_program(fp);
 
2242
                r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
 
2243
        }
 
2244
 
 
2245
        update_params(fp);
 
2246
}
 
2247
 
 
2248
/* just some random things... */
 
2249
static void dump_program(struct r300_fragment_program *fp)
 
2250
{
 
2251
        int n, i, j;
 
2252
        static int pc = 0;
 
2253
 
 
2254
        fprintf(stderr, "pc=%d*************************************\n", pc++);
 
2255
 
 
2256
        fprintf(stderr, "Mesa program:\n");
 
2257
        fprintf(stderr, "-------------\n");
 
2258
        _mesa_print_program(&fp->mesa_program.Base);
 
2259
        fflush(stdout);
 
2260
 
 
2261
        fprintf(stderr, "Hardware program\n");
 
2262
        fprintf(stderr, "----------------\n");
 
2263
 
 
2264
        for (n = 0; n < (fp->cur_node + 1); n++) {
 
2265
                fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
 
2266
                        "alu_end: %d, tex_end: %d\n", n,
 
2267
                        fp->node[n].alu_offset,
 
2268
                        fp->node[n].tex_offset,
 
2269
                        fp->node[n].alu_end, fp->node[n].tex_end);
 
2270
 
 
2271
                if (fp->tex.length) {
 
2272
                        fprintf(stderr, "  TEX:\n");
 
2273
                        for (i = fp->node[n].tex_offset;
 
2274
                             i <= fp->node[n].tex_offset + fp->node[n].tex_end;
 
2275
                             ++i) {
 
2276
                                const char *instr;
 
2277
 
 
2278
                                switch ((fp->tex.
 
2279
                                         inst[i] >> R300_FPITX_OPCODE_SHIFT) &
 
2280
                                        15) {
 
2281
                                case R300_FPITX_OP_TEX:
 
2282
                                        instr = "TEX";
 
2283
                                        break;
 
2284
                                case R300_FPITX_OP_KIL:
 
2285
                                        instr = "KIL";
 
2286
                                        break;
 
2287
                                case R300_FPITX_OP_TXP:
 
2288
                                        instr = "TXP";
 
2289
                                        break;
 
2290
                                case R300_FPITX_OP_TXB:
 
2291
                                        instr = "TXB";
 
2292
                                        break;
 
2293
                                default:
 
2294
                                        instr = "UNKNOWN";
 
2295
                                }
 
2296
 
 
2297
                                fprintf(stderr,
 
2298
                                        "    %s t%i, %c%i, texture[%i]   (%08x)\n",
 
2299
                                        instr,
 
2300
                                        (fp->tex.
 
2301
                                         inst[i] >> R300_FPITX_DST_SHIFT) & 31,
 
2302
                                        (fp->tex.
 
2303
                                         inst[i] & R300_FPITX_SRC_CONST) ? 'c' :
 
2304
                                        't',
 
2305
                                        (fp->tex.
 
2306
                                         inst[i] >> R300_FPITX_SRC_SHIFT) & 31,
 
2307
                                        (fp->tex.
 
2308
                                         inst[i] & R300_FPITX_IMAGE_MASK) >>
 
2309
                                        R300_FPITX_IMAGE_SHIFT,
 
2310
                                        fp->tex.inst[i]);
 
2311
                        }
 
2312
                }
 
2313
 
 
2314
                for (i = fp->node[n].alu_offset;
 
2315
                     i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) {
 
2316
                        char srcc[3][10], dstc[20];
 
2317
                        char srca[3][10], dsta[20];
 
2318
                        char argc[3][20];
 
2319
                        char arga[3][20];
 
2320
                        char flags[5], tmp[10];
 
2321
 
 
2322
                        for (j = 0; j < 3; ++j) {
 
2323
                                int regc = fp->alu.inst[i].inst1 >> (j * 6);
 
2324
                                int rega = fp->alu.inst[i].inst3 >> (j * 6);
 
2325
 
 
2326
                                sprintf(srcc[j], "%c%i",
 
2327
                                        (regc & 32) ? 'c' : 't', regc & 31);
 
2328
                                sprintf(srca[j], "%c%i",
 
2329
                                        (rega & 32) ? 'c' : 't', rega & 31);
 
2330
                        }
 
2331
 
 
2332
                        dstc[0] = 0;
 
2333
                        sprintf(flags, "%s%s%s",
 
2334
                                (fp->alu.inst[i].
 
2335
                                 inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "",
 
2336
                                (fp->alu.inst[i].
 
2337
                                 inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "",
 
2338
                                (fp->alu.inst[i].
 
2339
                                 inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : "");
 
2340
                        if (flags[0] != 0) {
 
2341
                                sprintf(dstc, "t%i.%s ",
 
2342
                                        (fp->alu.inst[i].
 
2343
                                         inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
 
2344
                                        flags);
 
2345
                        }
 
2346
                        sprintf(flags, "%s%s%s",
 
2347
                                (fp->alu.inst[i].
 
2348
                                 inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "",
 
2349
                                (fp->alu.inst[i].
 
2350
                                 inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "",
 
2351
                                (fp->alu.inst[i].
 
2352
                                 inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : "");
 
2353
                        if (flags[0] != 0) {
 
2354
                                sprintf(tmp, "o%i.%s",
 
2355
                                        (fp->alu.inst[i].
 
2356
                                         inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
 
2357
                                        flags);
 
2358
                                strcat(dstc, tmp);
 
2359
                        }
 
2360
 
 
2361
                        dsta[0] = 0;
 
2362
                        if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) {
 
2363
                                sprintf(dsta, "t%i.w ",
 
2364
                                        (fp->alu.inst[i].
 
2365
                                         inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
 
2366
                        }
 
2367
                        if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) {
 
2368
                                sprintf(tmp, "o%i.w ",
 
2369
                                        (fp->alu.inst[i].
 
2370
                                         inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
 
2371
                                strcat(dsta, tmp);
 
2372
                        }
 
2373
                        if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) {
 
2374
                                strcat(dsta, "Z");
 
2375
                        }
 
2376
 
 
2377
                        fprintf(stderr,
 
2378
                                "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
 
2379
                                "       w: %3s %3s %3s -> %-20s (%08x)\n", i,
 
2380
                                srcc[0], srcc[1], srcc[2], dstc,
 
2381
                                fp->alu.inst[i].inst1, srca[0], srca[1],
 
2382
                                srca[2], dsta, fp->alu.inst[i].inst3);
 
2383
 
 
2384
                        for (j = 0; j < 3; ++j) {
 
2385
                                int regc = fp->alu.inst[i].inst0 >> (j * 7);
 
2386
                                int rega = fp->alu.inst[i].inst2 >> (j * 7);
 
2387
                                int d;
 
2388
                                char buf[20];
 
2389
 
 
2390
                                d = regc & 31;
 
2391
                                if (d < 12) {
 
2392
                                        switch (d % 4) {
 
2393
                                        case R300_FPI0_ARGC_SRC0C_XYZ:
 
2394
                                                sprintf(buf, "%s.xyz",
 
2395
                                                        srcc[d / 4]);
 
2396
                                                break;
 
2397
                                        case R300_FPI0_ARGC_SRC0C_XXX:
 
2398
                                                sprintf(buf, "%s.xxx",
 
2399
                                                        srcc[d / 4]);
 
2400
                                                break;
 
2401
                                        case R300_FPI0_ARGC_SRC0C_YYY:
 
2402
                                                sprintf(buf, "%s.yyy",
 
2403
                                                        srcc[d / 4]);
 
2404
                                                break;
 
2405
                                        case R300_FPI0_ARGC_SRC0C_ZZZ:
 
2406
                                                sprintf(buf, "%s.zzz",
 
2407
                                                        srcc[d / 4]);
 
2408
                                                break;
 
2409
                                        }
 
2410
                                } else if (d < 15) {
 
2411
                                        sprintf(buf, "%s.www", srca[d - 12]);
 
2412
                                } else if (d == 20) {
 
2413
                                        sprintf(buf, "0.0");
 
2414
                                } else if (d == 21) {
 
2415
                                        sprintf(buf, "1.0");
 
2416
                                } else if (d == 22) {
 
2417
                                        sprintf(buf, "0.5");
 
2418
                                } else if (d >= 23 && d < 32) {
 
2419
                                        d -= 23;
 
2420
                                        switch (d / 3) {
 
2421
                                        case 0:
 
2422
                                                sprintf(buf, "%s.yzx",
 
2423
                                                        srcc[d % 3]);
 
2424
                                                break;
 
2425
                                        case 1:
 
2426
                                                sprintf(buf, "%s.zxy",
 
2427
                                                        srcc[d % 3]);
 
2428
                                                break;
 
2429
                                        case 2:
 
2430
                                                sprintf(buf, "%s.Wzy",
 
2431
                                                        srcc[d % 3]);
 
2432
                                                break;
 
2433
                                        }
 
2434
                                } else {
 
2435
                                        sprintf(buf, "%i", d);
 
2436
                                }
 
2437
 
 
2438
                                sprintf(argc[j], "%s%s%s%s",
 
2439
                                        (regc & 32) ? "-" : "",
 
2440
                                        (regc & 64) ? "|" : "",
 
2441
                                        buf, (regc & 64) ? "|" : "");
 
2442
 
 
2443
                                d = rega & 31;
 
2444
                                if (d < 9) {
 
2445
                                        sprintf(buf, "%s.%c", srcc[d / 3],
 
2446
                                                'x' + (char)(d % 3));
 
2447
                                } else if (d < 12) {
 
2448
                                        sprintf(buf, "%s.w", srca[d - 9]);
 
2449
                                } else if (d == 16) {
 
2450
                                        sprintf(buf, "0.0");
 
2451
                                } else if (d == 17) {
 
2452
                                        sprintf(buf, "1.0");
 
2453
                                } else if (d == 18) {
 
2454
                                        sprintf(buf, "0.5");
 
2455
                                } else {
 
2456
                                        sprintf(buf, "%i", d);
 
2457
                                }
 
2458
 
 
2459
                                sprintf(arga[j], "%s%s%s%s",
 
2460
                                        (rega & 32) ? "-" : "",
 
2461
                                        (rega & 64) ? "|" : "",
 
2462
                                        buf, (rega & 64) ? "|" : "");
 
2463
                        }
 
2464
 
 
2465
                        fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x\n"
 
2466
                                "       w: %8s %8s %8s    op: %08x\n",
 
2467
                                argc[0], argc[1], argc[2],
 
2468
                                fp->alu.inst[i].inst0, arga[0], arga[1],
 
2469
                                arga[2], fp->alu.inst[i].inst2);
 
2470
                }
 
2471
        }
 
2472
}