~ubuntu-branches/ubuntu/precise/mesa/precise-updates

« back to all changes in this revision

Viewing changes to src/gallium/drivers/r300/compiler/r3xx_vertprog.c

  • Committer: Package Import Robot
  • Author(s): Robert Hooker
  • Date: 2012-02-02 12:05:48 UTC
  • mfrom: (1.7.1) (3.3.27 sid)
  • Revision ID: package-import@ubuntu.com-20120202120548-nvkma85jq0h4coix
Tags: 8.0~rc2-0ubuntu4
Drop drisearchdir handling, it is no longer needed with multiarch
and dri-alternates being removed.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
 
3
 *
 
4
 * Permission is hereby granted, free of charge, to any person obtaining a
 
5
 * copy of this software and associated documentation files (the "Software"),
 
6
 * to deal in the Software without restriction, including without limitation
 
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 
8
 * license, and/or sell copies of the Software, and to permit persons to whom
 
9
 * the Software is furnished to do so, subject to the following conditions:
 
10
 *
 
11
 * The above copyright notice and this permission notice (including the next
 
12
 * paragraph) shall be included in all copies or substantial portions of the
 
13
 * Software.
 
14
 *
 
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
22
 
 
23
#include "radeon_compiler.h"
 
24
 
 
25
#include <stdio.h>
 
26
 
 
27
#include "../r300_reg.h"
 
28
 
 
29
#include "radeon_compiler_util.h"
 
30
#include "radeon_dataflow.h"
 
31
#include "radeon_program_alu.h"
 
32
#include "radeon_swizzle.h"
 
33
#include "radeon_emulate_branches.h"
 
34
#include "radeon_emulate_loops.h"
 
35
#include "radeon_remove_constants.h"
 
36
 
 
37
struct loop {
 
38
        int BgnLoop;
 
39
 
 
40
};
 
41
 
 
42
/*
 
43
 * Take an already-setup and valid source then swizzle it appropriately to
 
44
 * obtain a constant ZERO or ONE source.
 
45
 */
 
46
#define __CONST(x, y)   \
 
47
        (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]),      \
 
48
                           t_swizzle(y),        \
 
49
                           t_swizzle(y),        \
 
50
                           t_swizzle(y),        \
 
51
                           t_swizzle(y),        \
 
52
                           t_src_class(vpi->SrcReg[x].File), \
 
53
                           RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
 
54
 
 
55
 
 
56
static unsigned long t_dst_mask(unsigned int mask)
 
57
{
 
58
        /* RC_MASK_* is equivalent to VSF_FLAG_* */
 
59
        return mask & RC_MASK_XYZW;
 
60
}
 
61
 
 
62
static unsigned long t_dst_class(rc_register_file file)
 
63
{
 
64
        switch (file) {
 
65
        default:
 
66
                fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
 
67
                /* fall-through */
 
68
        case RC_FILE_TEMPORARY:
 
69
                return PVS_DST_REG_TEMPORARY;
 
70
        case RC_FILE_OUTPUT:
 
71
                return PVS_DST_REG_OUT;
 
72
        case RC_FILE_ADDRESS:
 
73
                return PVS_DST_REG_A0;
 
74
        }
 
75
}
 
76
 
 
77
static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
 
78
                                 struct rc_dst_register *dst)
 
79
{
 
80
        if (dst->File == RC_FILE_OUTPUT)
 
81
                return vp->outputs[dst->Index];
 
82
 
 
83
        return dst->Index;
 
84
}
 
85
 
 
86
static unsigned long t_src_class(rc_register_file file)
 
87
{
 
88
        switch (file) {
 
89
        default:
 
90
                fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
 
91
                /* fall-through */
 
92
        case RC_FILE_NONE:
 
93
        case RC_FILE_TEMPORARY:
 
94
                return PVS_SRC_REG_TEMPORARY;
 
95
        case RC_FILE_INPUT:
 
96
                return PVS_SRC_REG_INPUT;
 
97
        case RC_FILE_CONSTANT:
 
98
                return PVS_SRC_REG_CONSTANT;
 
99
        }
 
100
}
 
101
 
 
102
static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
 
103
{
 
104
        unsigned long aclass = t_src_class(a.File);
 
105
        unsigned long bclass = t_src_class(b.File);
 
106
 
 
107
        if (aclass != bclass)
 
108
                return 0;
 
109
        if (aclass == PVS_SRC_REG_TEMPORARY)
 
110
                return 0;
 
111
 
 
112
        if (a.RelAddr || b.RelAddr)
 
113
                return 1;
 
114
        if (a.Index != b.Index)
 
115
                return 1;
 
116
 
 
117
        return 0;
 
118
}
 
119
 
 
120
static inline unsigned long t_swizzle(unsigned int swizzle)
 
121
{
 
122
        /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
 
123
        return swizzle;
 
124
}
 
125
 
 
126
static unsigned long t_src_index(struct r300_vertex_program_code *vp,
 
127
                                 struct rc_src_register *src)
 
128
{
 
129
        if (src->File == RC_FILE_INPUT) {
 
130
                assert(vp->inputs[src->Index] != -1);
 
131
                return vp->inputs[src->Index];
 
132
        } else {
 
133
                if (src->Index < 0) {
 
134
                        fprintf(stderr,
 
135
                                "negative offsets for indirect addressing do not work.\n");
 
136
                        return 0;
 
137
                }
 
138
                return src->Index;
 
139
        }
 
140
}
 
141
 
 
142
/* these two functions should probably be merged... */
 
143
 
 
144
static unsigned long t_src(struct r300_vertex_program_code *vp,
 
145
                           struct rc_src_register *src)
 
146
{
 
147
        /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
 
148
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
 
149
         */
 
150
        return PVS_SRC_OPERAND(t_src_index(vp, src),
 
151
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
 
152
                               t_swizzle(GET_SWZ(src->Swizzle, 1)),
 
153
                               t_swizzle(GET_SWZ(src->Swizzle, 2)),
 
154
                               t_swizzle(GET_SWZ(src->Swizzle, 3)),
 
155
                               t_src_class(src->File),
 
156
                               src->Negate) |
 
157
               (src->RelAddr << 4) | (src->Abs << 3);
 
158
}
 
159
 
 
160
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
 
161
                                  struct rc_src_register *src)
 
162
{
 
163
        /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
 
164
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
 
165
         */
 
166
        unsigned int swz = rc_get_scalar_src_swz(src->Swizzle);
 
167
 
 
168
        return PVS_SRC_OPERAND(t_src_index(vp, src),
 
169
                               t_swizzle(swz),
 
170
                               t_swizzle(swz),
 
171
                               t_swizzle(swz),
 
172
                               t_swizzle(swz),
 
173
                               t_src_class(src->File),
 
174
                               src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
 
175
               (src->RelAddr << 4) | (src->Abs << 3);
 
176
}
 
177
 
 
178
static int valid_dst(struct r300_vertex_program_code *vp,
 
179
                           struct rc_dst_register *dst)
 
180
{
 
181
        if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
 
182
                return 0;
 
183
        } else if (dst->File == RC_FILE_ADDRESS) {
 
184
                assert(dst->Index == 0);
 
185
        }
 
186
 
 
187
        return 1;
 
188
}
 
189
 
 
190
static void ei_vector1(struct r300_vertex_program_code *vp,
 
191
                                unsigned int hw_opcode,
 
192
                                struct rc_sub_instruction *vpi,
 
193
                                unsigned int * inst)
 
194
{
 
195
        inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
 
196
                                     0,
 
197
                                     0,
 
198
                                     t_dst_index(vp, &vpi->DstReg),
 
199
                                     t_dst_mask(vpi->DstReg.WriteMask),
 
200
                                     t_dst_class(vpi->DstReg.File));
 
201
        inst[1] = t_src(vp, &vpi->SrcReg[0]);
 
202
        inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
 
203
        inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
 
204
}
 
205
 
 
206
static void ei_vector2(struct r300_vertex_program_code *vp,
 
207
                                unsigned int hw_opcode,
 
208
                                struct rc_sub_instruction *vpi,
 
209
                                unsigned int * inst)
 
210
{
 
211
        inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
 
212
                                     0,
 
213
                                     0,
 
214
                                     t_dst_index(vp, &vpi->DstReg),
 
215
                                     t_dst_mask(vpi->DstReg.WriteMask),
 
216
                                     t_dst_class(vpi->DstReg.File));
 
217
        inst[1] = t_src(vp, &vpi->SrcReg[0]);
 
218
        inst[2] = t_src(vp, &vpi->SrcReg[1]);
 
219
        inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
 
220
}
 
221
 
 
222
static void ei_math1(struct r300_vertex_program_code *vp,
 
223
                                unsigned int hw_opcode,
 
224
                                struct rc_sub_instruction *vpi,
 
225
                                unsigned int * inst)
 
226
{
 
227
        inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
 
228
                                     1,
 
229
                                     0,
 
230
                                     t_dst_index(vp, &vpi->DstReg),
 
231
                                     t_dst_mask(vpi->DstReg.WriteMask),
 
232
                                     t_dst_class(vpi->DstReg.File));
 
233
        inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
 
234
        inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
 
235
        inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
 
236
}
 
237
 
 
238
static void ei_lit(struct r300_vertex_program_code *vp,
 
239
                                      struct rc_sub_instruction *vpi,
 
240
                                      unsigned int * inst)
 
241
{
 
242
        //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
 
243
 
 
244
        inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
 
245
                                     1,
 
246
                                     0,
 
247
                                     t_dst_index(vp, &vpi->DstReg),
 
248
                                     t_dst_mask(vpi->DstReg.WriteMask),
 
249
                                     t_dst_class(vpi->DstReg.File));
 
250
        /* NOTE: Users swizzling might not work. */
 
251
        inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),      // X
 
252
                                  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),        // W
 
253
                                  PVS_SRC_SELECT_FORCE_0,       // Z
 
254
                                  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),        // Y
 
255
                                  t_src_class(vpi->SrcReg[0].File),
 
256
                                  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
 
257
            (vpi->SrcReg[0].RelAddr << 4);
 
258
        inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),      // Y
 
259
                                  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),        // W
 
260
                                  PVS_SRC_SELECT_FORCE_0,       // Z
 
261
                                  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),        // X
 
262
                                  t_src_class(vpi->SrcReg[0].File),
 
263
                                  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
 
264
            (vpi->SrcReg[0].RelAddr << 4);
 
265
        inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),      // Y
 
266
                                  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),        // X
 
267
                                  PVS_SRC_SELECT_FORCE_0,       // Z
 
268
                                  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),        // W
 
269
                                  t_src_class(vpi->SrcReg[0].File),
 
270
                                  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
 
271
            (vpi->SrcReg[0].RelAddr << 4);
 
272
}
 
273
 
 
274
static void ei_mad(struct r300_vertex_program_code *vp,
 
275
                                      struct rc_sub_instruction *vpi,
 
276
                                      unsigned int * inst)
 
277
{
 
278
        unsigned int i;
 
279
        /* Remarks about hardware limitations of MAD
 
280
         * (please preserve this comment, as this information is _NOT_
 
281
         * in the documentation provided by AMD).
 
282
         *
 
283
         * As described in the documentation, MAD with three unique temporary
 
284
         * source registers requires the use of the macro version.
 
285
         *
 
286
         * However (and this is not mentioned in the documentation), apparently
 
287
         * the macro version is _NOT_ a full superset of the normal version.
 
288
         * In particular, the macro version does not always work when relative
 
289
         * addressing is used in the source operands.
 
290
         *
 
291
         * This limitation caused incorrect rendering in Sauerbraten's OpenGL
 
292
         * assembly shader path when using medium quality animations
 
293
         * (i.e. animations with matrix blending instead of quaternion blending).
 
294
         *
 
295
         * Unfortunately, I (nha) have been unable to extract a Piglit regression
 
296
         * test for this issue - for some reason, it is possible to have vertex
 
297
         * programs whose prefix is *exactly* the same as the prefix of the
 
298
         * offending program in Sauerbraten up to the offending instruction
 
299
         * without causing any trouble.
 
300
         *
 
301
         * Bottom line: Only use the macro version only when really necessary;
 
302
         * according to AMD docs, this should improve performance by one clock
 
303
         * as a nice side bonus.
 
304
         */
 
305
        if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
 
306
            vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
 
307
            vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
 
308
            vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
 
309
            vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
 
310
            vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
 
311
                inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
 
312
                                0,
 
313
                                1,
 
314
                                t_dst_index(vp, &vpi->DstReg),
 
315
                                t_dst_mask(vpi->DstReg.WriteMask),
 
316
                                t_dst_class(vpi->DstReg.File));
 
317
        } else {
 
318
                inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
 
319
                                0,
 
320
                                0,
 
321
                                t_dst_index(vp, &vpi->DstReg),
 
322
                                t_dst_mask(vpi->DstReg.WriteMask),
 
323
                                t_dst_class(vpi->DstReg.File));
 
324
 
 
325
                /* Arguments with constant swizzles still count as a unique
 
326
                 * temporary, so we should make sure these arguments share a
 
327
                 * register index with one of the other arguments. */
 
328
                for (i = 0; i < 3; i++) {
 
329
                        unsigned int j;
 
330
                        if (vpi->SrcReg[i].File != RC_FILE_NONE)
 
331
                                continue;
 
332
 
 
333
                        for (j = 0; j < 3; j++) {
 
334
                                if (i != j) {
 
335
                                        vpi->SrcReg[i].Index =
 
336
                                                vpi->SrcReg[j].Index;
 
337
                                        break;
 
338
                                }
 
339
                        }
 
340
                }
 
341
        }
 
342
        inst[1] = t_src(vp, &vpi->SrcReg[0]);
 
343
        inst[2] = t_src(vp, &vpi->SrcReg[1]);
 
344
        inst[3] = t_src(vp, &vpi->SrcReg[2]);
 
345
}
 
346
 
 
347
static void ei_pow(struct r300_vertex_program_code *vp,
 
348
                                      struct rc_sub_instruction *vpi,
 
349
                                      unsigned int * inst)
 
350
{
 
351
        inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
 
352
                                     1,
 
353
                                     0,
 
354
                                     t_dst_index(vp, &vpi->DstReg),
 
355
                                     t_dst_mask(vpi->DstReg.WriteMask),
 
356
                                     t_dst_class(vpi->DstReg.File));
 
357
        inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
 
358
        inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
 
359
        inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
 
360
}
 
361
 
 
362
static void mark_write(void * userdata, struct rc_instruction * inst,
 
363
                rc_register_file file,  unsigned int index, unsigned int mask)
 
364
{
 
365
        unsigned int * writemasks = userdata;
 
366
 
 
367
        if (file != RC_FILE_TEMPORARY)
 
368
                return;
 
369
 
 
370
        if (index >= R300_VS_MAX_TEMPS)
 
371
                return;
 
372
 
 
373
        writemasks[index] |= mask;
 
374
}
 
375
 
 
376
static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
 
377
{
 
378
        return PVS_SRC_OPERAND(compiler->PredicateIndex,
 
379
                t_swizzle(RC_SWIZZLE_ZERO),
 
380
                t_swizzle(RC_SWIZZLE_ZERO),
 
381
                t_swizzle(RC_SWIZZLE_ZERO),
 
382
                t_swizzle(RC_SWIZZLE_W),
 
383
                t_src_class(RC_FILE_TEMPORARY),
 
384
                0);
 
385
}
 
386
 
 
387
static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
 
388
                                        unsigned int hw_opcode, int is_math)
 
389
{
 
390
        return PVS_OP_DST_OPERAND(hw_opcode,
 
391
             is_math,
 
392
             0,
 
393
             compiler->PredicateIndex,
 
394
             RC_MASK_W,
 
395
             t_dst_class(RC_FILE_TEMPORARY));
 
396
 
 
397
}
 
398
 
 
399
static void ei_if(struct r300_vertex_program_compiler * compiler,
 
400
                                        struct rc_instruction *rci,
 
401
                                        unsigned int * inst,
 
402
                                        unsigned int branch_depth)
 
403
{
 
404
        unsigned int predicate_opcode;
 
405
        int is_math = 0;
 
406
 
 
407
        if (!compiler->Base.is_r500) {
 
408
                rc_error(&compiler->Base,"Opcode IF not supported\n");
 
409
                return;
 
410
        }
 
411
 
 
412
        /* Reserve a temporary to use as our predicate stack counter, if we
 
413
         * don't already have one. */
 
414
        if (!compiler->PredicateMask) {
 
415
                unsigned int writemasks[RC_REGISTER_MAX_INDEX];
 
416
                struct rc_instruction * inst;
 
417
                unsigned int i;
 
418
                memset(writemasks, 0, sizeof(writemasks));
 
419
                for(inst = compiler->Base.Program.Instructions.Next;
 
420
                                inst != &compiler->Base.Program.Instructions;
 
421
                                                        inst = inst->Next) {
 
422
                        rc_for_all_writes_mask(inst, mark_write, writemasks);
 
423
                }
 
424
                for(i = 0; i < compiler->Base.max_temp_regs; i++) {
 
425
                        unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
 
426
                        /* Only the W component can be used fo the predicate
 
427
                         * stack counter. */
 
428
                        if (mask & RC_MASK_W) {
 
429
                                compiler->PredicateMask = RC_MASK_W;
 
430
                                compiler->PredicateIndex = i;
 
431
                                break;
 
432
                        }
 
433
                }
 
434
                if (i == compiler->Base.max_temp_regs) {
 
435
                        rc_error(&compiler->Base, "No free temporary to use for"
 
436
                                        " predicate stack counter.\n");
 
437
                        return;
 
438
                }
 
439
        }
 
440
        predicate_opcode =
 
441
                        branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
 
442
 
 
443
        rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
 
444
        if (branch_depth == 0) {
 
445
                is_math = 1;
 
446
                predicate_opcode = ME_PRED_SET_NEQ;
 
447
                inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
 
448
                inst[2] = 0;
 
449
        } else {
 
450
                predicate_opcode = VE_PRED_SET_NEQ_PUSH;
 
451
                inst[1] = t_pred_src(compiler);
 
452
                inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
 
453
        }
 
454
 
 
455
        inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
 
456
        inst[3] = 0;
 
457
 
 
458
}
 
459
 
 
460
static void ei_else(struct r300_vertex_program_compiler * compiler,
 
461
                                                        unsigned int * inst)
 
462
{
 
463
        if (!compiler->Base.is_r500) {
 
464
                rc_error(&compiler->Base,"Opcode ELSE not supported\n");
 
465
                return;
 
466
        }
 
467
        inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
 
468
        inst[1] = t_pred_src(compiler);
 
469
        inst[2] = 0;
 
470
        inst[3] = 0;
 
471
}
 
472
 
 
473
static void ei_endif(struct r300_vertex_program_compiler *compiler,
 
474
                                                        unsigned int * inst)
 
475
{
 
476
        if (!compiler->Base.is_r500) {
 
477
                rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
 
478
                return;
 
479
        }
 
480
        inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
 
481
        inst[1] = t_pred_src(compiler);
 
482
        inst[2] = 0;
 
483
        inst[3] = 0;
 
484
}
 
485
 
 
486
static void translate_vertex_program(struct radeon_compiler *c, void *user)
 
487
{
 
488
        struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
 
489
        struct rc_instruction *rci;
 
490
 
 
491
        struct loop * loops = NULL;
 
492
        int current_loop_depth = 0;
 
493
        int loops_reserved = 0;
 
494
 
 
495
        unsigned int branch_depth = 0;
 
496
 
 
497
        compiler->code->pos_end = 0;    /* Not supported yet */
 
498
        compiler->code->length = 0;
 
499
        compiler->code->num_temporaries = 0;
 
500
 
 
501
        compiler->SetHwInputOutput(compiler);
 
502
 
 
503
        for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
 
504
                struct rc_sub_instruction *vpi = &rci->U.I;
 
505
                unsigned int *inst = compiler->code->body.d + compiler->code->length;
 
506
                const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
 
507
 
 
508
                /* Skip instructions writing to non-existing destination */
 
509
                if (!valid_dst(compiler->code, &vpi->DstReg))
 
510
                        continue;
 
511
 
 
512
                if (info->HasDstReg) {
 
513
                        /* Neither is Saturate. */
 
514
                        if (vpi->SaturateMode != RC_SATURATE_NONE) {
 
515
                                rc_error(&compiler->Base, "Vertex program does not support the Saturate "
 
516
                                         "modifier (yet).\n");
 
517
                        }
 
518
                }
 
519
 
 
520
                if (compiler->code->length >= c->max_alu_insts * 4) {
 
521
                        rc_error(&compiler->Base, "Vertex program has too many instructions\n");
 
522
                        return;
 
523
                }
 
524
 
 
525
                assert(compiler->Base.is_r500 ||
 
526
                       (vpi->Opcode != RC_OPCODE_SEQ &&
 
527
                        vpi->Opcode != RC_OPCODE_SNE));
 
528
 
 
529
                switch (vpi->Opcode) {
 
530
                case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
 
531
                case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
 
532
                case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
 
533
                case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
 
534
                case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
 
535
                case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
 
536
                case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
 
537
                case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
 
538
                case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
 
539
                case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
 
540
                case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
 
541
                case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
 
542
                case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
 
543
                case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
 
544
                case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
 
545
                case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
 
546
                case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
 
547
                case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
 
548
                case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
 
549
                case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
 
550
                case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
 
551
                case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
 
552
                case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
 
553
                case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
 
554
                case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
 
555
                case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
 
556
                case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
 
557
                case RC_OPCODE_BGNLOOP:
 
558
                {
 
559
                        struct loop * l;
 
560
 
 
561
                        if ((!compiler->Base.is_r500
 
562
                                && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
 
563
                                || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
 
564
                                rc_error(&compiler->Base,
 
565
                                                "Loops are nested too deep.");
 
566
                                return;
 
567
                        }
 
568
                        memory_pool_array_reserve(&compiler->Base.Pool,
 
569
                                        struct loop, loops, current_loop_depth,
 
570
                                        loops_reserved, 1);
 
571
                        l = &loops[current_loop_depth++];
 
572
                        memset(l , 0, sizeof(struct loop));
 
573
                        l->BgnLoop = (compiler->code->length / 4);
 
574
                        continue;
 
575
                }
 
576
                case RC_OPCODE_ENDLOOP:
 
577
                {
 
578
                        struct loop * l;
 
579
                        unsigned int act_addr;
 
580
                        unsigned int last_addr;
 
581
                        unsigned int ret_addr;
 
582
 
 
583
                        assert(loops);
 
584
                        l = &loops[current_loop_depth - 1];
 
585
                        act_addr = l->BgnLoop - 1;
 
586
                        last_addr = (compiler->code->length / 4) - 1;
 
587
                        ret_addr = l->BgnLoop;
 
588
 
 
589
                        if (loops_reserved >= R300_VS_MAX_FC_OPS) {
 
590
                                rc_error(&compiler->Base,
 
591
                                        "Too many flow control instructions.");
 
592
                                return;
 
593
                        }
 
594
                        if (compiler->Base.is_r500) {
 
595
                                compiler->code->fc_op_addrs.r500
 
596
                                        [compiler->code->num_fc_ops].lw =
 
597
                                        R500_PVS_FC_ACT_ADRS(act_addr)
 
598
                                        | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
 
599
                                        ;
 
600
                                compiler->code->fc_op_addrs.r500
 
601
                                        [compiler->code->num_fc_ops].uw =
 
602
                                        R500_PVS_FC_LAST_INST(last_addr)
 
603
                                        | R500_PVS_FC_RTN_INST(ret_addr)
 
604
                                        ;
 
605
                        } else {
 
606
                                compiler->code->fc_op_addrs.r300
 
607
                                        [compiler->code->num_fc_ops] =
 
608
                                        R300_PVS_FC_ACT_ADRS(act_addr)
 
609
                                        | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
 
610
                                        | R300_PVS_FC_LAST_INST(last_addr)
 
611
                                        | R300_PVS_FC_RTN_INST(ret_addr)
 
612
                                        ;
 
613
                        }
 
614
                        compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
 
615
                                R300_PVS_FC_LOOP_INIT_VAL(0x0)
 
616
                                | R300_PVS_FC_LOOP_STEP_VAL(0x1)
 
617
                                ;
 
618
                        compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
 
619
                                                compiler->code->num_fc_ops);
 
620
                        compiler->code->num_fc_ops++;
 
621
                        current_loop_depth--;
 
622
                        continue;
 
623
                }
 
624
 
 
625
                default:
 
626
                        rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
 
627
                        return;
 
628
                }
 
629
 
 
630
                /* Non-flow control instructions that are inside an if statement
 
631
                 * need to pay attention to the predicate bit. */
 
632
                if (branch_depth
 
633
                        && vpi->Opcode != RC_OPCODE_IF
 
634
                        && vpi->Opcode != RC_OPCODE_ELSE
 
635
                        && vpi->Opcode != RC_OPCODE_ENDIF) {
 
636
 
 
637
                        inst[0] |= (PVS_DST_PRED_ENABLE_MASK
 
638
                                                << PVS_DST_PRED_ENABLE_SHIFT);
 
639
                        inst[0] |= (PVS_DST_PRED_SENSE_MASK
 
640
                                                << PVS_DST_PRED_SENSE_SHIFT);
 
641
                }
 
642
 
 
643
                /* Update the number of temporaries. */
 
644
                if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
 
645
                    vpi->DstReg.Index >= compiler->code->num_temporaries)
 
646
                        compiler->code->num_temporaries = vpi->DstReg.Index + 1;
 
647
 
 
648
                for (unsigned i = 0; i < info->NumSrcRegs; i++)
 
649
                        if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
 
650
                            vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
 
651
                                compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
 
652
 
 
653
                if (compiler->PredicateMask)
 
654
                        if (compiler->PredicateIndex >= compiler->code->num_temporaries)
 
655
                                compiler->code->num_temporaries = compiler->PredicateIndex + 1;
 
656
 
 
657
                if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
 
658
                        rc_error(&compiler->Base, "Too many temporaries.\n");
 
659
                        return;
 
660
                }
 
661
 
 
662
                compiler->code->length += 4;
 
663
 
 
664
                if (compiler->Base.Error)
 
665
                        return;
 
666
        }
 
667
}
 
668
 
 
669
struct temporary_allocation {
 
670
        unsigned int Allocated:1;
 
671
        unsigned int HwTemp:15;
 
672
        struct rc_instruction * LastRead;
 
673
};
 
674
 
 
675
static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
 
676
{
 
677
        struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
 
678
        struct rc_instruction *inst;
 
679
        struct rc_instruction *end_loop = NULL;
 
680
        unsigned int num_orig_temps = 0;
 
681
        char hwtemps[RC_REGISTER_MAX_INDEX];
 
682
        struct temporary_allocation * ta;
 
683
        unsigned int i, j;
 
684
 
 
685
        memset(hwtemps, 0, sizeof(hwtemps));
 
686
 
 
687
        rc_recompute_ips(c);
 
688
 
 
689
        /* Pass 1: Count original temporaries. */
 
690
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
 
691
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
692
 
 
693
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
 
694
                        if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
 
695
                                if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
 
696
                                        num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
 
697
                        }
 
698
                }
 
699
 
 
700
                if (opcode->HasDstReg) {
 
701
                        if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
 
702
                                if (inst->U.I.DstReg.Index >= num_orig_temps)
 
703
                                        num_orig_temps = inst->U.I.DstReg.Index + 1;
 
704
                        }
 
705
                }
 
706
        }
 
707
 
 
708
        ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
 
709
                        sizeof(struct temporary_allocation) * num_orig_temps);
 
710
        memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
 
711
 
 
712
        /* Pass 2: Determine original temporary lifetimes */
 
713
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
 
714
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
715
                /* Instructions inside of loops need to use the ENDLOOP
 
716
                 * instruction as their LastRead. */
 
717
                if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
 
718
                        int endloops = 1;
 
719
                        struct rc_instruction * ptr;
 
720
                        for(ptr = inst->Next;
 
721
                                ptr != &compiler->Base.Program.Instructions;
 
722
                                                        ptr = ptr->Next){
 
723
                                if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
 
724
                                        endloops++;
 
725
                                } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
 
726
                                        endloops--;
 
727
                                        if (endloops <= 0) {
 
728
                                                end_loop = ptr;
 
729
                                                break;
 
730
                                        }
 
731
                                }
 
732
                        }
 
733
                }
 
734
 
 
735
                if (inst == end_loop) {
 
736
                        end_loop = NULL;
 
737
                        continue;
 
738
                }
 
739
 
 
740
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
 
741
                        if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
 
742
                                ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
 
743
                        }
 
744
                }
 
745
        }
 
746
 
 
747
        /* Pass 3: Register allocation */
 
748
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
 
749
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
750
 
 
751
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
 
752
                        if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
 
753
                                unsigned int orig = inst->U.I.SrcReg[i].Index;
 
754
                                inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
 
755
 
 
756
                                if (ta[orig].Allocated && inst == ta[orig].LastRead)
 
757
                                        hwtemps[ta[orig].HwTemp] = 0;
 
758
                        }
 
759
                }
 
760
 
 
761
                if (opcode->HasDstReg) {
 
762
                        if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
 
763
                                unsigned int orig = inst->U.I.DstReg.Index;
 
764
 
 
765
                                if (!ta[orig].Allocated) {
 
766
                                        for(j = 0; j < c->max_temp_regs; ++j) {
 
767
                                                if (!hwtemps[j])
 
768
                                                        break;
 
769
                                        }
 
770
                                        ta[orig].Allocated = 1;
 
771
                                        ta[orig].HwTemp = j;
 
772
                                        hwtemps[ta[orig].HwTemp] = 1;
 
773
                                }
 
774
 
 
775
                                inst->U.I.DstReg.Index = ta[orig].HwTemp;
 
776
                        }
 
777
                }
 
778
        }
 
779
}
 
780
 
 
781
/**
 
782
 * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
 
783
 * and the Saturate opcode modifier. Only Absolute is currently transformed.
 
784
 */
 
785
static int transform_nonnative_modifiers(
 
786
        struct radeon_compiler *c,
 
787
        struct rc_instruction *inst,
 
788
        void* unused)
 
789
{
 
790
        const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
791
        unsigned i;
 
792
 
 
793
        /* Transform ABS(a) to MAX(a, -a). */
 
794
        for (i = 0; i < opcode->NumSrcRegs; i++) {
 
795
                if (inst->U.I.SrcReg[i].Abs) {
 
796
                        struct rc_instruction *new_inst;
 
797
                        unsigned temp;
 
798
 
 
799
                        inst->U.I.SrcReg[i].Abs = 0;
 
800
 
 
801
                        temp = rc_find_free_temporary(c);
 
802
 
 
803
                        new_inst = rc_insert_new_instruction(c, inst->Prev);
 
804
                        new_inst->U.I.Opcode = RC_OPCODE_MAX;
 
805
                        new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
 
806
                        new_inst->U.I.DstReg.Index = temp;
 
807
                        new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
 
808
                        new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
 
809
                        new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
 
810
 
 
811
                        memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
 
812
                        inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
 
813
                        inst->U.I.SrcReg[i].Index = temp;
 
814
                        inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
 
815
                }
 
816
        }
 
817
        return 1;
 
818
}
 
819
 
 
820
/**
 
821
 * Vertex engine cannot read two inputs or two constants at the same time.
 
822
 * Introduce intermediate MOVs to temporary registers to account for this.
 
823
 */
 
824
static int transform_source_conflicts(
 
825
        struct radeon_compiler *c,
 
826
        struct rc_instruction* inst,
 
827
        void* unused)
 
828
{
 
829
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
830
 
 
831
        if (opcode->NumSrcRegs == 3) {
 
832
                if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
 
833
                    || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
 
834
                        int tmpreg = rc_find_free_temporary(c);
 
835
                        struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
 
836
                        inst_mov->U.I.Opcode = RC_OPCODE_MOV;
 
837
                        inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
 
838
                        inst_mov->U.I.DstReg.Index = tmpreg;
 
839
                        inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
 
840
 
 
841
                        reset_srcreg(&inst->U.I.SrcReg[2]);
 
842
                        inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
 
843
                        inst->U.I.SrcReg[2].Index = tmpreg;
 
844
                }
 
845
        }
 
846
 
 
847
        if (opcode->NumSrcRegs >= 2) {
 
848
                if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
 
849
                        int tmpreg = rc_find_free_temporary(c);
 
850
                        struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
 
851
                        inst_mov->U.I.Opcode = RC_OPCODE_MOV;
 
852
                        inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
 
853
                        inst_mov->U.I.DstReg.Index = tmpreg;
 
854
                        inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
 
855
 
 
856
                        reset_srcreg(&inst->U.I.SrcReg[1]);
 
857
                        inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
 
858
                        inst->U.I.SrcReg[1].Index = tmpreg;
 
859
                }
 
860
        }
 
861
 
 
862
        return 1;
 
863
}
 
864
 
 
865
static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
 
866
{
 
867
        struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
 
868
        int i;
 
869
 
 
870
        for(i = 0; i < 32; ++i) {
 
871
                if ((compiler->RequiredOutputs & (1 << i)) &&
 
872
                    !(compiler->Base.Program.OutputsWritten & (1 << i))) {
 
873
                        struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
 
874
                        inst->U.I.Opcode = RC_OPCODE_MOV;
 
875
 
 
876
                        inst->U.I.DstReg.File = RC_FILE_OUTPUT;
 
877
                        inst->U.I.DstReg.Index = i;
 
878
                        inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
 
879
 
 
880
                        inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
 
881
                        inst->U.I.SrcReg[0].Index = 0;
 
882
                        inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
 
883
 
 
884
                        compiler->Base.Program.OutputsWritten |= 1 << i;
 
885
                }
 
886
        }
 
887
}
 
888
 
 
889
static void dataflow_outputs_mark_used(void * userdata, void * data,
 
890
                void (*callback)(void *, unsigned int, unsigned int))
 
891
{
 
892
        struct r300_vertex_program_compiler * c = userdata;
 
893
        int i;
 
894
 
 
895
        for(i = 0; i < 32; ++i) {
 
896
                if (c->RequiredOutputs & (1 << i))
 
897
                        callback(data, i, RC_MASK_XYZW);
 
898
        }
 
899
}
 
900
 
 
901
static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
 
902
{
 
903
        (void) opcode;
 
904
        (void) reg;
 
905
 
 
906
        return 1;
 
907
}
 
908
 
 
909
static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
 
910
                                          struct rc_instruction *arl,
 
911
                                          struct rc_instruction *end,
 
912
                                          int min_offset)
 
913
{
 
914
        struct rc_instruction *inst, *add;
 
915
        unsigned const_swizzle;
 
916
 
 
917
        /* Transform ARL */
 
918
        add = rc_insert_new_instruction(&c->Base, arl->Prev);
 
919
        add->U.I.Opcode = RC_OPCODE_ADD;
 
920
        add->U.I.DstReg.File = RC_FILE_TEMPORARY;
 
921
        add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
 
922
        add->U.I.DstReg.WriteMask = RC_MASK_X;
 
923
        add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
 
924
        add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
 
925
        add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
 
926
                                                                     min_offset, &const_swizzle);
 
927
        add->U.I.SrcReg[1].Swizzle = const_swizzle;
 
928
 
 
929
        arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
 
930
        arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
 
931
        arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
 
932
 
 
933
        /* Rewrite offsets up to and excluding inst. */
 
934
        for (inst = arl->Next; inst != end; inst = inst->Next) {
 
935
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
936
 
 
937
                for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
 
938
                        if (inst->U.I.SrcReg[i].RelAddr)
 
939
                                inst->U.I.SrcReg[i].Index -= min_offset;
 
940
        }
 
941
}
 
942
 
 
943
static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
 
944
{
 
945
        struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
 
946
        struct rc_instruction *inst, *lastARL = NULL;
 
947
        int min_offset = 0;
 
948
 
 
949
        for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
 
950
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
951
 
 
952
                if (inst->U.I.Opcode == RC_OPCODE_ARL) {
 
953
                        if (lastARL != NULL && min_offset < 0)
 
954
                                transform_negative_addressing(c, lastARL, inst, min_offset);
 
955
 
 
956
                        lastARL = inst;
 
957
                        min_offset = 0;
 
958
                        continue;
 
959
                }
 
960
 
 
961
                for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
 
962
                        if (inst->U.I.SrcReg[i].RelAddr &&
 
963
                            inst->U.I.SrcReg[i].Index < 0) {
 
964
                                /* ARL must precede any indirect addressing. */
 
965
                                if (lastARL == NULL) {
 
966
                                        rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
 
967
                                        return;
 
968
                                }
 
969
 
 
970
                                if (inst->U.I.SrcReg[i].Index < min_offset)
 
971
                                        min_offset = inst->U.I.SrcReg[i].Index;
 
972
                        }
 
973
                }
 
974
        }
 
975
 
 
976
        if (lastARL != NULL && min_offset < 0)
 
977
                transform_negative_addressing(c, lastARL, inst, min_offset);
 
978
}
 
979
 
 
980
static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
 
981
        .IsNative = &swizzle_is_native,
 
982
        .Split = 0 /* should never be called */
 
983
};
 
984
 
 
985
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
 
986
{
 
987
        int is_r500 = c->Base.is_r500;
 
988
        int opt = !c->Base.disable_optimizations;
 
989
 
 
990
        /* Lists of instruction transformations. */
 
991
        struct radeon_program_transformation alu_rewrite_r500[] = {
 
992
                { &r300_transform_vertex_alu, 0 },
 
993
                { &r300_transform_trig_scale_vertex, 0 },
 
994
                { 0, 0 }
 
995
        };
 
996
 
 
997
        struct radeon_program_transformation alu_rewrite_r300[] = {
 
998
                { &r300_transform_vertex_alu, 0 },
 
999
                { &r300_transform_trig_simple, 0 },
 
1000
                { 0, 0 }
 
1001
        };
 
1002
 
 
1003
        /* Note: These passes have to be done seperately from ALU rewrite,
 
1004
         * otherwise non-native ALU instructions with source conflits
 
1005
         * or non-native modifiers will not be treated properly.
 
1006
         */
 
1007
        struct radeon_program_transformation emulate_modifiers[] = {
 
1008
                { &transform_nonnative_modifiers, 0 },
 
1009
                { 0, 0 }
 
1010
        };
 
1011
 
 
1012
        struct radeon_program_transformation resolve_src_conflicts[] = {
 
1013
                { &transform_source_conflicts, 0 },
 
1014
                { 0, 0 }
 
1015
        };
 
1016
 
 
1017
        /* List of compiler passes. */
 
1018
        struct radeon_compiler_pass vs_list[] = {
 
1019
                /* NAME                         DUMP PREDICATE  FUNCTION                        PARAM */
 
1020
                {"add artificial outputs",      0, 1,           rc_vs_add_artificial_outputs,   NULL},
 
1021
                {"transform loops",             1, 1,           rc_transform_loops,             NULL},
 
1022
                {"emulate branches",            1, !is_r500,    rc_emulate_branches,            NULL},
 
1023
                {"emulate negative addressing", 1, 1,           rc_emulate_negative_addressing, NULL},
 
1024
                {"native rewrite",              1, is_r500,     rc_local_transform,             alu_rewrite_r500},
 
1025
                {"native rewrite",              1, !is_r500,    rc_local_transform,             alu_rewrite_r300},
 
1026
                {"emulate modifiers",           1, !is_r500,    rc_local_transform,             emulate_modifiers},
 
1027
                {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_used},
 
1028
                {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
 
1029
                /* This pass must be done after optimizations. */
 
1030
                {"source conflict resolve",     1, 1,           rc_local_transform,             resolve_src_conflicts},
 
1031
                {"register allocation",         1, opt,         allocate_temporary_registers,   NULL},
 
1032
                {"dead constants",              1, 1,           rc_remove_unused_constants,     &c->code->constants_remap_table},
 
1033
                {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
 
1034
                {"machine code generation",     0, 1,           translate_vertex_program,       NULL},
 
1035
                {"dump machine code",           0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,        NULL},
 
1036
                {NULL, 0, 0, NULL, NULL}
 
1037
        };
 
1038
 
 
1039
        c->Base.type = RC_VERTEX_PROGRAM;
 
1040
        c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
 
1041
 
 
1042
        rc_run_compiler(&c->Base, vs_list);
 
1043
 
 
1044
        c->code->InputsRead = c->Base.Program.InputsRead;
 
1045
        c->code->OutputsWritten = c->Base.Program.OutputsWritten;
 
1046
        rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
 
1047
}