~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/gallium/drivers/r300/compiler/radeon_pair_translate.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright (C) 2009 Nicolai Haehnle.
3
 
 *
4
 
 * All Rights Reserved.
5
 
 *
6
 
 * Permission is hereby granted, free of charge, to any person obtaining
7
 
 * a copy of this software and associated documentation files (the
8
 
 * "Software"), to deal in the Software without restriction, including
9
 
 * without limitation the rights to use, copy, modify, merge, publish,
10
 
 * distribute, sublicense, and/or sell copies of the Software, and to
11
 
 * permit persons to whom the Software is furnished to do so, subject to
12
 
 * the following conditions:
13
 
 *
14
 
 * The above copyright notice and this permission notice (including the
15
 
 * next paragraph) shall be included in all copies or substantial
16
 
 * portions of the Software.
17
 
 *
18
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
 *
26
 
 */
27
 
 
28
 
#include "radeon_program_pair.h"
29
 
 
30
 
#include "radeon_compiler.h"
31
 
#include "radeon_compiler_util.h"
32
 
 
33
 
#include "util/compiler.h"
34
 
 
35
 
 
36
 
/**
37
 
 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
38
 
 * and reverse the order of arguments for CMP.
39
 
 */
40
 
static void final_rewrite(struct rc_sub_instruction *inst)
41
 
{
42
 
        struct rc_src_register tmp;
43
 
 
44
 
        switch(inst->Opcode) {
45
 
        case RC_OPCODE_ADD:
46
 
                inst->SrcReg[2] = inst->SrcReg[1];
47
 
                inst->SrcReg[1].File = RC_FILE_NONE;
48
 
                inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
49
 
                inst->SrcReg[1].Negate = RC_MASK_NONE;
50
 
                inst->Opcode = RC_OPCODE_MAD;
51
 
                break;
52
 
        case RC_OPCODE_CMP:
53
 
                tmp = inst->SrcReg[2];
54
 
                inst->SrcReg[2] = inst->SrcReg[0];
55
 
                inst->SrcReg[0] = tmp;
56
 
                break;
57
 
        case RC_OPCODE_MOV:
58
 
                /* AMD say we should use CMP.
59
 
                 * However, when we transform
60
 
                 *  KIL -r0;
61
 
                 * into
62
 
                 *  CMP tmp, -r0, -r0, 0;
63
 
                 *  KIL tmp;
64
 
                 * we get incorrect behaviour on R500 when r0 == 0.0.
65
 
                 * It appears that the R500 KIL hardware treats -0.0 as less
66
 
                 * than zero.
67
 
                 */
68
 
                inst->SrcReg[1].File = RC_FILE_NONE;
69
 
                inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
70
 
                inst->SrcReg[2].File = RC_FILE_NONE;
71
 
                inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
72
 
                inst->Opcode = RC_OPCODE_MAD;
73
 
                break;
74
 
        case RC_OPCODE_MUL:
75
 
                inst->SrcReg[2].File = RC_FILE_NONE;
76
 
                inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
77
 
                inst->Opcode = RC_OPCODE_MAD;
78
 
                break;
79
 
        default:
80
 
                /* nothing to do */
81
 
                break;
82
 
        }
83
 
}
84
 
 
85
 
 
86
 
/**
87
 
 * Classify an instruction according to which ALUs etc. it needs
88
 
 */
89
 
static void classify_instruction(struct rc_sub_instruction * inst,
90
 
        int * needrgb, int * needalpha, int * istranscendent)
91
 
{
92
 
        *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
93
 
        *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
94
 
        *istranscendent = 0;
95
 
 
96
 
        if (inst->WriteALUResult == RC_ALURESULT_X)
97
 
                *needrgb = 1;
98
 
        else if (inst->WriteALUResult == RC_ALURESULT_W)
99
 
                *needalpha = 1;
100
 
 
101
 
        switch(inst->Opcode) {
102
 
        case RC_OPCODE_ADD:
103
 
        case RC_OPCODE_CMP:
104
 
        case RC_OPCODE_CND:
105
 
        case RC_OPCODE_DDX:
106
 
        case RC_OPCODE_DDY:
107
 
        case RC_OPCODE_FRC:
108
 
        case RC_OPCODE_MAD:
109
 
        case RC_OPCODE_MAX:
110
 
        case RC_OPCODE_MIN:
111
 
        case RC_OPCODE_MOV:
112
 
        case RC_OPCODE_MUL:
113
 
                break;
114
 
        case RC_OPCODE_COS:
115
 
        case RC_OPCODE_EX2:
116
 
        case RC_OPCODE_LG2:
117
 
        case RC_OPCODE_RCP:
118
 
        case RC_OPCODE_RSQ:
119
 
        case RC_OPCODE_SIN:
120
 
                *istranscendent = 1;
121
 
                *needalpha = 1;
122
 
                break;
123
 
        case RC_OPCODE_DP4:
124
 
                *needalpha = 1;
125
 
                FALLTHROUGH;
126
 
        case RC_OPCODE_DP3:
127
 
                *needrgb = 1;
128
 
                break;
129
 
        default:
130
 
                break;
131
 
        }
132
 
}
133
 
 
134
 
static void src_uses(struct rc_src_register src, unsigned int * rgb,
135
 
                                                        unsigned int * alpha)
136
 
{
137
 
        int j;
138
 
        for(j = 0; j < 4; ++j) {
139
 
                unsigned int swz = GET_SWZ(src.Swizzle, j);
140
 
                if (swz < 3)
141
 
                        *rgb = 1;
142
 
                else if (swz < 4)
143
 
                        *alpha = 1;
144
 
        }
145
 
}
146
 
 
147
 
/**
148
 
 * Fill the given ALU instruction's opcodes and source operands into the given pair,
149
 
 * if possible.
150
 
 */
151
 
static void set_pair_instruction(struct r300_fragment_program_compiler *c,
152
 
        struct rc_pair_instruction * pair,
153
 
        struct rc_sub_instruction * inst)
154
 
{
155
 
        int needrgb, needalpha, istranscendent;
156
 
        const struct rc_opcode_info * opcode;
157
 
        int i;
158
 
 
159
 
        memset(pair, 0, sizeof(struct rc_pair_instruction));
160
 
 
161
 
        classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
162
 
 
163
 
        if (needrgb) {
164
 
                if (istranscendent)
165
 
                        pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
166
 
                else
167
 
                        pair->RGB.Opcode = inst->Opcode;
168
 
                if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
169
 
                        pair->RGB.Saturate = 1;
170
 
        }
171
 
        if (needalpha) {
172
 
                pair->Alpha.Opcode = inst->Opcode;
173
 
                if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
174
 
                        pair->Alpha.Saturate = 1;
175
 
        }
176
 
 
177
 
        opcode = rc_get_opcode_info(inst->Opcode);
178
 
 
179
 
        /* Presubtract handling:
180
 
         * We need to make sure that the values used by the presubtract
181
 
         * operation end up in src0 or src1. */
182
 
        if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
183
 
                /* rc_pair_alloc_source() will fill in data for
184
 
                 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
185
 
                int j;
186
 
                for(j = 0; j < 3; j++) {
187
 
                        int src_regs;
188
 
                        if(inst->SrcReg[j].File != RC_FILE_PRESUB)
189
 
                                continue;
190
 
 
191
 
                        src_regs = rc_presubtract_src_reg_count(
192
 
                                                        inst->PreSub.Opcode);
193
 
                        for(i = 0; i < src_regs; i++) {
194
 
                                unsigned int rgb = 0;
195
 
                                unsigned int alpha = 0;
196
 
                                src_uses(inst->SrcReg[j], &rgb, &alpha);
197
 
                                if(rgb) {
198
 
                                        pair->RGB.Src[i].File =
199
 
                                                inst->PreSub.SrcReg[i].File;
200
 
                                        pair->RGB.Src[i].Index =
201
 
                                                inst->PreSub.SrcReg[i].Index;
202
 
                                        pair->RGB.Src[i].Used = 1;
203
 
                                }
204
 
                                if(alpha) {
205
 
                                        pair->Alpha.Src[i].File =
206
 
                                                inst->PreSub.SrcReg[i].File;
207
 
                                        pair->Alpha.Src[i].Index =
208
 
                                                inst->PreSub.SrcReg[i].Index;
209
 
                                        pair->Alpha.Src[i].Used = 1;
210
 
                                }
211
 
                        }
212
 
                }
213
 
        }
214
 
 
215
 
        for(i = 0; i < opcode->NumSrcRegs; ++i) {
216
 
                int source;
217
 
                if (needrgb && !istranscendent) {
218
 
                        unsigned int srcrgb = 0;
219
 
                        unsigned int srcalpha = 0;
220
 
                        unsigned int srcmask = 0;
221
 
                        int j;
222
 
                        /* We don't care about the alpha channel here.  We only
223
 
                         * want the part of the swizzle that writes to rgb,
224
 
                         * since we are creating an rgb instruction. */
225
 
                        for(j = 0; j < 3; ++j) {
226
 
                                unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
227
 
 
228
 
                                if (swz < RC_SWIZZLE_W)
229
 
                                        srcrgb = 1;
230
 
                                else if (swz == RC_SWIZZLE_W)
231
 
                                        srcalpha = 1;
232
 
 
233
 
                                if (swz < RC_SWIZZLE_UNUSED)
234
 
                                        srcmask |= 1 << j;
235
 
                        }
236
 
                        source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
237
 
                                                        inst->SrcReg[i].File, inst->SrcReg[i].Index);
238
 
                        if (source < 0) {
239
 
                                rc_error(&c->Base, "Failed to translate "
240
 
                                                        "rgb instruction.\n");
241
 
                                return;
242
 
                        }
243
 
                        pair->RGB.Arg[i].Source = source;
244
 
                        pair->RGB.Arg[i].Swizzle =
245
 
                                rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
246
 
                        pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
247
 
                        pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
248
 
                }
249
 
                if (needalpha) {
250
 
                        unsigned int srcrgb = 0;
251
 
                        unsigned int srcalpha = 0;
252
 
                        unsigned int swz;
253
 
                        if (istranscendent) {
254
 
                                swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
255
 
                        } else {
256
 
                                swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
257
 
                        }
258
 
 
259
 
                        if (swz < 3)
260
 
                                srcrgb = 1;
261
 
                        else if (swz < 4)
262
 
                                srcalpha = 1;
263
 
                        source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
264
 
                                                        inst->SrcReg[i].File, inst->SrcReg[i].Index);
265
 
                        if (source < 0) {
266
 
                                rc_error(&c->Base, "Failed to translate "
267
 
                                                        "alpha instruction.\n");
268
 
                                return;
269
 
                        }
270
 
                        pair->Alpha.Arg[i].Source = source;
271
 
                        pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
272
 
                        pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
273
 
 
274
 
                        if (istranscendent) {
275
 
                                pair->Alpha.Arg[i].Negate =
276
 
                                        !!(inst->SrcReg[i].Negate &
277
 
                                                        inst->DstReg.WriteMask);
278
 
                        } else {
279
 
                                pair->Alpha.Arg[i].Negate =
280
 
                                        !!(inst->SrcReg[i].Negate & RC_MASK_W);
281
 
                        }
282
 
                }
283
 
        }
284
 
 
285
 
        /* Destination handling */
286
 
        if (inst->DstReg.File == RC_FILE_OUTPUT) {
287
 
        if (inst->DstReg.Index == c->OutputDepth) {
288
 
            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
289
 
        } else {
290
 
            for (i = 0; i < 4; i++) {
291
 
                if (inst->DstReg.Index == c->OutputColor[i]) {
292
 
                    pair->RGB.Target = i;
293
 
                    pair->Alpha.Target = i;
294
 
                    pair->RGB.OutputWriteMask |=
295
 
                        inst->DstReg.WriteMask & RC_MASK_XYZ;
296
 
                    pair->Alpha.OutputWriteMask |=
297
 
                        GET_BIT(inst->DstReg.WriteMask, 3);
298
 
                    break;
299
 
                }
300
 
            }
301
 
        }
302
 
        } else {
303
 
                if (needrgb) {
304
 
                        pair->RGB.DestIndex = inst->DstReg.Index;
305
 
                        pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
306
 
                }
307
 
 
308
 
                if (needalpha) {
309
 
                        pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
310
 
                        if (pair->Alpha.WriteMask) {
311
 
                                pair->Alpha.DestIndex = inst->DstReg.Index;
312
 
                        }
313
 
                }
314
 
        }
315
 
 
316
 
        if (needrgb) {
317
 
                pair->RGB.Omod = inst->Omod;
318
 
        }
319
 
        if (needalpha) {
320
 
                pair->Alpha.Omod = inst->Omod;
321
 
        }
322
 
 
323
 
        if (inst->WriteALUResult) {
324
 
                pair->WriteALUResult = inst->WriteALUResult;
325
 
                pair->ALUResultCompare = inst->ALUResultCompare;
326
 
        }
327
 
}
328
 
 
329
 
 
330
 
static void check_opcode_support(struct r300_fragment_program_compiler *c,
331
 
                                 struct rc_sub_instruction *inst)
332
 
{
333
 
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
334
 
 
335
 
        if (opcode->HasDstReg) {
336
 
                if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
337
 
                        rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
338
 
                        return;
339
 
                }
340
 
        }
341
 
 
342
 
        for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
343
 
                if (inst->SrcReg[i].RelAddr) {
344
 
                        rc_error(&c->Base, "Fragment program does not support relative addressing "
345
 
                                 " of source operands.\n");
346
 
                        return;
347
 
                }
348
 
        }
349
 
}
350
 
 
351
 
 
352
 
/**
353
 
 * Translate all ALU instructions into corresponding pair instructions,
354
 
 * performing no other changes.
355
 
 */
356
 
void rc_pair_translate(struct radeon_compiler *cc, void *user)
357
 
{
358
 
        struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
359
 
 
360
 
        for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
361
 
            inst != &c->Base.Program.Instructions;
362
 
            inst = inst->Next) {
363
 
                const struct rc_opcode_info * opcode;
364
 
                struct rc_sub_instruction copy;
365
 
 
366
 
                if (inst->Type != RC_INSTRUCTION_NORMAL)
367
 
                        continue;
368
 
 
369
 
                opcode = rc_get_opcode_info(inst->U.I.Opcode);
370
 
 
371
 
                if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
372
 
                        continue;
373
 
 
374
 
                copy = inst->U.I;
375
 
 
376
 
                check_opcode_support(c, &copy);
377
 
 
378
 
                final_rewrite(&copy);
379
 
                inst->Type = RC_INSTRUCTION_PAIR;
380
 
                set_pair_instruction(c, &inst->U.P, &copy);
381
 
        }
382
 
}