~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/gallium/drivers/r600/r600_asm.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3
 
 *
4
 
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 
 * copy of this software and associated documentation files (the "Software"),
6
 
 * to deal in the Software without restriction, including without limitation
7
 
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 
 * the Software is furnished to do so, subject to the following conditions:
10
 
 *
11
 
 * The above copyright notice and this permission notice (including the next
12
 
 * paragraph) shall be included in all copies or substantial portions of the
13
 
 * Software.
14
 
 *
15
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 
 */
23
 
#include "r600_sq.h"
24
 
#include "r600_opcodes.h"
25
 
#include "r600_formats.h"
26
 
#include "r600_shader.h"
27
 
#include "r600d.h"
28
 
 
29
 
#include <errno.h>
30
 
#include "util/u_bitcast.h"
31
 
#include "util/u_dump.h"
32
 
#include "util/u_memory.h"
33
 
#include "util/u_math.h"
34
 
#include "pipe/p_shader_tokens.h"
35
 
 
36
 
#include "sb/sb_public.h"
37
 
 
38
 
#define NUM_OF_CYCLES 3
39
 
#define NUM_OF_COMPONENTS 4
40
 
 
41
 
static inline bool alu_writes(struct r600_bytecode_alu *alu)
42
 
{
43
 
        return alu->dst.write || alu->is_op3;
44
 
}
45
 
 
46
 
static inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
47
 
{
48
 
        return r600_isa_alu(alu->op)->src_count;
49
 
}
50
 
 
51
 
static struct r600_bytecode_cf *r600_bytecode_cf(void)
52
 
{
53
 
        struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
54
 
 
55
 
        if (!cf)
56
 
                return NULL;
57
 
        list_inithead(&cf->list);
58
 
        list_inithead(&cf->alu);
59
 
        list_inithead(&cf->vtx);
60
 
        list_inithead(&cf->tex);
61
 
        list_inithead(&cf->gds);
62
 
        return cf;
63
 
}
64
 
 
65
 
static struct r600_bytecode_alu *r600_bytecode_alu(void)
66
 
{
67
 
        struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
68
 
 
69
 
        if (!alu)
70
 
                return NULL;
71
 
        list_inithead(&alu->list);
72
 
        return alu;
73
 
}
74
 
 
75
 
static struct r600_bytecode_vtx *r600_bytecode_vtx(void)
76
 
{
77
 
        struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
78
 
 
79
 
        if (!vtx)
80
 
                return NULL;
81
 
        list_inithead(&vtx->list);
82
 
        return vtx;
83
 
}
84
 
 
85
 
static struct r600_bytecode_tex *r600_bytecode_tex(void)
86
 
{
87
 
        struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
88
 
 
89
 
        if (!tex)
90
 
                return NULL;
91
 
        list_inithead(&tex->list);
92
 
        return tex;
93
 
}
94
 
 
95
 
static struct r600_bytecode_gds *r600_bytecode_gds(void)
96
 
{
97
 
        struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds);
98
 
 
99
 
        if (gds == NULL)
100
 
                return NULL;
101
 
        list_inithead(&gds->list);
102
 
        return gds;
103
 
}
104
 
 
105
 
static unsigned stack_entry_size(enum radeon_family chip) {
106
 
        /* Wavefront size:
107
 
         *   64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
108
 
         *       Aruba/Sumo/Sumo2/redwood/juniper
109
 
         *   32: R630/R730/R710/Palm/Cedar
110
 
         *   16: R610/Rs780
111
 
         *
112
 
         * Stack row size:
113
 
         *      Wavefront Size                        16  32  48  64
114
 
         *      Columns per Row (R6xx/R7xx/R8xx only)  8   8   4   4
115
 
         *      Columns per Row (R9xx+)                8   4   4   4 */
116
 
 
117
 
        switch (chip) {
118
 
        /* FIXME: are some chips missing here? */
119
 
        /* wavefront size 16 */
120
 
        case CHIP_RV610:
121
 
        case CHIP_RS780:
122
 
        case CHIP_RV620:
123
 
        case CHIP_RS880:
124
 
        /* wavefront size 32 */
125
 
        case CHIP_RV630:
126
 
        case CHIP_RV635:
127
 
        case CHIP_RV730:
128
 
        case CHIP_RV710:
129
 
        case CHIP_PALM:
130
 
        case CHIP_CEDAR:
131
 
                return 8;
132
 
 
133
 
        /* wavefront size 64 */
134
 
        default:
135
 
                return 4;
136
 
        }
137
 
}
138
 
 
139
 
void r600_bytecode_init(struct r600_bytecode *bc,
140
 
                        enum chip_class chip_class,
141
 
                        enum radeon_family family,
142
 
                        bool has_compressed_msaa_texturing)
143
 
{
144
 
        static unsigned next_shader_id = 0;
145
 
 
146
 
        bc->debug_id = ++next_shader_id;
147
 
 
148
 
        if ((chip_class == R600) &&
149
 
            (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
150
 
                bc->ar_handling = AR_HANDLE_RV6XX;
151
 
                bc->r6xx_nop_after_rel_dst = 1;
152
 
        } else {
153
 
                bc->ar_handling = AR_HANDLE_NORMAL;
154
 
                bc->r6xx_nop_after_rel_dst = 0;
155
 
        }
156
 
 
157
 
        list_inithead(&bc->cf);
158
 
        bc->chip_class = chip_class;
159
 
        bc->family = family;
160
 
        bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
161
 
        bc->stack.entry_size = stack_entry_size(family);
162
 
}
163
 
 
164
 
int r600_bytecode_add_cf(struct r600_bytecode *bc)
165
 
{
166
 
        struct r600_bytecode_cf *cf = r600_bytecode_cf();
167
 
 
168
 
        if (!cf)
169
 
                return -ENOMEM;
170
 
        list_addtail(&cf->list, &bc->cf);
171
 
        if (bc->cf_last) {
172
 
                cf->id = bc->cf_last->id + 2;
173
 
                if (bc->cf_last->eg_alu_extended) {
174
 
                        /* take into account extended alu size */
175
 
                        cf->id += 2;
176
 
                        bc->ndw += 2;
177
 
                }
178
 
        }
179
 
        bc->cf_last = cf;
180
 
        bc->ncf++;
181
 
        bc->ndw += 2;
182
 
        bc->force_add_cf = 0;
183
 
        bc->ar_loaded = 0;
184
 
        return 0;
185
 
}
186
 
 
187
 
int r600_bytecode_add_output(struct r600_bytecode *bc,
188
 
                const struct r600_bytecode_output *output)
189
 
{
190
 
        int r;
191
 
 
192
 
        if (output->gpr >= bc->ngpr)
193
 
                bc->ngpr = output->gpr + 1;
194
 
 
195
 
        if (bc->cf_last && (bc->cf_last->op == output->op ||
196
 
                (bc->cf_last->op == CF_OP_EXPORT &&
197
 
                output->op == CF_OP_EXPORT_DONE)) &&
198
 
                output->type == bc->cf_last->output.type &&
199
 
                output->elem_size == bc->cf_last->output.elem_size &&
200
 
                output->swizzle_x == bc->cf_last->output.swizzle_x &&
201
 
                output->swizzle_y == bc->cf_last->output.swizzle_y &&
202
 
                output->swizzle_z == bc->cf_last->output.swizzle_z &&
203
 
                output->swizzle_w == bc->cf_last->output.swizzle_w &&
204
 
                output->comp_mask == bc->cf_last->output.comp_mask &&
205
 
                (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
206
 
 
207
 
                if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
208
 
                        (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
209
 
 
210
 
                        bc->cf_last->op = bc->cf_last->output.op = output->op;
211
 
                        bc->cf_last->output.gpr = output->gpr;
212
 
                        bc->cf_last->output.array_base = output->array_base;
213
 
                        bc->cf_last->output.burst_count += output->burst_count;
214
 
                        return 0;
215
 
 
216
 
                } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
217
 
                        output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
218
 
 
219
 
                        bc->cf_last->op = bc->cf_last->output.op = output->op;
220
 
                        bc->cf_last->output.burst_count += output->burst_count;
221
 
                        return 0;
222
 
                }
223
 
        }
224
 
 
225
 
        r = r600_bytecode_add_cf(bc);
226
 
        if (r)
227
 
                return r;
228
 
        bc->cf_last->op = output->op;
229
 
        memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
230
 
        bc->cf_last->barrier = 1;
231
 
        return 0;
232
 
}
233
 
 
234
 
int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
235
 
                const struct r600_bytecode_output *output)
236
 
{
237
 
        assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
238
 
        bc->pending_outputs[bc->n_pending_outputs++] = *output;
239
 
 
240
 
        return 0;
241
 
}
242
 
 
243
 
void
244
 
r600_bytecode_add_ack(struct r600_bytecode *bc)
245
 
{
246
 
        bc->need_wait_ack = true;
247
 
}
248
 
 
249
 
int
250
 
r600_bytecode_wait_acks(struct r600_bytecode *bc)
251
 
{
252
 
        /* Store acks are an R700+ feature. */
253
 
        if (bc->chip_class < R700)
254
 
                return 0;
255
 
 
256
 
        if (!bc->need_wait_ack)
257
 
                return 0;
258
 
 
259
 
        int ret = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
260
 
        if (ret != 0)
261
 
                return ret;
262
 
 
263
 
        struct r600_bytecode_cf *cf = bc->cf_last;
264
 
        cf->barrier = 1;
265
 
        /* Request a wait if the number of outstanding acks is > 0 */
266
 
        cf->cf_addr = 0;
267
 
 
268
 
        return 0;
269
 
}
270
 
 
271
 
uint32_t
272
 
r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect)
273
 
{
274
 
        if (bc->chip_class >= R700) {
275
 
                if (indirect)
276
 
                        return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG;
277
 
                else
278
 
                        return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG;
279
 
        } else {
280
 
                if (indirect)
281
 
                        return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
282
 
                else
283
 
                        return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
284
 
        }
285
 
}
286
 
 
287
 
/* alu instructions that can ony exits once per group */
288
 
static int is_alu_once_inst(struct r600_bytecode_alu *alu)
289
 
{
290
 
        return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
291
 
}
292
 
 
293
 
static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
294
 
{
295
 
        return (r600_isa_alu(alu->op)->flags & AF_REPL) &&
296
 
                        (r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
297
 
}
298
 
 
299
 
static int is_alu_mova_inst(struct r600_bytecode_alu *alu)
300
 
{
301
 
        return r600_isa_alu(alu->op)->flags & AF_MOVA;
302
 
}
303
 
 
304
 
static int alu_uses_rel(struct r600_bytecode_alu *alu)
305
 
{
306
 
        unsigned num_src = r600_bytecode_get_num_operands(alu);
307
 
        unsigned src;
308
 
 
309
 
        if (alu->dst.rel) {
310
 
                return 1;
311
 
        }
312
 
 
313
 
        for (src = 0; src < num_src; ++src) {
314
 
                if (alu->src[src].rel) {
315
 
                        return 1;
316
 
                }
317
 
        }
318
 
        return 0;
319
 
}
320
 
 
321
 
static int is_lds_read(int sel)
322
 
{
323
 
  return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
324
 
}
325
 
 
326
 
static int alu_uses_lds(struct r600_bytecode_alu *alu)
327
 
{
328
 
        unsigned num_src = r600_bytecode_get_num_operands(alu);
329
 
        unsigned src;
330
 
 
331
 
        for (src = 0; src < num_src; ++src) {
332
 
                if (is_lds_read(alu->src[src].sel)) {
333
 
                        return 1;
334
 
                }
335
 
        }
336
 
        return 0;
337
 
}
338
 
 
339
 
static int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
340
 
{
341
 
        const struct alu_op_info *op = r600_isa_alu(alu->op);
342
 
        return (op->flags & AF_64);
343
 
}
344
 
 
345
 
static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
346
 
{
347
 
        unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
348
 
        return !(slots & AF_S);
349
 
}
350
 
 
351
 
static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
352
 
{
353
 
        unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
354
 
        return !(slots & AF_V);
355
 
}
356
 
 
357
 
/* alu instructions that can execute on any unit */
358
 
static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
359
 
{
360
 
        unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
361
 
        return slots == AF_VS;
362
 
}
363
 
 
364
 
static int is_nop_inst(struct r600_bytecode_alu *alu)
365
 
{
366
 
        return alu->op == ALU_OP0_NOP;
367
 
}
368
 
 
369
 
static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
370
 
                            struct r600_bytecode_alu *assignment[5])
371
 
{
372
 
        struct r600_bytecode_alu *alu;
373
 
        unsigned i, chan, trans;
374
 
        int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
375
 
 
376
 
        for (i = 0; i < max_slots; i++)
377
 
                assignment[i] = NULL;
378
 
 
379
 
        for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) {
380
 
                chan = alu->dst.chan;
381
 
                if (max_slots == 4)
382
 
                        trans = 0;
383
 
                else if (is_alu_trans_unit_inst(bc, alu))
384
 
                        trans = 1;
385
 
                else if (is_alu_vec_unit_inst(bc, alu))
386
 
                        trans = 0;
387
 
                else if (assignment[chan])
388
 
                        trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */
389
 
                else
390
 
                        trans = 0;
391
 
 
392
 
                if (trans) {
393
 
                        if (assignment[4]) {
394
 
                                assert(0); /* ALU.Trans has already been allocated. */
395
 
                                return -1;
396
 
                        }
397
 
                        assignment[4] = alu;
398
 
                } else {
399
 
                        if (assignment[chan]) {                           
400
 
                                assert(0); /* ALU.chan has already been allocated. */
401
 
                                return -1;
402
 
                        }
403
 
                        assignment[chan] = alu;
404
 
                }
405
 
 
406
 
                if (alu->last)
407
 
                        break;
408
 
        }
409
 
        return 0;
410
 
}
411
 
 
412
 
struct alu_bank_swizzle {
413
 
        int     hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
414
 
        int     hw_cfile_addr[4];
415
 
        int     hw_cfile_elem[4];
416
 
};
417
 
 
418
 
static const unsigned cycle_for_bank_swizzle_vec[][3] = {
419
 
        [SQ_ALU_VEC_012] = { 0, 1, 2 },
420
 
        [SQ_ALU_VEC_021] = { 0, 2, 1 },
421
 
        [SQ_ALU_VEC_120] = { 1, 2, 0 },
422
 
        [SQ_ALU_VEC_102] = { 1, 0, 2 },
423
 
        [SQ_ALU_VEC_201] = { 2, 0, 1 },
424
 
        [SQ_ALU_VEC_210] = { 2, 1, 0 }
425
 
};
426
 
 
427
 
static const unsigned cycle_for_bank_swizzle_scl[][3] = {
428
 
        [SQ_ALU_SCL_210] = { 2, 1, 0 },
429
 
        [SQ_ALU_SCL_122] = { 1, 2, 2 },
430
 
        [SQ_ALU_SCL_212] = { 2, 1, 2 },
431
 
        [SQ_ALU_SCL_221] = { 2, 2, 1 }
432
 
};
433
 
 
434
 
static void init_bank_swizzle(struct alu_bank_swizzle *bs)
435
 
{
436
 
        int i, cycle, component;
437
 
        /* set up gpr use */
438
 
        for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
439
 
                for (component = 0; component < NUM_OF_COMPONENTS; component++)
440
 
                         bs->hw_gpr[cycle][component] = -1;
441
 
        for (i = 0; i < 4; i++)
442
 
                bs->hw_cfile_addr[i] = -1;
443
 
        for (i = 0; i < 4; i++)
444
 
                bs->hw_cfile_elem[i] = -1;
445
 
}
446
 
 
447
 
static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
448
 
{
449
 
        if (bs->hw_gpr[cycle][chan] == -1)
450
 
                bs->hw_gpr[cycle][chan] = sel;
451
 
        else if (bs->hw_gpr[cycle][chan] != (int)sel) {
452
 
                /* Another scalar operation has already used the GPR read port for the channel. */
453
 
                return -1;
454
 
        }
455
 
        return 0;
456
 
}
457
 
 
458
 
static int reserve_cfile(const struct r600_bytecode *bc,
459
 
                         struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
460
 
{
461
 
        int res, num_res = 4;
462
 
        if (bc->chip_class >= R700) {
463
 
                num_res = 2;
464
 
                chan /= 2;
465
 
        }
466
 
        for (res = 0; res < num_res; ++res) {
467
 
                if (bs->hw_cfile_addr[res] == -1) {
468
 
                        bs->hw_cfile_addr[res] = sel;
469
 
                        bs->hw_cfile_elem[res] = chan;
470
 
                        return 0;
471
 
                } else if (bs->hw_cfile_addr[res] == sel &&
472
 
                        bs->hw_cfile_elem[res] == chan)
473
 
                        return 0; /* Read for this scalar element already reserved, nothing to do here. */
474
 
        }
475
 
        /* All cfile read ports are used, cannot reference vector element. */
476
 
        return -1;
477
 
}
478
 
 
479
 
static int is_gpr(unsigned sel)
480
 
{
481
 
        return (sel <= 127);
482
 
}
483
 
 
484
 
/* CB constants start at 512, and get translated to a kcache index when ALU
485
 
 * clauses are constructed. Note that we handle kcache constants the same way
486
 
 * as (the now gone) cfile constants, is that really required? */
487
 
static int is_kcache(unsigned sel)
488
 
{
489
 
   return (sel > 511 && sel < 4607) || /* Kcache before translation. */
490
 
         (sel > 127 && sel < 192) || /* Kcache 0 & 1 after translation. */
491
 
         (sel > 256  && sel < 320);  /* Kcache 2 & 3 after translation (EG). */
492
 
}
493
 
 
494
 
static int is_const(int sel)
495
 
{
496
 
   return is_kcache(sel) ||
497
 
                (sel >= V_SQ_ALU_SRC_0 &&
498
 
                sel <= V_SQ_ALU_SRC_LITERAL);
499
 
}
500
 
 
501
 
static int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
502
 
                        struct alu_bank_swizzle *bs, int bank_swizzle)
503
 
{
504
 
        int r, src, num_src, sel, elem, cycle;
505
 
 
506
 
        num_src = r600_bytecode_get_num_operands(alu);
507
 
        for (src = 0; src < num_src; src++) {
508
 
                sel = alu->src[src].sel;
509
 
                elem = alu->src[src].chan;
510
 
                if (is_gpr(sel)) {
511
 
                        cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
512
 
                        if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
513
 
                                /* Nothing to do; special-case optimization,
514
 
                                 * second source uses first source’s reservation. */
515
 
                                continue;
516
 
                        else {
517
 
                                r = reserve_gpr(bs, sel, elem, cycle);
518
 
                                if (r)
519
 
                                        return r;
520
 
                        }
521
 
      } else if (is_kcache(sel)) {
522
 
                        r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
523
 
                        if (r)
524
 
                                return r;
525
 
                }
526
 
                /* No restrictions on PV, PS, literal or special constants. */
527
 
        }
528
 
        return 0;
529
 
}
530
 
 
531
 
static int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
532
 
                        struct alu_bank_swizzle *bs, int bank_swizzle)
533
 
{
534
 
        int r, src, num_src, const_count, sel, elem, cycle;
535
 
 
536
 
        num_src = r600_bytecode_get_num_operands(alu);
537
 
        for (const_count = 0, src = 0; src < num_src; ++src) {
538
 
                sel = alu->src[src].sel;
539
 
                elem = alu->src[src].chan;
540
 
                if (is_const(sel)) { /* Any constant, including literal and inline constants. */
541
 
                        if (const_count >= 2)
542
 
                                /* More than two references to a constant in
543
 
                                 * transcendental operation. */
544
 
                                return -1;
545
 
                        else
546
 
                                const_count++;
547
 
                }
548
 
      if (is_kcache(sel)) {
549
 
                        r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
550
 
                        if (r)
551
 
                                return r;
552
 
                }
553
 
        }
554
 
        for (src = 0; src < num_src; ++src) {
555
 
                sel = alu->src[src].sel;
556
 
                elem = alu->src[src].chan;
557
 
                if (is_gpr(sel)) {
558
 
                        cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
559
 
                        if (cycle < const_count)
560
 
                                /* Cycle for GPR load conflicts with
561
 
                                 * constant load in transcendental operation. */
562
 
                                return -1;
563
 
                        r = reserve_gpr(bs, sel, elem, cycle);
564
 
                        if (r)
565
 
                                return r;
566
 
                }
567
 
                /* PV PS restrictions */
568
 
                if (const_count && (sel == 254 || sel == 255)) {
569
 
                        cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
570
 
                        if (cycle < const_count)
571
 
                                return -1;
572
 
                }
573
 
        }
574
 
        return 0;
575
 
}
576
 
 
577
 
static int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
578
 
                                      struct r600_bytecode_alu *slots[5])
579
 
{
580
 
        struct alu_bank_swizzle bs;
581
 
        int bank_swizzle[5];
582
 
        int i, r = 0, forced = 1;
583
 
        boolean scalar_only = bc->chip_class == CAYMAN ? false : true;
584
 
        int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
585
 
 
586
 
        for (i = 0; i < max_slots; i++) {
587
 
                if (slots[i]) {
588
 
                        if (slots[i]->bank_swizzle_force) {
589
 
                                slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
590
 
                        } else {
591
 
                                forced = 0;
592
 
                        }
593
 
                }
594
 
 
595
 
                if (i < 4 && slots[i])
596
 
                        scalar_only = false;
597
 
        }
598
 
        if (forced)
599
 
                return 0;
600
 
 
601
 
        /* Just check every possible combination of bank swizzle.
602
 
         * Not very efficent, but works on the first try in most of the cases. */
603
 
        for (i = 0; i < 4; i++)
604
 
                if (!slots[i] || !slots[i]->bank_swizzle_force)
605
 
                        bank_swizzle[i] = SQ_ALU_VEC_012;
606
 
                else
607
 
                        bank_swizzle[i] = slots[i]->bank_swizzle;
608
 
 
609
 
        bank_swizzle[4] = SQ_ALU_SCL_210;
610
 
        while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
611
 
 
612
 
                init_bank_swizzle(&bs);
613
 
                if (scalar_only == false) {
614
 
                        for (i = 0; i < 4; i++) {
615
 
                                if (slots[i]) {
616
 
                                        r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
617
 
                                        if (r)
618
 
                                                break;
619
 
                                }
620
 
                        }
621
 
                } else
622
 
                        r = 0;
623
 
 
624
 
                if (!r && max_slots == 5 && slots[4]) {
625
 
                        r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
626
 
                }
627
 
                if (!r) {
628
 
                        for (i = 0; i < max_slots; i++) {
629
 
                                if (slots[i])
630
 
                                        slots[i]->bank_swizzle = bank_swizzle[i];
631
 
                        }
632
 
                        return 0;
633
 
                }
634
 
 
635
 
                if (scalar_only) {
636
 
                        bank_swizzle[4]++;
637
 
                } else {
638
 
                        for (i = 0; i < max_slots; i++) {
639
 
                                if (!slots[i] || !slots[i]->bank_swizzle_force) {
640
 
                                        bank_swizzle[i]++;
641
 
                                        if (bank_swizzle[i] <= SQ_ALU_VEC_210)
642
 
                                                break;
643
 
                                        else if (i < max_slots - 1)
644
 
                                                bank_swizzle[i] = SQ_ALU_VEC_012;
645
 
                                        else
646
 
                                                return -1;
647
 
                                }
648
 
                        }
649
 
                }
650
 
        }
651
 
 
652
 
        /* Couldn't find a working swizzle. */
653
 
        return -1;
654
 
}
655
 
 
656
 
static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
657
 
                                  struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
658
 
{
659
 
        struct r600_bytecode_alu *prev[5];
660
 
        int gpr[5], chan[5];
661
 
        int i, j, r, src, num_src;
662
 
        int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
663
 
 
664
 
        r = assign_alu_units(bc, alu_prev, prev);
665
 
        if (r)
666
 
                return r;
667
 
 
668
 
        for (i = 0; i < max_slots; ++i) {
669
 
                if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
670
 
 
671
 
                        if (is_alu_64bit_inst(prev[i])) {
672
 
                                gpr[i] = -1;
673
 
                                continue;
674
 
                        }
675
 
 
676
 
                        gpr[i] = prev[i]->dst.sel;
677
 
                        /* cube writes more than PV.X */
678
 
                        if (is_alu_reduction_inst(bc, prev[i]))
679
 
                                chan[i] = 0;
680
 
                        else
681
 
                                chan[i] = prev[i]->dst.chan;
682
 
                } else
683
 
                        gpr[i] = -1;
684
 
        }
685
 
 
686
 
        for (i = 0; i < max_slots; ++i) {
687
 
                struct r600_bytecode_alu *alu = slots[i];
688
 
                if (!alu)
689
 
                        continue;
690
 
 
691
 
                if (is_alu_64bit_inst(alu))
692
 
                        continue;
693
 
                num_src = r600_bytecode_get_num_operands(alu);
694
 
                for (src = 0; src < num_src; ++src) {
695
 
                        if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
696
 
                                continue;
697
 
 
698
 
                        if (bc->chip_class < CAYMAN) {
699
 
                                if (alu->src[src].sel == gpr[4] &&
700
 
                                    alu->src[src].chan == chan[4] &&
701
 
                                    alu_prev->pred_sel == alu->pred_sel) {
702
 
                                        alu->src[src].sel = V_SQ_ALU_SRC_PS;
703
 
                                        alu->src[src].chan = 0;
704
 
                                        continue;
705
 
                                }
706
 
                        }
707
 
 
708
 
                        for (j = 0; j < 4; ++j) {
709
 
                                if (alu->src[src].sel == gpr[j] &&
710
 
                                        alu->src[src].chan == j &&
711
 
                                      alu_prev->pred_sel == alu->pred_sel) {
712
 
                                        alu->src[src].sel = V_SQ_ALU_SRC_PV;
713
 
                                        alu->src[src].chan = chan[j];
714
 
                                        break;
715
 
                                }
716
 
                        }
717
 
                }
718
 
        }
719
 
 
720
 
        return 0;
721
 
}
722
 
 
723
 
void r600_bytecode_special_constants(uint32_t value, unsigned *sel)
724
 
{
725
 
        switch(value) {
726
 
        case 0:
727
 
                *sel = V_SQ_ALU_SRC_0;
728
 
                break;
729
 
        case 1:
730
 
                *sel = V_SQ_ALU_SRC_1_INT;
731
 
                break;
732
 
        case -1:
733
 
                *sel = V_SQ_ALU_SRC_M_1_INT;
734
 
                break;
735
 
        case 0x3F800000: /* 1.0f */
736
 
                *sel = V_SQ_ALU_SRC_1;
737
 
                break;
738
 
        case 0x3F000000: /* 0.5f */
739
 
                *sel = V_SQ_ALU_SRC_0_5;
740
 
                break;
741
 
        default:
742
 
                *sel = V_SQ_ALU_SRC_LITERAL;
743
 
                break;
744
 
        }
745
 
}
746
 
 
747
 
/* compute how many literal are needed */
748
 
static int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
749
 
                                 uint32_t literal[4], unsigned *nliteral)
750
 
{
751
 
        unsigned num_src = r600_bytecode_get_num_operands(alu);
752
 
        unsigned i, j;
753
 
 
754
 
        for (i = 0; i < num_src; ++i) {
755
 
                if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
756
 
                        uint32_t value = alu->src[i].value;
757
 
                        unsigned found = 0;
758
 
                        for (j = 0; j < *nliteral; ++j) {
759
 
                                if (literal[j] == value) {
760
 
                                        found = 1;
761
 
                                        break;
762
 
                                }
763
 
                        }
764
 
                        if (!found) {
765
 
                                if (*nliteral >= 4)
766
 
                                        return -EINVAL;
767
 
                                literal[(*nliteral)++] = value;
768
 
                        }
769
 
                }
770
 
        }
771
 
        return 0;
772
 
}
773
 
 
774
 
static void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
775
 
                                              uint32_t literal[4], unsigned nliteral)
776
 
{
777
 
        unsigned num_src = r600_bytecode_get_num_operands(alu);
778
 
        unsigned i, j;
779
 
 
780
 
        for (i = 0; i < num_src; ++i) {
781
 
                if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
782
 
                        uint32_t value = alu->src[i].value;
783
 
                        for (j = 0; j < nliteral; ++j) {
784
 
                                if (literal[j] == value) {
785
 
                                        alu->src[i].chan = j;
786
 
                                        break;
787
 
                                }
788
 
                        }
789
 
                }
790
 
        }
791
 
}
792
 
 
793
 
static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
794
 
                             struct r600_bytecode_alu *alu_prev)
795
 
{
796
 
        struct r600_bytecode_alu *prev[5];
797
 
        struct r600_bytecode_alu *result[5] = { NULL };
798
 
 
799
 
        uint8_t interp_xz = 0;
800
 
 
801
 
        uint32_t literal[4], prev_literal[4];
802
 
        unsigned nliteral = 0, prev_nliteral = 0;
803
 
 
804
 
        int i, j, r, src, num_src;
805
 
        int num_once_inst = 0;
806
 
        int have_mova = 0, have_rel = 0;
807
 
        int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
808
 
 
809
 
        r = assign_alu_units(bc, alu_prev, prev);
810
 
        if (r)
811
 
                return r;
812
 
 
813
 
        for (i = 0; i < max_slots; ++i) {
814
 
                if (prev[i]) {
815
 
                      if (prev[i]->pred_sel)
816
 
                              return 0;
817
 
                      if (is_alu_once_inst(prev[i]))
818
 
                              return 0;
819
 
 
820
 
                      if (prev[i]->op == ALU_OP1_INTERP_LOAD_P0)
821
 
                         interp_xz |= 3;
822
 
                      if (prev[i]->op == ALU_OP2_INTERP_X)
823
 
                         interp_xz |= 1;
824
 
                      if (prev[i]->op == ALU_OP2_INTERP_Z)
825
 
                         interp_xz |= 2;
826
 
                }
827
 
                if (slots[i]) {
828
 
                        if (slots[i]->pred_sel)
829
 
                                return 0;
830
 
                        if (is_alu_once_inst(slots[i]))
831
 
                                return 0;
832
 
                        if (slots[i]->op == ALU_OP1_INTERP_LOAD_P0)
833
 
                           interp_xz |= 3;
834
 
                        if (slots[i]->op == ALU_OP2_INTERP_X)
835
 
                           interp_xz |= 1;
836
 
                        if (slots[i]->op == ALU_OP2_INTERP_Z)
837
 
                           interp_xz |= 2;
838
 
                }
839
 
                if (interp_xz == 3)
840
 
                   return 0;
841
 
        }
842
 
 
843
 
        for (i = 0; i < max_slots; ++i) {
844
 
                struct r600_bytecode_alu *alu;
845
 
 
846
 
                if (num_once_inst > 0)
847
 
                   return 0;
848
 
 
849
 
                /* check number of literals */
850
 
                if (prev[i]) {
851
 
                        if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
852
 
                                return 0;
853
 
                        if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
854
 
                                return 0;
855
 
                        if (is_alu_mova_inst(prev[i])) {
856
 
                                if (have_rel)
857
 
                                        return 0;
858
 
                                have_mova = 1;
859
 
                        }
860
 
 
861
 
                        if (alu_uses_rel(prev[i])) {
862
 
                                if (have_mova) {
863
 
                                        return 0;
864
 
                                }
865
 
                                have_rel = 1;
866
 
                        }
867
 
                        if (alu_uses_lds(prev[i]))
868
 
                                return 0;
869
 
 
870
 
                        num_once_inst += is_alu_once_inst(prev[i]);
871
 
                }
872
 
                if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
873
 
                        return 0;
874
 
 
875
 
                /* Let's check used slots. */
876
 
                if (prev[i] && !slots[i]) {
877
 
                        result[i] = prev[i];
878
 
                        continue;
879
 
                } else if (prev[i] && slots[i]) {
880
 
                        if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
881
 
                                /* Trans unit is still free try to use it. */
882
 
                                if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
883
 
                                        result[i] = prev[i];
884
 
                                        result[4] = slots[i];
885
 
                                } else if (is_alu_any_unit_inst(bc, prev[i])) {
886
 
                                        if (slots[i]->dst.sel == prev[i]->dst.sel &&
887
 
                                            alu_writes(slots[i]) &&
888
 
                                            alu_writes(prev[i]))
889
 
                                                return 0;
890
 
 
891
 
                                        result[i] = slots[i];
892
 
                                        result[4] = prev[i];
893
 
                                } else
894
 
                                        return 0;
895
 
                        } else
896
 
                                return 0;
897
 
                } else if(!slots[i]) {
898
 
                        continue;
899
 
                } else {
900
 
                        if (max_slots == 5 && slots[i] && prev[4] &&
901
 
                                        slots[i]->dst.sel == prev[4]->dst.sel &&
902
 
                                        slots[i]->dst.chan == prev[4]->dst.chan &&
903
 
                                        alu_writes(slots[i]) &&
904
 
                                        alu_writes(prev[4]))
905
 
                                return 0;
906
 
 
907
 
                        result[i] = slots[i];
908
 
                }
909
 
 
910
 
                alu = slots[i];
911
 
                num_once_inst += is_alu_once_inst(alu);
912
 
 
913
 
                /* don't reschedule NOPs */
914
 
                if (is_nop_inst(alu))
915
 
                        return 0;
916
 
 
917
 
                if (is_alu_mova_inst(alu)) {
918
 
                        if (have_rel) {
919
 
                                return 0;
920
 
                        }
921
 
                        have_mova = 1;
922
 
                }
923
 
 
924
 
                if (alu_uses_rel(alu)) {
925
 
                        if (have_mova) {
926
 
                                return 0;
927
 
                        }
928
 
                        have_rel = 1;
929
 
                }
930
 
 
931
 
                if (alu->op == ALU_OP0_SET_CF_IDX0 ||
932
 
                        alu->op == ALU_OP0_SET_CF_IDX1)
933
 
                        return 0; /* data hazard with MOVA */
934
 
 
935
 
                /* Let's check source gprs */
936
 
                num_src = r600_bytecode_get_num_operands(alu);
937
 
                for (src = 0; src < num_src; ++src) {
938
 
 
939
 
                        /* Constants don't matter. */
940
 
                        if (!is_gpr(alu->src[src].sel))
941
 
                                continue;
942
 
 
943
 
                        for (j = 0; j < max_slots; ++j) {
944
 
                                if (!prev[j] || !alu_writes(prev[j]))
945
 
                                        continue;
946
 
 
947
 
                                /* If it's relative then we can't determin which gpr is really used. */
948
 
                                if (prev[j]->dst.chan == alu->src[src].chan &&
949
 
                                        (prev[j]->dst.sel == alu->src[src].sel ||
950
 
                                        prev[j]->dst.rel || alu->src[src].rel))
951
 
                                        return 0;
952
 
                        }
953
 
                }
954
 
        }
955
 
 
956
 
        /* more than one PRED_ or KILL_ ? */
957
 
        if (num_once_inst > 1)
958
 
                return 0;
959
 
 
960
 
        /* check if the result can still be swizzlet */
961
 
        r = check_and_set_bank_swizzle(bc, result);
962
 
        if (r)
963
 
                return 0;
964
 
 
965
 
        /* looks like everything worked out right, apply the changes */
966
 
 
967
 
        /* undo adding previus literals */
968
 
        bc->cf_last->ndw -= align(prev_nliteral, 2);
969
 
 
970
 
        /* sort instructions */
971
 
        for (i = 0; i < max_slots; ++i) {
972
 
                slots[i] = result[i];
973
 
                if (result[i]) {
974
 
                        list_del(&result[i]->list);
975
 
                        result[i]->last = 0;
976
 
                        list_addtail(&result[i]->list, &bc->cf_last->alu);
977
 
                }
978
 
        }
979
 
 
980
 
        /* determine new last instruction */
981
 
        LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1;
982
 
 
983
 
        /* determine new first instruction */
984
 
        for (i = 0; i < max_slots; ++i) {
985
 
                if (result[i]) {
986
 
                        bc->cf_last->curr_bs_head = result[i];
987
 
                        break;
988
 
                }
989
 
        }
990
 
 
991
 
        bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
992
 
        bc->cf_last->prev2_bs_head = NULL;
993
 
 
994
 
        return 0;
995
 
}
996
 
 
997
 
/* we'll keep kcache sets sorted by bank & addr */
998
 
static int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc,
999
 
                struct r600_bytecode_kcache *kcache,
1000
 
                unsigned bank, unsigned line, unsigned index_mode)
1001
 
{
1002
 
        int i, kcache_banks = bc->chip_class >= EVERGREEN ? 4 : 2;
1003
 
 
1004
 
        for (i = 0; i < kcache_banks; i++) {
1005
 
                if (kcache[i].mode) {
1006
 
                        int d;
1007
 
 
1008
 
                        if (kcache[i].bank < bank)
1009
 
                                continue;
1010
 
 
1011
 
                        if ((kcache[i].bank == bank && kcache[i].addr > line+1) ||
1012
 
                                        kcache[i].bank > bank) {
1013
 
                                /* try to insert new line */
1014
 
                                if (kcache[kcache_banks-1].mode) {
1015
 
                                        /* all sets are in use */
1016
 
                                        return -ENOMEM;
1017
 
                                }
1018
 
 
1019
 
                                memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache));
1020
 
                                kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1021
 
                                kcache[i].bank = bank;
1022
 
                                kcache[i].addr = line;
1023
 
                                kcache[i].index_mode = index_mode;
1024
 
                                return 0;
1025
 
                        }
1026
 
 
1027
 
                        d = line - kcache[i].addr;
1028
 
 
1029
 
                        if (d == -1) {
1030
 
                                kcache[i].addr--;
1031
 
                                if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) {
1032
 
                                        /* we are prepending the line to the current set,
1033
 
                                         * discarding the existing second line,
1034
 
                                         * so we'll have to insert line+2 after it */
1035
 
                                        line += 2;
1036
 
                                        continue;
1037
 
                                } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) {
1038
 
                                        kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1039
 
                                        return 0;
1040
 
                                } else {
1041
 
                                        /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
1042
 
                                        return -ENOMEM;
1043
 
                                }
1044
 
                        } else if (d == 1) {
1045
 
                                kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1046
 
                                return 0;
1047
 
                        } else if (d == 0)
1048
 
                                return 0;
1049
 
                } else { /* free kcache set - use it */
1050
 
                        kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1051
 
                        kcache[i].bank = bank;
1052
 
                        kcache[i].addr = line;
1053
 
                        kcache[i].index_mode = index_mode;
1054
 
                        return 0;
1055
 
                }
1056
 
        }
1057
 
        return -ENOMEM;
1058
 
}
1059
 
 
1060
 
static int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc,
1061
 
                struct r600_bytecode_kcache *kcache,
1062
 
                struct r600_bytecode_alu *alu)
1063
 
{
1064
 
        int i, r;
1065
 
 
1066
 
        for (i = 0; i < 3; i++) {
1067
 
                unsigned bank, line, sel = alu->src[i].sel, index_mode;
1068
 
 
1069
 
                if (sel < 512)
1070
 
                        continue;
1071
 
 
1072
 
                bank = alu->src[i].kc_bank;
1073
 
                assert(bank < R600_MAX_HW_CONST_BUFFERS);
1074
 
                line = (sel-512)>>4;
1075
 
                index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
1076
 
 
1077
 
                if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode)))
1078
 
                        return r;
1079
 
        }
1080
 
        return 0;
1081
 
}
1082
 
 
1083
 
static int r600_bytecode_assign_kcache_banks(
1084
 
                struct r600_bytecode_alu *alu,
1085
 
                struct r600_bytecode_kcache * kcache)
1086
 
{
1087
 
        int i, j;
1088
 
 
1089
 
        /* Alter the src operands to refer to the kcache. */
1090
 
        for (i = 0; i < 3; ++i) {
1091
 
                static const unsigned int base[] = {128, 160, 256, 288};
1092
 
                unsigned int line, sel = alu->src[i].sel, found = 0;
1093
 
 
1094
 
                if (sel < 512)
1095
 
                        continue;
1096
 
 
1097
 
                sel -= 512;
1098
 
                line = sel>>4;
1099
 
 
1100
 
                for (j = 0; j < 4 && !found; ++j) {
1101
 
                        switch (kcache[j].mode) {
1102
 
                        case V_SQ_CF_KCACHE_NOP:
1103
 
                        case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX:
1104
 
                                R600_ERR("unexpected kcache line mode\n");
1105
 
                                return -ENOMEM;
1106
 
                        default:
1107
 
                                if (kcache[j].bank == alu->src[i].kc_bank &&
1108
 
                                                kcache[j].addr <= line &&
1109
 
                                                line < kcache[j].addr + kcache[j].mode) {
1110
 
                                        alu->src[i].sel = sel - (kcache[j].addr<<4);
1111
 
                                        alu->src[i].sel += base[j];
1112
 
                                        found=1;
1113
 
                            }
1114
 
                        }
1115
 
                }
1116
 
        }
1117
 
        return 0;
1118
 
}
1119
 
 
1120
 
static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc,
1121
 
                struct r600_bytecode_alu *alu,
1122
 
                unsigned type)
1123
 
{
1124
 
        struct r600_bytecode_kcache kcache_sets[4];
1125
 
        struct r600_bytecode_kcache *kcache = kcache_sets;
1126
 
        int r;
1127
 
 
1128
 
        memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache));
1129
 
 
1130
 
        if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1131
 
                /* can't alloc, need to start new clause */
1132
 
 
1133
 
                /* Make sure the CF ends with an "last" instruction when
1134
 
                 * we split an ALU group because of a new CF */
1135
 
                if (!list_is_empty(&bc->cf_last->alu))  {
1136
 
                        struct r600_bytecode_alu *last_submitted =
1137
 
                                list_last_entry(&bc->cf_last->alu, struct r600_bytecode_alu, list);
1138
 
                                last_submitted->last = 1;
1139
 
                }
1140
 
 
1141
 
                if ((r = r600_bytecode_add_cf(bc))) {
1142
 
                        return r;
1143
 
                }
1144
 
                bc->cf_last->op = type;
1145
 
 
1146
 
                /* retry with the new clause */
1147
 
                kcache = bc->cf_last->kcache;
1148
 
                if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1149
 
                        /* can't alloc again- should never happen */
1150
 
                        return r;
1151
 
                }
1152
 
        } else {
1153
 
                /* update kcache sets */
1154
 
                memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache));
1155
 
        }
1156
 
 
1157
 
        /* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */
1158
 
        if (kcache[2].mode != V_SQ_CF_KCACHE_NOP ||
1159
 
                kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) {
1160
 
                if (bc->chip_class < EVERGREEN)
1161
 
                        return -ENOMEM;
1162
 
                bc->cf_last->eg_alu_extended = 1;
1163
 
        }
1164
 
 
1165
 
        return 0;
1166
 
}
1167
 
 
1168
 
static int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots)
1169
 
{
1170
 
        struct r600_bytecode_alu alu;
1171
 
        int r, i;
1172
 
 
1173
 
        for (i = 0; i < max_slots; i++) {
1174
 
                memset(&alu, 0, sizeof(alu));
1175
 
                alu.op = ALU_OP0_NOP;
1176
 
                alu.src[0].chan = i & 3;
1177
 
                alu.dst.chan = i & 3;
1178
 
                alu.last = (i == max_slots - 1);
1179
 
                r = r600_bytecode_add_alu(bc, &alu);
1180
 
                if (r)
1181
 
                        return r;
1182
 
        }
1183
 
        return 0;
1184
 
}
1185
 
 
1186
 
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
1187
 
static int load_ar_r6xx(struct r600_bytecode *bc)
1188
 
{
1189
 
        struct r600_bytecode_alu alu;
1190
 
        int r;
1191
 
 
1192
 
        if (bc->ar_loaded)
1193
 
                return 0;
1194
 
 
1195
 
        /* hack to avoid making MOVA the last instruction in the clause */
1196
 
        if ((bc->cf_last->ndw>>1) >= 110)
1197
 
                bc->force_add_cf = 1;
1198
 
 
1199
 
        memset(&alu, 0, sizeof(alu));
1200
 
        alu.op = ALU_OP1_MOVA_GPR_INT;
1201
 
        alu.src[0].sel = bc->ar_reg;
1202
 
        alu.src[0].chan = bc->ar_chan;
1203
 
        alu.last = 1;
1204
 
        alu.index_mode = INDEX_MODE_LOOP;
1205
 
        r = r600_bytecode_add_alu(bc, &alu);
1206
 
        if (r)
1207
 
                return r;
1208
 
 
1209
 
        /* no requirement to set uses waterfall on MOVA_GPR_INT */
1210
 
        bc->ar_loaded = 1;
1211
 
        return 0;
1212
 
}
1213
 
 
1214
 
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
1215
 
int r600_load_ar(struct r600_bytecode *bc)
1216
 
{
1217
 
        struct r600_bytecode_alu alu;
1218
 
        int r;
1219
 
 
1220
 
        if (bc->ar_handling)
1221
 
                return load_ar_r6xx(bc);
1222
 
 
1223
 
        if (bc->ar_loaded)
1224
 
                return 0;
1225
 
 
1226
 
        /* hack to avoid making MOVA the last instruction in the clause */
1227
 
        if ((bc->cf_last->ndw>>1) >= 110)
1228
 
                bc->force_add_cf = 1;
1229
 
 
1230
 
        memset(&alu, 0, sizeof(alu));
1231
 
        alu.op = ALU_OP1_MOVA_INT;
1232
 
        alu.src[0].sel = bc->ar_reg;
1233
 
        alu.src[0].chan = bc->ar_chan;
1234
 
        alu.last = 1;
1235
 
        r = r600_bytecode_add_alu(bc, &alu);
1236
 
        if (r)
1237
 
                return r;
1238
 
 
1239
 
        bc->cf_last->r6xx_uses_waterfall = 1;
1240
 
        bc->ar_loaded = 1;
1241
 
        return 0;
1242
 
}
1243
 
 
1244
 
int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
1245
 
                const struct r600_bytecode_alu *alu, unsigned type)
1246
 
{
1247
 
        struct r600_bytecode_alu *nalu = r600_bytecode_alu();
1248
 
        struct r600_bytecode_alu *lalu;
1249
 
        int i, r;
1250
 
 
1251
 
        if (!nalu)
1252
 
                return -ENOMEM;
1253
 
        memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
1254
 
 
1255
 
        if (alu->is_op3) {
1256
 
                /* will fail later since alu does not support it. */
1257
 
                assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1258
 
        }
1259
 
 
1260
 
        if (bc->cf_last != NULL && bc->cf_last->op != type) {
1261
 
                /* check if we could add it anyway */
1262
 
                if (bc->cf_last->op == CF_OP_ALU &&
1263
 
                        type == CF_OP_ALU_PUSH_BEFORE) {
1264
 
                        LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1265
 
                                if (lalu->execute_mask) {
1266
 
                                        bc->force_add_cf = 1;
1267
 
                                        break;
1268
 
                                }
1269
 
                        }
1270
 
                } else
1271
 
                        bc->force_add_cf = 1;
1272
 
        }
1273
 
 
1274
 
        /* cf can contains only alu or only vtx or only tex */
1275
 
        if (bc->cf_last == NULL || bc->force_add_cf) {
1276
 
               if (bc->cf_last && bc->cf_last->curr_bs_head)
1277
 
                  bc->cf_last->curr_bs_head->last = 1;
1278
 
                r = r600_bytecode_add_cf(bc);
1279
 
                if (r) {
1280
 
                        free(nalu);
1281
 
                        return r;
1282
 
                }
1283
 
        }
1284
 
        bc->cf_last->op = type;
1285
 
 
1286
 
        /* Load index register if required */
1287
 
        if (bc->chip_class >= EVERGREEN) {
1288
 
                for (i = 0; i < 3; i++)
1289
 
                        if (nalu->src[i].kc_bank &&  nalu->src[i].kc_rel)
1290
 
                                egcm_load_index_reg(bc, 0, true);
1291
 
        }
1292
 
 
1293
 
        /* Check AR usage and load it if required */
1294
 
        for (i = 0; i < 3; i++)
1295
 
                if (nalu->src[i].rel && !bc->ar_loaded)
1296
 
                        r600_load_ar(bc);
1297
 
 
1298
 
        if (nalu->dst.rel && !bc->ar_loaded)
1299
 
                r600_load_ar(bc);
1300
 
 
1301
 
        /* Setup the kcache for this ALU instruction. This will start a new
1302
 
         * ALU clause if needed. */
1303
 
        if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
1304
 
                free(nalu);
1305
 
                return r;
1306
 
        }
1307
 
 
1308
 
        if (!bc->cf_last->curr_bs_head) {
1309
 
                bc->cf_last->curr_bs_head = nalu;
1310
 
        }
1311
 
        /* number of gpr == the last gpr used in any alu */
1312
 
        for (i = 0; i < 3; i++) {
1313
 
                if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
1314
 
                        bc->ngpr = nalu->src[i].sel + 1;
1315
 
                }
1316
 
                if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
1317
 
                        r600_bytecode_special_constants(nalu->src[i].value,
1318
 
                                &nalu->src[i].sel);
1319
 
        }
1320
 
        if (nalu->dst.write && nalu->dst.sel >= bc->ngpr) {
1321
 
                bc->ngpr = nalu->dst.sel + 1;
1322
 
        }
1323
 
        list_addtail(&nalu->list, &bc->cf_last->alu);
1324
 
        /* each alu use 2 dwords */
1325
 
        bc->cf_last->ndw += 2;
1326
 
        bc->ndw += 2;
1327
 
 
1328
 
        /* process cur ALU instructions for bank swizzle */
1329
 
        if (nalu->last) {
1330
 
                uint32_t literal[4];
1331
 
                unsigned nliteral;
1332
 
                struct r600_bytecode_alu *slots[5];
1333
 
                int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
1334
 
                r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
1335
 
                if (r)
1336
 
                        return r;
1337
 
 
1338
 
                if (bc->cf_last->prev_bs_head) {
1339
 
                        r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
1340
 
                        if (r)
1341
 
                                return r;
1342
 
                }
1343
 
 
1344
 
                if (bc->cf_last->prev_bs_head) {
1345
 
                        r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
1346
 
                        if (r)
1347
 
                                return r;
1348
 
                }
1349
 
 
1350
 
                r = check_and_set_bank_swizzle(bc, slots);
1351
 
                if (r)
1352
 
                        return r;
1353
 
 
1354
 
                for (i = 0, nliteral = 0; i < max_slots; i++) {
1355
 
                        if (slots[i]) {
1356
 
                                r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
1357
 
                                if (r)
1358
 
                                        return r;
1359
 
                        }
1360
 
                }
1361
 
                bc->cf_last->ndw += align(nliteral, 2);
1362
 
 
1363
 
                /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
1364
 
                 * worst case */
1365
 
                if ((bc->cf_last->ndw >> 1) >= 120) {
1366
 
                        bc->force_add_cf = 1;
1367
 
                }
1368
 
 
1369
 
                bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
1370
 
                bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
1371
 
                bc->cf_last->curr_bs_head = NULL;
1372
 
 
1373
 
                if (bc->r6xx_nop_after_rel_dst) {
1374
 
                        for (int i = 0; i < max_slots; ++i) {
1375
 
                                if (slots[i] && slots[i]->dst.rel) {
1376
 
                                        insert_nop_r6xx(bc, max_slots);
1377
 
                                        break;
1378
 
                                }
1379
 
                        }
1380
 
                }
1381
 
        }
1382
 
 
1383
 
        /* Might need to insert spill write ops after current clause */
1384
 
        if (nalu->last && bc->n_pending_outputs) {
1385
 
                while (bc->n_pending_outputs) {
1386
 
                        r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
1387
 
                        if (r)
1388
 
                                return r;
1389
 
                }
1390
 
        }
1391
 
 
1392
 
        return 0;
1393
 
}
1394
 
 
1395
 
int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
1396
 
{
1397
 
        return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU);
1398
 
}
1399
 
 
1400
 
static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
1401
 
{
1402
 
        switch (bc->chip_class) {
1403
 
        case R600:
1404
 
                return 8;
1405
 
 
1406
 
        case R700:
1407
 
        case EVERGREEN:
1408
 
        case CAYMAN:
1409
 
                return 16;
1410
 
 
1411
 
        default:
1412
 
                R600_ERR("Unknown chip class %d.\n", bc->chip_class);
1413
 
                return 8;
1414
 
        }
1415
 
}
1416
 
 
1417
 
static inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc)
1418
 
{
1419
 
        return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
1420
 
                 bc->cf_last->op != CF_OP_GDS &&
1421
 
                 (bc->chip_class == CAYMAN ||
1422
 
                  bc->cf_last->op != CF_OP_TEX));
1423
 
}
1424
 
 
1425
 
static int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
1426
 
                                          bool use_tc)
1427
 
{
1428
 
        struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
1429
 
        int r;
1430
 
 
1431
 
        if (!nvtx)
1432
 
                return -ENOMEM;
1433
 
        memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
1434
 
 
1435
 
        /* Load index register if required */
1436
 
        if (bc->chip_class >= EVERGREEN) {
1437
 
                if (vtx->buffer_index_mode)
1438
 
                        egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false);
1439
 
        }
1440
 
 
1441
 
        /* cf can contains only alu or only vtx or only tex */
1442
 
        if (bc->cf_last == NULL ||
1443
 
            last_inst_was_not_vtx_fetch(bc) ||
1444
 
            bc->force_add_cf) {
1445
 
                r = r600_bytecode_add_cf(bc);
1446
 
                if (r) {
1447
 
                        free(nvtx);
1448
 
                        return r;
1449
 
                }
1450
 
                switch (bc->chip_class) {
1451
 
                case R600:
1452
 
                case R700:
1453
 
                        bc->cf_last->op = CF_OP_VTX;
1454
 
                        break;
1455
 
                case EVERGREEN:
1456
 
                        if (use_tc)
1457
 
                                bc->cf_last->op = CF_OP_TEX;
1458
 
                        else
1459
 
                                bc->cf_last->op = CF_OP_VTX;
1460
 
                        break;
1461
 
                case CAYMAN:
1462
 
                        bc->cf_last->op = CF_OP_TEX;
1463
 
                        break;
1464
 
                default:
1465
 
                        R600_ERR("Unknown chip class %d.\n", bc->chip_class);
1466
 
                        free(nvtx);
1467
 
                        return -EINVAL;
1468
 
                }
1469
 
        }
1470
 
        list_addtail(&nvtx->list, &bc->cf_last->vtx);
1471
 
        /* each fetch use 4 dwords */
1472
 
        bc->cf_last->ndw += 4;
1473
 
        bc->ndw += 4;
1474
 
        if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1475
 
                bc->force_add_cf = 1;
1476
 
 
1477
 
        bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1);
1478
 
        bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1);
1479
 
 
1480
 
        return 0;
1481
 
}
1482
 
 
1483
 
int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1484
 
{
1485
 
        return r600_bytecode_add_vtx_internal(bc, vtx, false);
1486
 
}
1487
 
 
1488
 
int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1489
 
{
1490
 
        return r600_bytecode_add_vtx_internal(bc, vtx, true);
1491
 
}
1492
 
 
1493
 
int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
1494
 
{
1495
 
        struct r600_bytecode_tex *ntex = r600_bytecode_tex();
1496
 
        int r;
1497
 
 
1498
 
        if (!ntex)
1499
 
                return -ENOMEM;
1500
 
        memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
1501
 
 
1502
 
        /* Load index register if required */
1503
 
        if (bc->chip_class >= EVERGREEN) {
1504
 
                if (tex->sampler_index_mode || tex->resource_index_mode)
1505
 
                        egcm_load_index_reg(bc, 1, false);
1506
 
        }
1507
 
 
1508
 
        /* we can't fetch data und use it as texture lookup address in the same TEX clause */
1509
 
        if (bc->cf_last != NULL &&
1510
 
                bc->cf_last->op == CF_OP_TEX) {
1511
 
                struct r600_bytecode_tex *ttex;
1512
 
                LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
1513
 
                        if (ttex->dst_gpr == ntex->src_gpr &&
1514
 
                            (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 ||
1515
 
                             ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) {
1516
 
                                bc->force_add_cf = 1;
1517
 
                                break;
1518
 
                        }
1519
 
                }
1520
 
                /* vtx instrs get inserted after tex, so make sure we aren't moving the tex
1521
 
                 * before (say) the instr fetching the texcoord.
1522
 
                 */
1523
 
                if (!list_is_empty(&bc->cf_last->vtx))
1524
 
                        bc->force_add_cf = 1;
1525
 
 
1526
 
                /* slight hack to make gradients always go into same cf */
1527
 
                if (ntex->op == FETCH_OP_SET_GRADIENTS_H)
1528
 
                        bc->force_add_cf = 1;
1529
 
        }
1530
 
 
1531
 
        /* cf can contains only alu or only vtx or only tex */
1532
 
        if (bc->cf_last == NULL ||
1533
 
                bc->cf_last->op != CF_OP_TEX ||
1534
 
                bc->force_add_cf) {
1535
 
                r = r600_bytecode_add_cf(bc);
1536
 
                if (r) {
1537
 
                        free(ntex);
1538
 
                        return r;
1539
 
                }
1540
 
                bc->cf_last->op = CF_OP_TEX;
1541
 
        }
1542
 
        if (ntex->src_gpr >= bc->ngpr) {
1543
 
                bc->ngpr = ntex->src_gpr + 1;
1544
 
        }
1545
 
        if (ntex->dst_gpr >= bc->ngpr) {
1546
 
                bc->ngpr = ntex->dst_gpr + 1;
1547
 
        }
1548
 
        list_addtail(&ntex->list, &bc->cf_last->tex);
1549
 
        /* each texture fetch use 4 dwords */
1550
 
        bc->cf_last->ndw += 4;
1551
 
        bc->ndw += 4;
1552
 
        if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1553
 
                bc->force_add_cf = 1;
1554
 
        return 0;
1555
 
}
1556
 
 
1557
 
int r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds)
1558
 
{
1559
 
        struct r600_bytecode_gds *ngds = r600_bytecode_gds();
1560
 
        int r;
1561
 
 
1562
 
        if (ngds == NULL)
1563
 
                return -ENOMEM;
1564
 
        memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
1565
 
 
1566
 
        if (bc->chip_class >= EVERGREEN) {
1567
 
                if (gds->uav_index_mode)
1568
 
                        egcm_load_index_reg(bc, gds->uav_index_mode - 1, false);
1569
 
        }
1570
 
 
1571
 
        if (bc->cf_last == NULL ||
1572
 
            bc->cf_last->op != CF_OP_GDS ||
1573
 
            bc->force_add_cf) {
1574
 
                r = r600_bytecode_add_cf(bc);
1575
 
                if (r) {
1576
 
                        free(ngds);
1577
 
                        return r;
1578
 
                }
1579
 
                bc->cf_last->op = CF_OP_GDS;
1580
 
        }
1581
 
 
1582
 
        list_addtail(&ngds->list, &bc->cf_last->gds);
1583
 
        bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
1584
 
        if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1585
 
                bc->force_add_cf = 1;
1586
 
        return 0;
1587
 
}
1588
 
 
1589
 
int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
1590
 
{
1591
 
        int r;
1592
 
 
1593
 
        /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
1594
 
        if (op != CF_OP_WAIT_ACK && op != CF_OP_MEM_SCRATCH)
1595
 
                r600_bytecode_wait_acks(bc);
1596
 
 
1597
 
        r = r600_bytecode_add_cf(bc);
1598
 
        if (r)
1599
 
                return r;
1600
 
 
1601
 
        bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
1602
 
        bc->cf_last->op = op;
1603
 
        return 0;
1604
 
}
1605
 
 
1606
 
int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
1607
 
{
1608
 
        return r600_bytecode_add_cfinst(bc, CF_OP_CF_END);
1609
 
}
1610
 
 
1611
 
/* common to all 3 families */
1612
 
static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
1613
 
{
1614
 
        if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
1615
 
                return r700_bytecode_fetch_mem_build(bc, vtx, id);
1616
 
        bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
1617
 
                        S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
1618
 
                        S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
1619
 
                        S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
1620
 
                        S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
1621
 
        if (bc->chip_class < CAYMAN)
1622
 
                bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
1623
 
        id++;
1624
 
        bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
1625
 
                                S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
1626
 
                                S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
1627
 
                                S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
1628
 
                                S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
1629
 
                                S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
1630
 
                                S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
1631
 
                                S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
1632
 
                                S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
1633
 
                                S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
1634
 
        bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
1635
 
                                S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
1636
 
        if (bc->chip_class >= EVERGREEN)
1637
 
                bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode);
1638
 
        if (bc->chip_class < CAYMAN)
1639
 
                bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
1640
 
        id++;
1641
 
        bc->bytecode[id++] = 0;
1642
 
        return 0;
1643
 
}
1644
 
 
1645
 
/* common to all 3 families */
1646
 
static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
1647
 
{
1648
 
        bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST(
1649
 
                                        r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) |
1650
 
                            EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
1651
 
                                S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1652
 
                                S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1653
 
                                S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
1654
 
        if (bc->chip_class >= EVERGREEN)
1655
 
                bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode);
1656
 
                                ((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode)
1657
 
        id++;
1658
 
        bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
1659
 
                                S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
1660
 
                                S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
1661
 
                                S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
1662
 
                                S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
1663
 
                                S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
1664
 
                                S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
1665
 
                                S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
1666
 
                                S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
1667
 
                                S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
1668
 
                                S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
1669
 
        bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
1670
 
                                S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
1671
 
                                S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
1672
 
                                S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
1673
 
                                S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
1674
 
                                S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
1675
 
                                S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
1676
 
                                S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
1677
 
        bc->bytecode[id++] = 0;
1678
 
        return 0;
1679
 
}
1680
 
 
1681
 
/* r600 only, r700/eg bits in r700_asm.c */
1682
 
static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
1683
 
{
1684
 
        unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
1685
 
 
1686
 
        /* don't replace gpr by pv or ps for destination register */
1687
 
        bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1688
 
                                S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1689
 
                                S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1690
 
                                S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1691
 
                                S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1692
 
                                S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1693
 
                                S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1694
 
                                S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1695
 
                                S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
1696
 
                                S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
1697
 
                                S_SQ_ALU_WORD0_LAST(alu->last);
1698
 
 
1699
 
        if (alu->is_op3) {
1700
 
                assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1701
 
                bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1702
 
                                        S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1703
 
                                        S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1704
 
                                        S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1705
 
                                        S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1706
 
                                        S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1707
 
                                        S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1708
 
                                        S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1709
 
                                        S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) |
1710
 
                                        S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1711
 
        } else {
1712
 
                bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1713
 
                                        S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1714
 
                                        S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1715
 
                                        S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1716
 
                                        S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1717
 
                                        S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1718
 
                                        S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1719
 
                                        S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1720
 
                                        S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) |
1721
 
                                        S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1722
 
                                        S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
1723
 
                                        S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
1724
 
        }
1725
 
        return 0;
1726
 
}
1727
 
 
1728
 
static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
1729
 
{
1730
 
        *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
1731
 
        *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
1732
 
                        S_SQ_CF_WORD1_BARRIER(1) |
1733
 
                        S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
1734
 
                        S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1735
 
}
1736
 
 
1737
 
/* common for r600/r700 - eg in eg_asm.c */
1738
 
static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
1739
 
{
1740
 
        unsigned id = cf->id;
1741
 
        const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1742
 
        unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op);
1743
 
 
1744
 
 
1745
 
        if (cf->op == CF_NATIVE) {
1746
 
                bc->bytecode[id++] = cf->isa[0];
1747
 
                bc->bytecode[id++] = cf->isa[1];
1748
 
        } else if (cfop->flags & CF_ALU) {
1749
 
                bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
1750
 
                        S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
1751
 
                        S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
1752
 
                        S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
1753
 
 
1754
 
                bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) |
1755
 
                        S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
1756
 
                        S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
1757
 
                        S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
1758
 
                                        S_SQ_CF_ALU_WORD1_BARRIER(1) |
1759
 
                                        S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) |
1760
 
                                        S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
1761
 
        } else if (cfop->flags & CF_FETCH) {
1762
 
                if (bc->chip_class == R700)
1763
 
                        r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1764
 
                else
1765
 
                        r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1766
 
        } else if (cfop->flags & CF_EXP) {
1767
 
                bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1768
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1769
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1770
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1771
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1772
 
                bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1773
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
1774
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
1775
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
1776
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
1777
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1778
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1779
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
1780
 
        } else if (cfop->flags & CF_MEM) {
1781
 
                bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1782
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1783
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1784
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1785
 
                        S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1786
 
                bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1787
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1788
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1789
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
1790
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
1791
 
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
1792
 
        } else {
1793
 
                bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
1794
 
                bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
1795
 
                                        S_SQ_CF_WORD1_BARRIER(1) |
1796
 
                                        S_SQ_CF_WORD1_COND(cf->cond) |
1797
 
                                        S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
1798
 
                                        S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1799
 
        }
1800
 
        return 0;
1801
 
}
1802
 
 
1803
 
int r600_bytecode_build(struct r600_bytecode *bc)
1804
 
{
1805
 
        struct r600_bytecode_cf *cf;
1806
 
        struct r600_bytecode_alu *alu;
1807
 
        struct r600_bytecode_vtx *vtx;
1808
 
        struct r600_bytecode_tex *tex;
1809
 
        struct r600_bytecode_gds *gds;
1810
 
        uint32_t literal[4];
1811
 
        unsigned nliteral;
1812
 
        unsigned addr;
1813
 
        int i, r;
1814
 
 
1815
 
        if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
1816
 
                if (bc->stack.max_entries)
1817
 
                        bc->nstack = bc->stack.max_entries;
1818
 
                else if (bc->type == PIPE_SHADER_VERTEX ||
1819
 
                         bc->type == PIPE_SHADER_TESS_EVAL ||
1820
 
                         bc->type == PIPE_SHADER_TESS_CTRL)
1821
 
                        bc->nstack = 1;
1822
 
        }
1823
 
 
1824
 
        /* first path compute addr of each CF block */
1825
 
        /* addr start after all the CF instructions */
1826
 
        addr = bc->cf_last->id + 2;
1827
 
        LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1828
 
                if (r600_isa_cf(cf->op)->flags & CF_FETCH) {
1829
 
                        addr += 3;
1830
 
                        addr &= 0xFFFFFFFCUL;
1831
 
                }
1832
 
                cf->addr = addr;
1833
 
                addr += cf->ndw;
1834
 
                bc->ndw = cf->addr + cf->ndw;
1835
 
        }
1836
 
        free(bc->bytecode);
1837
 
        bc->bytecode = calloc(4, bc->ndw);
1838
 
        if (bc->bytecode == NULL)
1839
 
                return -ENOMEM;
1840
 
        LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1841
 
                const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1842
 
                addr = cf->addr;
1843
 
                if (bc->chip_class >= EVERGREEN)
1844
 
                        r = eg_bytecode_cf_build(bc, cf);
1845
 
                else
1846
 
                        r = r600_bytecode_cf_build(bc, cf);
1847
 
                if (r)
1848
 
                        return r;
1849
 
                if (cfop->flags & CF_ALU) {
1850
 
                        nliteral = 0;
1851
 
                        memset(literal, 0, sizeof(literal));
1852
 
                        LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1853
 
                                r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
1854
 
                                if (r)
1855
 
                                        return r;
1856
 
                                r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
1857
 
                                r600_bytecode_assign_kcache_banks(alu, cf->kcache);
1858
 
 
1859
 
                                switch(bc->chip_class) {
1860
 
                                case R600:
1861
 
                                        r = r600_bytecode_alu_build(bc, alu, addr);
1862
 
                                        break;
1863
 
                                case R700:
1864
 
                                        r = r700_bytecode_alu_build(bc, alu, addr);
1865
 
                                        break;
1866
 
                                case EVERGREEN:
1867
 
                                case CAYMAN:
1868
 
                                        r = eg_bytecode_alu_build(bc, alu, addr);
1869
 
                                        break;
1870
 
                                default:
1871
 
                                        R600_ERR("unknown chip class %d.\n", bc->chip_class);
1872
 
                                        return -EINVAL;
1873
 
                                }
1874
 
                                if (r)
1875
 
                                        return r;
1876
 
                                addr += 2;
1877
 
                                if (alu->last) {
1878
 
                                        for (i = 0; i < align(nliteral, 2); ++i) {
1879
 
                                                bc->bytecode[addr++] = literal[i];
1880
 
                                        }
1881
 
                                        nliteral = 0;
1882
 
                                        memset(literal, 0, sizeof(literal));
1883
 
                                }
1884
 
                        }
1885
 
                } else if (cf->op == CF_OP_VTX) {
1886
 
                        LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1887
 
                                r = r600_bytecode_vtx_build(bc, vtx, addr);
1888
 
                                if (r)
1889
 
                                        return r;
1890
 
                                addr += 4;
1891
 
                        }
1892
 
                } else if (cf->op == CF_OP_GDS) {
1893
 
                        assert(bc->chip_class >= EVERGREEN);
1894
 
                        LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
1895
 
                                r = eg_bytecode_gds_build(bc, gds, addr);
1896
 
                                if (r)
1897
 
                                        return r;
1898
 
                                addr += 4;
1899
 
                        }
1900
 
                } else if (cf->op == CF_OP_TEX) {
1901
 
                        LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1902
 
                                assert(bc->chip_class >= EVERGREEN);
1903
 
                                r = r600_bytecode_vtx_build(bc, vtx, addr);
1904
 
                                if (r)
1905
 
                                        return r;
1906
 
                                addr += 4;
1907
 
                        }
1908
 
                        LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
1909
 
                                r = r600_bytecode_tex_build(bc, tex, addr);
1910
 
                                if (r)
1911
 
                                        return r;
1912
 
                                addr += 4;
1913
 
                        }
1914
 
                }
1915
 
        }
1916
 
        return 0;
1917
 
}
1918
 
 
1919
 
void r600_bytecode_clear(struct r600_bytecode *bc)
1920
 
{
1921
 
        struct r600_bytecode_cf *cf = NULL, *next_cf;
1922
 
 
1923
 
        free(bc->bytecode);
1924
 
        bc->bytecode = NULL;
1925
 
 
1926
 
        LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
1927
 
                struct r600_bytecode_alu *alu = NULL, *next_alu;
1928
 
                struct r600_bytecode_tex *tex = NULL, *next_tex;
1929
 
                struct r600_bytecode_tex *vtx = NULL, *next_vtx;
1930
 
                struct r600_bytecode_gds *gds = NULL, *next_gds;
1931
 
 
1932
 
                LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
1933
 
                        free(alu);
1934
 
                }
1935
 
 
1936
 
                list_inithead(&cf->alu);
1937
 
 
1938
 
                LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
1939
 
                        free(tex);
1940
 
                }
1941
 
 
1942
 
                list_inithead(&cf->tex);
1943
 
 
1944
 
                LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
1945
 
                        free(vtx);
1946
 
                }
1947
 
 
1948
 
                list_inithead(&cf->vtx);
1949
 
 
1950
 
                LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
1951
 
                        free(gds);
1952
 
                }
1953
 
 
1954
 
                list_inithead(&cf->gds);
1955
 
 
1956
 
                free(cf);
1957
 
        }
1958
 
 
1959
 
        list_inithead(&cf->list);
1960
 
}
1961
 
 
1962
 
static int print_swizzle(unsigned swz)
1963
 
{
1964
 
        const char * swzchars = "xyzw01?_";
1965
 
        assert(swz<8 && swz != 6);
1966
 
        return fprintf(stderr, "%c", swzchars[swz]);
1967
 
}
1968
 
 
1969
 
static int print_sel(unsigned sel, unsigned rel, unsigned index_mode,
1970
 
                unsigned need_brackets)
1971
 
{
1972
 
        int o = 0;
1973
 
        if (rel && index_mode >= 5 && sel < 128)
1974
 
                o += fprintf(stderr, "G");
1975
 
        if (rel || need_brackets) {
1976
 
                o += fprintf(stderr, "[");
1977
 
        }
1978
 
        o += fprintf(stderr, "%d", sel);
1979
 
        if (rel) {
1980
 
                if (index_mode == 0 || index_mode == 6)
1981
 
                        o += fprintf(stderr, "+AR");
1982
 
                else if (index_mode == 4)
1983
 
                        o += fprintf(stderr, "+AL");
1984
 
        }
1985
 
        if (rel || need_brackets) {
1986
 
                o += fprintf(stderr, "]");
1987
 
        }
1988
 
        return o;
1989
 
}
1990
 
 
1991
 
static int print_dst(struct r600_bytecode_alu *alu)
1992
 
{
1993
 
        int o = 0;
1994
 
        unsigned sel = alu->dst.sel;
1995
 
        char reg_char = 'R';
1996
 
        if (sel > 128 - 4) { /* clause temporary gpr */
1997
 
                sel -= 128 - 4;
1998
 
                reg_char = 'T';
1999
 
        }
2000
 
 
2001
 
        if (alu_writes(alu)) {
2002
 
                o += fprintf(stderr, "%c", reg_char);
2003
 
                o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0);
2004
 
        } else {
2005
 
                o += fprintf(stderr, "__");
2006
 
        }
2007
 
        o += fprintf(stderr, ".");
2008
 
        o += print_swizzle(alu->dst.chan);
2009
 
        return o;
2010
 
}
2011
 
 
2012
 
static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
2013
 
{
2014
 
        int o = 0;
2015
 
        struct r600_bytecode_alu_src *src = &alu->src[idx];
2016
 
        unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
2017
 
 
2018
 
        if (src->neg)
2019
 
                o += fprintf(stderr,"-");
2020
 
        if (src->abs)
2021
 
                o += fprintf(stderr,"|");
2022
 
 
2023
 
        if (sel < 128 - 4) {
2024
 
                o += fprintf(stderr, "R");
2025
 
        } else if (sel < 128) {
2026
 
                o += fprintf(stderr, "T");
2027
 
                sel -= 128 - 4;
2028
 
        } else if (sel < 160) {
2029
 
                o += fprintf(stderr, "KC0");
2030
 
                need_brackets = 1;
2031
 
                sel -= 128;
2032
 
        } else if (sel < 192) {
2033
 
                o += fprintf(stderr, "KC1");
2034
 
                need_brackets = 1;
2035
 
                sel -= 160;
2036
 
        } else if (sel >= 512) {
2037
 
                o += fprintf(stderr, "C%d", src->kc_bank);
2038
 
                need_brackets = 1;
2039
 
                sel -= 512;
2040
 
        } else if (sel >= 448) {
2041
 
                o += fprintf(stderr, "Param");
2042
 
                sel -= 448;
2043
 
                need_chan = 0;
2044
 
        } else if (sel >= 288) {
2045
 
                o += fprintf(stderr, "KC3");
2046
 
                need_brackets = 1;
2047
 
                sel -= 288;
2048
 
        } else if (sel >= 256) {
2049
 
                o += fprintf(stderr, "KC2");
2050
 
                need_brackets = 1;
2051
 
                sel -= 256;
2052
 
        } else {
2053
 
                need_sel = 0;
2054
 
                need_chan = 0;
2055
 
                switch (sel) {
2056
 
                case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
2057
 
                        o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
2058
 
                        break;
2059
 
                case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
2060
 
                        o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
2061
 
                        break;
2062
 
                case EG_V_SQ_ALU_SRC_LDS_OQ_A:
2063
 
                        o += fprintf(stderr, "LDS_OQ_A");
2064
 
                        need_chan = 1;
2065
 
                        break;
2066
 
                case EG_V_SQ_ALU_SRC_LDS_OQ_B:
2067
 
                        o += fprintf(stderr, "LDS_OQ_B");
2068
 
                        need_chan = 1;
2069
 
                        break;
2070
 
                case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
2071
 
                        o += fprintf(stderr, "LDS_OQ_A_POP");
2072
 
                        need_chan = 1;
2073
 
                        break;
2074
 
                case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
2075
 
                        o += fprintf(stderr, "LDS_OQ_B_POP");
2076
 
                        need_chan = 1;
2077
 
                        break;
2078
 
                case EG_V_SQ_ALU_SRC_TIME_LO:
2079
 
                        o += fprintf(stderr, "TIME_LO");
2080
 
                        break;
2081
 
                case EG_V_SQ_ALU_SRC_TIME_HI:
2082
 
                        o += fprintf(stderr, "TIME_HI");
2083
 
                        break;
2084
 
                case EG_V_SQ_ALU_SRC_SE_ID:
2085
 
                        o += fprintf(stderr, "SE_ID");
2086
 
                        break;
2087
 
                case EG_V_SQ_ALU_SRC_SIMD_ID:
2088
 
                        o += fprintf(stderr, "SIMD_ID");
2089
 
                        break;
2090
 
                case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
2091
 
                        o += fprintf(stderr, "HW_WAVE_ID");
2092
 
                        break;
2093
 
                case V_SQ_ALU_SRC_PS:
2094
 
                        o += fprintf(stderr, "PS");
2095
 
                        break;
2096
 
                case V_SQ_ALU_SRC_PV:
2097
 
                        o += fprintf(stderr, "PV");
2098
 
                        need_chan = 1;
2099
 
                        break;
2100
 
                case V_SQ_ALU_SRC_LITERAL:
2101
 
                        o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value));
2102
 
                        break;
2103
 
                case V_SQ_ALU_SRC_0_5:
2104
 
                        o += fprintf(stderr, "0.5");
2105
 
                        break;
2106
 
                case V_SQ_ALU_SRC_M_1_INT:
2107
 
                        o += fprintf(stderr, "-1");
2108
 
                        break;
2109
 
                case V_SQ_ALU_SRC_1_INT:
2110
 
                        o += fprintf(stderr, "1");
2111
 
                        break;
2112
 
                case V_SQ_ALU_SRC_1:
2113
 
                        o += fprintf(stderr, "1.0");
2114
 
                        break;
2115
 
                case V_SQ_ALU_SRC_0:
2116
 
                        o += fprintf(stderr, "0");
2117
 
                        break;
2118
 
                default:
2119
 
                        o += fprintf(stderr, "??IMM_%d", sel);
2120
 
                        break;
2121
 
                }
2122
 
        }
2123
 
 
2124
 
        if (need_sel)
2125
 
                o += print_sel(sel, src->rel, alu->index_mode, need_brackets);
2126
 
 
2127
 
        if (need_chan) {
2128
 
                o += fprintf(stderr, ".");
2129
 
                o += print_swizzle(src->chan);
2130
 
        }
2131
 
 
2132
 
        if (src->abs)
2133
 
                o += fprintf(stderr,"|");
2134
 
 
2135
 
        return o;
2136
 
}
2137
 
 
2138
 
static int print_indent(int p, int c)
2139
 
{
2140
 
        int o = 0;
2141
 
        while (p++ < c)
2142
 
                o += fprintf(stderr, " ");
2143
 
        return o;
2144
 
}
2145
 
 
2146
 
void r600_bytecode_disasm(struct r600_bytecode *bc)
2147
 
{
2148
 
        const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"};
2149
 
        static int index = 0;
2150
 
        struct r600_bytecode_cf *cf = NULL;
2151
 
        struct r600_bytecode_alu *alu = NULL;
2152
 
        struct r600_bytecode_vtx *vtx = NULL;
2153
 
        struct r600_bytecode_tex *tex = NULL;
2154
 
        struct r600_bytecode_gds *gds = NULL;
2155
 
 
2156
 
        unsigned i, id, ngr = 0, last;
2157
 
        uint32_t literal[4];
2158
 
        unsigned nliteral;
2159
 
        char chip = '6';
2160
 
 
2161
 
        switch (bc->chip_class) {
2162
 
        case R700:
2163
 
                chip = '7';
2164
 
                break;
2165
 
        case EVERGREEN:
2166
 
                chip = 'E';
2167
 
                break;
2168
 
        case CAYMAN:
2169
 
                chip = 'C';
2170
 
                break;
2171
 
        case R600:
2172
 
        default:
2173
 
                chip = '6';
2174
 
                break;
2175
 
        }
2176
 
        fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
2177
 
                bc->ndw, bc->ngpr, bc->nstack);
2178
 
        fprintf(stderr, "shader %d -- %c\n", index++, chip);
2179
 
 
2180
 
        LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2181
 
                id = cf->id;
2182
 
                if (cf->op == CF_NATIVE) {
2183
 
                        fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id],
2184
 
                                        bc->bytecode[id + 1]);
2185
 
                } else {
2186
 
                        const struct cf_op_info *cfop = r600_isa_cf(cf->op);
2187
 
                        if (cfop->flags & CF_ALU) {
2188
 
                                if (cf->eg_alu_extended) {
2189
 
                                        fprintf(stderr, "%04d %08X %08X  %s\n", id, bc->bytecode[id],
2190
 
                                                        bc->bytecode[id + 1], "ALU_EXT");
2191
 
                                        id += 2;
2192
 
                                }
2193
 
                                fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2194
 
                                                bc->bytecode[id + 1], cfop->name);
2195
 
                                fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr);
2196
 
                                for (i = 0; i < 4; ++i) {
2197
 
                                        if (cf->kcache[i].mode) {
2198
 
                                                int c_start = (cf->kcache[i].addr << 4);
2199
 
                                                int c_end = c_start + (cf->kcache[i].mode << 4);
2200
 
                                                fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ",
2201
 
                                                        i, cf->kcache[i].bank, c_start, c_end,
2202
 
                                                        cf->kcache[i].index_mode ? " " : "",
2203
 
                                                        cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : "");
2204
 
                                        }
2205
 
                                }
2206
 
                                fprintf(stderr, "\n");
2207
 
                        } else if (cfop->flags & CF_FETCH) {
2208
 
                                fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2209
 
                                                bc->bytecode[id + 1], cfop->name);
2210
 
                                fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
2211
 
                                if (cf->vpm)
2212
 
                                        fprintf(stderr, "VPM ");
2213
 
                                if (cf->end_of_program)
2214
 
                                        fprintf(stderr, "EOP ");
2215
 
                                fprintf(stderr, "\n");
2216
 
 
2217
 
                        } else if (cfop->flags & CF_EXP) {
2218
 
                                int o = 0;
2219
 
                                const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
2220
 
                                o += fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2221
 
                                                bc->bytecode[id + 1], cfop->name);
2222
 
                                o += print_indent(o, 43);
2223
 
                                o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2224
 
                                if (cf->output.burst_count > 1) {
2225
 
                                        o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2226
 
                                                        cf->output.array_base + cf->output.burst_count - 1);
2227
 
 
2228
 
                                        o += print_indent(o, 55);
2229
 
                                        o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2230
 
                                                        cf->output.gpr + cf->output.burst_count - 1);
2231
 
                                } else {
2232
 
                                        o += fprintf(stderr, "%d ", cf->output.array_base);
2233
 
                                        o += print_indent(o, 55);
2234
 
                                        o += fprintf(stderr, "R%d.", cf->output.gpr);
2235
 
                                }
2236
 
 
2237
 
                                o += print_swizzle(cf->output.swizzle_x);
2238
 
                                o += print_swizzle(cf->output.swizzle_y);
2239
 
                                o += print_swizzle(cf->output.swizzle_z);
2240
 
                                o += print_swizzle(cf->output.swizzle_w);
2241
 
 
2242
 
                                print_indent(o, 67);
2243
 
 
2244
 
                                fprintf(stderr, " ES:%X ", cf->output.elem_size);
2245
 
                                if (cf->mark)
2246
 
                                        fprintf(stderr, "MARK ");
2247
 
                                if (!cf->barrier)
2248
 
                                        fprintf(stderr, "NO_BARRIER ");
2249
 
                                if (cf->end_of_program)
2250
 
                                        fprintf(stderr, "EOP ");
2251
 
                                fprintf(stderr, "\n");
2252
 
                        } else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
2253
 
                                int o = 0;
2254
 
                                const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
2255
 
                                                "WRITE_IND_ACK"};
2256
 
                                o += fprintf(stderr, "%04d %08X %08X  %s ", id,
2257
 
                                                bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
2258
 
                                o += print_indent(o, 43);
2259
 
                                o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2260
 
 
2261
 
                                if (r600_isa_cf(cf->op)->flags & CF_RAT) {
2262
 
                                        o += fprintf(stderr, "RAT%d", cf->rat.id);
2263
 
                                        if (cf->rat.index_mode) {
2264
 
                                                o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
2265
 
                                        }
2266
 
                                        o += fprintf(stderr, " INST: %d ", cf->rat.inst);
2267
 
                                }
2268
 
 
2269
 
                                if (cf->output.burst_count > 1) {
2270
 
                                        o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2271
 
                                                        cf->output.array_base + cf->output.burst_count - 1);
2272
 
                                        o += print_indent(o, 55);
2273
 
                                        o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2274
 
                                                        cf->output.gpr + cf->output.burst_count - 1);
2275
 
                                } else {
2276
 
                                        o += fprintf(stderr, "%d ", cf->output.array_base);
2277
 
                                        o += print_indent(o, 55);
2278
 
                                        o += fprintf(stderr, "R%d.", cf->output.gpr);
2279
 
                                }
2280
 
                                for (i = 0; i < 4; ++i) {
2281
 
                                        if (cf->output.comp_mask & (1 << i))
2282
 
                                                o += print_swizzle(i);
2283
 
                                        else
2284
 
                                                o += print_swizzle(7);
2285
 
                                }
2286
 
 
2287
 
                                if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
2288
 
                                    cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
2289
 
                                        o += fprintf(stderr, " R%d", cf->output.index_gpr);
2290
 
 
2291
 
                                o += print_indent(o, 67);
2292
 
 
2293
 
                                fprintf(stderr, " ES:%i ", cf->output.elem_size);
2294
 
                                if (cf->output.array_size != 0xFFF)
2295
 
                                        fprintf(stderr, "AS:%i ", cf->output.array_size);
2296
 
                                if (cf->mark)
2297
 
                                        fprintf(stderr, "MARK ");
2298
 
                                if (!cf->barrier)
2299
 
                                        fprintf(stderr, "NO_BARRIER ");
2300
 
                                if (cf->end_of_program)
2301
 
                                        fprintf(stderr, "EOP ");
2302
 
 
2303
 
                                if (cf->output.mark)
2304
 
                                        fprintf(stderr, "MARK ");
2305
 
 
2306
 
                                fprintf(stderr, "\n");
2307
 
                        } else {
2308
 
                                fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2309
 
                                                bc->bytecode[id + 1], cfop->name);
2310
 
                                fprintf(stderr, "@%d ", cf->cf_addr);
2311
 
                                if (cf->cond)
2312
 
                                        fprintf(stderr, "CND:%X ", cf->cond);
2313
 
                                if (cf->pop_count)
2314
 
                                        fprintf(stderr, "POP:%X ", cf->pop_count);
2315
 
                                if (cf->count && (cfop->flags & CF_EMIT))
2316
 
                                        fprintf(stderr, "STREAM%d ", cf->count);
2317
 
                                if (cf->vpm)
2318
 
                                        fprintf(stderr, "VPM ");
2319
 
                                if (cf->end_of_program)
2320
 
                                        fprintf(stderr, "EOP ");
2321
 
                                fprintf(stderr, "\n");
2322
 
                        }
2323
 
                }
2324
 
 
2325
 
                id = cf->addr;
2326
 
                nliteral = 0;
2327
 
                last = 1;
2328
 
                LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2329
 
                        const char *omod_str[] = {"","*2","*4","/2"};
2330
 
                        const struct alu_op_info *aop = r600_isa_alu(alu->op);
2331
 
                        int o = 0;
2332
 
 
2333
 
                        r600_bytecode_alu_nliterals(alu, literal, &nliteral);
2334
 
                        o += fprintf(stderr, " %04d %08X %08X  ", id, bc->bytecode[id], bc->bytecode[id+1]);
2335
 
                        if (last)
2336
 
                                o += fprintf(stderr, "%4d ", ++ngr);
2337
 
                        else
2338
 
                                o += fprintf(stderr, "     ");
2339
 
                        o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ',
2340
 
                                        alu->update_pred ? 'P':' ',
2341
 
                                        alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
2342
 
 
2343
 
                        o += fprintf(stderr, "%s%s%s ", aop->name,
2344
 
                                        omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
2345
 
 
2346
 
                        o += print_indent(o,60);
2347
 
                        o += print_dst(alu);
2348
 
                        for (i = 0; i < aop->src_count; ++i) {
2349
 
                                o += fprintf(stderr, i == 0 ? ",  ": ", ");
2350
 
                                o += print_src(alu, i);
2351
 
                        }
2352
 
 
2353
 
                        if (alu->bank_swizzle) {
2354
 
                                o += print_indent(o,75);
2355
 
                                o += fprintf(stderr, "  BS:%d", alu->bank_swizzle);
2356
 
                        }
2357
 
 
2358
 
                        fprintf(stderr, "\n");
2359
 
                        id += 2;
2360
 
 
2361
 
                        if (alu->last) {
2362
 
                                for (i = 0; i < nliteral; i++, id++) {
2363
 
                                        float *f = (float*)(bc->bytecode + id);
2364
 
                                        o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]);
2365
 
                                        print_indent(o, 60);
2366
 
                                        fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id));
2367
 
                                }
2368
 
                                id += nliteral & 1;
2369
 
                                nliteral = 0;
2370
 
                        }
2371
 
                        last = alu->last;
2372
 
                }
2373
 
 
2374
 
                LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2375
 
                        int o = 0;
2376
 
                        o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2377
 
                                        bc->bytecode[id + 1], bc->bytecode[id + 2]);
2378
 
 
2379
 
                        o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name);
2380
 
 
2381
 
                        o += print_indent(o, 50);
2382
 
 
2383
 
                        o += fprintf(stderr, "R%d.", tex->dst_gpr);
2384
 
                        o += print_swizzle(tex->dst_sel_x);
2385
 
                        o += print_swizzle(tex->dst_sel_y);
2386
 
                        o += print_swizzle(tex->dst_sel_z);
2387
 
                        o += print_swizzle(tex->dst_sel_w);
2388
 
 
2389
 
                        o += fprintf(stderr, ", R%d.", tex->src_gpr);
2390
 
                        o += print_swizzle(tex->src_sel_x);
2391
 
                        o += print_swizzle(tex->src_sel_y);
2392
 
                        o += print_swizzle(tex->src_sel_z);
2393
 
                        o += print_swizzle(tex->src_sel_w);
2394
 
 
2395
 
                        o += fprintf(stderr, ",  RID:%d", tex->resource_id);
2396
 
                        o += fprintf(stderr, ", SID:%d  ", tex->sampler_id);
2397
 
 
2398
 
                        if (tex->sampler_index_mode)
2399
 
                                fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]);
2400
 
 
2401
 
                        if (tex->lod_bias)
2402
 
                                fprintf(stderr, "LB:%d ", tex->lod_bias);
2403
 
 
2404
 
                        fprintf(stderr, "CT:%c%c%c%c ",
2405
 
                                        tex->coord_type_x ? 'N' : 'U',
2406
 
                                        tex->coord_type_y ? 'N' : 'U',
2407
 
                                        tex->coord_type_z ? 'N' : 'U',
2408
 
                                        tex->coord_type_w ? 'N' : 'U');
2409
 
 
2410
 
                        if (tex->offset_x)
2411
 
                                fprintf(stderr, "OX:%d ", tex->offset_x);
2412
 
                        if (tex->offset_y)
2413
 
                                fprintf(stderr, "OY:%d ", tex->offset_y);
2414
 
                        if (tex->offset_z)
2415
 
                                fprintf(stderr, "OZ:%d ", tex->offset_z);
2416
 
 
2417
 
                        id += 4;
2418
 
                        fprintf(stderr, "\n");
2419
 
                }
2420
 
 
2421
 
                LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2422
 
                        int o = 0;
2423
 
                        const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
2424
 
                        o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2425
 
                                        bc->bytecode[id + 1], bc->bytecode[id + 2]);
2426
 
 
2427
 
                        o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name);
2428
 
 
2429
 
                        o += print_indent(o, 50);
2430
 
 
2431
 
                        o += fprintf(stderr, "R%d.", vtx->dst_gpr);
2432
 
                        o += print_swizzle(vtx->dst_sel_x);
2433
 
                        o += print_swizzle(vtx->dst_sel_y);
2434
 
                        o += print_swizzle(vtx->dst_sel_z);
2435
 
                        o += print_swizzle(vtx->dst_sel_w);
2436
 
 
2437
 
                        o += fprintf(stderr, ", R%d.", vtx->src_gpr);
2438
 
                        o += print_swizzle(vtx->src_sel_x);
2439
 
                        if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
2440
 
                                o += print_swizzle(vtx->src_sel_y);
2441
 
 
2442
 
                        if (vtx->offset)
2443
 
                                fprintf(stderr, " +%db", vtx->offset);
2444
 
 
2445
 
                        o += print_indent(o, 55);
2446
 
 
2447
 
                        fprintf(stderr, ",  RID:%d ", vtx->buffer_id);
2448
 
 
2449
 
                        fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]);
2450
 
 
2451
 
                        if (bc->chip_class < CAYMAN && vtx->mega_fetch_count)
2452
 
                                fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count);
2453
 
 
2454
 
                        if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode)
2455
 
                                fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
2456
 
 
2457
 
                        if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
2458
 
                                if (vtx->uncached)
2459
 
                                        fprintf(stderr, "UNCACHED ");
2460
 
                                if (vtx->indexed)
2461
 
                                        fprintf(stderr, "INDEXED:%d ", vtx->indexed);
2462
 
 
2463
 
                                fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
2464
 
                                if (vtx->burst_count)
2465
 
                                        fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
2466
 
                                fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
2467
 
                                fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
2468
 
                        }
2469
 
 
2470
 
                        fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
2471
 
                        fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
2472
 
                        fprintf(stderr, "NUM:%d ", vtx->num_format_all);
2473
 
                        fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
2474
 
                        fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
2475
 
 
2476
 
                        id += 4;
2477
 
                }
2478
 
 
2479
 
                LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
2480
 
                        int o = 0;
2481
 
                        o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2482
 
                                        bc->bytecode[id + 1], bc->bytecode[id + 2]);
2483
 
 
2484
 
                        o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name);
2485
 
 
2486
 
                        if (gds->op != FETCH_OP_TF_WRITE) {
2487
 
                                o += fprintf(stderr, "R%d.", gds->dst_gpr);
2488
 
                                o += print_swizzle(gds->dst_sel_x);
2489
 
                                o += print_swizzle(gds->dst_sel_y);
2490
 
                                o += print_swizzle(gds->dst_sel_z);
2491
 
                                o += print_swizzle(gds->dst_sel_w);
2492
 
                        }
2493
 
 
2494
 
                        o += fprintf(stderr, ", R%d.", gds->src_gpr);
2495
 
                        o += print_swizzle(gds->src_sel_x);
2496
 
                        o += print_swizzle(gds->src_sel_y);
2497
 
                        o += print_swizzle(gds->src_sel_z);
2498
 
 
2499
 
                        if (gds->op != FETCH_OP_TF_WRITE) {
2500
 
                                o += fprintf(stderr, ", R%d.", gds->src_gpr2);
2501
 
                        }
2502
 
                        if (gds->alloc_consume) {
2503
 
                                o += fprintf(stderr, " UAV: %d", gds->uav_id);
2504
 
                                if (gds->uav_index_mode)
2505
 
                                        o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
2506
 
                        }
2507
 
                        fprintf(stderr, "\n");
2508
 
                        id += 4;
2509
 
                }
2510
 
        }
2511
 
 
2512
 
        fprintf(stderr, "--------------------------------------\n");
2513
 
}
2514
 
 
2515
 
void r600_vertex_data_type(enum pipe_format pformat,
2516
 
                                  unsigned *format,
2517
 
                                  unsigned *num_format, unsigned *format_comp, unsigned *endian)
2518
 
{
2519
 
        const struct util_format_description *desc;
2520
 
        unsigned i;
2521
 
 
2522
 
        *format = 0;
2523
 
        *num_format = 0;
2524
 
        *format_comp = 0;
2525
 
        *endian = ENDIAN_NONE;
2526
 
 
2527
 
        if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) {
2528
 
                *format = FMT_10_11_11_FLOAT;
2529
 
                *endian = r600_endian_swap(32);
2530
 
                return;
2531
 
        }
2532
 
 
2533
 
        if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
2534
 
                *format = FMT_5_6_5;
2535
 
                *endian = r600_endian_swap(16);
2536
 
                return;
2537
 
        }
2538
 
 
2539
 
        if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
2540
 
                *format = FMT_1_5_5_5;
2541
 
                *endian = r600_endian_swap(16);
2542
 
                return;
2543
 
        }
2544
 
 
2545
 
        if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
2546
 
                *format = FMT_5_5_5_1;
2547
 
                return;
2548
 
        }
2549
 
 
2550
 
        desc = util_format_description(pformat);
2551
 
        if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
2552
 
                goto out_unknown;
2553
 
        }
2554
 
 
2555
 
        /* Find the first non-VOID channel. */
2556
 
        for (i = 0; i < 4; i++) {
2557
 
                if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2558
 
                        break;
2559
 
                }
2560
 
        }
2561
 
 
2562
 
        *endian = r600_endian_swap(desc->channel[i].size);
2563
 
 
2564
 
        switch (desc->channel[i].type) {
2565
 
        /* Half-floats, floats, ints */
2566
 
        case UTIL_FORMAT_TYPE_FLOAT:
2567
 
                switch (desc->channel[i].size) {
2568
 
                case 16:
2569
 
                        switch (desc->nr_channels) {
2570
 
                        case 1:
2571
 
                                *format = FMT_16_FLOAT;
2572
 
                                break;
2573
 
                        case 2:
2574
 
                                *format = FMT_16_16_FLOAT;
2575
 
                                break;
2576
 
                        case 3:
2577
 
                        case 4:
2578
 
                                *format = FMT_16_16_16_16_FLOAT;
2579
 
                                break;
2580
 
                        }
2581
 
                        break;
2582
 
                case 32:
2583
 
                        switch (desc->nr_channels) {
2584
 
                        case 1:
2585
 
                                *format = FMT_32_FLOAT;
2586
 
                                break;
2587
 
                        case 2:
2588
 
                                *format = FMT_32_32_FLOAT;
2589
 
                                break;
2590
 
                        case 3:
2591
 
                                *format = FMT_32_32_32_FLOAT;
2592
 
                                break;
2593
 
                        case 4:
2594
 
                                *format = FMT_32_32_32_32_FLOAT;
2595
 
                                break;
2596
 
                        }
2597
 
                        break;
2598
 
                default:
2599
 
                        goto out_unknown;
2600
 
                }
2601
 
                break;
2602
 
                /* Unsigned ints */
2603
 
        case UTIL_FORMAT_TYPE_UNSIGNED:
2604
 
                /* Signed ints */
2605
 
        case UTIL_FORMAT_TYPE_SIGNED:
2606
 
                switch (desc->channel[i].size) {
2607
 
                case 4:
2608
 
                        switch (desc->nr_channels) {
2609
 
                        case 2:
2610
 
                                *format = FMT_4_4;
2611
 
                                break;
2612
 
                        case 4:
2613
 
                                *format = FMT_4_4_4_4;
2614
 
                                break;
2615
 
                        }
2616
 
                        break;
2617
 
                case 8:
2618
 
                        switch (desc->nr_channels) {
2619
 
                        case 1:
2620
 
                                *format = FMT_8;
2621
 
                                break;
2622
 
                        case 2:
2623
 
                                *format = FMT_8_8;
2624
 
                                break;
2625
 
                        case 3:
2626
 
                        case 4:
2627
 
                                *format = FMT_8_8_8_8;
2628
 
                                break;
2629
 
                        }
2630
 
                        break;
2631
 
                case 10:
2632
 
                        if (desc->nr_channels != 4)
2633
 
                                goto out_unknown;
2634
 
 
2635
 
                        *format = FMT_2_10_10_10;
2636
 
                        break;
2637
 
                case 16:
2638
 
                        switch (desc->nr_channels) {
2639
 
                        case 1:
2640
 
                                *format = FMT_16;
2641
 
                                break;
2642
 
                        case 2:
2643
 
                                *format = FMT_16_16;
2644
 
                                break;
2645
 
                        case 3:
2646
 
                        case 4:
2647
 
                                *format = FMT_16_16_16_16;
2648
 
                                break;
2649
 
                        }
2650
 
                        break;
2651
 
                case 32:
2652
 
                        switch (desc->nr_channels) {
2653
 
                        case 1:
2654
 
                                *format = FMT_32;
2655
 
                                break;
2656
 
                        case 2:
2657
 
                                *format = FMT_32_32;
2658
 
                                break;
2659
 
                        case 3:
2660
 
                                *format = FMT_32_32_32;
2661
 
                                break;
2662
 
                        case 4:
2663
 
                                *format = FMT_32_32_32_32;
2664
 
                                break;
2665
 
                        }
2666
 
                        break;
2667
 
                default:
2668
 
                        goto out_unknown;
2669
 
                }
2670
 
                break;
2671
 
        default:
2672
 
                goto out_unknown;
2673
 
        }
2674
 
 
2675
 
        if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2676
 
                *format_comp = 1;
2677
 
        }
2678
 
 
2679
 
        *num_format = 0;
2680
 
        if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
2681
 
            desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2682
 
                if (!desc->channel[i].normalized) {
2683
 
                        if (desc->channel[i].pure_integer)
2684
 
                                *num_format = 1;
2685
 
                        else
2686
 
                                *num_format = 2;
2687
 
                }
2688
 
        }
2689
 
        return;
2690
 
out_unknown:
2691
 
        R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
2692
 
}
2693
 
 
2694
 
void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
2695
 
                                      unsigned count,
2696
 
                                      const struct pipe_vertex_element *elements)
2697
 
{
2698
 
        struct r600_context *rctx = (struct r600_context *)ctx;
2699
 
        struct r600_bytecode bc;
2700
 
        struct r600_bytecode_vtx vtx;
2701
 
        const struct util_format_description *desc;
2702
 
        unsigned fetch_resource_start = rctx->b.chip_class >= EVERGREEN ? 0 : 160;
2703
 
        unsigned format, num_format, format_comp, endian;
2704
 
        uint32_t *bytecode;
2705
 
        int i, j, r, fs_size;
2706
 
        struct r600_fetch_shader *shader;
2707
 
        unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB ||
2708
 
                         (rctx->screen->b.debug_flags & DBG_NIR);
2709
 
        unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
2710
 
 
2711
 
        assert(count < 32);
2712
 
 
2713
 
        memset(&bc, 0, sizeof(bc));
2714
 
        r600_bytecode_init(&bc, rctx->b.chip_class, rctx->b.family,
2715
 
                           rctx->screen->has_compressed_msaa_texturing);
2716
 
 
2717
 
        bc.isa = rctx->isa;
2718
 
 
2719
 
        for (i = 0; i < count; i++) {
2720
 
                if (elements[i].instance_divisor > 1) {
2721
 
                        if (rctx->b.chip_class == CAYMAN) {
2722
 
                                for (j = 0; j < 4; j++) {
2723
 
                                        struct r600_bytecode_alu alu;
2724
 
                                        memset(&alu, 0, sizeof(alu));
2725
 
                                        alu.op = ALU_OP2_MULHI_UINT;
2726
 
                                        alu.src[0].sel = 0;
2727
 
                                        alu.src[0].chan = 3;
2728
 
                                        alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2729
 
                                        alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2730
 
                                        alu.dst.sel = i + 1;
2731
 
                                        alu.dst.chan = j;
2732
 
                                        alu.dst.write = j == 3;
2733
 
                                        alu.last = j == 3;
2734
 
                                        if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2735
 
                                                r600_bytecode_clear(&bc);
2736
 
                                                return NULL;
2737
 
                                        }
2738
 
                                }
2739
 
                        } else {
2740
 
                                struct r600_bytecode_alu alu;
2741
 
                                memset(&alu, 0, sizeof(alu));
2742
 
                                alu.op = ALU_OP2_MULHI_UINT;
2743
 
                                alu.src[0].sel = 0;
2744
 
                                alu.src[0].chan = 3;
2745
 
                                alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2746
 
                                alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2747
 
                                alu.dst.sel = i + 1;
2748
 
                                alu.dst.chan = 3;
2749
 
                                alu.dst.write = 1;
2750
 
                                alu.last = 1;
2751
 
                                if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2752
 
                                        r600_bytecode_clear(&bc);
2753
 
                                        return NULL;
2754
 
                                }
2755
 
                        }
2756
 
                }
2757
 
        }
2758
 
 
2759
 
        for (i = 0; i < count; i++) {
2760
 
                r600_vertex_data_type(elements[i].src_format,
2761
 
                                      &format, &num_format, &format_comp, &endian);
2762
 
 
2763
 
                desc = util_format_description(elements[i].src_format);
2764
 
                if (!desc) {
2765
 
                        r600_bytecode_clear(&bc);
2766
 
                        R600_ERR("unknown format %d\n", elements[i].src_format);
2767
 
                        return NULL;
2768
 
                }
2769
 
 
2770
 
                if (elements[i].src_offset > 65535) {
2771
 
                        r600_bytecode_clear(&bc);
2772
 
                        R600_ERR("too big src_offset: %u\n", elements[i].src_offset);
2773
 
                        return NULL;
2774
 
                }
2775
 
 
2776
 
                memset(&vtx, 0, sizeof(vtx));
2777
 
                vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start;
2778
 
                vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA;
2779
 
                vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
2780
 
                vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
2781
 
                vtx.mega_fetch_count = 0x1F;
2782
 
                vtx.dst_gpr = i + 1;
2783
 
                vtx.dst_sel_x = desc->swizzle[0];
2784
 
                vtx.dst_sel_y = desc->swizzle[1];
2785
 
                vtx.dst_sel_z = desc->swizzle[2];
2786
 
                vtx.dst_sel_w = desc->swizzle[3];
2787
 
                vtx.data_format = format;
2788
 
                vtx.num_format_all = num_format;
2789
 
                vtx.format_comp_all = format_comp;
2790
 
                vtx.offset = elements[i].src_offset;
2791
 
                vtx.endian = endian;
2792
 
 
2793
 
                if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
2794
 
                        r600_bytecode_clear(&bc);
2795
 
                        return NULL;
2796
 
                }
2797
 
        }
2798
 
 
2799
 
        r600_bytecode_add_cfinst(&bc, CF_OP_RET);
2800
 
 
2801
 
        if ((r = r600_bytecode_build(&bc))) {
2802
 
                r600_bytecode_clear(&bc);
2803
 
                return NULL;
2804
 
        }
2805
 
 
2806
 
        if (rctx->screen->b.debug_flags & DBG_FS) {
2807
 
                fprintf(stderr, "--------------------------------------------------------------\n");
2808
 
                fprintf(stderr, "Vertex elements state:\n");
2809
 
                for (i = 0; i < count; i++) {
2810
 
                        fprintf(stderr, "   ");
2811
 
                        util_dump_vertex_element(stderr, elements+i);
2812
 
                        fprintf(stderr, "\n");
2813
 
                }
2814
 
 
2815
 
                if (!sb_disasm) {
2816
 
                        r600_bytecode_disasm(&bc);
2817
 
 
2818
 
                        fprintf(stderr, "______________________________________________________________\n");
2819
 
                } else {
2820
 
                        r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/);
2821
 
                }
2822
 
        }
2823
 
 
2824
 
        fs_size = bc.ndw*4;
2825
 
 
2826
 
        /* Allocate the CSO. */
2827
 
        shader = CALLOC_STRUCT(r600_fetch_shader);
2828
 
        if (!shader) {
2829
 
                r600_bytecode_clear(&bc);
2830
 
                return NULL;
2831
 
        }
2832
 
 
2833
 
        u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256,
2834
 
                             &shader->offset,
2835
 
                             (struct pipe_resource**)&shader->buffer);
2836
 
        if (!shader->buffer) {
2837
 
                r600_bytecode_clear(&bc);
2838
 
                FREE(shader);
2839
 
                return NULL;
2840
 
        }
2841
 
 
2842
 
        bytecode = r600_buffer_map_sync_with_rings
2843
 
                (&rctx->b, shader->buffer,
2844
 
                PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
2845
 
        bytecode += shader->offset / 4;
2846
 
 
2847
 
        if (R600_BIG_ENDIAN) {
2848
 
                for (i = 0; i < fs_size / 4; ++i) {
2849
 
                        bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);
2850
 
                }
2851
 
        } else {
2852
 
                memcpy(bytecode, bc.bytecode, fs_size);
2853
 
        }
2854
 
        rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf);
2855
 
 
2856
 
        r600_bytecode_clear(&bc);
2857
 
        return shader;
2858
 
}
2859
 
 
2860
 
void r600_bytecode_alu_read(struct r600_bytecode *bc,
2861
 
                struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
2862
 
{
2863
 
        /* WORD0 */
2864
 
        alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
2865
 
        alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
2866
 
        alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
2867
 
        alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
2868
 
        alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
2869
 
        alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
2870
 
        alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
2871
 
        alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
2872
 
        alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
2873
 
        alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
2874
 
        alu->last = G_SQ_ALU_WORD0_LAST(word0);
2875
 
 
2876
 
        /* WORD1 */
2877
 
        alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
2878
 
        if (alu->bank_swizzle)
2879
 
                alu->bank_swizzle_force = alu->bank_swizzle;
2880
 
        alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
2881
 
        alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
2882
 
        alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
2883
 
        alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
2884
 
        if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
2885
 
        {
2886
 
                alu->is_op3 = 1;
2887
 
                alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
2888
 
                alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
2889
 
                alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
2890
 
                alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
2891
 
                alu->op = r600_isa_alu_by_opcode(bc->isa,
2892
 
                                G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1);
2893
 
 
2894
 
        }
2895
 
        else /*ALU_DWORD1_OP2*/
2896
 
        {
2897
 
                alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
2898
 
                alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
2899
 
                alu->op = r600_isa_alu_by_opcode(bc->isa,
2900
 
                                G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0);
2901
 
                alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
2902
 
                alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
2903
 
                alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
2904
 
                alu->execute_mask =
2905
 
                        G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
2906
 
        }
2907
 
}
2908
 
 
2909
 
#if 0
2910
 
void r600_bytecode_export_read(struct r600_bytecode *bc,
2911
 
                struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
2912
 
{
2913
 
        output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
2914
 
        output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
2915
 
        output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
2916
 
        output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
2917
 
 
2918
 
        output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
2919
 
        output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
2920
 
        output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
2921
 
        output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
2922
 
        output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
2923
 
        output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
2924
 
    output->op = r600_isa_cf_by_opcode(bc->isa,
2925
 
                        G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0);
2926
 
        output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
2927
 
        output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
2928
 
        output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
2929
 
}
2930
 
#endif