~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/gallium/drivers/r600/evergreen_state.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3
 
 *
4
 
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 
 * copy of this software and associated documentation files (the "Software"),
6
 
 * to deal in the Software without restriction, including without limitation
7
 
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 
 * the Software is furnished to do so, subject to the following conditions:
10
 
 *
11
 
 * The above copyright notice and this permission notice (including the next
12
 
 * paragraph) shall be included in all copies or substantial portions of the
13
 
 * Software.
14
 
 *
15
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 
 */
23
 
#include "r600_formats.h"
24
 
#include "r600_shader.h"
25
 
#include "r600_query.h"
26
 
#include "evergreend.h"
27
 
 
28
 
#include "pipe/p_shader_tokens.h"
29
 
#include "util/u_pack_color.h"
30
 
#include "util/u_memory.h"
31
 
#include "util/u_framebuffer.h"
32
 
#include "util/u_dual_blend.h"
33
 
#include "evergreen_compute.h"
34
 
#include "util/u_math.h"
35
 
 
36
 
static inline unsigned evergreen_array_mode(unsigned mode)
37
 
{
38
 
        switch (mode) {
39
 
        default:
40
 
        case RADEON_SURF_MODE_LINEAR_ALIGNED:   return V_028C70_ARRAY_LINEAR_ALIGNED;
41
 
                break;
42
 
        case RADEON_SURF_MODE_1D:               return V_028C70_ARRAY_1D_TILED_THIN1;
43
 
                break;
44
 
        case RADEON_SURF_MODE_2D:               return V_028C70_ARRAY_2D_TILED_THIN1;
45
 
        }
46
 
}
47
 
 
48
 
static uint32_t eg_num_banks(uint32_t nbanks)
49
 
{
50
 
        switch (nbanks) {
51
 
        case 2:
52
 
                return 0;
53
 
        case 4:
54
 
                return 1;
55
 
        case 8:
56
 
        default:
57
 
                return 2;
58
 
        case 16:
59
 
                return 3;
60
 
        }
61
 
}
62
 
 
63
 
 
64
 
static unsigned eg_tile_split(unsigned tile_split)
65
 
{
66
 
        switch (tile_split) {
67
 
        case 64:        tile_split = 0; break;
68
 
        case 128:       tile_split = 1; break;
69
 
        case 256:       tile_split = 2; break;
70
 
        case 512:       tile_split = 3; break;
71
 
        default:
72
 
        case 1024:      tile_split = 4; break;
73
 
        case 2048:      tile_split = 5; break;
74
 
        case 4096:      tile_split = 6; break;
75
 
        }
76
 
        return tile_split;
77
 
}
78
 
 
79
 
static unsigned eg_macro_tile_aspect(unsigned macro_tile_aspect)
80
 
{
81
 
        switch (macro_tile_aspect) {
82
 
        default:
83
 
        case 1: macro_tile_aspect = 0;  break;
84
 
        case 2: macro_tile_aspect = 1;  break;
85
 
        case 4: macro_tile_aspect = 2;  break;
86
 
        case 8: macro_tile_aspect = 3;  break;
87
 
        }
88
 
        return macro_tile_aspect;
89
 
}
90
 
 
91
 
static unsigned eg_bank_wh(unsigned bankwh)
92
 
{
93
 
        switch (bankwh) {
94
 
        default:
95
 
        case 1: bankwh = 0;     break;
96
 
        case 2: bankwh = 1;     break;
97
 
        case 4: bankwh = 2;     break;
98
 
        case 8: bankwh = 3;     break;
99
 
        }
100
 
        return bankwh;
101
 
}
102
 
 
103
 
static uint32_t r600_translate_blend_function(int blend_func)
104
 
{
105
 
        switch (blend_func) {
106
 
        case PIPE_BLEND_ADD:
107
 
                return V_028780_COMB_DST_PLUS_SRC;
108
 
        case PIPE_BLEND_SUBTRACT:
109
 
                return V_028780_COMB_SRC_MINUS_DST;
110
 
        case PIPE_BLEND_REVERSE_SUBTRACT:
111
 
                return V_028780_COMB_DST_MINUS_SRC;
112
 
        case PIPE_BLEND_MIN:
113
 
                return V_028780_COMB_MIN_DST_SRC;
114
 
        case PIPE_BLEND_MAX:
115
 
                return V_028780_COMB_MAX_DST_SRC;
116
 
        default:
117
 
                R600_ERR("Unknown blend function %d\n", blend_func);
118
 
                assert(0);
119
 
                break;
120
 
        }
121
 
        return 0;
122
 
}
123
 
 
124
 
static uint32_t r600_translate_blend_factor(int blend_fact)
125
 
{
126
 
        switch (blend_fact) {
127
 
        case PIPE_BLENDFACTOR_ONE:
128
 
                return V_028780_BLEND_ONE;
129
 
        case PIPE_BLENDFACTOR_SRC_COLOR:
130
 
                return V_028780_BLEND_SRC_COLOR;
131
 
        case PIPE_BLENDFACTOR_SRC_ALPHA:
132
 
                return V_028780_BLEND_SRC_ALPHA;
133
 
        case PIPE_BLENDFACTOR_DST_ALPHA:
134
 
                return V_028780_BLEND_DST_ALPHA;
135
 
        case PIPE_BLENDFACTOR_DST_COLOR:
136
 
                return V_028780_BLEND_DST_COLOR;
137
 
        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
138
 
                return V_028780_BLEND_SRC_ALPHA_SATURATE;
139
 
        case PIPE_BLENDFACTOR_CONST_COLOR:
140
 
                return V_028780_BLEND_CONST_COLOR;
141
 
        case PIPE_BLENDFACTOR_CONST_ALPHA:
142
 
                return V_028780_BLEND_CONST_ALPHA;
143
 
        case PIPE_BLENDFACTOR_ZERO:
144
 
                return V_028780_BLEND_ZERO;
145
 
        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
146
 
                return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
147
 
        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
148
 
                return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
149
 
        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
150
 
                return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
151
 
        case PIPE_BLENDFACTOR_INV_DST_COLOR:
152
 
                return V_028780_BLEND_ONE_MINUS_DST_COLOR;
153
 
        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
154
 
                return V_028780_BLEND_ONE_MINUS_CONST_COLOR;
155
 
        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
156
 
                return V_028780_BLEND_ONE_MINUS_CONST_ALPHA;
157
 
        case PIPE_BLENDFACTOR_SRC1_COLOR:
158
 
                return V_028780_BLEND_SRC1_COLOR;
159
 
        case PIPE_BLENDFACTOR_SRC1_ALPHA:
160
 
                return V_028780_BLEND_SRC1_ALPHA;
161
 
        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
162
 
                return V_028780_BLEND_INV_SRC1_COLOR;
163
 
        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
164
 
                return V_028780_BLEND_INV_SRC1_ALPHA;
165
 
        default:
166
 
                R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
167
 
                assert(0);
168
 
                break;
169
 
        }
170
 
        return 0;
171
 
}
172
 
 
173
 
static unsigned r600_tex_dim(struct r600_texture *rtex,
174
 
                             unsigned view_target, unsigned nr_samples)
175
 
{
176
 
        unsigned res_target = rtex->resource.b.b.target;
177
 
 
178
 
        if (view_target == PIPE_TEXTURE_CUBE ||
179
 
            view_target == PIPE_TEXTURE_CUBE_ARRAY)
180
 
                res_target = view_target;
181
 
                /* If interpreting cubemaps as something else, set 2D_ARRAY. */
182
 
        else if (res_target == PIPE_TEXTURE_CUBE ||
183
 
                 res_target == PIPE_TEXTURE_CUBE_ARRAY)
184
 
                res_target = PIPE_TEXTURE_2D_ARRAY;
185
 
 
186
 
        switch (res_target) {
187
 
        default:
188
 
        case PIPE_TEXTURE_1D:
189
 
                return V_030000_SQ_TEX_DIM_1D;
190
 
        case PIPE_TEXTURE_1D_ARRAY:
191
 
                return V_030000_SQ_TEX_DIM_1D_ARRAY;
192
 
        case PIPE_TEXTURE_2D:
193
 
        case PIPE_TEXTURE_RECT:
194
 
                return nr_samples > 1 ? V_030000_SQ_TEX_DIM_2D_MSAA :
195
 
                                        V_030000_SQ_TEX_DIM_2D;
196
 
        case PIPE_TEXTURE_2D_ARRAY:
197
 
                return nr_samples > 1 ? V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA :
198
 
                                        V_030000_SQ_TEX_DIM_2D_ARRAY;
199
 
        case PIPE_TEXTURE_3D:
200
 
                return V_030000_SQ_TEX_DIM_3D;
201
 
        case PIPE_TEXTURE_CUBE:
202
 
        case PIPE_TEXTURE_CUBE_ARRAY:
203
 
                return V_030000_SQ_TEX_DIM_CUBEMAP;
204
 
        }
205
 
}
206
 
 
207
 
static uint32_t r600_translate_dbformat(enum pipe_format format)
208
 
{
209
 
        switch (format) {
210
 
        case PIPE_FORMAT_Z16_UNORM:
211
 
                return V_028040_Z_16;
212
 
        case PIPE_FORMAT_Z24X8_UNORM:
213
 
        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
214
 
        case PIPE_FORMAT_X8Z24_UNORM:
215
 
        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
216
 
                return V_028040_Z_24;
217
 
        case PIPE_FORMAT_Z32_FLOAT:
218
 
        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
219
 
                return V_028040_Z_32_FLOAT;
220
 
        default:
221
 
                return ~0U;
222
 
        }
223
 
}
224
 
 
225
 
static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
226
 
{
227
 
        return r600_translate_texformat(screen, format, NULL, NULL, NULL,
228
 
                                   FALSE) != ~0U;
229
 
}
230
 
 
231
 
static bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format)
232
 
{
233
 
        return r600_translate_colorformat(chip, format, FALSE) != ~0U &&
234
 
                r600_translate_colorswap(format, FALSE) != ~0U;
235
 
}
236
 
 
237
 
static bool r600_is_zs_format_supported(enum pipe_format format)
238
 
{
239
 
        return r600_translate_dbformat(format) != ~0U;
240
 
}
241
 
 
242
 
bool evergreen_is_format_supported(struct pipe_screen *screen,
243
 
                                   enum pipe_format format,
244
 
                                   enum pipe_texture_target target,
245
 
                                   unsigned sample_count,
246
 
                                   unsigned storage_sample_count,
247
 
                                   unsigned usage)
248
 
{
249
 
        struct r600_screen *rscreen = (struct r600_screen*)screen;
250
 
        unsigned retval = 0;
251
 
 
252
 
        if (target >= PIPE_MAX_TEXTURE_TYPES) {
253
 
                R600_ERR("r600: unsupported texture type %d\n", target);
254
 
                return false;
255
 
        }
256
 
 
257
 
        if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
258
 
                return false;
259
 
 
260
 
        if (sample_count > 1) {
261
 
                if (!rscreen->has_msaa)
262
 
                        return false;
263
 
 
264
 
                switch (sample_count) {
265
 
                case 2:
266
 
                case 4:
267
 
                case 8:
268
 
                        break;
269
 
                default:
270
 
                        return false;
271
 
                }
272
 
        }
273
 
 
274
 
        if (usage & PIPE_BIND_SAMPLER_VIEW) {
275
 
                if (target == PIPE_BUFFER) {
276
 
                        if (r600_is_buffer_format_supported(format, false))
277
 
                                retval |= PIPE_BIND_SAMPLER_VIEW;
278
 
                } else {
279
 
                        if (r600_is_sampler_format_supported(screen, format))
280
 
                                retval |= PIPE_BIND_SAMPLER_VIEW;
281
 
                }
282
 
        }
283
 
 
284
 
        if ((usage & (PIPE_BIND_RENDER_TARGET |
285
 
                      PIPE_BIND_DISPLAY_TARGET |
286
 
                      PIPE_BIND_SCANOUT |
287
 
                      PIPE_BIND_SHARED |
288
 
                      PIPE_BIND_BLENDABLE)) &&
289
 
            r600_is_colorbuffer_format_supported(rscreen->b.chip_class, format)) {
290
 
                retval |= usage &
291
 
                          (PIPE_BIND_RENDER_TARGET |
292
 
                           PIPE_BIND_DISPLAY_TARGET |
293
 
                           PIPE_BIND_SCANOUT |
294
 
                           PIPE_BIND_SHARED);
295
 
                if (!util_format_is_pure_integer(format) &&
296
 
                    !util_format_is_depth_or_stencil(format))
297
 
                        retval |= usage & PIPE_BIND_BLENDABLE;
298
 
        }
299
 
 
300
 
        if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
301
 
            r600_is_zs_format_supported(format)) {
302
 
                retval |= PIPE_BIND_DEPTH_STENCIL;
303
 
        }
304
 
 
305
 
        if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
306
 
            r600_is_buffer_format_supported(format, true)) {
307
 
                retval |= PIPE_BIND_VERTEX_BUFFER;
308
 
        }
309
 
 
310
 
        if (usage & PIPE_BIND_INDEX_BUFFER &&
311
 
            r600_is_index_format_supported(format)) {
312
 
                retval |= PIPE_BIND_INDEX_BUFFER;
313
 
        }
314
 
 
315
 
        if ((usage & PIPE_BIND_LINEAR) &&
316
 
            !util_format_is_compressed(format) &&
317
 
            !(usage & PIPE_BIND_DEPTH_STENCIL))
318
 
                retval |= PIPE_BIND_LINEAR;
319
 
 
320
 
        return retval == usage;
321
 
}
322
 
 
323
 
static void *evergreen_create_blend_state_mode(struct pipe_context *ctx,
324
 
                                               const struct pipe_blend_state *state, int mode)
325
 
{
326
 
        uint32_t color_control = 0, target_mask = 0;
327
 
        struct r600_blend_state *blend = CALLOC_STRUCT(r600_blend_state);
328
 
 
329
 
        if (!blend) {
330
 
                return NULL;
331
 
        }
332
 
 
333
 
        r600_init_command_buffer(&blend->buffer, 20);
334
 
        r600_init_command_buffer(&blend->buffer_no_blend, 20);
335
 
 
336
 
        if (state->logicop_enable) {
337
 
                color_control |= (state->logicop_func << 16) | (state->logicop_func << 20);
338
 
        } else {
339
 
                color_control |= (0xcc << 16);
340
 
        }
341
 
        /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
342
 
        if (state->independent_blend_enable) {
343
 
                for (int i = 0; i < 8; i++) {
344
 
                        target_mask |= (state->rt[i].colormask << (4 * i));
345
 
                }
346
 
        } else {
347
 
                for (int i = 0; i < 8; i++) {
348
 
                        target_mask |= (state->rt[0].colormask << (4 * i));
349
 
                }
350
 
        }
351
 
 
352
 
        /* only have dual source on MRT0 */
353
 
        blend->dual_src_blend = util_blend_state_is_dual(state, 0);
354
 
        blend->cb_target_mask = target_mask;
355
 
        blend->alpha_to_one = state->alpha_to_one;
356
 
 
357
 
        if (target_mask)
358
 
                color_control |= S_028808_MODE(mode);
359
 
        else
360
 
                color_control |= S_028808_MODE(V_028808_CB_DISABLE);
361
 
 
362
 
 
363
 
        r600_store_context_reg(&blend->buffer, R_028808_CB_COLOR_CONTROL, color_control);
364
 
        r600_store_context_reg(&blend->buffer, R_028B70_DB_ALPHA_TO_MASK,
365
 
                               S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
366
 
                               S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
367
 
                               S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
368
 
                               S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
369
 
                               S_028B70_ALPHA_TO_MASK_OFFSET3(2));
370
 
        r600_store_context_reg_seq(&blend->buffer, R_028780_CB_BLEND0_CONTROL, 8);
371
 
 
372
 
        /* Copy over the dwords set so far into buffer_no_blend.
373
 
         * Only the CB_BLENDi_CONTROL registers must be set after this. */
374
 
        memcpy(blend->buffer_no_blend.buf, blend->buffer.buf, blend->buffer.num_dw * 4);
375
 
        blend->buffer_no_blend.num_dw = blend->buffer.num_dw;
376
 
 
377
 
        for (int i = 0; i < 8; i++) {
378
 
                /* state->rt entries > 0 only written if independent blending */
379
 
                const int j = state->independent_blend_enable ? i : 0;
380
 
 
381
 
                unsigned eqRGB = state->rt[j].rgb_func;
382
 
                unsigned srcRGB = state->rt[j].rgb_src_factor;
383
 
                unsigned dstRGB = state->rt[j].rgb_dst_factor;
384
 
                unsigned eqA = state->rt[j].alpha_func;
385
 
                unsigned srcA = state->rt[j].alpha_src_factor;
386
 
                unsigned dstA = state->rt[j].alpha_dst_factor;
387
 
                uint32_t bc = 0;
388
 
 
389
 
                r600_store_value(&blend->buffer_no_blend, 0);
390
 
 
391
 
                if (!state->rt[j].blend_enable) {
392
 
                        r600_store_value(&blend->buffer, 0);
393
 
                        continue;
394
 
                }
395
 
 
396
 
                bc |= S_028780_BLEND_CONTROL_ENABLE(1);
397
 
                bc |= S_028780_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB));
398
 
                bc |= S_028780_COLOR_SRCBLEND(r600_translate_blend_factor(srcRGB));
399
 
                bc |= S_028780_COLOR_DESTBLEND(r600_translate_blend_factor(dstRGB));
400
 
 
401
 
                if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
402
 
                        bc |= S_028780_SEPARATE_ALPHA_BLEND(1);
403
 
                        bc |= S_028780_ALPHA_COMB_FCN(r600_translate_blend_function(eqA));
404
 
                        bc |= S_028780_ALPHA_SRCBLEND(r600_translate_blend_factor(srcA));
405
 
                        bc |= S_028780_ALPHA_DESTBLEND(r600_translate_blend_factor(dstA));
406
 
                }
407
 
                r600_store_value(&blend->buffer, bc);
408
 
        }
409
 
        return blend;
410
 
}
411
 
 
412
 
static void *evergreen_create_blend_state(struct pipe_context *ctx,
413
 
                                        const struct pipe_blend_state *state)
414
 
{
415
 
 
416
 
        return evergreen_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
417
 
}
418
 
 
419
 
static void *evergreen_create_dsa_state(struct pipe_context *ctx,
420
 
                                   const struct pipe_depth_stencil_alpha_state *state)
421
 
{
422
 
        unsigned db_depth_control, alpha_test_control, alpha_ref;
423
 
        struct r600_dsa_state *dsa = CALLOC_STRUCT(r600_dsa_state);
424
 
 
425
 
        if (!dsa) {
426
 
                return NULL;
427
 
        }
428
 
 
429
 
        r600_init_command_buffer(&dsa->buffer, 3);
430
 
 
431
 
        dsa->valuemask[0] = state->stencil[0].valuemask;
432
 
        dsa->valuemask[1] = state->stencil[1].valuemask;
433
 
        dsa->writemask[0] = state->stencil[0].writemask;
434
 
        dsa->writemask[1] = state->stencil[1].writemask;
435
 
        dsa->zwritemask = state->depth_writemask;
436
 
 
437
 
        db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) |
438
 
                S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
439
 
                S_028800_ZFUNC(state->depth_func);
440
 
 
441
 
        /* stencil */
442
 
        if (state->stencil[0].enabled) {
443
 
                db_depth_control |= S_028800_STENCIL_ENABLE(1);
444
 
                db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); /* translates straight */
445
 
                db_depth_control |= S_028800_STENCILFAIL(r600_translate_stencil_op(state->stencil[0].fail_op));
446
 
                db_depth_control |= S_028800_STENCILZPASS(r600_translate_stencil_op(state->stencil[0].zpass_op));
447
 
                db_depth_control |= S_028800_STENCILZFAIL(r600_translate_stencil_op(state->stencil[0].zfail_op));
448
 
 
449
 
                if (state->stencil[1].enabled) {
450
 
                        db_depth_control |= S_028800_BACKFACE_ENABLE(1);
451
 
                        db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); /* translates straight */
452
 
                        db_depth_control |= S_028800_STENCILFAIL_BF(r600_translate_stencil_op(state->stencil[1].fail_op));
453
 
                        db_depth_control |= S_028800_STENCILZPASS_BF(r600_translate_stencil_op(state->stencil[1].zpass_op));
454
 
                        db_depth_control |= S_028800_STENCILZFAIL_BF(r600_translate_stencil_op(state->stencil[1].zfail_op));
455
 
                }
456
 
        }
457
 
 
458
 
        /* alpha */
459
 
        alpha_test_control = 0;
460
 
        alpha_ref = 0;
461
 
        if (state->alpha_enabled) {
462
 
                alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func);
463
 
                alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
464
 
                alpha_ref = fui(state->alpha_ref_value);
465
 
        }
466
 
        dsa->sx_alpha_test_control = alpha_test_control & 0xff;
467
 
        dsa->alpha_ref = alpha_ref;
468
 
 
469
 
        /* misc */
470
 
        r600_store_context_reg(&dsa->buffer, R_028800_DB_DEPTH_CONTROL, db_depth_control);
471
 
        return dsa;
472
 
}
473
 
 
474
 
static void *evergreen_create_rs_state(struct pipe_context *ctx,
475
 
                                        const struct pipe_rasterizer_state *state)
476
 
{
477
 
        struct r600_context *rctx = (struct r600_context *)ctx;
478
 
        unsigned tmp, spi_interp;
479
 
        float psize_min, psize_max;
480
 
        struct r600_rasterizer_state *rs = CALLOC_STRUCT(r600_rasterizer_state);
481
 
 
482
 
        if (!rs) {
483
 
                return NULL;
484
 
        }
485
 
 
486
 
        r600_init_command_buffer(&rs->buffer, 30);
487
 
 
488
 
        rs->scissor_enable = state->scissor;
489
 
        rs->clip_halfz = state->clip_halfz;
490
 
        rs->flatshade = state->flatshade;
491
 
        rs->sprite_coord_enable = state->sprite_coord_enable;
492
 
        rs->rasterizer_discard = state->rasterizer_discard;
493
 
        rs->two_side = state->light_twoside;
494
 
        rs->clip_plane_enable = state->clip_plane_enable;
495
 
        rs->pa_sc_line_stipple = state->line_stipple_enable ?
496
 
                                S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
497
 
                                S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
498
 
        rs->pa_cl_clip_cntl =
499
 
                S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
500
 
                S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) |
501
 
                S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) |
502
 
                S_028810_DX_LINEAR_ATTR_CLIP_ENA(1) |
503
 
                S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard);
504
 
        rs->multisample_enable = state->multisample;
505
 
 
506
 
        /* offset */
507
 
        rs->offset_units = state->offset_units;
508
 
        rs->offset_scale = state->offset_scale * 16.0f;
509
 
        rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
510
 
        rs->offset_units_unscaled = state->offset_units_unscaled;
511
 
 
512
 
        if (state->point_size_per_vertex) {
513
 
                psize_min = util_get_min_point_size(state);
514
 
                psize_max = 8192;
515
 
        } else {
516
 
                /* Force the point size to be as if the vertex output was disabled. */
517
 
                psize_min = state->point_size;
518
 
                psize_max = state->point_size;
519
 
        }
520
 
 
521
 
        spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
522
 
        spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
523
 
                S_0286D4_PNT_SPRITE_OVRD_X(2) |
524
 
                S_0286D4_PNT_SPRITE_OVRD_Y(3) |
525
 
                S_0286D4_PNT_SPRITE_OVRD_Z(0) |
526
 
                S_0286D4_PNT_SPRITE_OVRD_W(1);
527
 
        if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
528
 
                spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
529
 
        }
530
 
 
531
 
        r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3);
532
 
        /* point size 12.4 fixed point (divide by two, because 0.5 = 1 pixel) */
533
 
        tmp = r600_pack_float_12p4(state->point_size/2);
534
 
        r600_store_value(&rs->buffer, /* R_028A00_PA_SU_POINT_SIZE */
535
 
                         S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
536
 
        r600_store_value(&rs->buffer, /* R_028A04_PA_SU_POINT_MINMAX */
537
 
                         S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) |
538
 
                         S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2)));
539
 
        r600_store_value(&rs->buffer, /* R_028A08_PA_SU_LINE_CNTL */
540
 
                         S_028A08_WIDTH((unsigned)(state->line_width * 8)));
541
 
 
542
 
        r600_store_context_reg(&rs->buffer, R_0286D4_SPI_INTERP_CONTROL_0, spi_interp);
543
 
        r600_store_context_reg(&rs->buffer, R_028A48_PA_SC_MODE_CNTL_0,
544
 
                               S_028A48_MSAA_ENABLE(state->multisample) |
545
 
                               S_028A48_VPORT_SCISSOR_ENABLE(1) |
546
 
                               S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable));
547
 
 
548
 
        if (rctx->b.chip_class == CAYMAN) {
549
 
                r600_store_context_reg(&rs->buffer, CM_R_028BE4_PA_SU_VTX_CNTL,
550
 
                                       S_028C08_PIX_CENTER_HALF(state->half_pixel_center) |
551
 
                                       S_028C08_QUANT_MODE(V_028C08_X_1_256TH));
552
 
        } else {
553
 
                r600_store_context_reg(&rs->buffer, R_028C08_PA_SU_VTX_CNTL,
554
 
                                       S_028C08_PIX_CENTER_HALF(state->half_pixel_center) |
555
 
                                       S_028C08_QUANT_MODE(V_028C08_X_1_256TH));
556
 
        }
557
 
 
558
 
        r600_store_context_reg(&rs->buffer, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
559
 
        r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL,
560
 
                               S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
561
 
                               S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
562
 
                               S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
563
 
                               S_028814_FACE(!state->front_ccw) |
564
 
                               S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
565
 
                               S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
566
 
                               S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
567
 
                               S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
568
 
                                                  state->fill_back != PIPE_POLYGON_MODE_FILL) |
569
 
                               S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
570
 
                               S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)));
571
 
        return rs;
572
 
}
573
 
 
574
 
static void *evergreen_create_sampler_state(struct pipe_context *ctx,
575
 
                                        const struct pipe_sampler_state *state)
576
 
{
577
 
        struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
578
 
        struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state);
579
 
        unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
580
 
                                                       : state->max_anisotropy;
581
 
        unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
582
 
        bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
583
 
                           state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
584
 
        float max_lod = state->max_lod;
585
 
 
586
 
        if (!ss) {
587
 
                return NULL;
588
 
        }
589
 
 
590
 
        /* If the min_mip_filter is NONE, then the texture has no mipmapping and
591
 
         * MIP_FILTER will also be set to NONE. However, if more then one LOD is
592
 
         * configured, then the texture lookup seems to fail for some specific texture
593
 
         * formats. Forcing the number of LODs to one in this case fixes it. */
594
 
        if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
595
 
                max_lod = state->min_lod;
596
 
 
597
 
        ss->border_color_use = sampler_state_needs_border_color(state);
598
 
 
599
 
        /* R_03C000_SQ_TEX_SAMPLER_WORD0_0 */
600
 
        ss->tex_sampler_words[0] =
601
 
                S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
602
 
                S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
603
 
                S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) |
604
 
                S_03C000_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
605
 
                S_03C000_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
606
 
                S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
607
 
                S_03C000_MAX_ANISO_RATIO(max_aniso_ratio) |
608
 
                S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
609
 
                S_03C000_BORDER_COLOR_TYPE(ss->border_color_use ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0);
610
 
        /* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */
611
 
        ss->tex_sampler_words[1] =
612
 
                S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
613
 
                S_03C004_MAX_LOD(S_FIXED(CLAMP(max_lod, 0, 15), 8));
614
 
        /* R_03C008_SQ_TEX_SAMPLER_WORD2_0 */
615
 
        ss->tex_sampler_words[2] =
616
 
                S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
617
 
                (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
618
 
                S_03C008_TRUNCATE_COORD(trunc_coord) |
619
 
                S_03C008_TYPE(1);
620
 
 
621
 
        if (ss->border_color_use) {
622
 
                memcpy(&ss->border_color, &state->border_color, sizeof(state->border_color));
623
 
        }
624
 
        return ss;
625
 
}
626
 
 
627
 
struct eg_buf_res_params {
628
 
        enum pipe_format pipe_format;
629
 
        unsigned offset;
630
 
        unsigned size;
631
 
        unsigned char swizzle[4];
632
 
        bool uncached;
633
 
        bool force_swizzle;
634
 
        bool size_in_bytes;
635
 
};
636
 
 
637
 
static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
638
 
                                                 struct pipe_resource *buffer,
639
 
                                                 struct eg_buf_res_params *params,
640
 
                                                 bool *skip_mip_address_reloc,
641
 
                                                 unsigned tex_resource_words[8])
642
 
{
643
 
        struct r600_texture *tmp = (struct r600_texture*)buffer;
644
 
        uint64_t va;
645
 
        int stride = util_format_get_blocksize(params->pipe_format);
646
 
        unsigned format, num_format, format_comp, endian;
647
 
        unsigned swizzle_res;
648
 
        const struct util_format_description *desc;
649
 
 
650
 
        r600_vertex_data_type(params->pipe_format,
651
 
                              &format, &num_format, &format_comp,
652
 
                              &endian);
653
 
 
654
 
        desc = util_format_description(params->pipe_format);
655
 
 
656
 
        if (params->force_swizzle)
657
 
                swizzle_res = r600_get_swizzle_combined(params->swizzle, NULL, TRUE);
658
 
        else
659
 
                swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE);
660
 
 
661
 
        va = tmp->resource.gpu_address + params->offset;
662
 
        *skip_mip_address_reloc = true;
663
 
        tex_resource_words[0] = va;
664
 
        tex_resource_words[1] = params->size - 1;
665
 
        tex_resource_words[2] = S_030008_BASE_ADDRESS_HI(va >> 32UL) |
666
 
                S_030008_STRIDE(stride) |
667
 
                S_030008_DATA_FORMAT(format) |
668
 
                S_030008_NUM_FORMAT_ALL(num_format) |
669
 
                S_030008_FORMAT_COMP_ALL(format_comp) |
670
 
                S_030008_ENDIAN_SWAP(endian);
671
 
        tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached);
672
 
        /*
673
 
         * dword 4 is for number of elements, for use with resinfo,
674
 
         * albeit the amd gpu shader analyser
675
 
         * uses a const buffer to store the element sizes for buffer txq
676
 
         */
677
 
        tex_resource_words[4] = params->size_in_bytes ? params->size : (params->size / stride);
678
 
 
679
 
        tex_resource_words[5] = tex_resource_words[6] = 0;
680
 
        tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
681
 
}
682
 
 
683
 
static struct pipe_sampler_view *
684
 
texture_buffer_sampler_view(struct r600_context *rctx,
685
 
                            struct r600_pipe_sampler_view *view,
686
 
                            unsigned width0, unsigned height0)
687
 
{
688
 
        struct r600_texture *tmp = (struct r600_texture*)view->base.texture;
689
 
        struct eg_buf_res_params params;
690
 
 
691
 
        memset(&params, 0, sizeof(params));
692
 
 
693
 
        params.pipe_format = view->base.format;
694
 
        params.offset = view->base.u.buf.offset;
695
 
        params.size = view->base.u.buf.size;
696
 
        params.swizzle[0] = view->base.swizzle_r;
697
 
        params.swizzle[1] = view->base.swizzle_g;
698
 
        params.swizzle[2] = view->base.swizzle_b;
699
 
        params.swizzle[3] = view->base.swizzle_a;
700
 
 
701
 
        evergreen_fill_buffer_resource_words(rctx, view->base.texture,
702
 
                                             &params, &view->skip_mip_address_reloc,
703
 
                                             view->tex_resource_words);
704
 
        view->tex_resource = &tmp->resource;
705
 
 
706
 
        if (tmp->resource.gpu_address)
707
 
                list_addtail(&view->list, &rctx->texture_buffers);
708
 
        return &view->base;
709
 
}
710
 
 
711
 
struct eg_tex_res_params {
712
 
        enum pipe_format pipe_format;
713
 
        int force_level;
714
 
        unsigned width0;
715
 
        unsigned height0;
716
 
        unsigned first_level;
717
 
        unsigned last_level;
718
 
        unsigned first_layer;
719
 
        unsigned last_layer;
720
 
        unsigned target;
721
 
        unsigned char swizzle[4];
722
 
};
723
 
 
724
 
static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
725
 
                                             struct pipe_resource *texture,
726
 
                                             struct eg_tex_res_params *params,
727
 
                                             bool *skip_mip_address_reloc,
728
 
                                             unsigned tex_resource_words[8])
729
 
{
730
 
        struct r600_screen *rscreen = (struct r600_screen*)rctx->b.b.screen;
731
 
        struct r600_texture *tmp = (struct r600_texture*)texture;
732
 
        unsigned format, endian;
733
 
        uint32_t word4 = 0, yuv_format = 0, pitch = 0;
734
 
        unsigned char array_mode = 0, non_disp_tiling = 0;
735
 
        unsigned height, depth, width;
736
 
        unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh;
737
 
        struct legacy_surf_level *surflevel;
738
 
        unsigned base_level, first_level, last_level;
739
 
        unsigned dim, last_layer;
740
 
        uint64_t va;
741
 
        bool do_endian_swap = FALSE;
742
 
 
743
 
        tile_split = tmp->surface.u.legacy.tile_split;
744
 
        surflevel = tmp->surface.u.legacy.level;
745
 
 
746
 
        /* Texturing with separate depth and stencil. */
747
 
        if (tmp->db_compatible) {
748
 
                switch (params->pipe_format) {
749
 
                case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
750
 
                        params->pipe_format = PIPE_FORMAT_Z32_FLOAT;
751
 
                        break;
752
 
                case PIPE_FORMAT_X8Z24_UNORM:
753
 
                case PIPE_FORMAT_S8_UINT_Z24_UNORM:
754
 
                        /* Z24 is always stored like this for DB
755
 
                         * compatibility.
756
 
                         */
757
 
                        params->pipe_format = PIPE_FORMAT_Z24X8_UNORM;
758
 
                        break;
759
 
                case PIPE_FORMAT_X24S8_UINT:
760
 
                case PIPE_FORMAT_S8X24_UINT:
761
 
                case PIPE_FORMAT_X32_S8X24_UINT:
762
 
                        params->pipe_format = PIPE_FORMAT_S8_UINT;
763
 
                        tile_split = tmp->surface.u.legacy.stencil_tile_split;
764
 
                        surflevel = tmp->surface.u.legacy.zs.stencil_level;
765
 
                        break;
766
 
                default:;
767
 
                }
768
 
        }
769
 
 
770
 
        if (R600_BIG_ENDIAN)
771
 
                do_endian_swap = !tmp->db_compatible;
772
 
 
773
 
        format = r600_translate_texformat(rctx->b.b.screen, params->pipe_format,
774
 
                                          params->swizzle,
775
 
                                          &word4, &yuv_format, do_endian_swap);
776
 
        assert(format != ~0);
777
 
        if (format == ~0) {
778
 
                return -1;
779
 
        }
780
 
 
781
 
        endian = r600_colorformat_endian_swap(format, do_endian_swap);
782
 
 
783
 
        base_level = 0;
784
 
        first_level = params->first_level;
785
 
        last_level = params->last_level;
786
 
        width = params->width0;
787
 
        height = params->height0;
788
 
        depth = texture->depth0;
789
 
 
790
 
        if (params->force_level) {
791
 
                base_level = params->force_level;
792
 
                first_level = 0;
793
 
                last_level = 0;
794
 
                width = u_minify(width, params->force_level);
795
 
                height = u_minify(height, params->force_level);
796
 
                depth = u_minify(depth, params->force_level);
797
 
        }
798
 
 
799
 
        pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(params->pipe_format);
800
 
        non_disp_tiling = tmp->non_disp_tiling;
801
 
 
802
 
        switch (surflevel[base_level].mode) {
803
 
        default:
804
 
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
805
 
                array_mode = V_028C70_ARRAY_LINEAR_ALIGNED;
806
 
                break;
807
 
        case RADEON_SURF_MODE_2D:
808
 
                array_mode = V_028C70_ARRAY_2D_TILED_THIN1;
809
 
                break;
810
 
        case RADEON_SURF_MODE_1D:
811
 
                array_mode = V_028C70_ARRAY_1D_TILED_THIN1;
812
 
                break;
813
 
        }
814
 
        macro_aspect = tmp->surface.u.legacy.mtilea;
815
 
        bankw = tmp->surface.u.legacy.bankw;
816
 
        bankh = tmp->surface.u.legacy.bankh;
817
 
        tile_split = eg_tile_split(tile_split);
818
 
        macro_aspect = eg_macro_tile_aspect(macro_aspect);
819
 
        bankw = eg_bank_wh(bankw);
820
 
        bankh = eg_bank_wh(bankh);
821
 
        fmask_bankh = eg_bank_wh(tmp->fmask.bank_height);
822
 
 
823
 
        /* 128 bit formats require tile type = 1 */
824
 
        if (rscreen->b.chip_class == CAYMAN) {
825
 
                if (util_format_get_blocksize(params->pipe_format) >= 16)
826
 
                        non_disp_tiling = 1;
827
 
        }
828
 
        nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
829
 
 
830
 
 
831
 
        va = tmp->resource.gpu_address;
832
 
 
833
 
        /* array type views and views into array types need to use layer offset */
834
 
        dim = r600_tex_dim(tmp, params->target, texture->nr_samples);
835
 
 
836
 
        if (dim == V_030000_SQ_TEX_DIM_1D_ARRAY) {
837
 
                height = 1;
838
 
                depth = texture->array_size;
839
 
        } else if (dim == V_030000_SQ_TEX_DIM_2D_ARRAY ||
840
 
                   dim == V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA) {
841
 
                depth = texture->array_size;
842
 
        } else if (dim == V_030000_SQ_TEX_DIM_CUBEMAP)
843
 
                depth = texture->array_size / 6;
844
 
 
845
 
        tex_resource_words[0] = (S_030000_DIM(dim) |
846
 
                                 S_030000_PITCH((pitch / 8) - 1) |
847
 
                                 S_030000_TEX_WIDTH(width - 1));
848
 
        if (rscreen->b.chip_class == CAYMAN)
849
 
                tex_resource_words[0] |= CM_S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
850
 
        else
851
 
                tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(non_disp_tiling);
852
 
        tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
853
 
                                       S_030004_TEX_DEPTH(depth - 1) |
854
 
                                       S_030004_ARRAY_MODE(array_mode));
855
 
        tex_resource_words[2] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8;
856
 
 
857
 
        *skip_mip_address_reloc = false;
858
 
        /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
859
 
        if (texture->nr_samples > 1 && rscreen->has_compressed_msaa_texturing) {
860
 
                if (tmp->is_depth) {
861
 
                        /* disable FMASK (0 = disabled) */
862
 
                        tex_resource_words[3] = 0;
863
 
                        *skip_mip_address_reloc = true;
864
 
                } else {
865
 
                        /* FMASK should be in MIP_ADDRESS for multisample textures */
866
 
                        tex_resource_words[3] = (tmp->fmask.offset + va) >> 8;
867
 
                }
868
 
        } else if (last_level && texture->nr_samples <= 1) {
869
 
                tex_resource_words[3] = ((uint64_t)surflevel[1].offset_256B * 256 + va) >> 8;
870
 
        } else {
871
 
                tex_resource_words[3] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8;
872
 
        }
873
 
 
874
 
        last_layer = params->last_layer;
875
 
        if (params->target != texture->target && depth == 1) {
876
 
                last_layer = params->first_layer;
877
 
        }
878
 
        tex_resource_words[4] = (word4 |
879
 
                                 S_030010_ENDIAN_SWAP(endian));
880
 
        tex_resource_words[5] = S_030014_BASE_ARRAY(params->first_layer) |
881
 
                                S_030014_LAST_ARRAY(last_layer);
882
 
        tex_resource_words[6] = S_030018_TILE_SPLIT(tile_split);
883
 
 
884
 
        if (texture->nr_samples > 1) {
885
 
                unsigned log_samples = util_logbase2(texture->nr_samples);
886
 
                if (rscreen->b.chip_class == CAYMAN) {
887
 
                        tex_resource_words[4] |= S_030010_LOG2_NUM_FRAGMENTS(log_samples);
888
 
                }
889
 
                /* LAST_LEVEL holds log2(nr_samples) for multisample textures */
890
 
                tex_resource_words[5] |= S_030014_LAST_LEVEL(log_samples);
891
 
                tex_resource_words[6] |= S_030018_FMASK_BANK_HEIGHT(fmask_bankh);
892
 
        } else {
893
 
                bool no_mip = first_level == last_level;
894
 
 
895
 
                tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level);
896
 
                tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level);
897
 
                /* aniso max 16 samples */
898
 
                tex_resource_words[6] |= S_030018_MAX_ANISO_RATIO(no_mip ? 0 : 4);
899
 
        }
900
 
 
901
 
        tex_resource_words[7] = S_03001C_DATA_FORMAT(format) |
902
 
                                      S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE) |
903
 
                                      S_03001C_BANK_WIDTH(bankw) |
904
 
                                      S_03001C_BANK_HEIGHT(bankh) |
905
 
                                      S_03001C_MACRO_TILE_ASPECT(macro_aspect) |
906
 
                                      S_03001C_NUM_BANKS(nbanks) |
907
 
                                      S_03001C_DEPTH_SAMPLE_ORDER(tmp->db_compatible);
908
 
        return 0;
909
 
}
910
 
 
911
 
struct pipe_sampler_view *
912
 
evergreen_create_sampler_view_custom(struct pipe_context *ctx,
913
 
                                     struct pipe_resource *texture,
914
 
                                     const struct pipe_sampler_view *state,
915
 
                                     unsigned width0, unsigned height0,
916
 
                                     unsigned force_level)
917
 
{
918
 
        struct r600_context *rctx = (struct r600_context*)ctx;
919
 
        struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
920
 
        struct r600_texture *tmp = (struct r600_texture*)texture;
921
 
        struct eg_tex_res_params params;
922
 
        int ret;
923
 
 
924
 
        if (!view)
925
 
                return NULL;
926
 
 
927
 
        /* initialize base object */
928
 
        view->base = *state;
929
 
        view->base.texture = NULL;
930
 
        pipe_reference(NULL, &texture->reference);
931
 
        view->base.texture = texture;
932
 
        view->base.reference.count = 1;
933
 
        view->base.context = ctx;
934
 
 
935
 
        if (state->target == PIPE_BUFFER)
936
 
                return texture_buffer_sampler_view(rctx, view, width0, height0);
937
 
 
938
 
        memset(&params, 0, sizeof(params));
939
 
        params.pipe_format = state->format;
940
 
        params.force_level = force_level;
941
 
        params.width0 = width0;
942
 
        params.height0 = height0;
943
 
        params.first_level = state->u.tex.first_level;
944
 
        params.last_level = state->u.tex.last_level;
945
 
        params.first_layer = state->u.tex.first_layer;
946
 
        params.last_layer = state->u.tex.last_layer;
947
 
        params.target = state->target;
948
 
        params.swizzle[0] = state->swizzle_r;
949
 
        params.swizzle[1] = state->swizzle_g;
950
 
        params.swizzle[2] = state->swizzle_b;
951
 
        params.swizzle[3] = state->swizzle_a;
952
 
 
953
 
        ret = evergreen_fill_tex_resource_words(rctx, texture, &params,
954
 
                                                &view->skip_mip_address_reloc,
955
 
                                                view->tex_resource_words);
956
 
        if (ret != 0) {
957
 
                FREE(view);
958
 
                return NULL;
959
 
        }
960
 
 
961
 
        if (state->format == PIPE_FORMAT_X24S8_UINT ||
962
 
            state->format == PIPE_FORMAT_S8X24_UINT ||
963
 
            state->format == PIPE_FORMAT_X32_S8X24_UINT ||
964
 
            state->format == PIPE_FORMAT_S8_UINT)
965
 
                view->is_stencil_sampler = true;
966
 
 
967
 
        view->tex_resource = &tmp->resource;
968
 
 
969
 
        return &view->base;
970
 
}
971
 
 
972
 
static struct pipe_sampler_view *
973
 
evergreen_create_sampler_view(struct pipe_context *ctx,
974
 
                              struct pipe_resource *tex,
975
 
                              const struct pipe_sampler_view *state)
976
 
{
977
 
        return evergreen_create_sampler_view_custom(ctx, tex, state,
978
 
                                                    tex->width0, tex->height0, 0);
979
 
}
980
 
 
981
 
static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
982
 
{
983
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
984
 
        struct r600_config_state *a = (struct r600_config_state*)atom;
985
 
 
986
 
        radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
987
 
        if (a->dyn_gpr_enabled) {
988
 
                radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
989
 
                radeon_emit(cs, 0);
990
 
                radeon_emit(cs, 0);
991
 
        } else {
992
 
                radeon_emit(cs, a->sq_gpr_resource_mgmt_1);
993
 
                radeon_emit(cs, a->sq_gpr_resource_mgmt_2);
994
 
                radeon_emit(cs, a->sq_gpr_resource_mgmt_3);
995
 
        }
996
 
        radeon_set_config_reg(cs, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (a->dyn_gpr_enabled << 8));
997
 
        if (a->dyn_gpr_enabled) {
998
 
                radeon_set_context_reg(cs, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
999
 
                                       S_028838_PS_GPRS(0x1e) |
1000
 
                                       S_028838_VS_GPRS(0x1e) |
1001
 
                                       S_028838_GS_GPRS(0x1e) |
1002
 
                                       S_028838_ES_GPRS(0x1e) |
1003
 
                                       S_028838_HS_GPRS(0x1e) |
1004
 
                                       S_028838_LS_GPRS(0x1e)); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
1005
 
        }
1006
 
}
1007
 
 
1008
 
static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
1009
 
{
1010
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
1011
 
        struct pipe_clip_state *state = &rctx->clip_state.state;
1012
 
 
1013
 
        radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
1014
 
        radeon_emit_array(cs, (unsigned*)state, 6*4);
1015
 
}
1016
 
 
1017
 
static void evergreen_set_polygon_stipple(struct pipe_context *ctx,
1018
 
                                         const struct pipe_poly_stipple *state)
1019
 
{
1020
 
}
1021
 
 
1022
 
static void evergreen_get_scissor_rect(struct r600_context *rctx,
1023
 
                                       unsigned tl_x, unsigned tl_y, unsigned br_x, unsigned br_y,
1024
 
                                       uint32_t *tl, uint32_t *br)
1025
 
{
1026
 
        struct pipe_scissor_state scissor = {tl_x, tl_y, br_x, br_y};
1027
 
 
1028
 
        evergreen_apply_scissor_bug_workaround(&rctx->b, &scissor);
1029
 
 
1030
 
        *tl = S_028240_TL_X(scissor.minx) | S_028240_TL_Y(scissor.miny);
1031
 
        *br = S_028244_BR_X(scissor.maxx) | S_028244_BR_Y(scissor.maxy);
1032
 
}
1033
 
 
1034
 
struct r600_tex_color_info {
1035
 
        unsigned info;
1036
 
        unsigned view;
1037
 
        unsigned dim;
1038
 
        unsigned pitch;
1039
 
        unsigned slice;
1040
 
        unsigned attrib;
1041
 
        unsigned ntype;
1042
 
        unsigned fmask;
1043
 
        unsigned fmask_slice;
1044
 
        uint64_t offset;
1045
 
        boolean export_16bpc;
1046
 
};
1047
 
 
1048
 
static void evergreen_set_color_surface_buffer(struct r600_context *rctx,
1049
 
                                               struct r600_resource *res,
1050
 
                                               enum pipe_format pformat,
1051
 
                                               unsigned first_element,
1052
 
                                               unsigned last_element,
1053
 
                                               struct r600_tex_color_info *color)
1054
 
{
1055
 
        unsigned format, swap, ntype, endian;
1056
 
        const struct util_format_description *desc;
1057
 
        unsigned block_size = util_format_get_blocksize(res->b.b.format);
1058
 
        unsigned pitch_alignment =
1059
 
                MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
1060
 
        unsigned pitch = align(res->b.b.width0, pitch_alignment);
1061
 
        int i;
1062
 
        unsigned width_elements;
1063
 
 
1064
 
        width_elements = last_element - first_element + 1;
1065
 
 
1066
 
        format = r600_translate_colorformat(rctx->b.chip_class, pformat, FALSE);
1067
 
        swap = r600_translate_colorswap(pformat, FALSE);
1068
 
 
1069
 
        endian = r600_colorformat_endian_swap(format, FALSE);
1070
 
 
1071
 
        desc = util_format_description(pformat);
1072
 
        for (i = 0; i < 4; i++) {
1073
 
                if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1074
 
                        break;
1075
 
                }
1076
 
        }
1077
 
        ntype = V_028C70_NUMBER_UNORM;
1078
 
        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1079
 
                ntype = V_028C70_NUMBER_SRGB;
1080
 
        else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1081
 
                if (desc->channel[i].normalized)
1082
 
                        ntype = V_028C70_NUMBER_SNORM;
1083
 
                else if (desc->channel[i].pure_integer)
1084
 
                        ntype = V_028C70_NUMBER_SINT;
1085
 
        } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1086
 
                if (desc->channel[i].normalized)
1087
 
                        ntype = V_028C70_NUMBER_UNORM;
1088
 
                else if (desc->channel[i].pure_integer)
1089
 
                        ntype = V_028C70_NUMBER_UINT;
1090
 
        } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1091
 
                ntype = V_028C70_NUMBER_FLOAT;
1092
 
        }
1093
 
 
1094
 
        pitch = (pitch / 8) - 1;
1095
 
        color->pitch = S_028C64_PITCH_TILE_MAX(pitch);
1096
 
 
1097
 
        color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED);
1098
 
        color->info |= S_028C70_FORMAT(format) |
1099
 
                       S_028C70_COMP_SWAP(swap) |
1100
 
                       S_028C70_BLEND_CLAMP(0) |
1101
 
                       S_028C70_BLEND_BYPASS(1) |
1102
 
                       S_028C70_NUMBER_TYPE(ntype) |
1103
 
                       S_028C70_ENDIAN(endian);
1104
 
        color->attrib = S_028C74_NON_DISP_TILING_ORDER(1);
1105
 
        color->ntype = ntype;
1106
 
        color->export_16bpc = false;
1107
 
        color->dim = width_elements - 1;
1108
 
        color->slice = 0; /* (width_elements / 64) - 1;*/
1109
 
        color->view = 0;
1110
 
        color->offset = (res->gpu_address + first_element) >> 8;
1111
 
 
1112
 
        color->fmask = color->offset;
1113
 
        color->fmask_slice = 0;
1114
 
}
1115
 
 
1116
 
static void evergreen_set_color_surface_common(struct r600_context *rctx,
1117
 
                                               struct r600_texture *rtex,
1118
 
                                               unsigned level,
1119
 
                                               unsigned first_layer,
1120
 
                                               unsigned last_layer,
1121
 
                                               enum pipe_format pformat,
1122
 
                                               struct r600_tex_color_info *color)
1123
 
{
1124
 
        struct r600_screen *rscreen = rctx->screen;
1125
 
        unsigned pitch, slice;
1126
 
        unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
1127
 
        unsigned format, swap, ntype, endian;
1128
 
        const struct util_format_description *desc;
1129
 
        bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE;
1130
 
        int i;
1131
 
 
1132
 
        color->offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
1133
 
        color->view = S_028C6C_SLICE_START(first_layer) |
1134
 
                        S_028C6C_SLICE_MAX(last_layer);
1135
 
 
1136
 
        color->offset += rtex->resource.gpu_address;
1137
 
        color->offset >>= 8;
1138
 
 
1139
 
        color->dim = 0;
1140
 
        pitch = (rtex->surface.u.legacy.level[level].nblk_x) / 8 - 1;
1141
 
        slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64;
1142
 
        if (slice) {
1143
 
                slice = slice - 1;
1144
 
        }
1145
 
 
1146
 
        color->info = 0;
1147
 
        switch (rtex->surface.u.legacy.level[level].mode) {
1148
 
        default:
1149
 
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
1150
 
                color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED);
1151
 
                non_disp_tiling = 1;
1152
 
                break;
1153
 
        case RADEON_SURF_MODE_1D:
1154
 
                color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_1D_TILED_THIN1);
1155
 
                non_disp_tiling = rtex->non_disp_tiling;
1156
 
                break;
1157
 
        case RADEON_SURF_MODE_2D:
1158
 
                color->info = S_028C70_ARRAY_MODE(V_028C70_ARRAY_2D_TILED_THIN1);
1159
 
                non_disp_tiling = rtex->non_disp_tiling;
1160
 
                break;
1161
 
        }
1162
 
        tile_split = rtex->surface.u.legacy.tile_split;
1163
 
        macro_aspect = rtex->surface.u.legacy.mtilea;
1164
 
        bankw = rtex->surface.u.legacy.bankw;
1165
 
        bankh = rtex->surface.u.legacy.bankh;
1166
 
        if (rtex->fmask.size)
1167
 
                fmask_bankh = rtex->fmask.bank_height;
1168
 
        else
1169
 
                fmask_bankh = rtex->surface.u.legacy.bankh;
1170
 
        tile_split = eg_tile_split(tile_split);
1171
 
        macro_aspect = eg_macro_tile_aspect(macro_aspect);
1172
 
        bankw = eg_bank_wh(bankw);
1173
 
        bankh = eg_bank_wh(bankh);
1174
 
        fmask_bankh = eg_bank_wh(fmask_bankh);
1175
 
 
1176
 
        if (rscreen->b.chip_class == CAYMAN) {
1177
 
                if (util_format_get_blocksize(pformat) >= 16)
1178
 
                        non_disp_tiling = 1;
1179
 
        }
1180
 
        nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
1181
 
        desc = util_format_description(pformat);
1182
 
        for (i = 0; i < 4; i++) {
1183
 
                if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1184
 
                        break;
1185
 
                }
1186
 
        }
1187
 
        color->attrib = S_028C74_TILE_SPLIT(tile_split)|
1188
 
                S_028C74_NUM_BANKS(nbanks) |
1189
 
                S_028C74_BANK_WIDTH(bankw) |
1190
 
                S_028C74_BANK_HEIGHT(bankh) |
1191
 
                S_028C74_MACRO_TILE_ASPECT(macro_aspect) |
1192
 
                S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) |
1193
 
                S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1194
 
 
1195
 
        if (rctx->b.chip_class == CAYMAN) {
1196
 
                color->attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] ==
1197
 
                                                           PIPE_SWIZZLE_1);
1198
 
 
1199
 
                if (rtex->resource.b.b.nr_samples > 1) {
1200
 
                        unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
1201
 
                        color->attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1202
 
                                        S_028C74_NUM_FRAGMENTS(log_samples);
1203
 
                }
1204
 
        }
1205
 
 
1206
 
        ntype = V_028C70_NUMBER_UNORM;
1207
 
        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
1208
 
                ntype = V_028C70_NUMBER_SRGB;
1209
 
        else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1210
 
                if (desc->channel[i].normalized)
1211
 
                        ntype = V_028C70_NUMBER_SNORM;
1212
 
                else if (desc->channel[i].pure_integer)
1213
 
                        ntype = V_028C70_NUMBER_SINT;
1214
 
        } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1215
 
                if (desc->channel[i].normalized)
1216
 
                        ntype = V_028C70_NUMBER_UNORM;
1217
 
                else if (desc->channel[i].pure_integer)
1218
 
                        ntype = V_028C70_NUMBER_UINT;
1219
 
        } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
1220
 
                ntype = V_028C70_NUMBER_FLOAT;
1221
 
        }
1222
 
 
1223
 
        if (R600_BIG_ENDIAN)
1224
 
                do_endian_swap = !rtex->db_compatible;
1225
 
 
1226
 
        format = r600_translate_colorformat(rctx->b.chip_class, pformat, do_endian_swap);
1227
 
        assert(format != ~0);
1228
 
        swap = r600_translate_colorswap(pformat, do_endian_swap);
1229
 
        assert(swap != ~0);
1230
 
 
1231
 
        endian = r600_colorformat_endian_swap(format, do_endian_swap);
1232
 
 
1233
 
        /* blend clamp should be set for all NORM/SRGB types */
1234
 
        if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
1235
 
            ntype == V_028C70_NUMBER_SRGB)
1236
 
                blend_clamp = 1;
1237
 
 
1238
 
        /* set blend bypass according to docs if SINT/UINT or
1239
 
           8/24 COLOR variants */
1240
 
        if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1241
 
            format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1242
 
            format == V_028C70_COLOR_X24_8_32_FLOAT) {
1243
 
                blend_clamp = 0;
1244
 
                blend_bypass = 1;
1245
 
        }
1246
 
 
1247
 
        color->ntype = ntype;
1248
 
        color->info |= S_028C70_FORMAT(format) |
1249
 
                S_028C70_COMP_SWAP(swap) |
1250
 
                S_028C70_BLEND_CLAMP(blend_clamp) |
1251
 
                S_028C70_BLEND_BYPASS(blend_bypass) |
1252
 
                S_028C70_SIMPLE_FLOAT(1) |
1253
 
                S_028C70_NUMBER_TYPE(ntype) |
1254
 
                S_028C70_ENDIAN(endian);
1255
 
 
1256
 
        if (rtex->fmask.size) {
1257
 
                color->info |= S_028C70_COMPRESSION(1);
1258
 
        }
1259
 
 
1260
 
        /* EXPORT_NORM is an optimization that can be enabled for better
1261
 
         * performance in certain cases.
1262
 
         * EXPORT_NORM can be enabled if:
1263
 
         * - 11-bit or smaller UNORM/SNORM/SRGB
1264
 
         * - 16-bit or smaller FLOAT
1265
 
         */
1266
 
        color->export_16bpc = false;
1267
 
        if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
1268
 
            ((desc->channel[i].size < 12 &&
1269
 
              desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
1270
 
              ntype != V_028C70_NUMBER_UINT && ntype != V_028C70_NUMBER_SINT) ||
1271
 
             (desc->channel[i].size < 17 &&
1272
 
              desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))) {
1273
 
                color->info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC);
1274
 
                color->export_16bpc = true;
1275
 
        }
1276
 
 
1277
 
        color->pitch = S_028C64_PITCH_TILE_MAX(pitch);
1278
 
        color->slice = S_028C68_SLICE_TILE_MAX(slice);
1279
 
 
1280
 
        if (rtex->fmask.size) {
1281
 
                color->fmask = (rtex->resource.gpu_address + rtex->fmask.offset) >> 8;
1282
 
                color->fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
1283
 
        } else {
1284
 
                color->fmask = color->offset;
1285
 
                color->fmask_slice = S_028C88_TILE_MAX(slice);
1286
 
        }
1287
 
}
1288
 
 
1289
 
/**
1290
 
 * This function initializes the CB* register values for RATs.  It is meant
1291
 
 * to be used for 1D aligned buffers that do not have an associated
1292
 
 * radeon_surf.
1293
 
 */
1294
 
void evergreen_init_color_surface_rat(struct r600_context *rctx,
1295
 
                                        struct r600_surface *surf)
1296
 
{
1297
 
        struct pipe_resource *pipe_buffer = surf->base.texture;
1298
 
        struct r600_tex_color_info color;
1299
 
 
1300
 
        evergreen_set_color_surface_buffer(rctx, (struct r600_resource *)surf->base.texture,
1301
 
                                           surf->base.format, 0, pipe_buffer->width0,
1302
 
                                           &color);
1303
 
 
1304
 
        surf->cb_color_base = color.offset;
1305
 
        surf->cb_color_dim = color.dim;
1306
 
        surf->cb_color_info = color.info | S_028C70_RAT(1);
1307
 
        surf->cb_color_pitch = color.pitch;
1308
 
        surf->cb_color_slice = color.slice;
1309
 
        surf->cb_color_view = color.view;
1310
 
        surf->cb_color_attrib = color.attrib;
1311
 
        surf->cb_color_fmask = color.fmask;
1312
 
        surf->cb_color_fmask_slice = color.fmask_slice;
1313
 
 
1314
 
        surf->cb_color_view = 0;
1315
 
 
1316
 
        /* Set the buffer range the GPU will have access to: */
1317
 
        util_range_add(pipe_buffer, &r600_resource(pipe_buffer)->valid_buffer_range,
1318
 
                       0, pipe_buffer->width0);
1319
 
}
1320
 
 
1321
 
 
1322
 
void evergreen_init_color_surface(struct r600_context *rctx,
1323
 
                                  struct r600_surface *surf)
1324
 
{
1325
 
        struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1326
 
        unsigned level = surf->base.u.tex.level;
1327
 
        struct r600_tex_color_info color;
1328
 
 
1329
 
        evergreen_set_color_surface_common(rctx, rtex, level,
1330
 
                                           surf->base.u.tex.first_layer,
1331
 
                                           surf->base.u.tex.last_layer,
1332
 
                                           surf->base.format,
1333
 
                                           &color);
1334
 
 
1335
 
        surf->alphatest_bypass = color.ntype == V_028C70_NUMBER_UINT ||
1336
 
                color.ntype == V_028C70_NUMBER_SINT;
1337
 
        surf->export_16bpc = color.export_16bpc;
1338
 
 
1339
 
        /* XXX handle enabling of CB beyond BASE8 which has different offset */
1340
 
        surf->cb_color_base = color.offset;
1341
 
        surf->cb_color_dim = color.dim;
1342
 
        surf->cb_color_info = color.info;
1343
 
        surf->cb_color_pitch = color.pitch;
1344
 
        surf->cb_color_slice = color.slice;
1345
 
        surf->cb_color_view = color.view;
1346
 
        surf->cb_color_attrib = color.attrib;
1347
 
        surf->cb_color_fmask = color.fmask;
1348
 
        surf->cb_color_fmask_slice = color.fmask_slice;
1349
 
 
1350
 
        surf->color_initialized = true;
1351
 
}
1352
 
 
1353
 
static void evergreen_init_depth_surface(struct r600_context *rctx,
1354
 
                                         struct r600_surface *surf)
1355
 
{
1356
 
        struct r600_screen *rscreen = rctx->screen;
1357
 
        struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1358
 
        unsigned level = surf->base.u.tex.level;
1359
 
        struct legacy_surf_level *levelinfo = &rtex->surface.u.legacy.level[level];
1360
 
        uint64_t offset;
1361
 
        unsigned format, array_mode;
1362
 
        unsigned macro_aspect, tile_split, bankh, bankw, nbanks;
1363
 
 
1364
 
 
1365
 
        format = r600_translate_dbformat(surf->base.format);
1366
 
        assert(format != ~0);
1367
 
 
1368
 
        offset = rtex->resource.gpu_address;
1369
 
        offset += (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
1370
 
 
1371
 
        switch (rtex->surface.u.legacy.level[level].mode) {
1372
 
        case RADEON_SURF_MODE_2D:
1373
 
                array_mode = V_028C70_ARRAY_2D_TILED_THIN1;
1374
 
                break;
1375
 
        case RADEON_SURF_MODE_1D:
1376
 
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
1377
 
        default:
1378
 
                array_mode = V_028C70_ARRAY_1D_TILED_THIN1;
1379
 
                break;
1380
 
        }
1381
 
        tile_split = rtex->surface.u.legacy.tile_split;
1382
 
        macro_aspect = rtex->surface.u.legacy.mtilea;
1383
 
        bankw = rtex->surface.u.legacy.bankw;
1384
 
        bankh = rtex->surface.u.legacy.bankh;
1385
 
        tile_split = eg_tile_split(tile_split);
1386
 
        macro_aspect = eg_macro_tile_aspect(macro_aspect);
1387
 
        bankw = eg_bank_wh(bankw);
1388
 
        bankh = eg_bank_wh(bankh);
1389
 
        nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
1390
 
        offset >>= 8;
1391
 
 
1392
 
        surf->db_z_info = S_028040_ARRAY_MODE(array_mode) |
1393
 
                          S_028040_FORMAT(format) |
1394
 
                          S_028040_TILE_SPLIT(tile_split)|
1395
 
                          S_028040_NUM_BANKS(nbanks) |
1396
 
                          S_028040_BANK_WIDTH(bankw) |
1397
 
                          S_028040_BANK_HEIGHT(bankh) |
1398
 
                          S_028040_MACRO_TILE_ASPECT(macro_aspect);
1399
 
        if (rscreen->b.chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) {
1400
 
                surf->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
1401
 
        }
1402
 
 
1403
 
        assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
1404
 
 
1405
 
        surf->db_depth_base = offset;
1406
 
        surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
1407
 
                              S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
1408
 
        surf->db_depth_size = S_028058_PITCH_TILE_MAX(levelinfo->nblk_x / 8 - 1) |
1409
 
                              S_028058_HEIGHT_TILE_MAX(levelinfo->nblk_y / 8 - 1);
1410
 
        surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(levelinfo->nblk_x *
1411
 
                                                       levelinfo->nblk_y / 64 - 1);
1412
 
 
1413
 
        if (rtex->surface.has_stencil) {
1414
 
                uint64_t stencil_offset;
1415
 
                unsigned stile_split = rtex->surface.u.legacy.stencil_tile_split;
1416
 
 
1417
 
                stile_split = eg_tile_split(stile_split);
1418
 
 
1419
 
                stencil_offset = (uint64_t)rtex->surface.u.legacy.zs.stencil_level[level].offset_256B * 256;
1420
 
                stencil_offset += rtex->resource.gpu_address;
1421
 
 
1422
 
                surf->db_stencil_base = stencil_offset >> 8;
1423
 
                surf->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8) |
1424
 
                                        S_028044_TILE_SPLIT(stile_split);
1425
 
        } else {
1426
 
                surf->db_stencil_base = offset;
1427
 
                /* DRM 2.6.18 allows the INVALID format to disable stencil.
1428
 
                 * Older kernels are out of luck. */
1429
 
                surf->db_stencil_info = rctx->screen->b.info.drm_minor >= 18 ?
1430
 
                                        S_028044_FORMAT(V_028044_STENCIL_INVALID) :
1431
 
                                        S_028044_FORMAT(V_028044_STENCIL_8);
1432
 
        }
1433
 
 
1434
 
        if (r600_htile_enabled(rtex, level)) {
1435
 
                uint64_t va = rtex->resource.gpu_address + rtex->htile_offset;
1436
 
                surf->db_htile_data_base = va >> 8;
1437
 
                surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
1438
 
                                         S_028ABC_HTILE_HEIGHT(1) |
1439
 
                                         S_028ABC_FULL_CACHE(1);
1440
 
                surf->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
1441
 
                surf->db_preload_control = 0;
1442
 
        }
1443
 
 
1444
 
        surf->depth_initialized = true;
1445
 
}
1446
 
 
1447
 
static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
1448
 
                                            const struct pipe_framebuffer_state *state)
1449
 
{
1450
 
        struct r600_context *rctx = (struct r600_context *)ctx;
1451
 
        struct r600_surface *surf;
1452
 
        struct r600_texture *rtex;
1453
 
        uint32_t i, log_samples;
1454
 
        uint32_t target_mask = 0;
1455
 
        /* Flush TC when changing the framebuffer state, because the only
1456
 
         * client not using TC that can change textures is the framebuffer.
1457
 
         * Other places don't typically have to flush TC.
1458
 
         */
1459
 
        rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE |
1460
 
                         R600_CONTEXT_FLUSH_AND_INV |
1461
 
                         R600_CONTEXT_FLUSH_AND_INV_CB |
1462
 
                         R600_CONTEXT_FLUSH_AND_INV_CB_META |
1463
 
                         R600_CONTEXT_FLUSH_AND_INV_DB |
1464
 
                         R600_CONTEXT_FLUSH_AND_INV_DB_META |
1465
 
                         R600_CONTEXT_INV_TEX_CACHE;
1466
 
 
1467
 
        util_copy_framebuffer_state(&rctx->framebuffer.state, state);
1468
 
 
1469
 
        /* Colorbuffers. */
1470
 
        rctx->framebuffer.export_16bpc = state->nr_cbufs != 0;
1471
 
        rctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
1472
 
                                           util_format_is_pure_integer(state->cbufs[0]->format);
1473
 
        rctx->framebuffer.compressed_cb_mask = 0;
1474
 
        rctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
1475
 
 
1476
 
        for (i = 0; i < state->nr_cbufs; i++) {
1477
 
                surf = (struct r600_surface*)state->cbufs[i];
1478
 
                if (!surf)
1479
 
                        continue;
1480
 
 
1481
 
                target_mask |= (0xf << (i * 4));
1482
 
 
1483
 
                rtex = (struct r600_texture*)surf->base.texture;
1484
 
 
1485
 
                r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
1486
 
 
1487
 
                if (!surf->color_initialized) {
1488
 
                        evergreen_init_color_surface(rctx, surf);
1489
 
                }
1490
 
 
1491
 
                if (!surf->export_16bpc) {
1492
 
                        rctx->framebuffer.export_16bpc = false;
1493
 
                }
1494
 
 
1495
 
                if (rtex->fmask.size) {
1496
 
                        rctx->framebuffer.compressed_cb_mask |= 1 << i;
1497
 
                }
1498
 
        }
1499
 
 
1500
 
        /* Update alpha-test state dependencies.
1501
 
         * Alpha-test is done on the first colorbuffer only. */
1502
 
        if (state->nr_cbufs) {
1503
 
                bool alphatest_bypass = false;
1504
 
                bool export_16bpc = true;
1505
 
 
1506
 
                surf = (struct r600_surface*)state->cbufs[0];
1507
 
                if (surf) {
1508
 
                        alphatest_bypass = surf->alphatest_bypass;
1509
 
                        export_16bpc = surf->export_16bpc;
1510
 
                }
1511
 
 
1512
 
                if (rctx->alphatest_state.bypass != alphatest_bypass) {
1513
 
                        rctx->alphatest_state.bypass = alphatest_bypass;
1514
 
                        r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
1515
 
                }
1516
 
                if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) {
1517
 
                        rctx->alphatest_state.cb0_export_16bpc = export_16bpc;
1518
 
                        r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
1519
 
                }
1520
 
        }
1521
 
 
1522
 
        /* ZS buffer. */
1523
 
        if (state->zsbuf) {
1524
 
                surf = (struct r600_surface*)state->zsbuf;
1525
 
 
1526
 
                r600_context_add_resource_size(ctx, state->zsbuf->texture);
1527
 
 
1528
 
                if (!surf->depth_initialized) {
1529
 
                        evergreen_init_depth_surface(rctx, surf);
1530
 
                }
1531
 
 
1532
 
                if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
1533
 
                        rctx->poly_offset_state.zs_format = state->zsbuf->format;
1534
 
                        r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
1535
 
                }
1536
 
 
1537
 
                if (rctx->db_state.rsurf != surf) {
1538
 
                        rctx->db_state.rsurf = surf;
1539
 
                        r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
1540
 
                        r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
1541
 
                }
1542
 
        } else if (rctx->db_state.rsurf) {
1543
 
                rctx->db_state.rsurf = NULL;
1544
 
                r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
1545
 
                r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
1546
 
        }
1547
 
 
1548
 
        if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs ||
1549
 
            rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) {
1550
 
                rctx->cb_misc_state.bound_cbufs_target_mask = target_mask;
1551
 
                rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
1552
 
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
1553
 
        }
1554
 
 
1555
 
        if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
1556
 
                rctx->alphatest_state.bypass = false;
1557
 
                r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
1558
 
        }
1559
 
 
1560
 
        log_samples = util_logbase2(rctx->framebuffer.nr_samples);
1561
 
        /* This is for Cayman to program SAMPLE_RATE, and for RV770 to fix a hw bug. */
1562
 
        if ((rctx->b.chip_class == CAYMAN ||
1563
 
             rctx->b.family == CHIP_RV770) &&
1564
 
            rctx->db_misc_state.log_samples != log_samples) {
1565
 
                rctx->db_misc_state.log_samples = log_samples;
1566
 
                r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
1567
 
        }
1568
 
 
1569
 
 
1570
 
        /* Calculate the CS size. */
1571
 
        rctx->framebuffer.atom.num_dw = 4; /* SCISSOR */
1572
 
 
1573
 
        /* MSAA. */
1574
 
        if (rctx->b.chip_class == EVERGREEN)
1575
 
                rctx->framebuffer.atom.num_dw += 17; /* Evergreen */
1576
 
        else
1577
 
                rctx->framebuffer.atom.num_dw += 28; /* Cayman */
1578
 
 
1579
 
        /* Colorbuffers. */
1580
 
        rctx->framebuffer.atom.num_dw += state->nr_cbufs * 23;
1581
 
        rctx->framebuffer.atom.num_dw += state->nr_cbufs * 2;
1582
 
        rctx->framebuffer.atom.num_dw += (12 - state->nr_cbufs) * 3;
1583
 
 
1584
 
        /* ZS buffer. */
1585
 
        if (state->zsbuf) {
1586
 
                rctx->framebuffer.atom.num_dw += 24;
1587
 
                rctx->framebuffer.atom.num_dw += 2;
1588
 
        } else if (rctx->screen->b.info.drm_minor >= 18) {
1589
 
                rctx->framebuffer.atom.num_dw += 4;
1590
 
        }
1591
 
 
1592
 
        r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
1593
 
 
1594
 
        r600_set_sample_locations_constant_buffer(rctx);
1595
 
        rctx->framebuffer.do_update_surf_dirtiness = true;
1596
 
}
1597
 
 
1598
 
static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
1599
 
{
1600
 
        struct r600_context *rctx = (struct r600_context *)ctx;
1601
 
 
1602
 
        if (rctx->ps_iter_samples == min_samples)
1603
 
                return;
1604
 
 
1605
 
        rctx->ps_iter_samples = min_samples;
1606
 
        if (rctx->framebuffer.nr_samples > 1) {
1607
 
                r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
1608
 
        }
1609
 
}
1610
 
 
1611
 
/* 8xMSAA */
1612
 
static const uint32_t sample_locs_8x[] = {
1613
 
        FILL_SREG(-1,  1,  1,  5,  3, -5,  5,  3),
1614
 
        FILL_SREG(-7, -1, -3, -7,  7, -3, -5,  7),
1615
 
        FILL_SREG(-1,  1,  1,  5,  3, -5,  5,  3),
1616
 
        FILL_SREG(-7, -1, -3, -7,  7, -3, -5,  7),
1617
 
        FILL_SREG(-1,  1,  1,  5,  3, -5,  5,  3),
1618
 
        FILL_SREG(-7, -1, -3, -7,  7, -3, -5,  7),
1619
 
        FILL_SREG(-1,  1,  1,  5,  3, -5,  5,  3),
1620
 
        FILL_SREG(-7, -1, -3, -7,  7, -3, -5,  7),
1621
 
};
1622
 
static unsigned max_dist_8x = 7;
1623
 
 
1624
 
static void evergreen_get_sample_position(struct pipe_context *ctx,
1625
 
                                     unsigned sample_count,
1626
 
                                     unsigned sample_index,
1627
 
                                     float *out_value)
1628
 
{
1629
 
        int offset, index;
1630
 
        struct {
1631
 
                int idx:4;
1632
 
        } val;
1633
 
        switch (sample_count) {
1634
 
        case 1:
1635
 
        default:
1636
 
                out_value[0] = out_value[1] = 0.5;
1637
 
                break;
1638
 
        case 2:
1639
 
                offset = 4 * (sample_index * 2);
1640
 
                val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
1641
 
                out_value[0] = (float)(val.idx + 8) / 16.0f;
1642
 
                val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
1643
 
                out_value[1] = (float)(val.idx + 8) / 16.0f;
1644
 
                break;
1645
 
        case 4:
1646
 
                offset = 4 * (sample_index * 2);
1647
 
                val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
1648
 
                out_value[0] = (float)(val.idx + 8) / 16.0f;
1649
 
                val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
1650
 
                out_value[1] = (float)(val.idx + 8) / 16.0f;
1651
 
                break;
1652
 
        case 8:
1653
 
                offset = 4 * (sample_index % 4 * 2);
1654
 
                index = (sample_index / 4);
1655
 
                val.idx = (sample_locs_8x[index] >> offset) & 0xf;
1656
 
                out_value[0] = (float)(val.idx + 8) / 16.0f;
1657
 
                val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf;
1658
 
                out_value[1] = (float)(val.idx + 8) / 16.0f;
1659
 
                break;
1660
 
        }
1661
 
}
1662
 
 
1663
 
static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
1664
 
{
1665
 
 
1666
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
1667
 
        unsigned max_dist = 0;
1668
 
 
1669
 
        switch (nr_samples) {
1670
 
        default:
1671
 
                nr_samples = 0;
1672
 
                break;
1673
 
        case 2:
1674
 
                radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(eg_sample_locs_2x));
1675
 
                radeon_emit_array(cs, eg_sample_locs_2x, ARRAY_SIZE(eg_sample_locs_2x));
1676
 
                max_dist = eg_max_dist_2x;
1677
 
                break;
1678
 
        case 4:
1679
 
                radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(eg_sample_locs_4x));
1680
 
                radeon_emit_array(cs, eg_sample_locs_4x, ARRAY_SIZE(eg_sample_locs_4x));
1681
 
                max_dist = eg_max_dist_4x;
1682
 
                break;
1683
 
        case 8:
1684
 
                radeon_set_context_reg_seq(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_0, ARRAY_SIZE(sample_locs_8x));
1685
 
                radeon_emit_array(cs, sample_locs_8x, ARRAY_SIZE(sample_locs_8x));
1686
 
                max_dist = max_dist_8x;
1687
 
                break;
1688
 
        }
1689
 
 
1690
 
        if (nr_samples > 1) {
1691
 
                radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
1692
 
                radeon_emit(cs, S_028C00_LAST_PIXEL(1) |
1693
 
                                     S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
1694
 
                radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
1695
 
                                     S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
1696
 
                radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
1697
 
                                       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
1698
 
                                       EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
1699
 
                                       EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
1700
 
        } else {
1701
 
                radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
1702
 
                radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
1703
 
                radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
1704
 
                radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
1705
 
                                       EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
1706
 
                                       EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
1707
 
        }
1708
 
}
1709
 
 
1710
 
static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_atom *atom,
1711
 
                                       int immed_id_base, int res_id_base, int offset, uint32_t pkt_flags)
1712
 
{
1713
 
        struct r600_image_state *state = (struct r600_image_state *)atom;
1714
 
        struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state;
1715
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
1716
 
        struct r600_texture *rtex;
1717
 
        struct r600_resource *resource;
1718
 
        int i;
1719
 
 
1720
 
        for (i = 0; i < R600_MAX_IMAGES; i++) {
1721
 
                struct r600_image_view *image = &state->views[i];
1722
 
                unsigned reloc, immed_reloc;
1723
 
                int idx = i + offset;
1724
 
 
1725
 
                if (!pkt_flags)
1726
 
                        idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0);
1727
 
                if (!image->base.resource)
1728
 
                        continue;
1729
 
 
1730
 
                resource = (struct r600_resource *)image->base.resource;
1731
 
                if (resource->b.b.target != PIPE_BUFFER)
1732
 
                        rtex = (struct r600_texture *)image->base.resource;
1733
 
                else
1734
 
                        rtex = NULL;
1735
 
 
1736
 
                reloc = radeon_add_to_buffer_list(&rctx->b,
1737
 
                                                  &rctx->b.gfx,
1738
 
                                                  resource,
1739
 
                                                  RADEON_USAGE_READWRITE |
1740
 
                                                  RADEON_PRIO_SHADER_RW_BUFFER);
1741
 
 
1742
 
                immed_reloc = radeon_add_to_buffer_list(&rctx->b,
1743
 
                                                        &rctx->b.gfx,
1744
 
                                                        resource->immed_buffer,
1745
 
                                                        RADEON_USAGE_READWRITE |
1746
 
                                                        RADEON_PRIO_SHADER_RW_BUFFER);
1747
 
 
1748
 
                if (pkt_flags)
1749
 
                        radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13);
1750
 
                else
1751
 
                        radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13);
1752
 
 
1753
 
                radeon_emit(cs, image->cb_color_base);  /* R_028C60_CB_COLOR0_BASE */
1754
 
                radeon_emit(cs, image->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
1755
 
                radeon_emit(cs, image->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
1756
 
                radeon_emit(cs, image->cb_color_view);  /* R_028C6C_CB_COLOR0_VIEW */
1757
 
                radeon_emit(cs, image->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
1758
 
                radeon_emit(cs, image->cb_color_attrib);        /* R_028C74_CB_COLOR0_ATTRIB */
1759
 
                radeon_emit(cs, image->cb_color_dim);           /* R_028C78_CB_COLOR0_DIM */
1760
 
                radeon_emit(cs, rtex ? rtex->cmask.base_address_reg : image->cb_color_base);    /* R_028C7C_CB_COLOR0_CMASK */
1761
 
                radeon_emit(cs, rtex ? rtex->cmask.slice_tile_max : 0); /* R_028C80_CB_COLOR0_CMASK_SLICE */
1762
 
                radeon_emit(cs, image->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */
1763
 
                radeon_emit(cs, image->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
1764
 
                radeon_emit(cs, rtex ? rtex->color_clear_value[0] : 0); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
1765
 
                radeon_emit(cs, rtex ? rtex->color_clear_value[1] : 0); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
1766
 
 
1767
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
1768
 
                radeon_emit(cs, reloc);
1769
 
 
1770
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
1771
 
                radeon_emit(cs, reloc);
1772
 
 
1773
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C7C_CB_COLOR0_CMASK */
1774
 
                radeon_emit(cs, reloc);
1775
 
 
1776
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */
1777
 
                radeon_emit(cs, reloc);
1778
 
 
1779
 
                if (pkt_flags)
1780
 
                        radeon_compute_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8);
1781
 
                else
1782
 
                        radeon_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8);
1783
 
 
1784
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /**/
1785
 
                radeon_emit(cs, immed_reloc);
1786
 
 
1787
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
1788
 
                radeon_emit(cs, (immed_id_base + i + offset) * 8);
1789
 
                radeon_emit_array(cs, image->immed_resource_words, 8);
1790
 
 
1791
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
1792
 
                radeon_emit(cs, immed_reloc);
1793
 
 
1794
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
1795
 
                radeon_emit(cs, (res_id_base + i + offset) * 8);
1796
 
                radeon_emit_array(cs, image->resource_words, 8);
1797
 
 
1798
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
1799
 
                radeon_emit(cs, reloc);
1800
 
 
1801
 
                if (!image->skip_mip_address_reloc) {
1802
 
                        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
1803
 
                        radeon_emit(cs, reloc);
1804
 
                }
1805
 
        }
1806
 
}
1807
 
 
1808
 
static void evergreen_emit_fragment_image_state(struct r600_context *rctx, struct r600_atom *atom)
1809
 
{
1810
 
        evergreen_emit_image_state(rctx, atom,
1811
 
                                   R600_IMAGE_IMMED_RESOURCE_OFFSET,
1812
 
                                   R600_IMAGE_REAL_RESOURCE_OFFSET, 0, 0);
1813
 
}
1814
 
 
1815
 
static void evergreen_emit_compute_image_state(struct r600_context *rctx, struct r600_atom *atom)
1816
 
{
1817
 
        evergreen_emit_image_state(rctx, atom,
1818
 
                                   EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET,
1819
 
                                   EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET,
1820
 
                                   0, RADEON_CP_PACKET3_COMPUTE_MODE);
1821
 
}
1822
 
 
1823
 
static void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom)
1824
 
{
1825
 
        int offset = util_bitcount(rctx->fragment_images.enabled_mask);
1826
 
        evergreen_emit_image_state(rctx, atom,
1827
 
                                   R600_IMAGE_IMMED_RESOURCE_OFFSET,
1828
 
                                   R600_IMAGE_REAL_RESOURCE_OFFSET, offset, 0);
1829
 
}
1830
 
 
1831
 
static void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struct r600_atom *atom)
1832
 
{
1833
 
        int offset = util_bitcount(rctx->compute_images.enabled_mask);
1834
 
        evergreen_emit_image_state(rctx, atom,
1835
 
                                   EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET,
1836
 
                                   EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET,
1837
 
                                   offset, RADEON_CP_PACKET3_COMPUTE_MODE);
1838
 
}
1839
 
 
1840
 
static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
1841
 
{
1842
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
1843
 
        struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
1844
 
        unsigned nr_cbufs = state->nr_cbufs;
1845
 
        unsigned i, tl, br;
1846
 
        struct r600_texture *tex = NULL;
1847
 
        struct r600_surface *cb = NULL;
1848
 
 
1849
 
        /* XXX support more colorbuffers once we need them */
1850
 
        assert(nr_cbufs <= 8);
1851
 
        if (nr_cbufs > 8)
1852
 
                nr_cbufs = 8;
1853
 
 
1854
 
        /* Colorbuffers. */
1855
 
        for (i = 0; i < nr_cbufs; i++) {
1856
 
                unsigned reloc, cmask_reloc;
1857
 
 
1858
 
                cb = (struct r600_surface*)state->cbufs[i];
1859
 
                if (!cb) {
1860
 
                        radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
1861
 
                                               S_028C70_FORMAT(V_028C70_COLOR_INVALID));
1862
 
                        continue;
1863
 
                }
1864
 
 
1865
 
                tex = (struct r600_texture *)cb->base.texture;
1866
 
                reloc = radeon_add_to_buffer_list(&rctx->b,
1867
 
                                              &rctx->b.gfx,
1868
 
                                              (struct r600_resource*)cb->base.texture,
1869
 
                                              RADEON_USAGE_READWRITE |
1870
 
                                              (tex->resource.b.b.nr_samples > 1 ?
1871
 
                                                      RADEON_PRIO_COLOR_BUFFER_MSAA :
1872
 
                                                      RADEON_PRIO_COLOR_BUFFER));
1873
 
 
1874
 
                if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
1875
 
                        cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
1876
 
                                tex->cmask_buffer, RADEON_USAGE_READWRITE | RADEON_PRIO_SEPARATE_META);
1877
 
                } else {
1878
 
                        cmask_reloc = reloc;
1879
 
                }
1880
 
 
1881
 
                radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
1882
 
                radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
1883
 
                radeon_emit(cs, cb->cb_color_pitch);    /* R_028C64_CB_COLOR0_PITCH */
1884
 
                radeon_emit(cs, cb->cb_color_slice);    /* R_028C68_CB_COLOR0_SLICE */
1885
 
                radeon_emit(cs, cb->cb_color_view);     /* R_028C6C_CB_COLOR0_VIEW */
1886
 
                radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
1887
 
                radeon_emit(cs, cb->cb_color_attrib);   /* R_028C74_CB_COLOR0_ATTRIB */
1888
 
                radeon_emit(cs, cb->cb_color_dim);              /* R_028C78_CB_COLOR0_DIM */
1889
 
                radeon_emit(cs, tex->cmask.base_address_reg);   /* R_028C7C_CB_COLOR0_CMASK */
1890
 
                radeon_emit(cs, tex->cmask.slice_tile_max);     /* R_028C80_CB_COLOR0_CMASK_SLICE */
1891
 
                radeon_emit(cs, cb->cb_color_fmask);    /* R_028C84_CB_COLOR0_FMASK */
1892
 
                radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
1893
 
                radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
1894
 
                radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
1895
 
 
1896
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */
1897
 
                radeon_emit(cs, reloc);
1898
 
 
1899
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */
1900
 
                radeon_emit(cs, reloc);
1901
 
 
1902
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C7C_CB_COLOR0_CMASK */
1903
 
                radeon_emit(cs, cmask_reloc);
1904
 
 
1905
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */
1906
 
                radeon_emit(cs, reloc);
1907
 
        }
1908
 
        /* set CB_COLOR1_INFO for possible dual-src blending */
1909
 
        if (rctx->framebuffer.dual_src_blend && i == 1 && state->cbufs[0]) {
1910
 
                radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
1911
 
                                       cb->cb_color_info | tex->cb_color_info);
1912
 
                i++;
1913
 
        }
1914
 
        i += util_bitcount(rctx->fragment_images.enabled_mask);
1915
 
        i += util_bitcount(rctx->fragment_buffers.enabled_mask);
1916
 
        for (; i < 8 ; i++)
1917
 
                radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
1918
 
        for (; i < 12; i++)
1919
 
                radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0);
1920
 
 
1921
 
        /* ZS buffer. */
1922
 
        if (state->zsbuf) {
1923
 
                struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
1924
 
                unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
1925
 
                                                       &rctx->b.gfx,
1926
 
                                                       (struct r600_resource*)state->zsbuf->texture,
1927
 
                                                       RADEON_USAGE_READWRITE |
1928
 
                                                       (zb->base.texture->nr_samples > 1 ?
1929
 
                                                               RADEON_PRIO_DEPTH_BUFFER_MSAA :
1930
 
                                                               RADEON_PRIO_DEPTH_BUFFER));
1931
 
 
1932
 
                radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
1933
 
 
1934
 
                radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
1935
 
                radeon_emit(cs, zb->db_z_info);         /* R_028040_DB_Z_INFO */
1936
 
                radeon_emit(cs, zb->db_stencil_info);   /* R_028044_DB_STENCIL_INFO */
1937
 
                radeon_emit(cs, zb->db_depth_base);     /* R_028048_DB_Z_READ_BASE */
1938
 
                radeon_emit(cs, zb->db_stencil_base);   /* R_02804C_DB_STENCIL_READ_BASE */
1939
 
                radeon_emit(cs, zb->db_depth_base);     /* R_028050_DB_Z_WRITE_BASE */
1940
 
                radeon_emit(cs, zb->db_stencil_base);   /* R_028054_DB_STENCIL_WRITE_BASE */
1941
 
                radeon_emit(cs, zb->db_depth_size);     /* R_028058_DB_DEPTH_SIZE */
1942
 
                radeon_emit(cs, zb->db_depth_slice);    /* R_02805C_DB_DEPTH_SLICE */
1943
 
 
1944
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028048_DB_Z_READ_BASE */
1945
 
                radeon_emit(cs, reloc);
1946
 
 
1947
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_02804C_DB_STENCIL_READ_BASE */
1948
 
                radeon_emit(cs, reloc);
1949
 
 
1950
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028050_DB_Z_WRITE_BASE */
1951
 
                radeon_emit(cs, reloc);
1952
 
 
1953
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028054_DB_STENCIL_WRITE_BASE */
1954
 
                radeon_emit(cs, reloc);
1955
 
        } else if (rctx->screen->b.info.drm_minor >= 18) {
1956
 
                /* DRM 2.6.18 allows the INVALID format to disable depth/stencil.
1957
 
                 * Older kernels are out of luck. */
1958
 
                radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
1959
 
                radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
1960
 
                radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
1961
 
        }
1962
 
 
1963
 
        /* Framebuffer dimensions. */
1964
 
        evergreen_get_scissor_rect(rctx, 0, 0, state->width, state->height, &tl, &br);
1965
 
 
1966
 
        radeon_set_context_reg_seq(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 2);
1967
 
        radeon_emit(cs, tl); /* R_028204_PA_SC_WINDOW_SCISSOR_TL */
1968
 
        radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */
1969
 
 
1970
 
        if (rctx->b.chip_class == EVERGREEN) {
1971
 
                evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
1972
 
        } else {
1973
 
                cayman_emit_msaa_state(cs, rctx->framebuffer.nr_samples,
1974
 
                                       rctx->ps_iter_samples, 0);
1975
 
        }
1976
 
}
1977
 
 
1978
 
static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
1979
 
{
1980
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
1981
 
        struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
1982
 
        float offset_units = state->offset_units;
1983
 
        float offset_scale = state->offset_scale;
1984
 
        uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
1985
 
 
1986
 
        if (!state->offset_units_unscaled) {
1987
 
                switch (state->zs_format) {
1988
 
                case PIPE_FORMAT_Z24X8_UNORM:
1989
 
                case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1990
 
                case PIPE_FORMAT_X8Z24_UNORM:
1991
 
                case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1992
 
                        offset_units *= 2.0f;
1993
 
                        pa_su_poly_offset_db_fmt_cntl =
1994
 
                                S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24);
1995
 
                        break;
1996
 
                case PIPE_FORMAT_Z16_UNORM:
1997
 
                        offset_units *= 4.0f;
1998
 
                        pa_su_poly_offset_db_fmt_cntl =
1999
 
                                S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16);
2000
 
                        break;
2001
 
                default:
2002
 
                        pa_su_poly_offset_db_fmt_cntl =
2003
 
                                S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) |
2004
 
                                S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2005
 
                }
2006
 
        }
2007
 
 
2008
 
        radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
2009
 
        radeon_emit(cs, fui(offset_scale));
2010
 
        radeon_emit(cs, fui(offset_units));
2011
 
        radeon_emit(cs, fui(offset_scale));
2012
 
        radeon_emit(cs, fui(offset_units));
2013
 
 
2014
 
        radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2015
 
                               pa_su_poly_offset_db_fmt_cntl);
2016
 
}
2017
 
 
2018
 
uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a,
2019
 
                                      unsigned nr_cbufs)
2020
 
{
2021
 
        unsigned base_mask = 0;
2022
 
        unsigned dirty_mask = a->image_rat_enabled_mask;
2023
 
        while (dirty_mask) {
2024
 
                unsigned idx = u_bit_scan(&dirty_mask);
2025
 
                base_mask |= (0xf << (idx * 4));
2026
 
        }
2027
 
        unsigned offset = util_last_bit(a->image_rat_enabled_mask);
2028
 
        dirty_mask = a->buffer_rat_enabled_mask;
2029
 
        while (dirty_mask) {
2030
 
                unsigned idx = u_bit_scan(&dirty_mask);
2031
 
                base_mask |= (0xf << (idx + offset) * 4);
2032
 
        }
2033
 
        return base_mask << (nr_cbufs * 4);
2034
 
}
2035
 
 
2036
 
static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
2037
 
{
2038
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2039
 
        struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
2040
 
        unsigned fb_colormask = a->bound_cbufs_target_mask;
2041
 
        unsigned ps_colormask = a->ps_color_export_mask;
2042
 
        unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, a->nr_cbufs);
2043
 
        radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
2044
 
        radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */
2045
 
        /* This must match the used export instructions exactly.
2046
 
         * Other values may lead to undefined behavior and hangs.
2047
 
         */
2048
 
        radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */
2049
 
}
2050
 
 
2051
 
static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
2052
 
{
2053
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2054
 
        struct r600_db_state *a = (struct r600_db_state*)atom;
2055
 
 
2056
 
        if (a->rsurf && a->rsurf->db_htile_surface) {
2057
 
                struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
2058
 
                unsigned reloc_idx;
2059
 
 
2060
 
                radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
2061
 
                radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
2062
 
                radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
2063
 
                radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
2064
 
                reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource,
2065
 
                                                  RADEON_USAGE_READWRITE | RADEON_PRIO_SEPARATE_META);
2066
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
2067
 
                radeon_emit(cs, reloc_idx);
2068
 
        } else {
2069
 
                radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
2070
 
                radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
2071
 
        }
2072
 
}
2073
 
 
2074
 
static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
2075
 
{
2076
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2077
 
        struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
2078
 
        unsigned db_render_control = 0;
2079
 
        unsigned db_count_control = 0;
2080
 
        unsigned db_render_override =
2081
 
                S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
2082
 
                S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
2083
 
 
2084
 
        if (rctx->b.num_occlusion_queries > 0 &&
2085
 
            !a->occlusion_queries_disabled) {
2086
 
                db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
2087
 
                if (rctx->b.chip_class == CAYMAN) {
2088
 
                        db_count_control |= S_028004_SAMPLE_RATE(a->log_samples);
2089
 
                }
2090
 
                db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
2091
 
        } else {
2092
 
                db_count_control |= S_028004_ZPASS_INCREMENT_DISABLE(1);
2093
 
        }
2094
 
 
2095
 
        /* This is to fix a lockup when hyperz and alpha test are enabled at
2096
 
         * the same time somehow GPU get confuse on which order to pick for
2097
 
         * z test
2098
 
         */
2099
 
        if (rctx->alphatest_state.sx_alpha_test_control)
2100
 
                db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1);
2101
 
 
2102
 
        if (a->flush_depthstencil_through_cb) {
2103
 
                assert(a->copy_depth || a->copy_stencil);
2104
 
 
2105
 
                db_render_control |= S_028000_DEPTH_COPY_ENABLE(a->copy_depth) |
2106
 
                                     S_028000_STENCIL_COPY_ENABLE(a->copy_stencil) |
2107
 
                                     S_028000_COPY_CENTROID(1) |
2108
 
                                     S_028000_COPY_SAMPLE(a->copy_sample);
2109
 
        } else if (a->flush_depth_inplace || a->flush_stencil_inplace) {
2110
 
                db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(a->flush_depth_inplace) |
2111
 
                                     S_028000_STENCIL_COMPRESS_DISABLE(a->flush_stencil_inplace);
2112
 
                db_render_override |= S_02800C_DISABLE_PIXEL_RATE_TILES(1);
2113
 
        }
2114
 
        if (a->htile_clear) {
2115
 
                /* FIXME we might want to disable cliprect here */
2116
 
                db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(1);
2117
 
        }
2118
 
 
2119
 
        radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
2120
 
        radeon_emit(cs, db_render_control); /* R_028000_DB_RENDER_CONTROL */
2121
 
        radeon_emit(cs, db_count_control); /* R_028004_DB_COUNT_CONTROL */
2122
 
        radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
2123
 
        radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, a->db_shader_control);
2124
 
}
2125
 
 
2126
 
static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
2127
 
                                          struct r600_vertexbuf_state *state,
2128
 
                                          unsigned resource_offset,
2129
 
                                          unsigned pkt_flags)
2130
 
{
2131
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2132
 
        uint32_t dirty_mask = state->dirty_mask;
2133
 
 
2134
 
        while (dirty_mask) {
2135
 
                struct pipe_vertex_buffer *vb;
2136
 
                struct r600_resource *rbuffer;
2137
 
                uint64_t va;
2138
 
                unsigned buffer_index = u_bit_scan(&dirty_mask);
2139
 
 
2140
 
                vb = &state->vb[buffer_index];
2141
 
                rbuffer = (struct r600_resource*)vb->buffer.resource;
2142
 
                assert(rbuffer);
2143
 
 
2144
 
                va = rbuffer->gpu_address + vb->buffer_offset;
2145
 
 
2146
 
                /* fetch resources start at index 992 */
2147
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
2148
 
                radeon_emit(cs, (resource_offset + buffer_index) * 8);
2149
 
                radeon_emit(cs, va); /* RESOURCEi_WORD0 */
2150
 
                radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
2151
 
                radeon_emit(cs, /* RESOURCEi_WORD2 */
2152
 
                                 S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
2153
 
                                 S_030008_STRIDE(vb->stride) |
2154
 
                                 S_030008_BASE_ADDRESS_HI(va >> 32UL));
2155
 
                radeon_emit(cs, /* RESOURCEi_WORD3 */
2156
 
                                 S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
2157
 
                                 S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
2158
 
                                 S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
2159
 
                                 S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W));
2160
 
                radeon_emit(cs, 0); /* RESOURCEi_WORD4 */
2161
 
                radeon_emit(cs, 0); /* RESOURCEi_WORD5 */
2162
 
                radeon_emit(cs, 0); /* RESOURCEi_WORD6 */
2163
 
                radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
2164
 
 
2165
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
2166
 
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
2167
 
                                                      RADEON_USAGE_READ | RADEON_PRIO_VERTEX_BUFFER));
2168
 
        }
2169
 
        state->dirty_mask = 0;
2170
 
}
2171
 
 
2172
 
static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
2173
 
{
2174
 
        evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_FS, 0);
2175
 
}
2176
 
 
2177
 
static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
2178
 
{
2179
 
        evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, EG_FETCH_CONSTANTS_OFFSET_CS,
2180
 
                                      RADEON_CP_PACKET3_COMPUTE_MODE);
2181
 
}
2182
 
 
2183
 
static void evergreen_emit_constant_buffers(struct r600_context *rctx,
2184
 
                                            struct r600_constbuf_state *state,
2185
 
                                            unsigned buffer_id_base,
2186
 
                                            unsigned reg_alu_constbuf_size,
2187
 
                                            unsigned reg_alu_const_cache,
2188
 
                                            unsigned pkt_flags)
2189
 
{
2190
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2191
 
        uint32_t dirty_mask = state->dirty_mask;
2192
 
 
2193
 
        while (dirty_mask) {
2194
 
                struct pipe_constant_buffer *cb;
2195
 
                struct r600_resource *rbuffer;
2196
 
                uint64_t va;
2197
 
                unsigned buffer_index = ffs(dirty_mask) - 1;
2198
 
                unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER);
2199
 
 
2200
 
                cb = &state->cb[buffer_index];
2201
 
                rbuffer = (struct r600_resource*)cb->buffer;
2202
 
                assert(rbuffer);
2203
 
 
2204
 
                va = rbuffer->gpu_address + cb->buffer_offset;
2205
 
 
2206
 
                if (buffer_index < R600_MAX_HW_CONST_BUFFERS) {
2207
 
                        radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
2208
 
                                                    DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
2209
 
                        radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
2210
 
                                                    pkt_flags);
2211
 
                        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
2212
 
                        radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
2213
 
                                                                  RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER));
2214
 
                }
2215
 
 
2216
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
2217
 
                radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
2218
 
                radeon_emit(cs, va); /* RESOURCEi_WORD0 */
2219
 
                radeon_emit(cs, cb->buffer_size -1); /* RESOURCEi_WORD1 */
2220
 
                radeon_emit(cs, /* RESOURCEi_WORD2 */
2221
 
                            S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
2222
 
                            S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
2223
 
                            S_030008_BASE_ADDRESS_HI(va >> 32UL) |
2224
 
                            S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT));
2225
 
                radeon_emit(cs, /* RESOURCEi_WORD3 */
2226
 
                                 S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) |
2227
 
                                 S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
2228
 
                                 S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
2229
 
                                 S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
2230
 
                                 S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W));
2231
 
                radeon_emit(cs, 0); /* RESOURCEi_WORD4 */
2232
 
                radeon_emit(cs, 0); /* RESOURCEi_WORD5 */
2233
 
                radeon_emit(cs, 0); /* RESOURCEi_WORD6 */
2234
 
                radeon_emit(cs, /* RESOURCEi_WORD7 */
2235
 
                            S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
2236
 
 
2237
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
2238
 
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
2239
 
                                                      RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER));
2240
 
 
2241
 
                dirty_mask &= ~(1 << buffer_index);
2242
 
        }
2243
 
        state->dirty_mask = 0;
2244
 
}
2245
 
 
2246
 
/* VS constants can be in VS/ES (same space) or LS if tess is enabled */
2247
 
static void evergreen_emit_vs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
2248
 
{
2249
 
        if (rctx->vs_shader->current->shader.vs_as_ls) {
2250
 
                evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
2251
 
                                                EG_FETCH_CONSTANTS_OFFSET_LS,
2252
 
                                                R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
2253
 
                                                R_028F40_ALU_CONST_CACHE_LS_0,
2254
 
                                                0 /* PKT3 flags */);
2255
 
        } else {
2256
 
                evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX],
2257
 
                                                EG_FETCH_CONSTANTS_OFFSET_VS,
2258
 
                                                R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
2259
 
                                                R_028980_ALU_CONST_CACHE_VS_0,
2260
 
                                                0 /* PKT3 flags */);
2261
 
        }
2262
 
}
2263
 
 
2264
 
static void evergreen_emit_gs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
2265
 
{
2266
 
        evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY],
2267
 
                                        EG_FETCH_CONSTANTS_OFFSET_GS,
2268
 
                                        R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0,
2269
 
                                        R_0289C0_ALU_CONST_CACHE_GS_0,
2270
 
                                        0 /* PKT3 flags */);
2271
 
}
2272
 
 
2273
 
static void evergreen_emit_ps_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
2274
 
{
2275
 
        evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT],
2276
 
                                        EG_FETCH_CONSTANTS_OFFSET_PS,
2277
 
                                        R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
2278
 
                                        R_028940_ALU_CONST_CACHE_PS_0,
2279
 
                                        0 /* PKT3 flags */);
2280
 
}
2281
 
 
2282
 
static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
2283
 
{
2284
 
        evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE],
2285
 
                                        EG_FETCH_CONSTANTS_OFFSET_CS,
2286
 
                                        R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0,
2287
 
                                        R_028F40_ALU_CONST_CACHE_LS_0,
2288
 
                                        RADEON_CP_PACKET3_COMPUTE_MODE);
2289
 
}
2290
 
 
2291
 
/* tes constants can be emitted to VS or ES - which are common */
2292
 
static void evergreen_emit_tes_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
2293
 
{
2294
 
        if (!rctx->tes_shader)
2295
 
                return;
2296
 
        evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_EVAL],
2297
 
                                        EG_FETCH_CONSTANTS_OFFSET_VS,
2298
 
                                        R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
2299
 
                                        R_028980_ALU_CONST_CACHE_VS_0,
2300
 
                                        0);
2301
 
}
2302
 
 
2303
 
static void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struct r600_atom *atom)
2304
 
{
2305
 
        if (!rctx->tes_shader)
2306
 
                return;
2307
 
        evergreen_emit_constant_buffers(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_CTRL],
2308
 
                                        EG_FETCH_CONSTANTS_OFFSET_HS,
2309
 
                                        R_028F80_ALU_CONST_BUFFER_SIZE_HS_0,
2310
 
                                        R_028F00_ALU_CONST_CACHE_HS_0,
2311
 
                                        0);
2312
 
}
2313
 
 
2314
 
void evergreen_setup_scratch_buffers(struct r600_context *rctx) {
2315
 
        static const struct {
2316
 
                unsigned ring_base;
2317
 
                unsigned item_size;
2318
 
                unsigned ring_size;
2319
 
        } regs[EG_NUM_HW_STAGES] = {
2320
 
                [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
2321
 
                [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
2322
 
                [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
2323
 
                [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE },
2324
 
                [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE },
2325
 
                [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE }
2326
 
        };
2327
 
 
2328
 
        for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) {
2329
 
                struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader;
2330
 
 
2331
 
                if (stage && unlikely(stage->scratch_space_needed)) {
2332
 
                        r600_setup_scratch_area_for_shader(rctx, stage,
2333
 
                                &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size);
2334
 
                }
2335
 
        }
2336
 
}
2337
 
 
2338
 
static void evergreen_emit_sampler_views(struct r600_context *rctx,
2339
 
                                         struct r600_samplerview_state *state,
2340
 
                                         unsigned resource_id_base, unsigned pkt_flags)
2341
 
{
2342
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2343
 
        uint32_t dirty_mask = state->dirty_mask;
2344
 
 
2345
 
        while (dirty_mask) {
2346
 
                struct r600_pipe_sampler_view *rview;
2347
 
                unsigned resource_index = u_bit_scan(&dirty_mask);
2348
 
                unsigned reloc;
2349
 
 
2350
 
                rview = state->views[resource_index];
2351
 
                assert(rview);
2352
 
 
2353
 
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
2354
 
                radeon_emit(cs, (resource_id_base + resource_index) * 8);
2355
 
                radeon_emit_array(cs, rview->tex_resource_words, 8);
2356
 
 
2357
 
                reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource,
2358
 
                                              RADEON_USAGE_READ |
2359
 
                                              r600_get_sampler_view_priority(rview->tex_resource));
2360
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
2361
 
                radeon_emit(cs, reloc);
2362
 
 
2363
 
                if (!rview->skip_mip_address_reloc) {
2364
 
                        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
2365
 
                        radeon_emit(cs, reloc);
2366
 
                }
2367
 
        }
2368
 
        state->dirty_mask = 0;
2369
 
}
2370
 
 
2371
 
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
2372
 
{
2373
 
        if (rctx->vs_shader->current->shader.vs_as_ls) {
2374
 
                evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
2375
 
                                             EG_FETCH_CONSTANTS_OFFSET_LS + R600_MAX_CONST_BUFFERS, 0);
2376
 
        } else {
2377
 
                evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
2378
 
                                             EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0);
2379
 
        }
2380
 
}
2381
 
 
2382
 
static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
2383
 
{
2384
 
        evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
2385
 
                                     EG_FETCH_CONSTANTS_OFFSET_GS + R600_MAX_CONST_BUFFERS, 0);
2386
 
}
2387
 
 
2388
 
static void evergreen_emit_tcs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
2389
 
{
2390
 
        evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].views,
2391
 
                                     EG_FETCH_CONSTANTS_OFFSET_HS + R600_MAX_CONST_BUFFERS, 0);
2392
 
}
2393
 
 
2394
 
static void evergreen_emit_tes_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
2395
 
{
2396
 
        if (!rctx->tes_shader)
2397
 
                return;
2398
 
        evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views,
2399
 
                                     EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0);
2400
 
}
2401
 
 
2402
 
static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
2403
 
{
2404
 
        evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
2405
 
                                     EG_FETCH_CONSTANTS_OFFSET_PS + R600_MAX_CONST_BUFFERS, 0);
2406
 
}
2407
 
 
2408
 
static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
2409
 
{
2410
 
        evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
2411
 
                                     EG_FETCH_CONSTANTS_OFFSET_CS + R600_MAX_CONST_BUFFERS, RADEON_CP_PACKET3_COMPUTE_MODE);
2412
 
}
2413
 
 
2414
 
static void evergreen_convert_border_color(union pipe_color_union *in,
2415
 
                                           union pipe_color_union *out,
2416
 
                                           enum pipe_format format)
2417
 
{
2418
 
        if (util_format_is_pure_integer(format) &&
2419
 
                 !util_format_is_depth_or_stencil(format)) {
2420
 
                const struct util_format_description *d = util_format_description(format);
2421
 
 
2422
 
                for (int i = 0; i < d->nr_channels; ++i) {
2423
 
                        int cs = d->channel[i].size;
2424
 
                        if (d->channel[i].type == UTIL_FORMAT_TYPE_SIGNED)
2425
 
                                out->f[i] = (double)(in->i[i]) / ((1ul << (cs - 1)) - 1 );
2426
 
                        else if (d->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
2427
 
                                out->f[i] = (double)(in->ui[i]) / ((1ul << cs) - 1 );
2428
 
                        else
2429
 
                                out->f[i] = 0;
2430
 
                }
2431
 
 
2432
 
        } else {
2433
 
                switch (format) {
2434
 
                case PIPE_FORMAT_X24S8_UINT:
2435
 
                case PIPE_FORMAT_X32_S8X24_UINT:
2436
 
                        out->f[0] = (double)(in->ui[0]) / 255.0;
2437
 
                        out->f[1] = out->f[2] = out->f[3] = 0.0f;
2438
 
                        break;
2439
 
                default:
2440
 
                        memcpy(out->f, in->f, 4 * sizeof(float));
2441
 
                }
2442
 
        }
2443
 
}
2444
 
 
2445
 
static void evergreen_emit_sampler_states(struct r600_context *rctx,
2446
 
                                struct r600_textures_info *texinfo,
2447
 
                                unsigned resource_id_base,
2448
 
                                unsigned border_index_reg,
2449
 
                                unsigned pkt_flags)
2450
 
{
2451
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2452
 
        uint32_t dirty_mask = texinfo->states.dirty_mask;
2453
 
        union pipe_color_union border_color = {{0,0,0,1}};
2454
 
        union pipe_color_union *border_color_ptr = &border_color;
2455
 
 
2456
 
        while (dirty_mask) {
2457
 
                struct r600_pipe_sampler_state *rstate;
2458
 
                unsigned i = u_bit_scan(&dirty_mask);
2459
 
 
2460
 
                rstate = texinfo->states.states[i];
2461
 
                assert(rstate);
2462
 
 
2463
 
                if (rstate->border_color_use) {
2464
 
                        struct r600_pipe_sampler_view   *rview = texinfo->views.views[i];
2465
 
                        if (rview) {
2466
 
                                evergreen_convert_border_color(&rstate->border_color,
2467
 
                                                               &border_color, rview->base.format);
2468
 
                        } else {
2469
 
                                border_color_ptr = &rstate->border_color;
2470
 
                        }
2471
 
                }
2472
 
 
2473
 
                radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
2474
 
                radeon_emit(cs, (resource_id_base + i) * 3);
2475
 
                radeon_emit_array(cs, rstate->tex_sampler_words, 3);
2476
 
 
2477
 
                if (rstate->border_color_use) {
2478
 
                        radeon_set_config_reg_seq(cs, border_index_reg, 5);
2479
 
                        radeon_emit(cs, i);
2480
 
                        radeon_emit_array(cs, border_color_ptr->ui, 4);
2481
 
                }
2482
 
        }
2483
 
        texinfo->states.dirty_mask = 0;
2484
 
}
2485
 
 
2486
 
static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
2487
 
{
2488
 
        if (rctx->vs_shader->current->shader.vs_as_ls) {
2489
 
                evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 72,
2490
 
                                              R_00A450_TD_LS_SAMPLER0_BORDER_COLOR_INDEX, 0);
2491
 
        } else {
2492
 
                evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18,
2493
 
                                              R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
2494
 
        }
2495
 
}
2496
 
 
2497
 
static void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
2498
 
{
2499
 
        evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36,
2500
 
                                      R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0);
2501
 
}
2502
 
 
2503
 
static void evergreen_emit_tcs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
2504
 
{
2505
 
        evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL], 54,
2506
 
                                      R_00A43C_TD_HS_SAMPLER0_BORDER_COLOR_INDEX, 0);
2507
 
}
2508
 
 
2509
 
static void evergreen_emit_tes_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
2510
 
{
2511
 
        if (!rctx->tes_shader)
2512
 
                return;
2513
 
        evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL], 18,
2514
 
                                      R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
2515
 
}
2516
 
 
2517
 
static void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
2518
 
{
2519
 
        evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0,
2520
 
                                      R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0);
2521
 
}
2522
 
 
2523
 
static void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
2524
 
{
2525
 
        evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90,
2526
 
                                      R_00A464_TD_CS_SAMPLER0_BORDER_INDEX,
2527
 
                                      RADEON_CP_PACKET3_COMPUTE_MODE);
2528
 
}
2529
 
 
2530
 
static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
2531
 
{
2532
 
        struct r600_sample_mask *s = (struct r600_sample_mask*)a;
2533
 
        uint8_t mask = s->sample_mask;
2534
 
 
2535
 
        radeon_set_context_reg(&rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
2536
 
                               mask | (mask << 8) | (mask << 16) | (mask << 24));
2537
 
}
2538
 
 
2539
 
static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
2540
 
{
2541
 
        struct r600_sample_mask *s = (struct r600_sample_mask*)a;
2542
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2543
 
        uint16_t mask = s->sample_mask;
2544
 
 
2545
 
        radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
2546
 
        radeon_emit(cs, mask | (mask << 16)); /* X0Y0_X1Y0 */
2547
 
        radeon_emit(cs, mask | (mask << 16)); /* X0Y1_X1Y1 */
2548
 
}
2549
 
 
2550
 
static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
2551
 
{
2552
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2553
 
        struct r600_cso_state *state = (struct r600_cso_state*)a;
2554
 
        struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
2555
 
 
2556
 
        if (!shader)
2557
 
                return;
2558
 
 
2559
 
        radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
2560
 
                               (shader->buffer->gpu_address + shader->offset) >> 8);
2561
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
2562
 
        radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer,
2563
 
                                                  RADEON_USAGE_READ |
2564
 
                                                  RADEON_PRIO_SHADER_BINARY));
2565
 
}
2566
 
 
2567
 
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
2568
 
{
2569
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2570
 
        struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
2571
 
 
2572
 
        uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0;
2573
 
 
2574
 
        if (rctx->vs_shader->current->shader.vs_as_gs_a) {
2575
 
                v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
2576
 
                primid = 1;
2577
 
        }
2578
 
 
2579
 
        if (state->geom_enable) {
2580
 
                uint32_t cut_val;
2581
 
 
2582
 
                if (rctx->gs_shader->gs_max_out_vertices <= 128)
2583
 
                        cut_val = V_028A40_GS_CUT_128;
2584
 
                else if (rctx->gs_shader->gs_max_out_vertices <= 256)
2585
 
                        cut_val = V_028A40_GS_CUT_256;
2586
 
                else if (rctx->gs_shader->gs_max_out_vertices <= 512)
2587
 
                        cut_val = V_028A40_GS_CUT_512;
2588
 
                else
2589
 
                        cut_val = V_028A40_GS_CUT_1024;
2590
 
 
2591
 
                v = S_028B54_GS_EN(1) |
2592
 
                    S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
2593
 
                if (!rctx->tes_shader)
2594
 
                        v |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
2595
 
 
2596
 
                v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
2597
 
                        S_028A40_CUT_MODE(cut_val);
2598
 
 
2599
 
                if (rctx->gs_shader->current->shader.gs_prim_id_input)
2600
 
                        primid = 1;
2601
 
        }
2602
 
 
2603
 
        if (rctx->tes_shader) {
2604
 
                uint32_t type, partitioning, topology;
2605
 
                struct tgsi_shader_info *info = &rctx->tes_shader->current->selector->info;
2606
 
                unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
2607
 
                unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
2608
 
                bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
2609
 
                bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
2610
 
                switch (tes_prim_mode) {
2611
 
                case PIPE_PRIM_LINES:
2612
 
                        type = V_028B6C_TESS_ISOLINE;
2613
 
                        break;
2614
 
                case PIPE_PRIM_TRIANGLES:
2615
 
                        type = V_028B6C_TESS_TRIANGLE;
2616
 
                        break;
2617
 
                case PIPE_PRIM_QUADS:
2618
 
                        type = V_028B6C_TESS_QUAD;
2619
 
                        break;
2620
 
                default:
2621
 
                        assert(0);
2622
 
                        return;
2623
 
                }
2624
 
 
2625
 
                switch (tes_spacing) {
2626
 
                case PIPE_TESS_SPACING_FRACTIONAL_ODD:
2627
 
                        partitioning = V_028B6C_PART_FRAC_ODD;
2628
 
                        break;
2629
 
                case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
2630
 
                        partitioning = V_028B6C_PART_FRAC_EVEN;
2631
 
                        break;
2632
 
                case PIPE_TESS_SPACING_EQUAL:
2633
 
                        partitioning = V_028B6C_PART_INTEGER;
2634
 
                        break;
2635
 
                default:
2636
 
                        assert(0);
2637
 
                        return;
2638
 
                }
2639
 
 
2640
 
                if (tes_point_mode)
2641
 
                        topology = V_028B6C_OUTPUT_POINT;
2642
 
                else if (tes_prim_mode == PIPE_PRIM_LINES)
2643
 
                        topology = V_028B6C_OUTPUT_LINE;
2644
 
                else if (tes_vertex_order_cw)
2645
 
                        /* XXX follow radeonsi and invert */
2646
 
                        topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
2647
 
                else
2648
 
                        topology = V_028B6C_OUTPUT_TRIANGLE_CW;
2649
 
 
2650
 
                tf_param = S_028B6C_TYPE(type) |
2651
 
                        S_028B6C_PARTITIONING(partitioning) |
2652
 
                        S_028B6C_TOPOLOGY(topology);
2653
 
        }
2654
 
 
2655
 
        if (rctx->tes_shader) {
2656
 
                v |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
2657
 
                     S_028B54_HS_EN(1);
2658
 
                if (!state->geom_enable)
2659
 
                        v |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
2660
 
                else
2661
 
                        v |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
2662
 
        }
2663
 
 
2664
 
        radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, v ? 1 : 0 );
2665
 
        radeon_set_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
2666
 
        radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
2667
 
        radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, primid);
2668
 
        radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, tf_param);
2669
 
}
2670
 
 
2671
 
static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
2672
 
{
2673
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
2674
 
        struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
2675
 
        struct r600_resource *rbuffer;
2676
 
 
2677
 
        radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
2678
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2679
 
        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
2680
 
 
2681
 
        if (state->enable) {
2682
 
                rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
2683
 
                radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
2684
 
                                rbuffer->gpu_address >> 8);
2685
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
2686
 
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
2687
 
                                                      RADEON_USAGE_READWRITE |
2688
 
                                                      RADEON_PRIO_SHADER_RINGS));
2689
 
                radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
2690
 
                                state->esgs_ring.buffer_size >> 8);
2691
 
 
2692
 
                rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
2693
 
                radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
2694
 
                                rbuffer->gpu_address >> 8);
2695
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
2696
 
                radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
2697
 
                                                      RADEON_USAGE_READWRITE |
2698
 
                                                      RADEON_PRIO_SHADER_RINGS));
2699
 
                radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
2700
 
                                state->gsvs_ring.buffer_size >> 8);
2701
 
        } else {
2702
 
                radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
2703
 
                radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
2704
 
        }
2705
 
 
2706
 
        radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
2707
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2708
 
        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
2709
 
}
2710
 
 
2711
 
void cayman_init_common_regs(struct r600_command_buffer *cb,
2712
 
                             enum chip_class ctx_chip_class,
2713
 
                             enum radeon_family ctx_family,
2714
 
                             int ctx_drm_minor)
2715
 
{
2716
 
        r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 2);
2717
 
        r600_store_value(cb, S_008C00_EXPORT_SRC_C(1)); /* R_008C00_SQ_CONFIG */
2718
 
        /* always set the temp clauses */
2719
 
        r600_store_value(cb, S_008C04_NUM_CLAUSE_TEMP_GPRS(4)); /* R_008C04_SQ_GPR_RESOURCE_MGMT_1 */
2720
 
 
2721
 
        r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
2722
 
        r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
2723
 
        r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
2724
 
 
2725
 
        r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
2726
 
 
2727
 
        r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2);
2728
 
        r600_store_value(cb, 0);
2729
 
        r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf));
2730
 
 
2731
 
        r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
2732
 
}
2733
 
 
2734
 
static void cayman_init_atom_start_cs(struct r600_context *rctx)
2735
 
{
2736
 
        struct r600_command_buffer *cb = &rctx->start_cs_cmd;
2737
 
        int i;
2738
 
 
2739
 
        r600_init_command_buffer(cb, 338);
2740
 
 
2741
 
        /* This must be first. */
2742
 
        r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2743
 
        r600_store_value(cb, 0x80000000);
2744
 
        r600_store_value(cb, 0x80000000);
2745
 
 
2746
 
        /* We're setting config registers here. */
2747
 
        r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
2748
 
        r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2749
 
 
2750
 
        /* This enables pipeline stat & streamout queries.
2751
 
         * They are only disabled by blits.
2752
 
         */
2753
 
        r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
2754
 
        r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
2755
 
 
2756
 
        cayman_init_common_regs(cb, rctx->b.chip_class,
2757
 
                                rctx->b.family, rctx->screen->b.info.drm_minor);
2758
 
 
2759
 
        r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0);
2760
 
        r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4));
2761
 
 
2762
 
        /* remove LS/HS from one SIMD for hw workaround */
2763
 
        r600_store_config_reg_seq(cb, R_008E20_SQ_STATIC_THREAD_MGMT1, 3);
2764
 
        r600_store_value(cb, 0xffffffff);
2765
 
        r600_store_value(cb, 0xffffffff);
2766
 
        r600_store_value(cb, 0xfffffffe);
2767
 
 
2768
 
        r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6);
2769
 
        r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */
2770
 
        r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */
2771
 
        r600_store_value(cb, 0); /* R_028908_SQ_ESTMP_RING_ITEMSIZE */
2772
 
        r600_store_value(cb, 0); /* R_02890C_SQ_GSTMP_RING_ITEMSIZE */
2773
 
        r600_store_value(cb, 0); /* R_028910_SQ_VSTMP_RING_ITEMSIZE */
2774
 
        r600_store_value(cb, 0); /* R_028914_SQ_PSTMP_RING_ITEMSIZE */
2775
 
 
2776
 
        r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
2777
 
        r600_store_value(cb, 0); /* R_02891C_SQ_GS_VERT_ITEMSIZE */
2778
 
        r600_store_value(cb, 0); /* R_028920_SQ_GS_VERT_ITEMSIZE_1 */
2779
 
        r600_store_value(cb, 0); /* R_028924_SQ_GS_VERT_ITEMSIZE_2 */
2780
 
        r600_store_value(cb, 0); /* R_028928_SQ_GS_VERT_ITEMSIZE_3 */
2781
 
 
2782
 
        r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
2783
 
        r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
2784
 
        r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
2785
 
        r600_store_value(cb, fui(64)); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */
2786
 
        r600_store_value(cb, fui(0)); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */
2787
 
        r600_store_value(cb, 16); /* R_028A20_VGT_HOS_REUSE_DEPTH */
2788
 
        r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */
2789
 
        r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */
2790
 
        r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */
2791
 
        r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */
2792
 
        r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */
2793
 
        r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */
2794
 
        r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
2795
 
        r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
2796
 
 
2797
 
        r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0);
2798
 
 
2799
 
        r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
2800
 
 
2801
 
        r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
2802
 
        r600_store_value(cb, 0x76543210); /* CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */
2803
 
        r600_store_value(cb, 0xfedcba98); /* CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */
2804
 
 
2805
 
        r600_store_context_reg(cb, R_028724_GDS_ADDR_SIZE, 0x3fff);
2806
 
        r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2);
2807
 
        r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */
2808
 
        r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */
2809
 
 
2810
 
        r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0);
2811
 
 
2812
 
        r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2);
2813
 
        r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */
2814
 
        r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */
2815
 
 
2816
 
        r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
2817
 
 
2818
 
        r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
2819
 
 
2820
 
        r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
2821
 
 
2822
 
        r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
2823
 
        r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
2824
 
        r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
2825
 
        r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
2826
 
 
2827
 
        r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
2828
 
        r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
2829
 
 
2830
 
        r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
2831
 
        r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
2832
 
 
2833
 
        r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
2834
 
        r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
2835
 
        r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
2836
 
 
2837
 
        r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
2838
 
        r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
2839
 
        r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
2840
 
 
2841
 
        r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
2842
 
        r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
2843
 
        r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
2844
 
        r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
2845
 
        r600_store_context_reg(cb, R_0288C0_SQ_PGM_RESOURCES_2_HS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
2846
 
        r600_store_context_reg(cb, R_0288D8_SQ_PGM_RESOURCES_2_LS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
2847
 
 
2848
 
        r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
2849
 
 
2850
 
        /* to avoid GPU doing any preloading of constant from random address */
2851
 
        r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16);
2852
 
        for (i = 0; i < 16; i++)
2853
 
                r600_store_value(cb, 0);
2854
 
 
2855
 
        r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16);
2856
 
        for (i = 0; i < 16; i++)
2857
 
                r600_store_value(cb, 0);
2858
 
 
2859
 
        r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16);
2860
 
        for (i = 0; i < 16; i++)
2861
 
                r600_store_value(cb, 0);
2862
 
 
2863
 
        r600_store_context_reg_seq(cb, R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 16);
2864
 
        for (i = 0; i < 16; i++)
2865
 
                r600_store_value(cb, 0);
2866
 
 
2867
 
        r600_store_context_reg_seq(cb, R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 16);
2868
 
        for (i = 0; i < 16; i++)
2869
 
                r600_store_value(cb, 0);
2870
 
 
2871
 
        if (rctx->screen->b.has_streamout) {
2872
 
                r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
2873
 
        }
2874
 
 
2875
 
        r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
2876
 
        r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
2877
 
        r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
2878
 
        r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
2879
 
        r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
2880
 
        r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
2881
 
 
2882
 
        r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 2);
2883
 
        r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */
2884
 
        r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */
2885
 
        r600_store_context_reg(cb, R_028B6C_VGT_TF_PARAM, 0);
2886
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
2887
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
2888
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (64 * 4), 0x01000FFF);
2889
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (96 * 4), 0x01000FFF);
2890
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF);
2891
 
}
2892
 
 
2893
 
void evergreen_init_common_regs(struct r600_context *rctx, struct r600_command_buffer *cb,
2894
 
                                enum chip_class ctx_chip_class,
2895
 
                                enum radeon_family ctx_family,
2896
 
                                int ctx_drm_minor)
2897
 
{
2898
 
        int ps_prio;
2899
 
        int vs_prio;
2900
 
        int gs_prio;
2901
 
        int es_prio;
2902
 
 
2903
 
        int hs_prio;
2904
 
        int cs_prio;
2905
 
        int ls_prio;
2906
 
 
2907
 
        unsigned tmp;
2908
 
 
2909
 
        ps_prio = 0;
2910
 
        vs_prio = 1;
2911
 
        gs_prio = 2;
2912
 
        es_prio = 3;
2913
 
        hs_prio = 3;
2914
 
        ls_prio = 3;
2915
 
        cs_prio = 0;
2916
 
 
2917
 
        rctx->default_gprs[R600_HW_STAGE_PS] = 93;
2918
 
        rctx->default_gprs[R600_HW_STAGE_VS] = 46;
2919
 
        rctx->r6xx_num_clause_temp_gprs = 4;
2920
 
        rctx->default_gprs[R600_HW_STAGE_GS] = 31;
2921
 
        rctx->default_gprs[R600_HW_STAGE_ES] = 31;
2922
 
        rctx->default_gprs[EG_HW_STAGE_HS] = 23;
2923
 
        rctx->default_gprs[EG_HW_STAGE_LS] = 23;
2924
 
 
2925
 
        tmp = 0;
2926
 
        switch (ctx_family) {
2927
 
        case CHIP_CEDAR:
2928
 
        case CHIP_PALM:
2929
 
        case CHIP_SUMO:
2930
 
        case CHIP_SUMO2:
2931
 
        case CHIP_CAICOS:
2932
 
                break;
2933
 
        default:
2934
 
                tmp |= S_008C00_VC_ENABLE(1);
2935
 
                break;
2936
 
        }
2937
 
        tmp |= S_008C00_EXPORT_SRC_C(1);
2938
 
        tmp |= S_008C00_CS_PRIO(cs_prio);
2939
 
        tmp |= S_008C00_LS_PRIO(ls_prio);
2940
 
        tmp |= S_008C00_HS_PRIO(hs_prio);
2941
 
        tmp |= S_008C00_PS_PRIO(ps_prio);
2942
 
        tmp |= S_008C00_VS_PRIO(vs_prio);
2943
 
        tmp |= S_008C00_GS_PRIO(gs_prio);
2944
 
        tmp |= S_008C00_ES_PRIO(es_prio);
2945
 
 
2946
 
        r600_store_config_reg_seq(cb, R_008C00_SQ_CONFIG, 1);
2947
 
        r600_store_value(cb, tmp); /* R_008C00_SQ_CONFIG */
2948
 
 
2949
 
        r600_store_config_reg_seq(cb, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 2);
2950
 
        r600_store_value(cb, 0); /* R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 */
2951
 
        r600_store_value(cb, 0); /* R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 */
2952
 
 
2953
 
        /* The cs checker requires this register to be set. */
2954
 
        r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
2955
 
 
2956
 
        r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2);
2957
 
        r600_store_value(cb, 0);
2958
 
        r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf));
2959
 
 
2960
 
        return;
2961
 
}
2962
 
 
2963
 
void evergreen_init_atom_start_cs(struct r600_context *rctx)
2964
 
{
2965
 
        struct r600_command_buffer *cb = &rctx->start_cs_cmd;
2966
 
        int num_ps_threads;
2967
 
        int num_vs_threads;
2968
 
        int num_gs_threads;
2969
 
        int num_es_threads;
2970
 
        int num_hs_threads;
2971
 
        int num_ls_threads;
2972
 
 
2973
 
        int num_ps_stack_entries;
2974
 
        int num_vs_stack_entries;
2975
 
        int num_gs_stack_entries;
2976
 
        int num_es_stack_entries;
2977
 
        int num_hs_stack_entries;
2978
 
        int num_ls_stack_entries;
2979
 
        enum radeon_family family;
2980
 
        unsigned tmp, i;
2981
 
 
2982
 
        if (rctx->b.chip_class == CAYMAN) {
2983
 
                cayman_init_atom_start_cs(rctx);
2984
 
                return;
2985
 
        }
2986
 
 
2987
 
        r600_init_command_buffer(cb, 338);
2988
 
 
2989
 
        /* This must be first. */
2990
 
        r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2991
 
        r600_store_value(cb, 0x80000000);
2992
 
        r600_store_value(cb, 0x80000000);
2993
 
 
2994
 
        /* We're setting config registers here. */
2995
 
        r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
2996
 
        r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2997
 
 
2998
 
        /* This enables pipeline stat & streamout queries.
2999
 
         * They are only disabled by blits.
3000
 
         */
3001
 
        r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
3002
 
        r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0));
3003
 
 
3004
 
        evergreen_init_common_regs(rctx, cb, rctx->b.chip_class,
3005
 
                                   rctx->b.family, rctx->screen->b.info.drm_minor);
3006
 
 
3007
 
        family = rctx->b.family;
3008
 
        switch (family) {
3009
 
        case CHIP_CEDAR:
3010
 
        default:
3011
 
                num_ps_threads = 96;
3012
 
                num_vs_threads = 16;
3013
 
                num_gs_threads = 16;
3014
 
                num_es_threads = 16;
3015
 
                num_hs_threads = 16;
3016
 
                num_ls_threads = 16;
3017
 
                num_ps_stack_entries = 42;
3018
 
                num_vs_stack_entries = 42;
3019
 
                num_gs_stack_entries = 42;
3020
 
                num_es_stack_entries = 42;
3021
 
                num_hs_stack_entries = 42;
3022
 
                num_ls_stack_entries = 42;
3023
 
                break;
3024
 
        case CHIP_REDWOOD:
3025
 
                num_ps_threads = 128;
3026
 
                num_vs_threads = 20;
3027
 
                num_gs_threads = 20;
3028
 
                num_es_threads = 20;
3029
 
                num_hs_threads = 20;
3030
 
                num_ls_threads = 20;
3031
 
                num_ps_stack_entries = 42;
3032
 
                num_vs_stack_entries = 42;
3033
 
                num_gs_stack_entries = 42;
3034
 
                num_es_stack_entries = 42;
3035
 
                num_hs_stack_entries = 42;
3036
 
                num_ls_stack_entries = 42;
3037
 
                break;
3038
 
        case CHIP_JUNIPER:
3039
 
                num_ps_threads = 128;
3040
 
                num_vs_threads = 20;
3041
 
                num_gs_threads = 20;
3042
 
                num_es_threads = 20;
3043
 
                num_hs_threads = 20;
3044
 
                num_ls_threads = 20;
3045
 
                num_ps_stack_entries = 85;
3046
 
                num_vs_stack_entries = 85;
3047
 
                num_gs_stack_entries = 85;
3048
 
                num_es_stack_entries = 85;
3049
 
                num_hs_stack_entries = 85;
3050
 
                num_ls_stack_entries = 85;
3051
 
                break;
3052
 
        case CHIP_CYPRESS:
3053
 
        case CHIP_HEMLOCK:
3054
 
                num_ps_threads = 128;
3055
 
                num_vs_threads = 20;
3056
 
                num_gs_threads = 20;
3057
 
                num_es_threads = 20;
3058
 
                num_hs_threads = 20;
3059
 
                num_ls_threads = 20;
3060
 
                num_ps_stack_entries = 85;
3061
 
                num_vs_stack_entries = 85;
3062
 
                num_gs_stack_entries = 85;
3063
 
                num_es_stack_entries = 85;
3064
 
                num_hs_stack_entries = 85;
3065
 
                num_ls_stack_entries = 85;
3066
 
                break;
3067
 
        case CHIP_PALM:
3068
 
                num_ps_threads = 96;
3069
 
                num_vs_threads = 16;
3070
 
                num_gs_threads = 16;
3071
 
                num_es_threads = 16;
3072
 
                num_hs_threads = 16;
3073
 
                num_ls_threads = 16;
3074
 
                num_ps_stack_entries = 42;
3075
 
                num_vs_stack_entries = 42;
3076
 
                num_gs_stack_entries = 42;
3077
 
                num_es_stack_entries = 42;
3078
 
                num_hs_stack_entries = 42;
3079
 
                num_ls_stack_entries = 42;
3080
 
                break;
3081
 
        case CHIP_SUMO:
3082
 
                num_ps_threads = 96;
3083
 
                num_vs_threads = 25;
3084
 
                num_gs_threads = 25;
3085
 
                num_es_threads = 25;
3086
 
                num_hs_threads = 16;
3087
 
                num_ls_threads = 16;
3088
 
                num_ps_stack_entries = 42;
3089
 
                num_vs_stack_entries = 42;
3090
 
                num_gs_stack_entries = 42;
3091
 
                num_es_stack_entries = 42;
3092
 
                num_hs_stack_entries = 42;
3093
 
                num_ls_stack_entries = 42;
3094
 
                break;
3095
 
        case CHIP_SUMO2:
3096
 
                num_ps_threads = 96;
3097
 
                num_vs_threads = 25;
3098
 
                num_gs_threads = 25;
3099
 
                num_es_threads = 25;
3100
 
                num_hs_threads = 16;
3101
 
                num_ls_threads = 16;
3102
 
                num_ps_stack_entries = 85;
3103
 
                num_vs_stack_entries = 85;
3104
 
                num_gs_stack_entries = 85;
3105
 
                num_es_stack_entries = 85;
3106
 
                num_hs_stack_entries = 85;
3107
 
                num_ls_stack_entries = 85;
3108
 
                break;
3109
 
        case CHIP_BARTS:
3110
 
                num_ps_threads = 128;
3111
 
                num_vs_threads = 20;
3112
 
                num_gs_threads = 20;
3113
 
                num_es_threads = 20;
3114
 
                num_hs_threads = 20;
3115
 
                num_ls_threads = 20;
3116
 
                num_ps_stack_entries = 85;
3117
 
                num_vs_stack_entries = 85;
3118
 
                num_gs_stack_entries = 85;
3119
 
                num_es_stack_entries = 85;
3120
 
                num_hs_stack_entries = 85;
3121
 
                num_ls_stack_entries = 85;
3122
 
                break;
3123
 
        case CHIP_TURKS:
3124
 
                num_ps_threads = 128;
3125
 
                num_vs_threads = 20;
3126
 
                num_gs_threads = 20;
3127
 
                num_es_threads = 20;
3128
 
                num_hs_threads = 20;
3129
 
                num_ls_threads = 20;
3130
 
                num_ps_stack_entries = 42;
3131
 
                num_vs_stack_entries = 42;
3132
 
                num_gs_stack_entries = 42;
3133
 
                num_es_stack_entries = 42;
3134
 
                num_hs_stack_entries = 42;
3135
 
                num_ls_stack_entries = 42;
3136
 
                break;
3137
 
        case CHIP_CAICOS:
3138
 
                num_ps_threads = 96;
3139
 
                num_vs_threads = 10;
3140
 
                num_gs_threads = 10;
3141
 
                num_es_threads = 10;
3142
 
                num_hs_threads = 10;
3143
 
                num_ls_threads = 10;
3144
 
                num_ps_stack_entries = 42;
3145
 
                num_vs_stack_entries = 42;
3146
 
                num_gs_stack_entries = 42;
3147
 
                num_es_stack_entries = 42;
3148
 
                num_hs_stack_entries = 42;
3149
 
                num_ls_stack_entries = 42;
3150
 
                break;
3151
 
        }
3152
 
 
3153
 
        tmp = S_008C18_NUM_PS_THREADS(num_ps_threads);
3154
 
        tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
3155
 
        tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
3156
 
        tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
3157
 
 
3158
 
        r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
3159
 
        r600_store_value(cb, tmp); /* R_008C18_SQ_THREAD_RESOURCE_MGMT_1 */
3160
 
 
3161
 
        tmp = S_008C1C_NUM_HS_THREADS(num_hs_threads);
3162
 
        tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads);
3163
 
        r600_store_value(cb, tmp); /* R_008C1C_SQ_THREAD_RESOURCE_MGMT_2 */
3164
 
 
3165
 
        tmp = S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
3166
 
        tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
3167
 
        r600_store_value(cb, tmp); /* R_008C20_SQ_STACK_RESOURCE_MGMT_1 */
3168
 
 
3169
 
        tmp = S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
3170
 
        tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
3171
 
        r600_store_value(cb, tmp); /* R_008C24_SQ_STACK_RESOURCE_MGMT_2 */
3172
 
 
3173
 
        tmp = S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries);
3174
 
        tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
3175
 
        r600_store_value(cb, tmp); /* R_008C28_SQ_STACK_RESOURCE_MGMT_3 */
3176
 
 
3177
 
        r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
3178
 
                              S_008E2C_NUM_PS_LDS(0x1000) | S_008E2C_NUM_LS_LDS(0x1000));
3179
 
 
3180
 
        /* remove LS/HS from one SIMD for hw workaround */
3181
 
        r600_store_config_reg_seq(cb, R_008E20_SQ_STATIC_THREAD_MGMT1, 3);
3182
 
        r600_store_value(cb, 0xffffffff);
3183
 
        r600_store_value(cb, 0xffffffff);
3184
 
        r600_store_value(cb, 0xfffffffe);
3185
 
 
3186
 
        r600_store_config_reg(cb, R_009100_SPI_CONFIG_CNTL, 0);
3187
 
        r600_store_config_reg(cb, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4));
3188
 
 
3189
 
        r600_store_context_reg_seq(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, 6);
3190
 
        r600_store_value(cb, 0); /* R_028900_SQ_ESGS_RING_ITEMSIZE */
3191
 
        r600_store_value(cb, 0); /* R_028904_SQ_GSVS_RING_ITEMSIZE */
3192
 
        r600_store_value(cb, 0); /* R_028908_SQ_ESTMP_RING_ITEMSIZE */
3193
 
        r600_store_value(cb, 0); /* R_02890C_SQ_GSTMP_RING_ITEMSIZE */
3194
 
        r600_store_value(cb, 0); /* R_028910_SQ_VSTMP_RING_ITEMSIZE */
3195
 
        r600_store_value(cb, 0); /* R_028914_SQ_PSTMP_RING_ITEMSIZE */
3196
 
 
3197
 
        r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
3198
 
        r600_store_value(cb, 0); /* R_02891C_SQ_GS_VERT_ITEMSIZE */
3199
 
        r600_store_value(cb, 0); /* R_028920_SQ_GS_VERT_ITEMSIZE_1 */
3200
 
        r600_store_value(cb, 0); /* R_028924_SQ_GS_VERT_ITEMSIZE_2 */
3201
 
        r600_store_value(cb, 0); /* R_028928_SQ_GS_VERT_ITEMSIZE_3 */
3202
 
 
3203
 
        r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
3204
 
        r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
3205
 
        r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
3206
 
        r600_store_value(cb, fui(64)); /* R_028A18_VGT_HOS_MAX_TESS_LEVEL */
3207
 
        r600_store_value(cb, fui(1.0)); /* R_028A1C_VGT_HOS_MIN_TESS_LEVEL */
3208
 
        r600_store_value(cb, 16); /* R_028A20_VGT_HOS_REUSE_DEPTH */
3209
 
        r600_store_value(cb, 0); /* R_028A24_VGT_GROUP_PRIM_TYPE */
3210
 
        r600_store_value(cb, 0); /* R_028A28_VGT_GROUP_FIRST_DECR */
3211
 
        r600_store_value(cb, 0); /* R_028A2C_VGT_GROUP_DECR */
3212
 
        r600_store_value(cb, 0); /* R_028A30_VGT_GROUP_VECT_0_CNTL */
3213
 
        r600_store_value(cb, 0); /* R_028A34_VGT_GROUP_VECT_1_CNTL */
3214
 
        r600_store_value(cb, 0); /* R_028A38_VGT_GROUP_VECT_0_FMT_CNTL */
3215
 
        r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
3216
 
        r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
3217
 
 
3218
 
        r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
3219
 
 
3220
 
        r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0);
3221
 
 
3222
 
        r600_store_context_reg_seq(cb, R_028400_VGT_MAX_VTX_INDX, 2);
3223
 
        r600_store_value(cb, ~0); /* R_028400_VGT_MAX_VTX_INDX */
3224
 
        r600_store_value(cb, 0); /* R_028404_VGT_MIN_VTX_INDX */
3225
 
 
3226
 
        r600_store_ctl_const(cb, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0);
3227
 
 
3228
 
        r600_store_context_reg(cb, R_028028_DB_STENCIL_CLEAR, 0);
3229
 
 
3230
 
        r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0);
3231
 
        r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3232
 
        r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3233
 
 
3234
 
        r600_store_context_reg(cb, R_0286DC_SPI_FOG_CNTL, 0);
3235
 
        r600_store_context_reg(cb, R_028820_PA_CL_NANINF_CNTL, 0);
3236
 
 
3237
 
        r600_store_context_reg_seq(cb, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 3);
3238
 
        r600_store_value(cb, 0); /* R_028AC0_DB_SRESULTS_COMPARE_STATE0 */
3239
 
        r600_store_value(cb, 0); /* R_028AC4_DB_SRESULTS_COMPARE_STATE1 */
3240
 
        r600_store_value(cb, 0); /* R_028AC8_DB_PRELOAD_CONTROL */
3241
 
 
3242
 
        r600_store_context_reg_seq(cb, R_028240_PA_SC_GENERIC_SCISSOR_TL, 2);
3243
 
        r600_store_value(cb, 0); /* R_028240_PA_SC_GENERIC_SCISSOR_TL */
3244
 
        r600_store_value(cb, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); /* R_028244_PA_SC_GENERIC_SCISSOR_BR */
3245
 
 
3246
 
        r600_store_context_reg_seq(cb, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2);
3247
 
        r600_store_value(cb, 0); /* R_028030_PA_SC_SCREEN_SCISSOR_TL */
3248
 
        r600_store_value(cb, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); /* R_028034_PA_SC_SCREEN_SCISSOR_BR */
3249
 
 
3250
 
        r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
3251
 
        r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
3252
 
        r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
3253
 
        r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
3254
 
        r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
3255
 
        r600_store_context_reg(cb, R_0288C0_SQ_PGM_RESOURCES_2_HS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
3256
 
        r600_store_context_reg(cb, R_0288D8_SQ_PGM_RESOURCES_2_LS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
3257
 
 
3258
 
        /* to avoid GPU doing any preloading of constant from random address */
3259
 
        r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 16);
3260
 
        for (i = 0; i < 16; i++)
3261
 
                r600_store_value(cb, 0);
3262
 
 
3263
 
        r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 16);
3264
 
        for (i = 0; i < 16; i++)
3265
 
                r600_store_value(cb, 0);
3266
 
 
3267
 
        r600_store_context_reg_seq(cb, R_0281C0_ALU_CONST_BUFFER_SIZE_GS_0, 16);
3268
 
        for (i = 0; i < 16; i++)
3269
 
                r600_store_value(cb, 0);
3270
 
 
3271
 
        r600_store_context_reg_seq(cb, R_028FC0_ALU_CONST_BUFFER_SIZE_LS_0, 16);
3272
 
        for (i = 0; i < 16; i++)
3273
 
                r600_store_value(cb, 0);
3274
 
 
3275
 
        r600_store_context_reg_seq(cb, R_028F80_ALU_CONST_BUFFER_SIZE_HS_0, 16);
3276
 
        for (i = 0; i < 16; i++)
3277
 
                r600_store_value(cb, 0);
3278
 
 
3279
 
        r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0);
3280
 
 
3281
 
        if (rctx->screen->b.has_streamout) {
3282
 
                r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3283
 
        }
3284
 
 
3285
 
        r600_store_context_reg(cb, R_028010_DB_RENDER_OVERRIDE2, 0);
3286
 
        r600_store_context_reg(cb, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
3287
 
        r600_store_context_reg(cb, R_0286C8_SPI_THREAD_GROUPING, 0);
3288
 
        r600_store_context_reg_seq(cb, R_0286E4_SPI_PS_IN_CONTROL_2, 2);
3289
 
        r600_store_value(cb, 0); /* R_0286E4_SPI_PS_IN_CONTROL_2 */
3290
 
        r600_store_value(cb, 0); /* R_0286E8_SPI_COMPUTE_INPUT_CNTL */
3291
 
 
3292
 
        r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2);
3293
 
        r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */
3294
 
        r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */
3295
 
 
3296
 
        if (rctx->b.family == CHIP_CAICOS) {
3297
 
                r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 2);
3298
 
                r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */
3299
 
                r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */
3300
 
                r600_store_context_reg(cb, R_028B6C_VGT_TF_PARAM, 0);
3301
 
        } else {
3302
 
                r600_store_context_reg_seq(cb, R_028B54_VGT_SHADER_STAGES_EN, 7);
3303
 
                r600_store_value(cb, 0); /* R028B54_VGT_SHADER_STAGES_EN */
3304
 
                r600_store_value(cb, 0); /* R028B58_VGT_LS_HS_CONFIG */
3305
 
                r600_store_value(cb, 0); /* R028B5C_VGT_LS_SIZE */
3306
 
                r600_store_value(cb, 0); /* R028B60_VGT_HS_SIZE */
3307
 
                r600_store_value(cb, 0); /* R028B64_VGT_LS_HS_ALLOC */
3308
 
                r600_store_value(cb, 0); /* R028B68_VGT_HS_PATCH_CONST */
3309
 
                r600_store_value(cb, 0); /* R028B68_VGT_TF_PARAM */
3310
 
        }
3311
 
 
3312
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0, 0x01000FFF);
3313
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF);
3314
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (64 * 4), 0x01000FFF);
3315
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (96 * 4), 0x01000FFF);
3316
 
        eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (128 * 4), 0x01000FFF);
3317
 
}
3318
 
 
3319
 
void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
3320
 
{
3321
 
        struct r600_context *rctx = (struct r600_context *)ctx;
3322
 
        struct r600_command_buffer *cb = &shader->command_buffer;
3323
 
        struct r600_shader *rshader = &shader->shader;
3324
 
        unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0;
3325
 
        int pos_index = -1, face_index = -1, fixed_pt_position_index = -1;
3326
 
        int ninterp = 0;
3327
 
        boolean have_perspective = FALSE, have_linear = FALSE;
3328
 
        static const unsigned spi_baryc_enable_bit[6] = {
3329
 
                S_0286E0_PERSP_SAMPLE_ENA(1),
3330
 
                S_0286E0_PERSP_CENTER_ENA(1),
3331
 
                S_0286E0_PERSP_CENTROID_ENA(1),
3332
 
                S_0286E0_LINEAR_SAMPLE_ENA(1),
3333
 
                S_0286E0_LINEAR_CENTER_ENA(1),
3334
 
                S_0286E0_LINEAR_CENTROID_ENA(1)
3335
 
        };
3336
 
        unsigned spi_baryc_cntl = 0, sid, tmp, num = 0;
3337
 
        unsigned z_export = 0, stencil_export = 0, mask_export = 0;
3338
 
        uint32_t spi_ps_input_cntl[32];
3339
 
 
3340
 
        /* Pull any state we use out of rctx.  Make sure that any additional
3341
 
         * state added to this list is also checked in the caller in
3342
 
         * r600_update_derived_state().
3343
 
         */
3344
 
        bool sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0;
3345
 
        bool flatshade = rctx->rasterizer ? rctx->rasterizer->flatshade : 0;
3346
 
        bool msaa = rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0;
3347
 
 
3348
 
        if (!cb->buf) {
3349
 
                r600_init_command_buffer(cb, 64);
3350
 
        } else {
3351
 
                cb->num_dw = 0;
3352
 
        }
3353
 
 
3354
 
        for (i = 0; i < rshader->ninput; i++) {
3355
 
                /* evergreen NUM_INTERP only contains values interpolated into the LDS,
3356
 
                   POSITION goes via GPRs from the SC so isn't counted */
3357
 
                if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
3358
 
                        pos_index = i;
3359
 
                else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) {
3360
 
                        if (face_index == -1)
3361
 
                                face_index = i;
3362
 
                }
3363
 
                else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
3364
 
                        if (face_index == -1)
3365
 
                                face_index = i; /* lives in same register, same enable bit */
3366
 
                }
3367
 
                else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) {
3368
 
                        fixed_pt_position_index = i;
3369
 
                }
3370
 
                else {
3371
 
                        ninterp++;
3372
 
                        int k = eg_get_interpolator_index(
3373
 
                                rshader->input[i].interpolate,
3374
 
                                rshader->input[i].interpolate_location);
3375
 
                        if (k >= 0) {
3376
 
                                spi_baryc_cntl |= spi_baryc_enable_bit[k];
3377
 
                                have_perspective |= k < 3;
3378
 
                                have_linear |= !(k < 3);
3379
 
                                if (rshader->input[i].uses_interpolate_at_centroid) {
3380
 
                                        k = eg_get_interpolator_index(
3381
 
                                                rshader->input[i].interpolate,
3382
 
                                                TGSI_INTERPOLATE_LOC_CENTROID);
3383
 
                                        spi_baryc_cntl |= spi_baryc_enable_bit[k];
3384
 
                                }
3385
 
                        }
3386
 
                }
3387
 
 
3388
 
                sid = rshader->input[i].spi_sid;
3389
 
 
3390
 
                if (sid) {
3391
 
                        tmp = S_028644_SEMANTIC(sid);
3392
 
 
3393
 
                        /* D3D 9 behaviour. GL is undefined */
3394
 
                        if (rshader->input[i].name == TGSI_SEMANTIC_COLOR && rshader->input[i].sid == 0)
3395
 
                                tmp |= S_028644_DEFAULT_VAL(3);
3396
 
 
3397
 
                        if (rshader->input[i].name == TGSI_SEMANTIC_POSITION ||
3398
 
                                rshader->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
3399
 
                                (rshader->input[i].interpolate == TGSI_INTERPOLATE_COLOR && flatshade)) {
3400
 
                                tmp |= S_028644_FLAT_SHADE(1);
3401
 
                        }
3402
 
 
3403
 
                        if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD ||
3404
 
                            (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD &&
3405
 
                             (sprite_coord_enable & (1 << rshader->input[i].sid)))) {
3406
 
                                tmp |= S_028644_PT_SPRITE_TEX(1);
3407
 
                        }
3408
 
 
3409
 
                        spi_ps_input_cntl[num++] = tmp;
3410
 
                }
3411
 
        }
3412
 
 
3413
 
        r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, num);
3414
 
        r600_store_array(cb, num, spi_ps_input_cntl);
3415
 
 
3416
 
        for (i = 0; i < rshader->noutput; i++) {
3417
 
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
3418
 
                        z_export = 1;
3419
 
                if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
3420
 
                        stencil_export = 1;
3421
 
                if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && msaa)
3422
 
                        mask_export = 1;
3423
 
        }
3424
 
        if (rshader->uses_kill)
3425
 
                db_shader_control |= S_02880C_KILL_ENABLE(1);
3426
 
 
3427
 
        db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
3428
 
        db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
3429
 
        db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
3430
 
 
3431
 
        if (shader->selector->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) {
3432
 
                db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
3433
 
                        S_02880C_EXEC_ON_NOOP(shader->selector->info.writes_memory);
3434
 
        } else if (shader->selector->info.writes_memory) {
3435
 
                db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1);
3436
 
        }
3437
 
 
3438
 
        switch (rshader->ps_conservative_z) {
3439
 
        default: /* fall through */
3440
 
        case TGSI_FS_DEPTH_LAYOUT_ANY:
3441
 
                db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
3442
 
                break;
3443
 
        case TGSI_FS_DEPTH_LAYOUT_GREATER:
3444
 
                db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
3445
 
                break;
3446
 
        case TGSI_FS_DEPTH_LAYOUT_LESS:
3447
 
                db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
3448
 
                break;
3449
 
        }
3450
 
 
3451
 
        exports_ps = 0;
3452
 
        for (i = 0; i < rshader->noutput; i++) {
3453
 
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
3454
 
                    rshader->output[i].name == TGSI_SEMANTIC_STENCIL ||
3455
 
                    rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK)
3456
 
                        exports_ps |= 1;
3457
 
        }
3458
 
 
3459
 
        num_cout = rshader->ps_export_highest + 1;
3460
 
 
3461
 
        exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
3462
 
        if (!exports_ps) {
3463
 
                /* always at least export 1 component per pixel */
3464
 
                exports_ps = 2;
3465
 
        }
3466
 
        shader->nr_ps_color_outputs = num_cout;
3467
 
        shader->ps_color_export_mask = rshader->ps_color_export_mask;
3468
 
        if (ninterp == 0) {
3469
 
                ninterp = 1;
3470
 
                have_perspective = TRUE;
3471
 
        }
3472
 
        if (!spi_baryc_cntl)
3473
 
                spi_baryc_cntl |= spi_baryc_enable_bit[0];
3474
 
 
3475
 
        if (!have_perspective && !have_linear)
3476
 
                have_perspective = TRUE;
3477
 
 
3478
 
        spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) |
3479
 
                              S_0286CC_PERSP_GRADIENT_ENA(have_perspective) |
3480
 
                              S_0286CC_LINEAR_GRADIENT_ENA(have_linear);
3481
 
        spi_input_z = 0;
3482
 
        if (pos_index != -1) {
3483
 
                spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
3484
 
                        S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) |
3485
 
                        S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr);
3486
 
                spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1);
3487
 
        }
3488
 
 
3489
 
        spi_ps_in_control_1 = 0;
3490
 
        if (face_index != -1) {
3491
 
                spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
3492
 
                        S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
3493
 
        }
3494
 
        if (fixed_pt_position_index != -1) {
3495
 
                spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) |
3496
 
                        S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr);
3497
 
        }
3498
 
 
3499
 
        r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2);
3500
 
        r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */
3501
 
        r600_store_value(cb, spi_ps_in_control_1); /* R_0286D0_SPI_PS_IN_CONTROL_1 */
3502
 
 
3503
 
        r600_store_context_reg(cb, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
3504
 
        r600_store_context_reg(cb, R_0286D8_SPI_INPUT_Z, spi_input_z);
3505
 
        r600_store_context_reg(cb, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps);
3506
 
 
3507
 
        r600_store_context_reg_seq(cb, R_028840_SQ_PGM_START_PS, 2);
3508
 
        r600_store_value(cb, shader->bo->gpu_address >> 8);
3509
 
        r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
3510
 
                         S_028844_NUM_GPRS(rshader->bc.ngpr) |
3511
 
                         S_028844_PRIME_CACHE_ON_DRAW(1) |
3512
 
                         S_028844_DX10_CLAMP(1) |
3513
 
                         S_028844_STACK_SIZE(rshader->bc.nstack));
3514
 
        /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
3515
 
 
3516
 
        shader->db_shader_control = db_shader_control;
3517
 
        shader->ps_depth_export = z_export | stencil_export | mask_export;
3518
 
 
3519
 
        shader->sprite_coord_enable = sprite_coord_enable;
3520
 
        shader->flatshade = flatshade;
3521
 
        shader->msaa = msaa;
3522
 
}
3523
 
 
3524
 
void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
3525
 
{
3526
 
        struct r600_command_buffer *cb = &shader->command_buffer;
3527
 
        struct r600_shader *rshader = &shader->shader;
3528
 
 
3529
 
        r600_init_command_buffer(cb, 32);
3530
 
 
3531
 
        r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
3532
 
                               S_028890_NUM_GPRS(rshader->bc.ngpr) |
3533
 
                               S_028890_DX10_CLAMP(1) |
3534
 
                               S_028890_STACK_SIZE(rshader->bc.nstack));
3535
 
        r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
3536
 
                               shader->bo->gpu_address >> 8);
3537
 
        /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
3538
 
}
3539
 
 
3540
 
void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
3541
 
{
3542
 
        struct r600_context *rctx = (struct r600_context *)ctx;
3543
 
        struct r600_command_buffer *cb = &shader->command_buffer;
3544
 
        struct r600_shader *rshader = &shader->shader;
3545
 
        struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
3546
 
        unsigned gsvs_itemsizes[4] = {
3547
 
                        (cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2,
3548
 
                        (cp_shader->ring_item_sizes[1] * shader->selector->gs_max_out_vertices) >> 2,
3549
 
                        (cp_shader->ring_item_sizes[2] * shader->selector->gs_max_out_vertices) >> 2,
3550
 
                        (cp_shader->ring_item_sizes[3] * shader->selector->gs_max_out_vertices) >> 2
3551
 
        };
3552
 
 
3553
 
        r600_init_command_buffer(cb, 64);
3554
 
 
3555
 
        /* VGT_GS_MODE is written by evergreen_emit_shader_stages */
3556
 
 
3557
 
 
3558
 
        r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
3559
 
                               S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices));
3560
 
        r600_store_context_reg(cb, R_028A6C_VGT_GS_OUT_PRIM_TYPE,
3561
 
                               r600_conv_prim_to_gs_out(shader->selector->gs_output_prim));
3562
 
 
3563
 
        if (rctx->screen->b.info.drm_minor >= 35) {
3564
 
                r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
3565
 
                                S_028B90_CNT(MIN2(shader->selector->gs_num_invocations, 127)) |
3566
 
                                S_028B90_ENABLE(shader->selector->gs_num_invocations > 0));
3567
 
        }
3568
 
        r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
3569
 
        r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
3570
 
        r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
3571
 
        r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
3572
 
        r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
3573
 
 
3574
 
        r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
3575
 
                               (rshader->ring_item_sizes[0]) >> 2);
3576
 
 
3577
 
        r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
3578
 
                               gsvs_itemsizes[0] +
3579
 
                               gsvs_itemsizes[1] +
3580
 
                               gsvs_itemsizes[2] +
3581
 
                               gsvs_itemsizes[3]);
3582
 
 
3583
 
        r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
3584
 
        r600_store_value(cb, gsvs_itemsizes[0]);
3585
 
        r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
3586
 
        r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + gsvs_itemsizes[2]);
3587
 
 
3588
 
        /* FIXME calculate these values somehow ??? */
3589
 
        r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
3590
 
        r600_store_value(cb, 0x80); /* GS_PER_ES */
3591
 
        r600_store_value(cb, 0x100); /* ES_PER_GS */
3592
 
        r600_store_value(cb, 0x2); /* GS_PER_VS */
3593
 
 
3594
 
        r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
3595
 
                               S_028878_NUM_GPRS(rshader->bc.ngpr) |
3596
 
                               S_028878_DX10_CLAMP(1) |
3597
 
                               S_028878_STACK_SIZE(rshader->bc.nstack));
3598
 
        r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
3599
 
                               shader->bo->gpu_address >> 8);
3600
 
        /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
3601
 
}
3602
 
 
3603
 
 
3604
 
void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
3605
 
{
3606
 
        struct r600_command_buffer *cb = &shader->command_buffer;
3607
 
        struct r600_shader *rshader = &shader->shader;
3608
 
        unsigned spi_vs_out_id[10] = {};
3609
 
        unsigned i, tmp, nparams = 0;
3610
 
 
3611
 
        for (i = 0; i < rshader->noutput; i++) {
3612
 
                if (rshader->output[i].spi_sid) {
3613
 
                        tmp = rshader->output[i].spi_sid << ((nparams & 3) * 8);
3614
 
                        spi_vs_out_id[nparams / 4] |= tmp;
3615
 
                        nparams++;
3616
 
                }
3617
 
        }
3618
 
 
3619
 
        r600_init_command_buffer(cb, 32);
3620
 
 
3621
 
        r600_store_context_reg_seq(cb, R_02861C_SPI_VS_OUT_ID_0, 10);
3622
 
        for (i = 0; i < 10; i++) {
3623
 
                r600_store_value(cb, spi_vs_out_id[i]);
3624
 
        }
3625
 
 
3626
 
        /* Certain attributes (position, psize, etc.) don't count as params.
3627
 
         * VS is required to export at least one param and r600_shader_from_tgsi()
3628
 
         * takes care of adding a dummy export.
3629
 
         */
3630
 
        if (nparams < 1)
3631
 
                nparams = 1;
3632
 
 
3633
 
        r600_store_context_reg(cb, R_0286C4_SPI_VS_OUT_CONFIG,
3634
 
                               S_0286C4_VS_EXPORT_COUNT(nparams - 1));
3635
 
        r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
3636
 
                               S_028860_NUM_GPRS(rshader->bc.ngpr) |
3637
 
                               S_028860_DX10_CLAMP(1) |
3638
 
                               S_028860_STACK_SIZE(rshader->bc.nstack));
3639
 
        if (rshader->vs_position_window_space) {
3640
 
                r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
3641
 
                        S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
3642
 
        } else {
3643
 
                r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
3644
 
                        S_028818_VTX_W0_FMT(1) |
3645
 
                        S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
3646
 
                        S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
3647
 
                        S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
3648
 
 
3649
 
        }
3650
 
        r600_store_context_reg(cb, R_02885C_SQ_PGM_START_VS,
3651
 
                               shader->bo->gpu_address >> 8);
3652
 
        /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
3653
 
 
3654
 
        shader->pa_cl_vs_out_cntl =
3655
 
                S_02881C_VS_OUT_CCDIST0_VEC_ENA((rshader->cc_dist_mask & 0x0F) != 0) |
3656
 
                S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->cc_dist_mask & 0xF0) != 0) |
3657
 
                S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) |
3658
 
                S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) |
3659
 
                S_02881C_USE_VTX_EDGE_FLAG(rshader->vs_out_edgeflag) |
3660
 
                S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport) |
3661
 
                S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer);
3662
 
}
3663
 
 
3664
 
void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
3665
 
{
3666
 
        struct r600_command_buffer *cb = &shader->command_buffer;
3667
 
        struct r600_shader *rshader = &shader->shader;
3668
 
 
3669
 
        r600_init_command_buffer(cb, 32);
3670
 
        r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
3671
 
                               S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
3672
 
                               S_0288BC_DX10_CLAMP(1) |
3673
 
                               S_0288BC_STACK_SIZE(rshader->bc.nstack));
3674
 
        r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
3675
 
                               shader->bo->gpu_address >> 8);
3676
 
}
3677
 
 
3678
 
void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
3679
 
{
3680
 
        struct r600_command_buffer *cb = &shader->command_buffer;
3681
 
        struct r600_shader *rshader = &shader->shader;
3682
 
 
3683
 
        r600_init_command_buffer(cb, 32);
3684
 
        r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS,
3685
 
                               S_0288D4_NUM_GPRS(rshader->bc.ngpr) |
3686
 
                               S_0288D4_DX10_CLAMP(1) |
3687
 
                               S_0288D4_STACK_SIZE(rshader->bc.nstack));
3688
 
        r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS,
3689
 
                               shader->bo->gpu_address >> 8);
3690
 
}
3691
 
void *evergreen_create_resolve_blend(struct r600_context *rctx)
3692
 
{
3693
 
        struct pipe_blend_state blend;
3694
 
 
3695
 
        memset(&blend, 0, sizeof(blend));
3696
 
        blend.independent_blend_enable = true;
3697
 
        blend.rt[0].colormask = 0xf;
3698
 
        return evergreen_create_blend_state_mode(&rctx->b.b, &blend, V_028808_CB_RESOLVE);
3699
 
}
3700
 
 
3701
 
void *evergreen_create_decompress_blend(struct r600_context *rctx)
3702
 
{
3703
 
        struct pipe_blend_state blend;
3704
 
        unsigned mode = rctx->screen->has_compressed_msaa_texturing ?
3705
 
                        V_028808_CB_FMASK_DECOMPRESS : V_028808_CB_DECOMPRESS;
3706
 
 
3707
 
        memset(&blend, 0, sizeof(blend));
3708
 
        blend.independent_blend_enable = true;
3709
 
        blend.rt[0].colormask = 0xf;
3710
 
        return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode);
3711
 
}
3712
 
 
3713
 
void *evergreen_create_fastclear_blend(struct r600_context *rctx)
3714
 
{
3715
 
        struct pipe_blend_state blend;
3716
 
        unsigned mode = V_028808_CB_ELIMINATE_FAST_CLEAR;
3717
 
 
3718
 
        memset(&blend, 0, sizeof(blend));
3719
 
        blend.independent_blend_enable = true;
3720
 
        blend.rt[0].colormask = 0xf;
3721
 
        return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode);
3722
 
}
3723
 
 
3724
 
void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
3725
 
{
3726
 
        struct pipe_depth_stencil_alpha_state dsa = {{{0}}};
3727
 
 
3728
 
        return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa);
3729
 
}
3730
 
 
3731
 
void evergreen_update_db_shader_control(struct r600_context * rctx)
3732
 
{
3733
 
        bool dual_export;
3734
 
        unsigned db_shader_control;
3735
 
 
3736
 
        if (!rctx->ps_shader) {
3737
 
                return;
3738
 
        }
3739
 
 
3740
 
        dual_export = rctx->framebuffer.export_16bpc &&
3741
 
                      !rctx->ps_shader->current->ps_depth_export;
3742
 
 
3743
 
        db_shader_control = rctx->ps_shader->current->db_shader_control |
3744
 
                            S_02880C_DUAL_EXPORT_ENABLE(dual_export) |
3745
 
                            S_02880C_DB_SOURCE_FORMAT(dual_export ? V_02880C_EXPORT_DB_TWO :
3746
 
                                                                    V_02880C_EXPORT_DB_FULL) |
3747
 
                            S_02880C_ALPHA_TO_MASK_DISABLE(rctx->framebuffer.cb0_is_integer);
3748
 
 
3749
 
        /* When alpha test is enabled we can't trust the hw to make the proper
3750
 
         * decision on the order in which ztest should be run related to fragment
3751
 
         * shader execution.
3752
 
         *
3753
 
         * If alpha test is enabled perform early z rejection (RE_Z) but don't early
3754
 
         * write to the zbuffer. Write to zbuffer is delayed after fragment shader
3755
 
         * execution and thus after alpha test so if discarded by the alpha test
3756
 
         * the z value is not written.
3757
 
         * If ReZ is enabled, and the zfunc/zenable/zwrite values change you can
3758
 
         * get a hang unless you flush the DB in between.  For now just use
3759
 
         * LATE_Z.
3760
 
         */
3761
 
        if (rctx->alphatest_state.sx_alpha_test_control || rctx->ps_shader->info.writes_memory) {
3762
 
                db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
3763
 
        } else {
3764
 
                db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
3765
 
        }
3766
 
 
3767
 
        if (db_shader_control != rctx->db_misc_state.db_shader_control) {
3768
 
                rctx->db_misc_state.db_shader_control = db_shader_control;
3769
 
                r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
3770
 
        }
3771
 
}
3772
 
 
3773
 
static void evergreen_dma_copy_tile(struct r600_context *rctx,
3774
 
                                struct pipe_resource *dst,
3775
 
                                unsigned dst_level,
3776
 
                                unsigned dst_x,
3777
 
                                unsigned dst_y,
3778
 
                                unsigned dst_z,
3779
 
                                struct pipe_resource *src,
3780
 
                                unsigned src_level,
3781
 
                                unsigned src_x,
3782
 
                                unsigned src_y,
3783
 
                                unsigned src_z,
3784
 
                                unsigned copy_height,
3785
 
                                unsigned pitch,
3786
 
                                unsigned bpp)
3787
 
{
3788
 
        struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
3789
 
        struct r600_texture *rsrc = (struct r600_texture*)src;
3790
 
        struct r600_texture *rdst = (struct r600_texture*)dst;
3791
 
        unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
3792
 
        unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode;
3793
 
        unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split, non_disp_tiling = 0;
3794
 
        uint64_t base, addr;
3795
 
 
3796
 
        dst_mode = rdst->surface.u.legacy.level[dst_level].mode;
3797
 
        src_mode = rsrc->surface.u.legacy.level[src_level].mode;
3798
 
        assert(dst_mode != src_mode);
3799
 
 
3800
 
        /* non_disp_tiling bit needs to be set for depth, stencil, and fmask surfaces */
3801
 
        if (util_format_has_depth(util_format_description(src->format)))
3802
 
                non_disp_tiling = 1;
3803
 
 
3804
 
        y = 0;
3805
 
        sub_cmd = EG_DMA_COPY_TILED;
3806
 
        lbpp = util_logbase2(bpp);
3807
 
        pitch_tile_max = ((pitch / bpp) / 8) - 1;
3808
 
        nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks);
3809
 
 
3810
 
        if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED) {
3811
 
                /* T2L */
3812
 
                array_mode = evergreen_array_mode(src_mode);
3813
 
                slice_tile_max = (rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.u.legacy.level[src_level].nblk_y) / (8*8);
3814
 
                slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0;
3815
 
                /* linear height must be the same as the slice tile max height, it's ok even
3816
 
                 * if the linear destination/source have smaller heigh as the size of the
3817
 
                 * dma packet will be using the copy_height which is always smaller or equal
3818
 
                 * to the linear height
3819
 
                 */
3820
 
                height = u_minify(rsrc->resource.b.b.height0, src_level);
3821
 
                detile = 1;
3822
 
                x = src_x;
3823
 
                y = src_y;
3824
 
                z = src_z;
3825
 
                base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
3826
 
                addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
3827
 
                addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
3828
 
                addr += dst_y * pitch + dst_x * bpp;
3829
 
                bank_h = eg_bank_wh(rsrc->surface.u.legacy.bankh);
3830
 
                bank_w = eg_bank_wh(rsrc->surface.u.legacy.bankw);
3831
 
                mt_aspect = eg_macro_tile_aspect(rsrc->surface.u.legacy.mtilea);
3832
 
                tile_split = eg_tile_split(rsrc->surface.u.legacy.tile_split);
3833
 
                base += rsrc->resource.gpu_address;
3834
 
                addr += rdst->resource.gpu_address;
3835
 
        } else {
3836
 
                /* L2T */
3837
 
                array_mode = evergreen_array_mode(dst_mode);
3838
 
                slice_tile_max = (rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.u.legacy.level[dst_level].nblk_y) / (8*8);
3839
 
                slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0;
3840
 
                /* linear height must be the same as the slice tile max height, it's ok even
3841
 
                 * if the linear destination/source have smaller heigh as the size of the
3842
 
                 * dma packet will be using the copy_height which is always smaller or equal
3843
 
                 * to the linear height
3844
 
                 */
3845
 
                height = u_minify(rdst->resource.b.b.height0, dst_level);
3846
 
                detile = 0;
3847
 
                x = dst_x;
3848
 
                y = dst_y;
3849
 
                z = dst_z;
3850
 
                base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
3851
 
                addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
3852
 
                addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z;
3853
 
                addr += src_y * pitch + src_x * bpp;
3854
 
                bank_h = eg_bank_wh(rdst->surface.u.legacy.bankh);
3855
 
                bank_w = eg_bank_wh(rdst->surface.u.legacy.bankw);
3856
 
                mt_aspect = eg_macro_tile_aspect(rdst->surface.u.legacy.mtilea);
3857
 
                tile_split = eg_tile_split(rdst->surface.u.legacy.tile_split);
3858
 
                base += rdst->resource.gpu_address;
3859
 
                addr += rsrc->resource.gpu_address;
3860
 
        }
3861
 
 
3862
 
        size = (copy_height * pitch) / 4;
3863
 
        ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE);
3864
 
        r600_need_dma_space(&rctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
3865
 
 
3866
 
        for (i = 0; i < ncopy; i++) {
3867
 
                cheight = copy_height;
3868
 
                if (((cheight * pitch) / 4) > EG_DMA_COPY_MAX_SIZE) {
3869
 
                        cheight = (EG_DMA_COPY_MAX_SIZE * 4) / pitch;
3870
 
                }
3871
 
                size = (cheight * pitch) / 4;
3872
 
                /* emit reloc before writing cs so that cs is always in consistent state */
3873
 
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource,
3874
 
                                      RADEON_USAGE_READ);
3875
 
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource,
3876
 
                                      RADEON_USAGE_WRITE);
3877
 
                radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size));
3878
 
                radeon_emit(cs, base >> 8);
3879
 
                radeon_emit(cs, (detile << 31) | (array_mode << 27) |
3880
 
                                (lbpp << 24) | (bank_h << 21) |
3881
 
                                (bank_w << 18) | (mt_aspect << 16));
3882
 
                radeon_emit(cs, (pitch_tile_max << 0) | ((height - 1) << 16));
3883
 
                radeon_emit(cs, (slice_tile_max << 0));
3884
 
                radeon_emit(cs, (x << 0) | (z << 18));
3885
 
                radeon_emit(cs, (y << 0) | (tile_split << 21) | (nbanks << 25) | (non_disp_tiling << 28));
3886
 
                radeon_emit(cs, addr & 0xfffffffc);
3887
 
                radeon_emit(cs, (addr >> 32UL) & 0xff);
3888
 
                copy_height -= cheight;
3889
 
                addr += cheight * pitch;
3890
 
                y += cheight;
3891
 
        }
3892
 
}
3893
 
 
3894
 
static void evergreen_dma_copy(struct pipe_context *ctx,
3895
 
                               struct pipe_resource *dst,
3896
 
                               unsigned dst_level,
3897
 
                               unsigned dstx, unsigned dsty, unsigned dstz,
3898
 
                               struct pipe_resource *src,
3899
 
                               unsigned src_level,
3900
 
                               const struct pipe_box *src_box)
3901
 
{
3902
 
        struct r600_context *rctx = (struct r600_context *)ctx;
3903
 
        struct r600_texture *rsrc = (struct r600_texture*)src;
3904
 
        struct r600_texture *rdst = (struct r600_texture*)dst;
3905
 
        unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
3906
 
        unsigned src_w, dst_w;
3907
 
        unsigned src_x, src_y;
3908
 
        unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
3909
 
 
3910
 
        if (rctx->b.dma.cs.priv == NULL) {
3911
 
                goto fallback;
3912
 
        }
3913
 
 
3914
 
        if (rctx->cmd_buf_is_compute) {
3915
 
                rctx->b.gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL);
3916
 
                rctx->cmd_buf_is_compute = false;
3917
 
        }
3918
 
 
3919
 
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
3920
 
                evergreen_dma_copy_buffer(rctx, dst, src, dst_x, src_box->x, src_box->width);
3921
 
                return;
3922
 
        }
3923
 
 
3924
 
        if (src_box->depth > 1 ||
3925
 
            !r600_prepare_for_dma_blit(&rctx->b, rdst, dst_level, dstx, dsty,
3926
 
                                        dstz, rsrc, src_level, src_box))
3927
 
                goto fallback;
3928
 
 
3929
 
        src_x = util_format_get_nblocksx(src->format, src_box->x);
3930
 
        dst_x = util_format_get_nblocksx(src->format, dst_x);
3931
 
        src_y = util_format_get_nblocksy(src->format, src_box->y);
3932
 
        dst_y = util_format_get_nblocksy(src->format, dst_y);
3933
 
 
3934
 
        bpp = rdst->surface.bpe;
3935
 
        dst_pitch = rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.bpe;
3936
 
        src_pitch = rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.bpe;
3937
 
        src_w = u_minify(rsrc->resource.b.b.width0, src_level);
3938
 
        dst_w = u_minify(rdst->resource.b.b.width0, dst_level);
3939
 
        copy_height = src_box->height / rsrc->surface.blk_h;
3940
 
 
3941
 
        dst_mode = rdst->surface.u.legacy.level[dst_level].mode;
3942
 
        src_mode = rsrc->surface.u.legacy.level[src_level].mode;
3943
 
 
3944
 
        if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
3945
 
                /* FIXME evergreen can do partial blit */
3946
 
                goto fallback;
3947
 
        }
3948
 
        /* the x test here are currently useless (because we don't support partial blit)
3949
 
         * but keep them around so we don't forget about those
3950
 
         */
3951
 
        if (src_pitch % 8 || src_box->x % 8 || dst_x % 8 || src_box->y % 8 || dst_y % 8) {
3952
 
                goto fallback;
3953
 
        }
3954
 
 
3955
 
        /* 128 bpp surfaces require non_disp_tiling for both
3956
 
         * tiled and linear buffers on cayman.  However, async
3957
 
         * DMA only supports it on the tiled side.  As such
3958
 
         * the tile order is backwards after a L2T/T2L packet.
3959
 
         */
3960
 
        if ((rctx->b.chip_class == CAYMAN) &&
3961
 
            (src_mode != dst_mode) &&
3962
 
            (util_format_get_blocksize(src->format) >= 16)) {
3963
 
                goto fallback;
3964
 
        }
3965
 
 
3966
 
        if (src_mode == dst_mode) {
3967
 
                uint64_t dst_offset, src_offset;
3968
 
                /* simple dma blit would do NOTE code here assume :
3969
 
                 *   src_box.x/y == 0
3970
 
                 *   dst_x/y == 0
3971
 
                 *   dst_pitch == src_pitch
3972
 
                 */
3973
 
                src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
3974
 
                src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z;
3975
 
                src_offset += src_y * src_pitch + src_x * bpp;
3976
 
                dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
3977
 
                dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
3978
 
                dst_offset += dst_y * dst_pitch + dst_x * bpp;
3979
 
                evergreen_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset,
3980
 
                                        src_box->height * src_pitch);
3981
 
        } else {
3982
 
                evergreen_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
3983
 
                                        src, src_level, src_x, src_y, src_box->z,
3984
 
                                        copy_height, dst_pitch, bpp);
3985
 
        }
3986
 
        return;
3987
 
 
3988
 
fallback:
3989
 
        r600_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
3990
 
                                  src, src_level, src_box);
3991
 
}
3992
 
 
3993
 
static void evergreen_set_tess_state(struct pipe_context *ctx,
3994
 
                                     const float default_outer_level[4],
3995
 
                                     const float default_inner_level[2])
3996
 
{
3997
 
        struct r600_context *rctx = (struct r600_context *)ctx;
3998
 
 
3999
 
        memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4);
4000
 
        memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2);
4001
 
        rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true;
4002
 
}
4003
 
 
4004
 
static void evergreen_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices)
4005
 
{
4006
 
        struct r600_context *rctx = (struct r600_context *)ctx;
4007
 
 
4008
 
        rctx->patch_vertices = patch_vertices;
4009
 
}
4010
 
 
4011
 
static void evergreen_setup_immed_buffer(struct r600_context *rctx,
4012
 
                                         struct r600_image_view *rview,
4013
 
                                         enum pipe_format pformat)
4014
 
{
4015
 
        struct r600_screen *rscreen = (struct r600_screen *)rctx->b.b.screen;
4016
 
        uint32_t immed_size = rscreen->b.info.max_se * 256 * 64 * util_format_get_blocksize(pformat);
4017
 
        struct eg_buf_res_params buf_params;
4018
 
        bool skip_reloc = false;
4019
 
        struct r600_resource *resource = (struct r600_resource *)rview->base.resource;
4020
 
        if (!resource->immed_buffer) {
4021
 
                eg_resource_alloc_immed(&rscreen->b, resource, immed_size);
4022
 
        }
4023
 
 
4024
 
        memset(&buf_params, 0, sizeof(buf_params));
4025
 
        buf_params.pipe_format = pformat;
4026
 
        buf_params.size = resource->immed_buffer->b.b.width0;
4027
 
        buf_params.swizzle[0] = PIPE_SWIZZLE_X;
4028
 
        buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
4029
 
        buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
4030
 
        buf_params.swizzle[3] = PIPE_SWIZZLE_W;
4031
 
        buf_params.uncached = 1;
4032
 
        evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b,
4033
 
                                             &buf_params, &skip_reloc,
4034
 
                                             rview->immed_resource_words);
4035
 
}
4036
 
 
4037
 
static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
4038
 
                                            unsigned start_slot,
4039
 
                                            unsigned count,
4040
 
                                            const struct pipe_shader_buffer *buffers)
4041
 
{
4042
 
        struct r600_context *rctx = (struct r600_context *)ctx;
4043
 
        struct r600_atomic_buffer_state *astate;
4044
 
        unsigned i, idx;
4045
 
 
4046
 
        astate = &rctx->atomic_buffer_state;
4047
 
 
4048
 
        /* we'd probably like to expand this to 8 later so put the logic in */
4049
 
        for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
4050
 
                const struct pipe_shader_buffer *buf;
4051
 
                struct pipe_shader_buffer *abuf;
4052
 
 
4053
 
                abuf = &astate->buffer[i];
4054
 
 
4055
 
                if (!buffers || !buffers[idx].buffer) {
4056
 
                        pipe_resource_reference(&abuf->buffer, NULL);
4057
 
                        continue;
4058
 
                }
4059
 
                buf = &buffers[idx];
4060
 
 
4061
 
                pipe_resource_reference(&abuf->buffer, buf->buffer);
4062
 
                abuf->buffer_offset = buf->buffer_offset;
4063
 
                abuf->buffer_size = buf->buffer_size;
4064
 
        }
4065
 
}
4066
 
 
4067
 
static void evergreen_set_shader_buffers(struct pipe_context *ctx,
4068
 
                                         enum pipe_shader_type shader, unsigned start_slot,
4069
 
                                         unsigned count,
4070
 
                                         const struct pipe_shader_buffer *buffers,
4071
 
                                         unsigned writable_bitmask)
4072
 
{
4073
 
        struct r600_context *rctx = (struct r600_context *)ctx;
4074
 
        struct r600_image_state *istate = NULL;
4075
 
        struct r600_image_view *rview;
4076
 
        struct r600_tex_color_info color;
4077
 
        struct eg_buf_res_params buf_params;
4078
 
        struct r600_resource *resource;
4079
 
        unsigned i, idx;
4080
 
        unsigned old_mask;
4081
 
 
4082
 
        if (shader != PIPE_SHADER_FRAGMENT &&
4083
 
            shader != PIPE_SHADER_COMPUTE && count == 0)
4084
 
                return;
4085
 
 
4086
 
        if (shader == PIPE_SHADER_FRAGMENT)
4087
 
                istate = &rctx->fragment_buffers;
4088
 
        else if (shader == PIPE_SHADER_COMPUTE)
4089
 
                istate = &rctx->compute_buffers;
4090
 
 
4091
 
        old_mask = istate->enabled_mask;
4092
 
        for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
4093
 
                const struct pipe_shader_buffer *buf;
4094
 
                unsigned res_type;
4095
 
 
4096
 
                rview = &istate->views[i];
4097
 
 
4098
 
                if (!buffers || !buffers[idx].buffer) {
4099
 
                        pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
4100
 
                        istate->enabled_mask &= ~(1 << i);
4101
 
                        continue;
4102
 
                }
4103
 
 
4104
 
                buf = &buffers[idx];
4105
 
                pipe_resource_reference((struct pipe_resource **)&rview->base.resource, buf->buffer);
4106
 
 
4107
 
                resource = (struct r600_resource *)rview->base.resource;
4108
 
 
4109
 
                evergreen_setup_immed_buffer(rctx, rview, PIPE_FORMAT_R32_UINT);
4110
 
 
4111
 
                color.offset = 0;
4112
 
                color.view = 0;
4113
 
                evergreen_set_color_surface_buffer(rctx, resource,
4114
 
                                                   PIPE_FORMAT_R32_UINT,
4115
 
                                                   buf->buffer_offset,
4116
 
                                                   buf->buffer_offset + buf->buffer_size,
4117
 
                                                   &color);
4118
 
 
4119
 
                res_type = V_028C70_BUFFER;
4120
 
 
4121
 
                rview->cb_color_base = color.offset;
4122
 
                rview->cb_color_dim = color.dim;
4123
 
                rview->cb_color_info = color.info |
4124
 
                        S_028C70_RAT(1) |
4125
 
                        S_028C70_RESOURCE_TYPE(res_type);
4126
 
                rview->cb_color_pitch = color.pitch;
4127
 
                rview->cb_color_slice = color.slice;
4128
 
                rview->cb_color_view = color.view;
4129
 
                rview->cb_color_attrib = color.attrib;
4130
 
                rview->cb_color_fmask = color.fmask;
4131
 
                rview->cb_color_fmask_slice = color.fmask_slice;
4132
 
 
4133
 
                memset(&buf_params, 0, sizeof(buf_params));
4134
 
                buf_params.pipe_format = PIPE_FORMAT_R32_UINT;
4135
 
                buf_params.offset = buf->buffer_offset;
4136
 
                buf_params.size = buf->buffer_size;
4137
 
                buf_params.swizzle[0] = PIPE_SWIZZLE_X;
4138
 
                buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
4139
 
                buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
4140
 
                buf_params.swizzle[3] = PIPE_SWIZZLE_W;
4141
 
                buf_params.force_swizzle = true;
4142
 
                buf_params.uncached = 1;
4143
 
                buf_params.size_in_bytes = true;
4144
 
                evergreen_fill_buffer_resource_words(rctx, &resource->b.b,
4145
 
                                                     &buf_params,
4146
 
                                                     &rview->skip_mip_address_reloc,
4147
 
                                                     rview->resource_words);
4148
 
 
4149
 
                istate->enabled_mask |= (1 << i);
4150
 
        }
4151
 
 
4152
 
        istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46;
4153
 
 
4154
 
        if (old_mask != istate->enabled_mask)
4155
 
                r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
4156
 
 
4157
 
        /* construct the target mask */
4158
 
        if (rctx->cb_misc_state.buffer_rat_enabled_mask != istate->enabled_mask) {
4159
 
                rctx->cb_misc_state.buffer_rat_enabled_mask = istate->enabled_mask;
4160
 
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
4161
 
        }
4162
 
 
4163
 
        if (shader == PIPE_SHADER_FRAGMENT)
4164
 
                r600_mark_atom_dirty(rctx, &istate->atom);
4165
 
}
4166
 
 
4167
 
static void evergreen_set_shader_images(struct pipe_context *ctx,
4168
 
                                        enum pipe_shader_type shader, unsigned start_slot,
4169
 
                                        unsigned count, unsigned unbind_num_trailing_slots,
4170
 
                                        const struct pipe_image_view *images)
4171
 
{
4172
 
        struct r600_context *rctx = (struct r600_context *)ctx;
4173
 
        unsigned i;
4174
 
        struct r600_image_view *rview;
4175
 
        struct pipe_resource *image;
4176
 
        struct r600_resource *resource;
4177
 
        struct r600_tex_color_info color;
4178
 
        struct eg_buf_res_params buf_params;
4179
 
        struct eg_tex_res_params tex_params;
4180
 
        unsigned old_mask;
4181
 
        struct r600_image_state *istate = NULL;
4182
 
        int idx;
4183
 
        if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE)
4184
 
                return;
4185
 
        if (!count && !unbind_num_trailing_slots)
4186
 
                return;
4187
 
 
4188
 
        if (shader == PIPE_SHADER_FRAGMENT)
4189
 
                istate = &rctx->fragment_images;
4190
 
        else if (shader == PIPE_SHADER_COMPUTE)
4191
 
                istate = &rctx->compute_images;
4192
 
 
4193
 
        assert (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE);
4194
 
 
4195
 
        old_mask = istate->enabled_mask;
4196
 
        for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
4197
 
                unsigned res_type;
4198
 
                const struct pipe_image_view *iview;
4199
 
                rview = &istate->views[i];
4200
 
 
4201
 
                if (!images || !images[idx].resource) {
4202
 
                        pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
4203
 
                        istate->enabled_mask &= ~(1 << i);
4204
 
                        istate->compressed_colortex_mask &= ~(1 << i);
4205
 
                        istate->compressed_depthtex_mask &= ~(1 << i);
4206
 
                        continue;
4207
 
                }
4208
 
 
4209
 
                iview = &images[idx];
4210
 
                image = iview->resource;
4211
 
                resource = (struct r600_resource *)image;
4212
 
 
4213
 
                r600_context_add_resource_size(ctx, image);
4214
 
 
4215
 
                rview->base = *iview;
4216
 
                rview->base.resource = NULL;
4217
 
                pipe_resource_reference((struct pipe_resource **)&rview->base.resource, image);
4218
 
 
4219
 
                evergreen_setup_immed_buffer(rctx, rview, iview->format);
4220
 
 
4221
 
                bool is_buffer = image->target == PIPE_BUFFER;
4222
 
                struct r600_texture *rtex = (struct r600_texture *)image;
4223
 
                if (!is_buffer & rtex->db_compatible)
4224
 
                        istate->compressed_depthtex_mask |= 1 << i;
4225
 
                else
4226
 
                        istate->compressed_depthtex_mask &= ~(1 << i);
4227
 
 
4228
 
                if (!is_buffer && rtex->cmask.size)
4229
 
                        istate->compressed_colortex_mask |= 1 << i;
4230
 
                else
4231
 
                        istate->compressed_colortex_mask &= ~(1 << i);
4232
 
                if (!is_buffer) {
4233
 
 
4234
 
                        evergreen_set_color_surface_common(rctx, rtex,
4235
 
                                                           iview->u.tex.level,
4236
 
                                                           iview->u.tex.first_layer,
4237
 
                                                           iview->u.tex.last_layer,
4238
 
                                                           iview->format,
4239
 
                                                           &color);
4240
 
                        color.dim = S_028C78_WIDTH_MAX(u_minify(image->width0, iview->u.tex.level) - 1) |
4241
 
                          S_028C78_HEIGHT_MAX(u_minify(image->height0, iview->u.tex.level) - 1);
4242
 
                } else {
4243
 
                        color.offset = 0;
4244
 
                        color.view = 0;
4245
 
                        evergreen_set_color_surface_buffer(rctx, resource,
4246
 
                                                           iview->format,
4247
 
                                                           iview->u.buf.offset,
4248
 
                                                           iview->u.buf.size,
4249
 
                                                           &color);
4250
 
                }
4251
 
 
4252
 
                switch (image->target) {
4253
 
                case PIPE_BUFFER:
4254
 
                        res_type = V_028C70_BUFFER;
4255
 
                        break;
4256
 
                case PIPE_TEXTURE_1D:
4257
 
                        res_type = V_028C70_TEXTURE1D;
4258
 
                        break;
4259
 
                case PIPE_TEXTURE_1D_ARRAY:
4260
 
                        res_type = V_028C70_TEXTURE1DARRAY;
4261
 
                        break;
4262
 
                case PIPE_TEXTURE_2D:
4263
 
                case PIPE_TEXTURE_RECT:
4264
 
                        res_type = V_028C70_TEXTURE2D;
4265
 
                        break;
4266
 
                case PIPE_TEXTURE_3D:
4267
 
                        res_type = V_028C70_TEXTURE3D;
4268
 
                        break;
4269
 
                case PIPE_TEXTURE_2D_ARRAY:
4270
 
                case PIPE_TEXTURE_CUBE:
4271
 
                case PIPE_TEXTURE_CUBE_ARRAY:
4272
 
                        res_type = V_028C70_TEXTURE2DARRAY;
4273
 
                        break;
4274
 
                default:
4275
 
                        assert(0);
4276
 
                        res_type = 0;
4277
 
                        break;
4278
 
                }
4279
 
 
4280
 
                rview->cb_color_base = color.offset;
4281
 
                rview->cb_color_dim = color.dim;
4282
 
                rview->cb_color_info = color.info |
4283
 
                        S_028C70_RAT(1) |
4284
 
                        S_028C70_RESOURCE_TYPE(res_type);
4285
 
                rview->cb_color_pitch = color.pitch;
4286
 
                rview->cb_color_slice = color.slice;
4287
 
                rview->cb_color_view = color.view;
4288
 
                rview->cb_color_attrib = color.attrib;
4289
 
                rview->cb_color_fmask = color.fmask;
4290
 
                rview->cb_color_fmask_slice = color.fmask_slice;
4291
 
 
4292
 
                if (image->target != PIPE_BUFFER) {
4293
 
                        memset(&tex_params, 0, sizeof(tex_params));
4294
 
                        tex_params.pipe_format = iview->format;
4295
 
                        tex_params.force_level = 0;
4296
 
                        tex_params.width0 = image->width0;
4297
 
                        tex_params.height0 = image->height0;
4298
 
                        tex_params.first_level = iview->u.tex.level;
4299
 
                        tex_params.last_level = iview->u.tex.level;
4300
 
                        tex_params.first_layer = iview->u.tex.first_layer;
4301
 
                        tex_params.last_layer = iview->u.tex.last_layer;
4302
 
                        tex_params.target = image->target;
4303
 
                        tex_params.swizzle[0] = PIPE_SWIZZLE_X;
4304
 
                        tex_params.swizzle[1] = PIPE_SWIZZLE_Y;
4305
 
                        tex_params.swizzle[2] = PIPE_SWIZZLE_Z;
4306
 
                        tex_params.swizzle[3] = PIPE_SWIZZLE_W;
4307
 
                        evergreen_fill_tex_resource_words(rctx, &resource->b.b, &tex_params,
4308
 
                                                          &rview->skip_mip_address_reloc,
4309
 
                                                          rview->resource_words);
4310
 
 
4311
 
                } else {
4312
 
                        memset(&buf_params, 0, sizeof(buf_params));
4313
 
                        buf_params.pipe_format = iview->format;
4314
 
                        buf_params.size = iview->u.buf.size;
4315
 
                        buf_params.offset = iview->u.buf.offset;
4316
 
                        buf_params.swizzle[0] = PIPE_SWIZZLE_X;
4317
 
                        buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
4318
 
                        buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
4319
 
                        buf_params.swizzle[3] = PIPE_SWIZZLE_W;
4320
 
                        evergreen_fill_buffer_resource_words(rctx, &resource->b.b,
4321
 
                                                             &buf_params,
4322
 
                                                             &rview->skip_mip_address_reloc,
4323
 
                                                             rview->resource_words);
4324
 
                }
4325
 
                istate->enabled_mask |= (1 << i);
4326
 
        }
4327
 
 
4328
 
        for (i = start_slot + count, idx = 0;
4329
 
             i < start_slot + count + unbind_num_trailing_slots; i++, idx++) {
4330
 
                rview = &istate->views[i];
4331
 
 
4332
 
                pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
4333
 
                istate->enabled_mask &= ~(1 << i);
4334
 
                istate->compressed_colortex_mask &= ~(1 << i);
4335
 
                istate->compressed_depthtex_mask &= ~(1 << i);
4336
 
        }
4337
 
 
4338
 
        istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46;
4339
 
        istate->dirty_buffer_constants = TRUE;
4340
 
        rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
4341
 
        rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
4342
 
                R600_CONTEXT_FLUSH_AND_INV_CB_META;
4343
 
 
4344
 
        if (old_mask != istate->enabled_mask)
4345
 
                r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
4346
 
 
4347
 
        if (rctx->cb_misc_state.image_rat_enabled_mask != istate->enabled_mask) {
4348
 
                rctx->cb_misc_state.image_rat_enabled_mask = istate->enabled_mask;
4349
 
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
4350
 
        }
4351
 
 
4352
 
        if (shader == PIPE_SHADER_FRAGMENT)
4353
 
                r600_mark_atom_dirty(rctx, &istate->atom);
4354
 
}
4355
 
 
4356
 
static void evergreen_get_pipe_constant_buffer(struct r600_context *rctx,
4357
 
                                               enum pipe_shader_type shader, uint slot,
4358
 
                                               struct pipe_constant_buffer *cbuf)
4359
 
{
4360
 
        struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
4361
 
        struct pipe_constant_buffer *cb;
4362
 
        cbuf->user_buffer = NULL;
4363
 
 
4364
 
        cb = &state->cb[slot];
4365
 
 
4366
 
        cbuf->buffer_size = cb->buffer_size;
4367
 
        pipe_resource_reference(&cbuf->buffer, cb->buffer);
4368
 
}
4369
 
 
4370
 
static void evergreen_get_shader_buffers(struct r600_context *rctx,
4371
 
                                         enum pipe_shader_type shader,
4372
 
                                         uint start_slot, uint count,
4373
 
                                         struct pipe_shader_buffer *sbuf)
4374
 
{
4375
 
        assert(shader == PIPE_SHADER_COMPUTE);
4376
 
        int idx, i;
4377
 
        struct r600_image_state *istate = &rctx->compute_buffers;
4378
 
        struct r600_image_view *rview;
4379
 
 
4380
 
        for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
4381
 
 
4382
 
                rview = &istate->views[i];
4383
 
 
4384
 
                pipe_resource_reference(&sbuf[idx].buffer, rview->base.resource);
4385
 
                if (rview->base.resource) {
4386
 
                        uint64_t rview_va = ((struct r600_resource *)rview->base.resource)->gpu_address;
4387
 
 
4388
 
                        uint64_t prog_va = rview->resource_words[0];
4389
 
 
4390
 
                        prog_va += ((uint64_t)G_030008_BASE_ADDRESS_HI(rview->resource_words[2])) << 32;
4391
 
                        prog_va -= rview_va;
4392
 
 
4393
 
                        sbuf[idx].buffer_offset = prog_va & 0xffffffff;
4394
 
                        sbuf[idx].buffer_size = rview->resource_words[1] + 1;;
4395
 
                } else {
4396
 
                        sbuf[idx].buffer_offset = 0;
4397
 
                        sbuf[idx].buffer_size = 0;
4398
 
                }
4399
 
        }
4400
 
}
4401
 
 
4402
 
static void evergreen_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
4403
 
{
4404
 
        struct r600_context *rctx = (struct r600_context *)ctx;
4405
 
        st->saved_compute = rctx->cs_shader_state.shader;
4406
 
 
4407
 
        /* save constant buffer 0 */
4408
 
        evergreen_get_pipe_constant_buffer(rctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
4409
 
        /* save ssbo 0 */
4410
 
        evergreen_get_shader_buffers(rctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
4411
 
}
4412
 
 
4413
 
 
4414
 
void evergreen_init_state_functions(struct r600_context *rctx)
4415
 
{
4416
 
        unsigned id = 1;
4417
 
        unsigned i;
4418
 
        /* !!!
4419
 
         *  To avoid GPU lockup registers must be emitted in a specific order
4420
 
         * (no kidding ...). The order below is important and have been
4421
 
         * partially inferred from analyzing fglrx command stream.
4422
 
         *
4423
 
         * Don't reorder atom without carefully checking the effect (GPU lockup
4424
 
         * or piglit regression).
4425
 
         * !!!
4426
 
         */
4427
 
        if (rctx->b.chip_class == EVERGREEN) {
4428
 
                r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11);
4429
 
                rctx->config_state.dyn_gpr_enabled = true;
4430
 
        }
4431
 
        r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
4432
 
        r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0);
4433
 
        r600_init_atom(rctx, &rctx->compute_images.atom, id++, evergreen_emit_compute_image_state, 0);
4434
 
        r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0);
4435
 
        r600_init_atom(rctx, &rctx->compute_buffers.atom, id++, evergreen_emit_compute_buffer_state, 0);
4436
 
        /* shader const */
4437
 
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
4438
 
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0);
4439
 
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT].atom, id++, evergreen_emit_ps_constant_buffers, 0);
4440
 
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_CTRL].atom, id++, evergreen_emit_tcs_constant_buffers, 0);
4441
 
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_TESS_EVAL].atom, id++, evergreen_emit_tes_constant_buffers, 0);
4442
 
        r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom, id++, evergreen_emit_cs_constant_buffers, 0);
4443
 
        /* shader program */
4444
 
        r600_init_atom(rctx, &rctx->cs_shader_state.atom, id++, evergreen_emit_cs_shader, 0);
4445
 
        /* sampler */
4446
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0);
4447
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0);
4448
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].states.atom, id++, evergreen_emit_tcs_sampler_states, 0);
4449
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].states.atom, id++, evergreen_emit_tes_sampler_states, 0);
4450
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0);
4451
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0);
4452
 
        /* resources */
4453
 
        r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
4454
 
        r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
4455
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0);
4456
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0);
4457
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_CTRL].views.atom, id++, evergreen_emit_tcs_sampler_views, 0);
4458
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views.atom, id++, evergreen_emit_tes_sampler_views, 0);
4459
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0);
4460
 
        r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0);
4461
 
 
4462
 
        r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10);
4463
 
 
4464
 
        if (rctx->b.chip_class == EVERGREEN) {
4465
 
                r600_init_atom(rctx, &rctx->sample_mask.atom, id++, evergreen_emit_sample_mask, 3);
4466
 
        } else {
4467
 
                r600_init_atom(rctx, &rctx->sample_mask.atom, id++, cayman_emit_sample_mask, 4);
4468
 
        }
4469
 
        rctx->sample_mask.sample_mask = ~0;
4470
 
 
4471
 
        r600_init_atom(rctx, &rctx->alphatest_state.atom, id++, r600_emit_alphatest_state, 6);
4472
 
        r600_init_atom(rctx, &rctx->blend_color.atom, id++, r600_emit_blend_color, 6);
4473
 
        r600_init_atom(rctx, &rctx->blend_state.atom, id++, r600_emit_cso_state, 0);
4474
 
        r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 4);
4475
 
        r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 9);
4476
 
        r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26);
4477
 
        r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10);
4478
 
        r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14);
4479
 
        r600_init_atom(rctx, &rctx->dsa_state.atom, id++, r600_emit_cso_state, 0);
4480
 
        r600_init_atom(rctx, &rctx->poly_offset_state.atom, id++, evergreen_emit_polygon_offset, 9);
4481
 
        r600_init_atom(rctx, &rctx->rasterizer_state.atom, id++, r600_emit_cso_state, 0);
4482
 
        r600_add_atom(rctx, &rctx->b.scissors.atom, id++);
4483
 
        r600_add_atom(rctx, &rctx->b.viewports.atom, id++);
4484
 
        r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
4485
 
        r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
4486
 
        r600_add_atom(rctx, &rctx->b.render_cond_atom, id++);
4487
 
        r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
4488
 
        r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
4489
 
        for (i = 0; i < EG_NUM_HW_STAGES; i++)
4490
 
                r600_init_atom(rctx, &rctx->hw_shader_stages[i].atom, id++, r600_emit_shader, 0);
4491
 
        r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 15);
4492
 
        r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26);
4493
 
 
4494
 
        rctx->b.b.create_blend_state = evergreen_create_blend_state;
4495
 
        rctx->b.b.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
4496
 
        rctx->b.b.create_rasterizer_state = evergreen_create_rs_state;
4497
 
        rctx->b.b.create_sampler_state = evergreen_create_sampler_state;
4498
 
        rctx->b.b.create_sampler_view = evergreen_create_sampler_view;
4499
 
        rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state;
4500
 
        rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
4501
 
        rctx->b.b.set_min_samples = evergreen_set_min_samples;
4502
 
        rctx->b.b.set_tess_state = evergreen_set_tess_state;
4503
 
        rctx->b.b.set_patch_vertices = evergreen_set_patch_vertices;
4504
 
        rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers;
4505
 
        rctx->b.b.set_shader_images = evergreen_set_shader_images;
4506
 
        rctx->b.b.set_shader_buffers = evergreen_set_shader_buffers;
4507
 
        if (rctx->b.chip_class == EVERGREEN)
4508
 
                rctx->b.b.get_sample_position = evergreen_get_sample_position;
4509
 
        else
4510
 
                rctx->b.b.get_sample_position = cayman_get_sample_position;
4511
 
        rctx->b.dma_copy = evergreen_dma_copy;
4512
 
        rctx->b.save_qbo_state = evergreen_save_qbo_state;
4513
 
 
4514
 
        evergreen_init_compute_state_functions(rctx);
4515
 
}
4516
 
 
4517
 
/**
4518
 
 * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
4519
 
 *
4520
 
 * The information about LDS and other non-compile-time parameters is then
4521
 
 * written to the const buffer.
4522
 
 
4523
 
 * const buffer contains -
4524
 
 * uint32_t input_patch_size
4525
 
 * uint32_t input_vertex_size
4526
 
 * uint32_t num_tcs_input_cp
4527
 
 * uint32_t num_tcs_output_cp;
4528
 
 * uint32_t output_patch_size
4529
 
 * uint32_t output_vertex_size
4530
 
 * uint32_t output_patch0_offset
4531
 
 * uint32_t perpatch_output_offset
4532
 
 * and the same constbuf is bound to LS/HS/VS(ES).
4533
 
 */
4534
 
void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches)
4535
 
{
4536
 
        struct pipe_constant_buffer constbuf = {0};
4537
 
        struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
4538
 
        struct r600_pipe_shader_selector *ls = rctx->vs_shader;
4539
 
        unsigned num_tcs_input_cp = rctx->patch_vertices;
4540
 
        unsigned num_tcs_outputs;
4541
 
        unsigned num_tcs_output_cp;
4542
 
        unsigned num_tcs_patch_outputs;
4543
 
        unsigned num_tcs_inputs;
4544
 
        unsigned input_vertex_size, output_vertex_size;
4545
 
        unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
4546
 
        unsigned output_patch0_offset, perpatch_output_offset, lds_size;
4547
 
        uint32_t values[8];
4548
 
        unsigned num_waves;
4549
 
        unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
4550
 
        unsigned wave_divisor = (16 * num_pipes);
4551
 
 
4552
 
        *num_patches = 1;
4553
 
 
4554
 
        if (!rctx->tes_shader) {
4555
 
                rctx->lds_alloc = 0;
4556
 
                rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
4557
 
                                              R600_LDS_INFO_CONST_BUFFER, false, NULL);
4558
 
                rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
4559
 
                                              R600_LDS_INFO_CONST_BUFFER, false, NULL);
4560
 
                rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
4561
 
                                              R600_LDS_INFO_CONST_BUFFER, false, NULL);
4562
 
                return;
4563
 
        }
4564
 
 
4565
 
        if (rctx->lds_alloc != 0 &&
4566
 
            rctx->last_ls == ls &&
4567
 
            rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
4568
 
            rctx->last_tcs == tcs)
4569
 
                return;
4570
 
 
4571
 
        num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask);
4572
 
 
4573
 
        if (rctx->tcs_shader) {
4574
 
                num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask);
4575
 
                num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
4576
 
                num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask);
4577
 
        } else {
4578
 
                num_tcs_outputs = num_tcs_inputs;
4579
 
                num_tcs_output_cp = num_tcs_input_cp;
4580
 
                num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
4581
 
        }
4582
 
 
4583
 
        /* size in bytes */
4584
 
        input_vertex_size = num_tcs_inputs * 16;
4585
 
        output_vertex_size = num_tcs_outputs * 16;
4586
 
 
4587
 
        input_patch_size = num_tcs_input_cp * input_vertex_size;
4588
 
 
4589
 
        pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
4590
 
        output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
4591
 
 
4592
 
        output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0;
4593
 
        perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
4594
 
 
4595
 
        lds_size = output_patch0_offset + output_patch_size * *num_patches;
4596
 
 
4597
 
        values[0] = input_patch_size;
4598
 
        values[1] = input_vertex_size;
4599
 
        values[2] = num_tcs_input_cp;
4600
 
        values[3] = num_tcs_output_cp;
4601
 
 
4602
 
        values[4] = output_patch_size;
4603
 
        values[5] = output_vertex_size;
4604
 
        values[6] = output_patch0_offset;
4605
 
        values[7] = perpatch_output_offset;
4606
 
 
4607
 
        /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES *
4608
 
           LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */
4609
 
        num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor);
4610
 
 
4611
 
        rctx->lds_alloc = (lds_size | (num_waves << 14));
4612
 
 
4613
 
        rctx->last_ls = ls;
4614
 
        rctx->last_tcs = tcs;
4615
 
        rctx->last_num_tcs_input_cp = num_tcs_input_cp;
4616
 
 
4617
 
        constbuf.user_buffer = values;
4618
 
        constbuf.buffer_size = 8 * 4;
4619
 
 
4620
 
        rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
4621
 
                                      R600_LDS_INFO_CONST_BUFFER, false, &constbuf);
4622
 
        rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
4623
 
                                      R600_LDS_INFO_CONST_BUFFER, false, &constbuf);
4624
 
        rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
4625
 
                                      R600_LDS_INFO_CONST_BUFFER, true, &constbuf);
4626
 
}
4627
 
 
4628
 
uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
4629
 
                                    const struct pipe_draw_info *info,
4630
 
                                    unsigned num_patches)
4631
 
{
4632
 
        unsigned num_output_cp;
4633
 
 
4634
 
        if (!rctx->tes_shader)
4635
 
                return 0;
4636
 
 
4637
 
        num_output_cp = rctx->tcs_shader ?
4638
 
                rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
4639
 
                rctx->patch_vertices;
4640
 
 
4641
 
        return S_028B58_NUM_PATCHES(num_patches) |
4642
 
                S_028B58_HS_NUM_INPUT_CP(rctx->patch_vertices) |
4643
 
                S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
4644
 
}
4645
 
 
4646
 
void evergreen_set_ls_hs_config(struct r600_context *rctx,
4647
 
                                struct radeon_cmdbuf *cs,
4648
 
                                uint32_t ls_hs_config)
4649
 
{
4650
 
        radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
4651
 
}
4652
 
 
4653
 
void evergreen_set_lds_alloc(struct r600_context *rctx,
4654
 
                             struct radeon_cmdbuf *cs,
4655
 
                             uint32_t lds_alloc)
4656
 
{
4657
 
        radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
4658
 
}
4659
 
 
4660
 
/* on evergreen if you are running tessellation you need to disable dynamic
4661
 
   GPRs to workaround a hardware bug.*/
4662
 
bool evergreen_adjust_gprs(struct r600_context *rctx)
4663
 
{
4664
 
        unsigned num_gprs[EG_NUM_HW_STAGES];
4665
 
        unsigned def_gprs[EG_NUM_HW_STAGES];
4666
 
        unsigned cur_gprs[EG_NUM_HW_STAGES];
4667
 
        unsigned new_gprs[EG_NUM_HW_STAGES];
4668
 
        unsigned def_num_clause_temp_gprs = rctx->r6xx_num_clause_temp_gprs;
4669
 
        unsigned max_gprs;
4670
 
        unsigned i;
4671
 
        unsigned total_gprs;
4672
 
        unsigned tmp[3];
4673
 
        bool rework = false, set_default = false, set_dirty = false;
4674
 
        max_gprs = 0;
4675
 
        for (i = 0; i < EG_NUM_HW_STAGES; i++) {
4676
 
                def_gprs[i] = rctx->default_gprs[i];
4677
 
                max_gprs += def_gprs[i];
4678
 
        }
4679
 
        max_gprs += def_num_clause_temp_gprs * 2;
4680
 
 
4681
 
        /* if we have no TESS and dyn gpr is enabled then do nothing. */
4682
 
        if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader) {
4683
 
                if (rctx->config_state.dyn_gpr_enabled)
4684
 
                        return true;
4685
 
 
4686
 
                /* transition back to dyn gpr enabled state */
4687
 
                rctx->config_state.dyn_gpr_enabled = true;
4688
 
                r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
4689
 
                rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
4690
 
                return true;
4691
 
        }
4692
 
 
4693
 
 
4694
 
        /* gather required shader gprs */
4695
 
        for (i = 0; i < EG_NUM_HW_STAGES; i++) {
4696
 
                if (rctx->hw_shader_stages[i].shader)
4697
 
                        num_gprs[i] = rctx->hw_shader_stages[i].shader->shader.bc.ngpr;
4698
 
                else
4699
 
                        num_gprs[i] = 0;
4700
 
        }
4701
 
 
4702
 
        cur_gprs[R600_HW_STAGE_PS] = G_008C04_NUM_PS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
4703
 
        cur_gprs[R600_HW_STAGE_VS] = G_008C04_NUM_VS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_1);
4704
 
        cur_gprs[R600_HW_STAGE_GS] = G_008C08_NUM_GS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
4705
 
        cur_gprs[R600_HW_STAGE_ES] = G_008C08_NUM_ES_GPRS(rctx->config_state.sq_gpr_resource_mgmt_2);
4706
 
        cur_gprs[EG_HW_STAGE_LS] = G_008C0C_NUM_LS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
4707
 
        cur_gprs[EG_HW_STAGE_HS] = G_008C0C_NUM_HS_GPRS(rctx->config_state.sq_gpr_resource_mgmt_3);
4708
 
 
4709
 
        total_gprs = 0;
4710
 
        for (i = 0; i < EG_NUM_HW_STAGES; i++)  {
4711
 
                new_gprs[i] = num_gprs[i];
4712
 
                total_gprs += num_gprs[i];
4713
 
        }
4714
 
 
4715
 
        if (total_gprs > (max_gprs - (2 * def_num_clause_temp_gprs)))
4716
 
                return false;
4717
 
 
4718
 
        for (i = 0; i < EG_NUM_HW_STAGES; i++) {
4719
 
                if (new_gprs[i] > cur_gprs[i]) {
4720
 
                        rework = true;
4721
 
                        break;
4722
 
                }
4723
 
        }
4724
 
 
4725
 
        if (rctx->config_state.dyn_gpr_enabled) {
4726
 
                set_dirty = true;
4727
 
                rctx->config_state.dyn_gpr_enabled = false;
4728
 
        }
4729
 
 
4730
 
        if (rework) {
4731
 
                set_default = true;
4732
 
                for (i = 0; i < EG_NUM_HW_STAGES; i++) {
4733
 
                        if (new_gprs[i] > def_gprs[i])
4734
 
                                set_default = false;
4735
 
                }
4736
 
 
4737
 
                if (set_default) {
4738
 
                        for (i = 0; i < EG_NUM_HW_STAGES; i++) {
4739
 
                                new_gprs[i] = def_gprs[i];
4740
 
                        }
4741
 
                } else {
4742
 
                        unsigned ps_value = max_gprs;
4743
 
 
4744
 
                        ps_value -= (def_num_clause_temp_gprs * 2);
4745
 
                        for (i = R600_HW_STAGE_VS; i < EG_NUM_HW_STAGES; i++)
4746
 
                                ps_value -= new_gprs[i];
4747
 
 
4748
 
                        new_gprs[R600_HW_STAGE_PS] = ps_value;
4749
 
                }
4750
 
 
4751
 
                tmp[0] = S_008C04_NUM_PS_GPRS(new_gprs[R600_HW_STAGE_PS]) |
4752
 
                        S_008C04_NUM_VS_GPRS(new_gprs[R600_HW_STAGE_VS]) |
4753
 
                        S_008C04_NUM_CLAUSE_TEMP_GPRS(def_num_clause_temp_gprs);
4754
 
 
4755
 
                tmp[1] = S_008C08_NUM_ES_GPRS(new_gprs[R600_HW_STAGE_ES]) |
4756
 
                        S_008C08_NUM_GS_GPRS(new_gprs[R600_HW_STAGE_GS]);
4757
 
 
4758
 
                tmp[2] = S_008C0C_NUM_HS_GPRS(new_gprs[EG_HW_STAGE_HS]) |
4759
 
                        S_008C0C_NUM_LS_GPRS(new_gprs[EG_HW_STAGE_LS]);
4760
 
 
4761
 
                if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp[0] ||
4762
 
                    rctx->config_state.sq_gpr_resource_mgmt_2 != tmp[1] ||
4763
 
                    rctx->config_state.sq_gpr_resource_mgmt_3 != tmp[2]) {
4764
 
                        rctx->config_state.sq_gpr_resource_mgmt_1 = tmp[0];
4765
 
                        rctx->config_state.sq_gpr_resource_mgmt_2 = tmp[1];
4766
 
                        rctx->config_state.sq_gpr_resource_mgmt_3 = tmp[2];
4767
 
                        set_dirty = true;
4768
 
                }
4769
 
        }
4770
 
 
4771
 
 
4772
 
        if (set_dirty) {
4773
 
                r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
4774
 
                rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
4775
 
        }
4776
 
        return true;
4777
 
}
4778
 
 
4779
 
#define AC_ENCODE_TRACE_POINT(id)       (0xcafe0000 | ((id) & 0xffff))
4780
 
 
4781
 
void eg_trace_emit(struct r600_context *rctx)
4782
 
{
4783
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
4784
 
        unsigned reloc;
4785
 
 
4786
 
        if (rctx->b.chip_class < EVERGREEN)
4787
 
                return;
4788
 
 
4789
 
        /* This must be done after r600_need_cs_space. */
4790
 
        reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
4791
 
                                          (struct r600_resource*)rctx->trace_buf, RADEON_USAGE_WRITE |
4792
 
                                          RADEON_PRIO_CP_DMA);
4793
 
 
4794
 
        rctx->trace_id++;
4795
 
        radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rctx->trace_buf,
4796
 
                              RADEON_USAGE_READWRITE | RADEON_PRIO_FENCE_TRACE);
4797
 
        radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
4798
 
        radeon_emit(cs, rctx->trace_buf->gpu_address);
4799
 
        radeon_emit(cs, rctx->trace_buf->gpu_address >> 32 | MEM_WRITE_32_BITS | MEM_WRITE_CONFIRM);
4800
 
        radeon_emit(cs, rctx->trace_id);
4801
 
        radeon_emit(cs, 0);
4802
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
4803
 
        radeon_emit(cs, reloc);
4804
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
4805
 
        radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id));
4806
 
}
4807
 
 
4808
 
static void evergreen_emit_set_append_cnt(struct r600_context *rctx,
4809
 
                                          struct r600_shader_atomic *atomic,
4810
 
                                          struct r600_resource *resource,
4811
 
                                          uint32_t pkt_flags)
4812
 
{
4813
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
4814
 
        unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
4815
 
                                                   resource,
4816
 
                                                   RADEON_USAGE_READ |
4817
 
                                                   RADEON_PRIO_SHADER_RW_BUFFER);
4818
 
        uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
4819
 
        uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0;
4820
 
 
4821
 
        uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4 - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
4822
 
 
4823
 
        radeon_emit(cs, PKT3(PKT3_SET_APPEND_CNT, 2, 0) | pkt_flags);
4824
 
        radeon_emit(cs, (reg_val << 16) | 0x3);
4825
 
        radeon_emit(cs, dst_offset & 0xfffffffc);
4826
 
        radeon_emit(cs, (dst_offset >> 32) & 0xff);
4827
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
4828
 
        radeon_emit(cs, reloc);
4829
 
}
4830
 
 
4831
 
static void evergreen_emit_event_write_eos(struct r600_context *rctx,
4832
 
                                           struct r600_shader_atomic *atomic,
4833
 
                                           struct r600_resource *resource,
4834
 
                                           uint32_t pkt_flags)
4835
 
{
4836
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
4837
 
        uint32_t event = EVENT_TYPE_PS_DONE;
4838
 
        uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0;
4839
 
        uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
4840
 
                                                   resource,
4841
 
                                                   RADEON_USAGE_WRITE |
4842
 
                                                   RADEON_PRIO_SHADER_RW_BUFFER);
4843
 
        uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
4844
 
        uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2;
4845
 
 
4846
 
        if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)
4847
 
                event = EVENT_TYPE_CS_DONE;
4848
 
 
4849
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags);
4850
 
        radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
4851
 
        radeon_emit(cs, (dst_offset) & 0xffffffff);
4852
 
        radeon_emit(cs, (0 << 29) | ((dst_offset >> 32) & 0xff));
4853
 
        radeon_emit(cs, reg_val);
4854
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
4855
 
        radeon_emit(cs, reloc);
4856
 
}
4857
 
 
4858
 
static void cayman_emit_event_write_eos(struct r600_context *rctx,
4859
 
                                        struct r600_shader_atomic *atomic,
4860
 
                                        struct r600_resource *resource,
4861
 
                                        uint32_t pkt_flags)
4862
 
{
4863
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
4864
 
        uint32_t event = EVENT_TYPE_PS_DONE;
4865
 
        uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
4866
 
                                                   resource,
4867
 
                                                   RADEON_USAGE_WRITE |
4868
 
                                                   RADEON_PRIO_SHADER_RW_BUFFER);
4869
 
        uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
4870
 
 
4871
 
        if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)
4872
 
                event = EVENT_TYPE_CS_DONE;
4873
 
 
4874
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags);
4875
 
        radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
4876
 
        radeon_emit(cs, (dst_offset) & 0xffffffff);
4877
 
        radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0xff));
4878
 
        radeon_emit(cs, (atomic->hw_idx) | (1 << 16));
4879
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
4880
 
        radeon_emit(cs, reloc);
4881
 
}
4882
 
 
4883
 
/* writes count from a buffer into GDS */
4884
 
static void cayman_write_count_to_gds(struct r600_context *rctx,
4885
 
                                      struct r600_shader_atomic *atomic,
4886
 
                                      struct r600_resource *resource,
4887
 
                                      uint32_t pkt_flags)
4888
 
{
4889
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
4890
 
        unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
4891
 
                                                   resource,
4892
 
                                                   RADEON_USAGE_READ |
4893
 
                                                   RADEON_PRIO_SHADER_RW_BUFFER);
4894
 
        uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
4895
 
 
4896
 
        radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags);
4897
 
        radeon_emit(cs, dst_offset & 0xffffffff);
4898
 
        radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | ((dst_offset >> 32) & 0xff));// GDS
4899
 
        radeon_emit(cs, atomic->hw_idx * 4);
4900
 
        radeon_emit(cs, 0);
4901
 
        radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4);
4902
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
4903
 
        radeon_emit(cs, reloc);
4904
 
}
4905
 
 
4906
 
void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
4907
 
                                              struct r600_pipe_shader *cs_shader,
4908
 
                                              struct r600_shader_atomic *combined_atomics,
4909
 
                                              uint8_t *atomic_used_mask_p)
4910
 
{
4911
 
        uint8_t atomic_used_mask = 0;
4912
 
        int i, j, k;
4913
 
        bool is_compute = cs_shader ? true : false;
4914
 
 
4915
 
        for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
4916
 
                uint8_t num_atomic_stage;
4917
 
                struct r600_pipe_shader *pshader;
4918
 
 
4919
 
                if (is_compute)
4920
 
                        pshader = cs_shader;
4921
 
                else
4922
 
                        pshader = rctx->hw_shader_stages[i].shader;
4923
 
                if (!pshader)
4924
 
                        continue;
4925
 
 
4926
 
                num_atomic_stage = pshader->shader.nhwatomic_ranges;
4927
 
                if (!num_atomic_stage)
4928
 
                        continue;
4929
 
 
4930
 
                for (j = 0; j < num_atomic_stage; j++) {
4931
 
                        struct r600_shader_atomic *atomic = &pshader->shader.atomics[j];
4932
 
                        int natomics = atomic->end - atomic->start + 1;
4933
 
 
4934
 
                        for (k = 0; k < natomics; k++) {
4935
 
                                /* seen this in a previous stage */
4936
 
                                if (atomic_used_mask & (1u << (atomic->hw_idx + k)))
4937
 
                                        continue;
4938
 
 
4939
 
                                combined_atomics[atomic->hw_idx + k].hw_idx = atomic->hw_idx + k;
4940
 
                                combined_atomics[atomic->hw_idx + k].buffer_id = atomic->buffer_id;
4941
 
                                combined_atomics[atomic->hw_idx + k].start = atomic->start + k;
4942
 
                                combined_atomics[atomic->hw_idx + k].end = combined_atomics[atomic->hw_idx + k].start + 1;
4943
 
                                atomic_used_mask |= (1u << (atomic->hw_idx + k));
4944
 
                        }
4945
 
                }
4946
 
        }
4947
 
        *atomic_used_mask_p = atomic_used_mask;
4948
 
}
4949
 
 
4950
 
void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
4951
 
                                        bool is_compute,
4952
 
                                        struct r600_shader_atomic *combined_atomics,
4953
 
                                        uint8_t atomic_used_mask)
4954
 
{
4955
 
        struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
4956
 
        unsigned pkt_flags = 0;
4957
 
        uint32_t mask;
4958
 
 
4959
 
        if (is_compute)
4960
 
                pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
4961
 
 
4962
 
        mask = atomic_used_mask;
4963
 
        if (!mask)
4964
 
                return;
4965
 
 
4966
 
        while (mask) {
4967
 
                unsigned atomic_index = u_bit_scan(&mask);
4968
 
                struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
4969
 
                struct r600_resource *resource = r600_resource(astate->buffer[atomic->buffer_id].buffer);
4970
 
                assert(resource);
4971
 
 
4972
 
                if (rctx->b.chip_class == CAYMAN)
4973
 
                        cayman_write_count_to_gds(rctx, atomic, resource, pkt_flags);
4974
 
                else
4975
 
                        evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
4976
 
        }
4977
 
}
4978
 
 
4979
 
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
4980
 
                                       bool is_compute,
4981
 
                                       struct r600_shader_atomic *combined_atomics,
4982
 
                                       uint8_t *atomic_used_mask_p)
4983
 
{
4984
 
        struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
4985
 
        struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
4986
 
        uint32_t pkt_flags = 0;
4987
 
        uint32_t event = EVENT_TYPE_PS_DONE;
4988
 
        uint32_t mask;
4989
 
        uint64_t dst_offset;
4990
 
        unsigned reloc;
4991
 
 
4992
 
        if (is_compute)
4993
 
                pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
4994
 
 
4995
 
        mask = *atomic_used_mask_p;
4996
 
        if (!mask)
4997
 
                return;
4998
 
 
4999
 
        while (mask) {
5000
 
                unsigned atomic_index = u_bit_scan(&mask);
5001
 
                struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
5002
 
                struct r600_resource *resource = r600_resource(astate->buffer[atomic->buffer_id].buffer);
5003
 
                assert(resource);
5004
 
 
5005
 
                if (rctx->b.chip_class == CAYMAN)
5006
 
                        cayman_emit_event_write_eos(rctx, atomic, resource, pkt_flags);
5007
 
                else
5008
 
                        evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags);
5009
 
        }
5010
 
 
5011
 
        if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)
5012
 
                event = EVENT_TYPE_CS_DONE;
5013
 
 
5014
 
        ++rctx->append_fence_id;
5015
 
        reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
5016
 
                                          r600_resource(rctx->append_fence),
5017
 
                                          RADEON_USAGE_READWRITE |
5018
 
                                          RADEON_PRIO_SHADER_RW_BUFFER);
5019
 
        dst_offset = r600_resource(rctx->append_fence)->gpu_address;
5020
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags);
5021
 
        radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
5022
 
        radeon_emit(cs, dst_offset & 0xffffffff);
5023
 
        radeon_emit(cs, (2 << 29) | ((dst_offset >> 32) & 0xff));
5024
 
        radeon_emit(cs, rctx->append_fence_id);
5025
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
5026
 
        radeon_emit(cs, reloc);
5027
 
 
5028
 
        radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0) | pkt_flags);
5029
 
        radeon_emit(cs, WAIT_REG_MEM_GEQUAL | WAIT_REG_MEM_MEMORY | (1 << 8));
5030
 
        radeon_emit(cs, dst_offset & 0xffffffff);
5031
 
        radeon_emit(cs, ((dst_offset >> 32) & 0xff));
5032
 
        radeon_emit(cs, rctx->append_fence_id);
5033
 
        radeon_emit(cs, 0xffffffff);
5034
 
        radeon_emit(cs, 0xa);
5035
 
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
5036
 
        radeon_emit(cs, reloc);
5037
 
}