~mmach/netext73/mesa-haswell

« back to all changes in this revision

Viewing changes to src/gallium/drivers/panfrost/pan_cmdstream.c

  • Committer: mmach
  • Date: 2022-09-22 19:56:13 UTC
  • Revision ID: netbit73@gmail.com-20220922195613-wtik9mmy20tmor0i
2022-09-22 21:17:09

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
 * Copyright (C) 2018 Alyssa Rosenzweig
3
 
 * Copyright (C) 2020 Collabora Ltd.
4
 
 * Copyright © 2017 Intel Corporation
5
 
 *
6
 
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 
 * copy of this software and associated documentation files (the "Software"),
8
 
 * to deal in the Software without restriction, including without limitation
9
 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 
 * and/or sell copies of the Software, and to permit persons to whom the
11
 
 * Software is furnished to do so, subject to the following conditions:
12
 
 *
13
 
 * The above copyright notice and this permission notice (including the next
14
 
 * paragraph) shall be included in all copies or substantial portions of the
15
 
 * Software.
16
 
 *
17
 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
 
 * SOFTWARE.
24
 
 */
25
 
 
26
 
#include "util/macros.h"
27
 
#include "util/u_prim.h"
28
 
#include "util/u_vbuf.h"
29
 
#include "util/u_helpers.h"
30
 
#include "util/u_draw.h"
31
 
#include "util/u_memory.h"
32
 
#include "pipe/p_defines.h"
33
 
#include "pipe/p_state.h"
34
 
#include "gallium/auxiliary/util/u_blend.h"
35
 
 
36
 
#include "genxml/gen_macros.h"
37
 
 
38
 
#include "pan_pool.h"
39
 
#include "pan_bo.h"
40
 
#include "pan_blend.h"
41
 
#include "pan_context.h"
42
 
#include "pan_job.h"
43
 
#include "pan_shader.h"
44
 
#include "pan_texture.h"
45
 
#include "pan_util.h"
46
 
#include "pan_indirect_draw.h"
47
 
#include "pan_indirect_dispatch.h"
48
 
#include "pan_blitter.h"
49
 
 
50
 
#define PAN_GPU_INDIRECTS (PAN_ARCH == 7)
51
 
 
52
 
struct panfrost_rasterizer {
53
 
        struct pipe_rasterizer_state base;
54
 
 
55
 
#if PAN_ARCH <= 7
56
 
        /* Partially packed RSD words */
57
 
        struct mali_multisample_misc_packed multisample;
58
 
        struct mali_stencil_mask_misc_packed stencil_misc;
59
 
#endif
60
 
};
61
 
 
62
 
struct panfrost_zsa_state {
63
 
        struct pipe_depth_stencil_alpha_state base;
64
 
 
65
 
        /* Is any depth, stencil, or alpha testing enabled? */
66
 
        bool enabled;
67
 
 
68
 
        /* Mask of PIPE_CLEAR_{DEPTH,STENCIL} written */
69
 
        unsigned draws;
70
 
 
71
 
        /* Prepacked words from the RSD */
72
 
        struct mali_multisample_misc_packed rsd_depth;
73
 
        struct mali_stencil_mask_misc_packed rsd_stencil;
74
 
        struct mali_stencil_packed stencil_front, stencil_back;
75
 
};
76
 
 
77
 
struct panfrost_sampler_state {
78
 
        struct pipe_sampler_state base;
79
 
        struct mali_sampler_packed hw;
80
 
};
81
 
 
82
 
/* Misnomer: Sampler view corresponds to textures, not samplers */
83
 
 
84
 
struct panfrost_sampler_view {
85
 
        struct pipe_sampler_view base;
86
 
        struct panfrost_pool_ref state;
87
 
        struct mali_texture_packed bifrost_descriptor;
88
 
        mali_ptr texture_bo;
89
 
        uint64_t modifier;
90
 
 
91
 
        /* Pool used to allocate the descriptor. If NULL, defaults to the global
92
 
         * descriptor pool. Can be set for short lived descriptors, useful for
93
 
         * shader images on Valhall.
94
 
         */
95
 
        struct panfrost_pool *pool;
96
 
};
97
 
 
98
 
struct panfrost_vertex_state {
99
 
        unsigned num_elements;
100
 
        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
101
 
 
102
 
#if PAN_ARCH >= 9
103
 
        /* Packed attribute descriptor. All fields are set at CSO create time
104
 
         * except for stride, which must be ORed in at draw time
105
 
         */
106
 
        struct mali_attribute_packed attributes[PIPE_MAX_ATTRIBS];
107
 
#else
108
 
        /* buffers corresponds to attribute buffer, element_buffers corresponds
109
 
         * to an index in buffers for each vertex element */
110
 
        struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS];
111
 
        unsigned element_buffer[PIPE_MAX_ATTRIBS];
112
 
        unsigned nr_bufs;
113
 
 
114
 
        unsigned formats[PIPE_MAX_ATTRIBS];
115
 
#endif
116
 
};
117
 
 
118
 
/* Statically assert that PIPE_* enums match the hardware enums.
119
 
 * (As long as they match, we don't need to translate them.)
120
 
 */
121
 
UNUSED static void
122
 
pan_pipe_asserts()
123
 
{
124
 
#define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
125
 
 
126
 
        /* Compare functions are natural in both Gallium and Mali */
127
 
        PIPE_ASSERT(PIPE_FUNC_NEVER    == MALI_FUNC_NEVER);
128
 
        PIPE_ASSERT(PIPE_FUNC_LESS     == MALI_FUNC_LESS);
129
 
        PIPE_ASSERT(PIPE_FUNC_EQUAL    == MALI_FUNC_EQUAL);
130
 
        PIPE_ASSERT(PIPE_FUNC_LEQUAL   == MALI_FUNC_LEQUAL);
131
 
        PIPE_ASSERT(PIPE_FUNC_GREATER  == MALI_FUNC_GREATER);
132
 
        PIPE_ASSERT(PIPE_FUNC_NOTEQUAL == MALI_FUNC_NOT_EQUAL);
133
 
        PIPE_ASSERT(PIPE_FUNC_GEQUAL   == MALI_FUNC_GEQUAL);
134
 
        PIPE_ASSERT(PIPE_FUNC_ALWAYS   == MALI_FUNC_ALWAYS);
135
 
}
136
 
 
137
 
static inline enum mali_sample_pattern
138
 
panfrost_sample_pattern(unsigned samples)
139
 
{
140
 
        switch (samples) {
141
 
        case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
142
 
        case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
143
 
        case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
144
 
        case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
145
 
        default: unreachable("Unsupported sample count");
146
 
        }
147
 
}
148
 
 
149
 
static unsigned
150
 
translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
151
 
{
152
 
        /* CLAMP is only supported on Midgard, where it is broken for nearest
153
 
         * filtering. Use CLAMP_TO_EDGE in that case.
154
 
         */
155
 
 
156
 
        switch (w) {
157
 
        case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
158
 
        case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
159
 
        case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
160
 
        case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
161
 
        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
162
 
        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
163
 
 
164
 
#if PAN_ARCH <= 5
165
 
        case PIPE_TEX_WRAP_CLAMP:
166
 
                return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
167
 
                                       MALI_WRAP_MODE_CLAMP;
168
 
        case PIPE_TEX_WRAP_MIRROR_CLAMP:
169
 
                return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
170
 
                                       MALI_WRAP_MODE_MIRRORED_CLAMP;
171
 
#endif
172
 
 
173
 
        default: unreachable("Invalid wrap");
174
 
        }
175
 
}
176
 
 
177
 
/* The hardware compares in the wrong order order, so we have to flip before
178
 
 * encoding. Yes, really. */
179
 
 
180
 
static enum mali_func
181
 
panfrost_sampler_compare_func(const struct pipe_sampler_state *cso)
182
 
{
183
 
        return !cso->compare_mode ? MALI_FUNC_NEVER :
184
 
                panfrost_flip_compare_func((enum mali_func) cso->compare_func);
185
 
}
186
 
 
187
 
static enum mali_mipmap_mode
188
 
pan_pipe_to_mipmode(enum pipe_tex_mipfilter f)
189
 
{
190
 
        switch (f) {
191
 
        case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
192
 
        case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
193
 
#if PAN_ARCH >= 6
194
 
        case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE;
195
 
#else
196
 
        case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NEAREST;
197
 
#endif
198
 
        default: unreachable("Invalid");
199
 
        }
200
 
}
201
 
 
202
 
 
203
 
static void *
204
 
panfrost_create_sampler_state(
205
 
        struct pipe_context *pctx,
206
 
        const struct pipe_sampler_state *cso)
207
 
{
208
 
        struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
209
 
        so->base = *cso;
210
 
 
211
 
        bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST;
212
 
 
213
 
        pan_pack(&so->hw, SAMPLER, cfg) {
214
 
                cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
215
 
                cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
216
 
 
217
 
                cfg.normalized_coordinates = cso->normalized_coords;
218
 
                cfg.lod_bias = FIXED_16(cso->lod_bias, true);
219
 
                cfg.minimum_lod = FIXED_16(cso->min_lod, false);
220
 
                cfg.maximum_lod = FIXED_16(cso->max_lod, false);
221
 
 
222
 
                cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s, using_nearest);
223
 
                cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t, using_nearest);
224
 
                cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r, using_nearest);
225
 
 
226
 
                cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
227
 
                cfg.compare_function = panfrost_sampler_compare_func(cso);
228
 
                cfg.seamless_cube_map = cso->seamless_cube_map;
229
 
 
230
 
                cfg.border_color_r = cso->border_color.ui[0];
231
 
                cfg.border_color_g = cso->border_color.ui[1];
232
 
                cfg.border_color_b = cso->border_color.ui[2];
233
 
                cfg.border_color_a = cso->border_color.ui[3];
234
 
 
235
 
#if PAN_ARCH >= 6
236
 
                if (cso->max_anisotropy > 1) {
237
 
                        cfg.maximum_anisotropy = cso->max_anisotropy;
238
 
                        cfg.lod_algorithm = MALI_LOD_ALGORITHM_ANISOTROPIC;
239
 
                }
240
 
#else
241
 
                /* Emulate disabled mipmapping by clamping the LOD as tight as
242
 
                 * possible (from 0 to epsilon = 1/256) */
243
 
                if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
244
 
                        cfg.maximum_lod = cfg.minimum_lod + 1;
245
 
#endif
246
 
        }
247
 
 
248
 
        return so;
249
 
}
250
 
 
251
 
static bool
252
 
panfrost_fs_required(
253
 
                struct panfrost_shader_state *fs,
254
 
                struct panfrost_blend_state *blend,
255
 
                struct pipe_framebuffer_state *state,
256
 
                const struct panfrost_zsa_state *zsa)
257
 
{
258
 
        /* If we generally have side effects. This inclues use of discard,
259
 
         * which can affect the results of an occlusion query. */
260
 
        if (fs->info.fs.sidefx)
261
 
                return true;
262
 
 
263
 
        /* Using an empty FS requires early-z to be enabled, but alpha test
264
 
         * needs it disabled. Alpha test is only native on Midgard, so only
265
 
         * check there.
266
 
         */
267
 
        if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
268
 
                return true;
269
 
 
270
 
        /* If colour is written we need to execute */
271
 
        for (unsigned i = 0; i < state->nr_cbufs; ++i) {
272
 
                if (state->cbufs[i] && !blend->info[i].no_colour)
273
 
                        return true;
274
 
        }
275
 
 
276
 
        /* If depth is written and not implied we need to execute.
277
 
         * TODO: Predicate on Z/S writes being enabled */
278
 
        return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
279
 
}
280
 
 
281
 
/* Get pointers to the blend shaders bound to each active render target. Used
282
 
 * to emit the blend descriptors, as well as the fragment renderer state
283
 
 * descriptor.
284
 
 */
285
 
static void
286
 
panfrost_get_blend_shaders(struct panfrost_batch *batch,
287
 
                           mali_ptr *blend_shaders)
288
 
{
289
 
        unsigned shader_offset = 0;
290
 
        struct panfrost_bo *shader_bo = NULL;
291
 
 
292
 
        for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
293
 
                if (batch->key.cbufs[c]) {
294
 
                        blend_shaders[c] = panfrost_get_blend(batch,
295
 
                                        c, &shader_bo, &shader_offset);
296
 
                }
297
 
        }
298
 
}
299
 
 
300
 
#if PAN_ARCH >= 5
301
 
UNUSED static uint16_t
302
 
pack_blend_constant(enum pipe_format format, float cons)
303
 
{
304
 
        const struct util_format_description *format_desc =
305
 
                util_format_description(format);
306
 
 
307
 
        unsigned chan_size = 0;
308
 
 
309
 
        for (unsigned i = 0; i < format_desc->nr_channels; i++)
310
 
                chan_size = MAX2(format_desc->channel[0].size, chan_size);
311
 
 
312
 
        uint16_t unorm = (cons * ((1 << chan_size) - 1));
313
 
        return unorm << (16 - chan_size);
314
 
}
315
 
 
316
 
static void
317
 
panfrost_emit_blend(struct panfrost_batch *batch, void *rts, mali_ptr *blend_shaders)
318
 
{
319
 
        unsigned rt_count = batch->key.nr_cbufs;
320
 
        struct panfrost_context *ctx = batch->ctx;
321
 
        const struct panfrost_blend_state *so = ctx->blend;
322
 
        bool dithered = so->base.dither;
323
 
 
324
 
        /* Always have at least one render target for depth-only passes */
325
 
        for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
326
 
                struct mali_blend_packed *packed = rts + (i * pan_size(BLEND));
327
 
 
328
 
                /* Disable blending for unbacked render targets */
329
 
                if (rt_count == 0 || !batch->key.cbufs[i] || so->info[i].no_colour) {
330
 
                        pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) {
331
 
                                cfg.enable = false;
332
 
#if PAN_ARCH >= 6
333
 
                                cfg.internal.mode = MALI_BLEND_MODE_OFF;
334
 
#endif
335
 
                        }
336
 
 
337
 
                        continue;
338
 
                }
339
 
 
340
 
                struct pan_blend_info info = so->info[i];
341
 
                enum pipe_format format = batch->key.cbufs[i]->format;
342
 
                float cons = pan_blend_get_constant(info.constant_mask,
343
 
                                                    ctx->blend_color.color);
344
 
 
345
 
                /* Word 0: Flags and constant */
346
 
                pan_pack(packed, BLEND, cfg) {
347
 
                        cfg.srgb = util_format_is_srgb(format);
348
 
                        cfg.load_destination = info.load_dest;
349
 
                        cfg.round_to_fb_precision = !dithered;
350
 
                        cfg.alpha_to_one = ctx->blend->base.alpha_to_one;
351
 
#if PAN_ARCH >= 6
352
 
                        if (!blend_shaders[i])
353
 
                                cfg.constant = pack_blend_constant(format, cons);
354
 
#else
355
 
                        cfg.blend_shader = (blend_shaders[i] != 0);
356
 
 
357
 
                        if (blend_shaders[i])
358
 
                                cfg.shader_pc = blend_shaders[i];
359
 
                        else
360
 
                                cfg.constant = cons;
361
 
#endif
362
 
                }
363
 
 
364
 
                if (!blend_shaders[i]) {
365
 
                        /* Word 1: Blend Equation */
366
 
                        STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
367
 
                        packed->opaque[PAN_ARCH >= 6 ? 1 : 2] = so->equation[i];
368
 
                }
369
 
 
370
 
#if PAN_ARCH >= 6
371
 
                const struct panfrost_device *dev = pan_device(ctx->base.screen);
372
 
                struct panfrost_shader_state *fs =
373
 
                        panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
374
 
 
375
 
                /* Words 2 and 3: Internal blend */
376
 
                if (blend_shaders[i]) {
377
 
                        /* The blend shader's address needs to be at
378
 
                         * the same top 32 bit as the fragment shader.
379
 
                         * TODO: Ensure that's always the case.
380
 
                         */
381
 
                        assert(!fs->bin.bo ||
382
 
                                        (blend_shaders[i] & (0xffffffffull << 32)) ==
383
 
                                        (fs->bin.gpu & (0xffffffffull << 32)));
384
 
 
385
 
                        pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
386
 
                                cfg.mode = MALI_BLEND_MODE_SHADER;
387
 
                                cfg.shader.pc = (u32) blend_shaders[i];
388
 
 
389
 
#if PAN_ARCH <= 7
390
 
                                unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
391
 
                                assert(!(ret_offset & 0x7));
392
 
 
393
 
                                cfg.shader.return_value = ret_offset ?
394
 
                                        fs->bin.gpu + ret_offset : 0;
395
 
#endif
396
 
                        }
397
 
                } else {
398
 
                        pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
399
 
                                cfg.mode = info.opaque ?
400
 
                                        MALI_BLEND_MODE_OPAQUE :
401
 
                                        MALI_BLEND_MODE_FIXED_FUNCTION;
402
 
 
403
 
                                /* If we want the conversion to work properly,
404
 
                                 * num_comps must be set to 4
405
 
                                 */
406
 
                                cfg.fixed_function.num_comps = 4;
407
 
                                cfg.fixed_function.conversion.memory_format =
408
 
                                        panfrost_format_to_bifrost_blend(dev, format, dithered);
409
 
                                cfg.fixed_function.conversion.register_format =
410
 
                                        fs->info.bifrost.blend[i].format;
411
 
                                cfg.fixed_function.rt = i;
412
 
 
413
 
#if PAN_ARCH <= 7
414
 
                                if (!info.opaque) {
415
 
                                        cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop;
416
 
                                        cfg.fixed_function.alpha_one_store = info.alpha_one_store;
417
 
                                }
418
 
#endif
419
 
                        }
420
 
                }
421
 
#endif
422
 
        }
423
 
 
424
 
        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
425
 
                if (!so->info[i].no_colour && batch->key.cbufs[i]) {
426
 
                        batch->draws |= (PIPE_CLEAR_COLOR0 << i);
427
 
                        batch->resolve |= (PIPE_CLEAR_COLOR0 << i);
428
 
                }
429
 
        }
430
 
}
431
 
#endif
432
 
 
433
 
static inline bool
434
 
pan_allow_forward_pixel_to_kill(struct panfrost_context *ctx, struct panfrost_shader_state *fs)
435
 
{
436
 
        /* Track if any colour buffer is reused across draws, either
437
 
         * from reading it directly, or from failing to write it
438
 
         */
439
 
        unsigned rt_mask = ctx->fb_rt_mask;
440
 
        uint64_t rt_written = (fs->info.outputs_written >> FRAG_RESULT_DATA0);
441
 
        bool blend_reads_dest = (ctx->blend->load_dest_mask & rt_mask);
442
 
        bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
443
 
 
444
 
        return fs->info.fs.can_fpk &&
445
 
                !(rt_mask & ~rt_written) &&
446
 
                !alpha_to_coverage &&
447
 
                !blend_reads_dest;
448
 
}
449
 
 
450
 
static mali_ptr
451
 
panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage)
452
 
{
453
 
        struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage);
454
 
 
455
 
        panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX);
456
 
        panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX);
457
 
 
458
 
        return ss->state.gpu;
459
 
}
460
 
 
461
 
#if PAN_ARCH <= 7
462
 
/* Construct a partial RSD corresponding to no executed fragment shader, and
463
 
 * merge with the existing partial RSD. */
464
 
 
465
 
static void
466
 
pan_merge_empty_fs(struct mali_renderer_state_packed *rsd)
467
 
{
468
 
        struct mali_renderer_state_packed empty_rsd;
469
 
 
470
 
        pan_pack(&empty_rsd, RENDERER_STATE, cfg) {
471
 
#if PAN_ARCH >= 6
472
 
                cfg.properties.shader_modifies_coverage = true;
473
 
                cfg.properties.allow_forward_pixel_to_kill = true;
474
 
                cfg.properties.allow_forward_pixel_to_be_killed = true;
475
 
                cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
476
 
#else
477
 
                cfg.shader.shader = 0x1;
478
 
                cfg.properties.work_register_count = 1;
479
 
                cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
480
 
                cfg.properties.force_early_z = true;
481
 
#endif
482
 
        }
483
 
 
484
 
        pan_merge((*rsd), empty_rsd, RENDERER_STATE);
485
 
}
486
 
 
487
 
static void
488
 
panfrost_prepare_fs_state(struct panfrost_context *ctx,
489
 
                          mali_ptr *blend_shaders,
490
 
                          struct mali_renderer_state_packed *rsd)
491
 
{
492
 
        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
493
 
        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
494
 
        struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
495
 
        struct panfrost_blend_state *so = ctx->blend;
496
 
        bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
497
 
        bool msaa = rast->multisample;
498
 
 
499
 
        unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
500
 
 
501
 
        bool has_blend_shader = false;
502
 
 
503
 
        for (unsigned c = 0; c < rt_count; ++c)
504
 
                has_blend_shader |= (blend_shaders[c] != 0);
505
 
 
506
 
        pan_pack(rsd, RENDERER_STATE, cfg) {
507
 
                if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) {
508
 
#if PAN_ARCH >= 6
509
 
                        cfg.properties.allow_forward_pixel_to_kill =
510
 
                                pan_allow_forward_pixel_to_kill(ctx, fs);
511
 
#else
512
 
                        cfg.properties.force_early_z =
513
 
                                fs->info.fs.can_early_z && !alpha_to_coverage &&
514
 
                                ((enum mali_func) zsa->base.alpha_func == MALI_FUNC_ALWAYS);
515
 
 
516
 
                        /* TODO: Reduce this limit? */
517
 
                        if (has_blend_shader)
518
 
                                cfg.properties.work_register_count = MAX2(fs->info.work_reg_count, 8);
519
 
                        else
520
 
                                cfg.properties.work_register_count = fs->info.work_reg_count;
521
 
 
522
 
                        /* Hardware quirks around early-zs forcing without a
523
 
                         * depth buffer. Note this breaks occlusion queries. */
524
 
                        bool has_oq = ctx->occlusion_query && ctx->active_queries;
525
 
                        bool force_ez_with_discard = !zsa->enabled && !has_oq;
526
 
 
527
 
                        cfg.properties.shader_reads_tilebuffer =
528
 
                                force_ez_with_discard && fs->info.fs.can_discard;
529
 
                        cfg.properties.shader_contains_discard =
530
 
                                !force_ez_with_discard && fs->info.fs.can_discard;
531
 
#endif
532
 
                }
533
 
 
534
 
#if PAN_ARCH == 4
535
 
                if (rt_count > 0) {
536
 
                        cfg.multisample_misc.load_destination = so->info[0].load_dest;
537
 
                        cfg.multisample_misc.blend_shader = (blend_shaders[0] != 0);
538
 
                        cfg.stencil_mask_misc.write_enable = !so->info[0].no_colour;
539
 
                        cfg.stencil_mask_misc.srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
540
 
                        cfg.stencil_mask_misc.dither_disable = !so->base.dither;
541
 
                        cfg.stencil_mask_misc.alpha_to_one = so->base.alpha_to_one;
542
 
 
543
 
                        if (blend_shaders[0]) {
544
 
                                cfg.blend_shader = blend_shaders[0];
545
 
                        } else {
546
 
                                cfg.blend_constant = pan_blend_get_constant(
547
 
                                                so->info[0].constant_mask,
548
 
                                                ctx->blend_color.color);
549
 
                        }
550
 
                } else {
551
 
                        /* If there is no colour buffer, leaving fields default is
552
 
                         * fine, except for blending which is nonnullable */
553
 
                        cfg.blend_equation.color_mask = 0xf;
554
 
                        cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
555
 
                        cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
556
 
                        cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
557
 
                        cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
558
 
                        cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
559
 
                        cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
560
 
                }
561
 
#elif PAN_ARCH == 5
562
 
                /* Workaround */
563
 
                cfg.legacy_blend_shader = panfrost_last_nonnull(blend_shaders, rt_count);
564
 
#endif
565
 
 
566
 
                cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF;
567
 
 
568
 
                cfg.multisample_misc.evaluate_per_sample =
569
 
                        msaa && (ctx->min_samples > 1);
570
 
 
571
 
#if PAN_ARCH >= 6
572
 
                /* MSAA blend shaders need to pass their sample ID to
573
 
                 * LD_TILE/ST_TILE, so we must preload it. Additionally, we
574
 
                 * need per-sample shading for the blend shader, accomplished
575
 
                 * by forcing per-sample shading for the whole program. */
576
 
 
577
 
                if (msaa && has_blend_shader) {
578
 
                        cfg.multisample_misc.evaluate_per_sample = true;
579
 
                        cfg.preload.fragment.sample_mask_id = true;
580
 
                }
581
 
#endif
582
 
 
583
 
                cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
584
 
                cfg.depth_units = rast->offset_units * 2.0f;
585
 
                cfg.depth_factor = rast->offset_scale;
586
 
 
587
 
                bool back_enab = zsa->base.stencil[1].enabled;
588
 
                cfg.stencil_front.reference_value = ctx->stencil_ref.ref_value[0];
589
 
                cfg.stencil_back.reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
590
 
 
591
 
#if PAN_ARCH <= 5
592
 
                /* v6+ fits register preload here, no alpha testing */
593
 
                cfg.alpha_reference = zsa->base.alpha_ref_value;
594
 
#endif
595
 
        }
596
 
}
597
 
 
598
 
static void
599
 
panfrost_emit_frag_shader(struct panfrost_context *ctx,
600
 
                          struct mali_renderer_state_packed *fragmeta,
601
 
                          mali_ptr *blend_shaders)
602
 
{
603
 
        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
604
 
        const struct panfrost_rasterizer *rast = ctx->rasterizer;
605
 
        struct panfrost_shader_state *fs =
606
 
                panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
607
 
 
608
 
        /* We need to merge several several partial renderer state descriptors,
609
 
         * so stage to temporary storage rather than reading back write-combine
610
 
         * memory, which will trash performance. */
611
 
        struct mali_renderer_state_packed rsd;
612
 
        panfrost_prepare_fs_state(ctx, blend_shaders, &rsd);
613
 
 
614
 
#if PAN_ARCH == 4
615
 
        if (ctx->pipe_framebuffer.nr_cbufs > 0 && !blend_shaders[0]) {
616
 
                /* Word 14: SFBD Blend Equation */
617
 
                STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
618
 
                rsd.opaque[14] = ctx->blend->equation[0];
619
 
        }
620
 
#endif
621
 
 
622
 
        /* Merge with CSO state and upload */
623
 
        if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer, zsa)) {
624
 
                struct mali_renderer_state_packed *partial_rsd =
625
 
                        (struct mali_renderer_state_packed *)&fs->partial_rsd;
626
 
                STATIC_ASSERT(sizeof(fs->partial_rsd) == sizeof(*partial_rsd));
627
 
                pan_merge(rsd, *partial_rsd, RENDERER_STATE);
628
 
        } else {
629
 
                pan_merge_empty_fs(&rsd);
630
 
        }
631
 
 
632
 
        /* Word 8, 9 Misc state */
633
 
        rsd.opaque[8] |= zsa->rsd_depth.opaque[0]
634
 
                       | rast->multisample.opaque[0];
635
 
 
636
 
        rsd.opaque[9] |= zsa->rsd_stencil.opaque[0]
637
 
                       | rast->stencil_misc.opaque[0];
638
 
 
639
 
        /* Word 10, 11 Stencil Front and Back */
640
 
        rsd.opaque[10] |= zsa->stencil_front.opaque[0];
641
 
        rsd.opaque[11] |= zsa->stencil_back.opaque[0];
642
 
 
643
 
        memcpy(fragmeta, &rsd, sizeof(rsd));
644
 
}
645
 
 
646
 
static mali_ptr
647
 
panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
648
 
{
649
 
        struct panfrost_context *ctx = batch->ctx;
650
 
        struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
651
 
 
652
 
        panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT);
653
 
 
654
 
        struct panfrost_ptr xfer;
655
 
 
656
 
#if PAN_ARCH == 4
657
 
        xfer = pan_pool_alloc_desc(&batch->pool.base, RENDERER_STATE);
658
 
#else
659
 
        unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
660
 
 
661
 
        xfer = pan_pool_alloc_desc_aggregate(&batch->pool.base,
662
 
                                             PAN_DESC(RENDERER_STATE),
663
 
                                             PAN_DESC_ARRAY(rt_count, BLEND));
664
 
#endif
665
 
 
666
 
        mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
667
 
        panfrost_get_blend_shaders(batch, blend_shaders);
668
 
 
669
 
        panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *) xfer.cpu, blend_shaders);
670
 
 
671
 
#if PAN_ARCH >= 5
672
 
        panfrost_emit_blend(batch, xfer.cpu + pan_size(RENDERER_STATE), blend_shaders);
673
 
#else
674
 
        batch->draws |= PIPE_CLEAR_COLOR0;
675
 
        batch->resolve |= PIPE_CLEAR_COLOR0;
676
 
#endif
677
 
 
678
 
        if (ctx->depth_stencil->base.depth_enabled)
679
 
                batch->read |= PIPE_CLEAR_DEPTH;
680
 
 
681
 
        if (ctx->depth_stencil->base.stencil[0].enabled)
682
 
                batch->read |= PIPE_CLEAR_STENCIL;
683
 
 
684
 
        return xfer.gpu;
685
 
}
686
 
#endif
687
 
 
688
 
static mali_ptr
689
 
panfrost_emit_viewport(struct panfrost_batch *batch)
690
 
{
691
 
        struct panfrost_context *ctx = batch->ctx;
692
 
        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
693
 
        const struct pipe_scissor_state *ss = &ctx->scissor;
694
 
        const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
695
 
 
696
 
        /* Derive min/max from translate/scale. Note since |x| >= 0 by
697
 
         * definition, we have that -|x| <= |x| hence translate - |scale| <=
698
 
         * translate + |scale|, so the ordering is correct here. */
699
 
        float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
700
 
        float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
701
 
        float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
702
 
        float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
703
 
        float minz = (vp->translate[2] - fabsf(vp->scale[2]));
704
 
        float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
705
 
 
706
 
        /* Scissor to the intersection of viewport and to the scissor, clamped
707
 
         * to the framebuffer */
708
 
 
709
 
        unsigned minx = MIN2(batch->key.width, MAX2((int) vp_minx, 0));
710
 
        unsigned maxx = MIN2(batch->key.width, MAX2((int) vp_maxx, 0));
711
 
        unsigned miny = MIN2(batch->key.height, MAX2((int) vp_miny, 0));
712
 
        unsigned maxy = MIN2(batch->key.height, MAX2((int) vp_maxy, 0));
713
 
 
714
 
        if (ss && rast->scissor) {
715
 
                minx = MAX2(ss->minx, minx);
716
 
                miny = MAX2(ss->miny, miny);
717
 
                maxx = MIN2(ss->maxx, maxx);
718
 
                maxy = MIN2(ss->maxy, maxy);
719
 
        }
720
 
 
721
 
        /* Set the range to [1, 1) so max values don't wrap round */
722
 
        if (maxx == 0 || maxy == 0)
723
 
                maxx = maxy = minx = miny = 1;
724
 
 
725
 
        panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
726
 
        batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
727
 
 
728
 
        /* [minx, maxx) and [miny, maxy) are exclusive ranges in the hardware */
729
 
        maxx--;
730
 
        maxy--;
731
 
 
732
 
        batch->minimum_z = rast->depth_clip_near ? minz : -INFINITY;
733
 
        batch->maximum_z = rast->depth_clip_far  ? maxz : +INFINITY;
734
 
 
735
 
#if PAN_ARCH <= 7
736
 
        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, VIEWPORT);
737
 
 
738
 
        pan_pack(T.cpu, VIEWPORT, cfg) {
739
 
                cfg.scissor_minimum_x = minx;
740
 
                cfg.scissor_minimum_y = miny;
741
 
                cfg.scissor_maximum_x = maxx;
742
 
                cfg.scissor_maximum_y = maxy;
743
 
 
744
 
                cfg.minimum_z = batch->minimum_z;
745
 
                cfg.maximum_z = batch->maximum_z;
746
 
        }
747
 
 
748
 
        return T.gpu;
749
 
#else
750
 
        pan_pack(&batch->scissor, SCISSOR, cfg) {
751
 
                cfg.scissor_minimum_x = minx;
752
 
                cfg.scissor_minimum_y = miny;
753
 
                cfg.scissor_maximum_x = maxx;
754
 
                cfg.scissor_maximum_y = maxy;
755
 
        }
756
 
 
757
 
        return 0;
758
 
#endif
759
 
}
760
 
 
761
 
#if PAN_ARCH >= 9
762
 
/**
763
 
 * Emit a Valhall depth/stencil descriptor at draw-time. The bulk of the
764
 
 * descriptor corresponds to a pipe_depth_stencil_alpha CSO and is packed at
765
 
 * CSO create time. However, the stencil reference values and shader
766
 
 * interactions are dynamic state. Pack only the dynamic state here and OR
767
 
 * together.
768
 
 */
769
 
static mali_ptr
770
 
panfrost_emit_depth_stencil(struct panfrost_batch *batch)
771
 
{
772
 
        struct panfrost_context *ctx = batch->ctx;
773
 
        const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
774
 
        struct panfrost_rasterizer *rast = ctx->rasterizer;
775
 
        struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
776
 
        bool back_enab = zsa->base.stencil[1].enabled;
777
 
 
778
 
        struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, DEPTH_STENCIL);
779
 
        struct mali_depth_stencil_packed dynamic;
780
 
 
781
 
        pan_pack(&dynamic, DEPTH_STENCIL, cfg) {
782
 
                cfg.front_reference_value = ctx->stencil_ref.ref_value[0];
783
 
                cfg.back_reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
784
 
 
785
 
                cfg.stencil_from_shader = fs->info.fs.writes_stencil;
786
 
                cfg.depth_source = pan_depth_source(&fs->info);
787
 
 
788
 
                cfg.depth_bias_enable = rast->base.offset_tri;
789
 
                cfg.depth_units = rast->base.offset_units * 2.0f;
790
 
                cfg.depth_factor = rast->base.offset_scale;
791
 
                cfg.depth_bias_clamp = rast->base.offset_clamp;
792
 
        }
793
 
 
794
 
        pan_merge(dynamic, zsa->desc, DEPTH_STENCIL);
795
 
        memcpy(T.cpu, &dynamic, pan_size(DEPTH_STENCIL));
796
 
 
797
 
        return T.gpu;
798
 
}
799
 
 
800
 
/**
801
 
 * Emit Valhall blend descriptor at draw-time. The descriptor itself is shared
802
 
 * with Bifrost, but the container data structure is simplified.
803
 
 */
804
 
static mali_ptr
805
 
panfrost_emit_blend_valhall(struct panfrost_batch *batch)
806
 
{
807
 
        unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
808
 
 
809
 
        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, rt_count, BLEND);
810
 
 
811
 
        mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
812
 
        panfrost_get_blend_shaders(batch, blend_shaders);
813
 
 
814
 
        panfrost_emit_blend(batch, T.cpu, blend_shaders);
815
 
 
816
 
        /* Precalculate for the per-draw path */
817
 
        bool has_blend_shader = false;
818
 
 
819
 
        for (unsigned i = 0; i < rt_count; ++i)
820
 
                has_blend_shader |= !!blend_shaders[i];
821
 
 
822
 
        batch->ctx->valhall_has_blend_shader = has_blend_shader;
823
 
 
824
 
        return T.gpu;
825
 
}
826
 
 
827
 
/**
828
 
 * Emit Valhall buffer descriptors for bound vertex buffers at draw-time.
829
 
 */
830
 
static mali_ptr
831
 
panfrost_emit_vertex_buffers(struct panfrost_batch *batch)
832
 
{
833
 
        struct panfrost_context *ctx = batch->ctx;
834
 
        unsigned buffer_count = util_last_bit(ctx->vb_mask);
835
 
        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
836
 
                                                          buffer_count, BUFFER);
837
 
        struct mali_buffer_packed *buffers = T.cpu;
838
 
 
839
 
        u_foreach_bit(i, ctx->vb_mask) {
840
 
                struct pipe_vertex_buffer vb = ctx->vertex_buffers[i];
841
 
                struct pipe_resource *prsrc = vb.buffer.resource;
842
 
                struct panfrost_resource *rsrc = pan_resource(prsrc);
843
 
                assert(!vb.is_user_buffer);
844
 
 
845
 
                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
846
 
 
847
 
                pan_pack(buffers + i, BUFFER, cfg) {
848
 
                        cfg.address = rsrc->image.data.bo->ptr.gpu +
849
 
                                      vb.buffer_offset;
850
 
 
851
 
                        cfg.size = prsrc->width0 - vb.buffer_offset;
852
 
                }
853
 
        }
854
 
 
855
 
        return T.gpu;
856
 
}
857
 
 
858
 
/**
859
 
 * Emit Valhall attribute descriptors and associated (vertex) buffer
860
 
 * descriptors at draw-time. The attribute descriptors are packed at draw time
861
 
 * except for the stride field. The buffer descriptors are packed here, though
862
 
 * that could be moved into panfrost_set_vertex_buffers if needed.
863
 
 */
864
 
static mali_ptr
865
 
panfrost_emit_vertex_data(struct panfrost_batch *batch)
866
 
{
867
 
        struct panfrost_context *ctx = batch->ctx;
868
 
        struct panfrost_vertex_state *vtx = ctx->vertex;
869
 
        struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
870
 
                                                          vtx->num_elements,
871
 
                                                          ATTRIBUTE);
872
 
        struct mali_attribute_packed *attributes = T.cpu;
873
 
 
874
 
        for (unsigned i = 0; i < vtx->num_elements; ++i) {
875
 
                struct mali_attribute_packed packed;
876
 
                unsigned vbi = vtx->pipe[i].vertex_buffer_index;
877
 
 
878
 
                pan_pack(&packed, ATTRIBUTE, cfg) {
879
 
                        cfg.stride = ctx->vertex_buffers[vbi].stride;
880
 
                }
881
 
 
882
 
                pan_merge(packed, vtx->attributes[i], ATTRIBUTE);
883
 
                attributes[i] = packed;
884
 
        }
885
 
 
886
 
        return T.gpu;
887
 
}
888
 
 
889
 
/*
890
 
 * Emit Valhall descriptors for shader images. Unlike previous generations,
891
 
 * Valhall does not have a special descriptor for images. Standard texture
892
 
 * descriptors are used. The binding is different in Gallium, however, so we
893
 
 * translate.
894
 
 */
895
 
static struct pipe_sampler_view
896
 
panfrost_pipe_image_to_sampler_view(struct pipe_image_view *v)
897
 
{
898
 
        struct pipe_sampler_view out = {
899
 
                .format = v->format,
900
 
                .texture = v->resource,
901
 
                .target = v->resource->target,
902
 
                .swizzle_r = PIPE_SWIZZLE_X,
903
 
                .swizzle_g = PIPE_SWIZZLE_Y,
904
 
                .swizzle_b = PIPE_SWIZZLE_Z,
905
 
                .swizzle_a = PIPE_SWIZZLE_W
906
 
        };
907
 
 
908
 
        if (out.target == PIPE_BUFFER) {
909
 
                out.u.buf.offset = v->u.buf.offset;
910
 
                out.u.buf.size = v->u.buf.size;
911
 
        } else {
912
 
                out.u.tex.first_layer = v->u.tex.first_layer;
913
 
                out.u.tex.last_layer = v->u.tex.last_layer;
914
 
 
915
 
                /* Single level only */
916
 
                out.u.tex.first_level = v->u.tex.level;
917
 
                out.u.tex.last_level = v->u.tex.level;
918
 
        }
919
 
 
920
 
        return out;
921
 
}
922
 
 
923
 
static void
924
 
panfrost_update_sampler_view(struct panfrost_sampler_view *view,
925
 
                             struct pipe_context *pctx);
926
 
 
927
 
static mali_ptr
928
 
panfrost_emit_images(struct panfrost_batch *batch, enum pipe_shader_type stage)
929
 
{
930
 
        struct panfrost_context *ctx = batch->ctx;
931
 
        unsigned last_bit = util_last_bit(ctx->image_mask[stage]);
932
 
 
933
 
        struct panfrost_ptr T =
934
 
                pan_pool_alloc_desc_array(&batch->pool.base, last_bit, TEXTURE);
935
 
 
936
 
        struct mali_texture_packed *out = (struct mali_texture_packed *) T.cpu;
937
 
 
938
 
        for (int i = 0; i < last_bit; ++i) {
939
 
                struct pipe_image_view *image = &ctx->images[stage][i];
940
 
 
941
 
                if (!(ctx->image_mask[stage] & BITFIELD_BIT(i))) {
942
 
                        memset(&out[i], 0, sizeof(out[i]));
943
 
                        continue;
944
 
                }
945
 
 
946
 
                /* Construct a synthetic sampler view so we can use our usual
947
 
                 * sampler view code for the actual descriptor packing.
948
 
                 *
949
 
                 * Use the batch pool for a transient allocation, rather than
950
 
                 * allocating a long-lived descriptor.
951
 
                 */
952
 
                struct panfrost_sampler_view view = {
953
 
                        .base = panfrost_pipe_image_to_sampler_view(image),
954
 
                        .pool = &batch->pool
955
 
                };
956
 
 
957
 
                /* If we specify a cube map, the hardware internally treat it as
958
 
                 * a 2D array. Since cube maps as images can confuse our common
959
 
                 * texturing code, explicitly use a 2D array.
960
 
                 *
961
 
                 * Similar concerns apply to 3D textures.
962
 
                 */
963
 
                if (view.base.target == PIPE_BUFFER) {
964
 
                        view.base.target = PIPE_BUFFER;
965
 
                } else {
966
 
                        view.base.target = PIPE_TEXTURE_2D_ARRAY;
967
 
 
968
 
                        /* Hardware limitation */
969
 
                        if (view.base.u.tex.first_level != 0)
970
 
                                unreachable("TODO: mipmaps special handling");
971
 
                }
972
 
 
973
 
                panfrost_update_sampler_view(&view, &ctx->base);
974
 
                out[i] = view.bifrost_descriptor;
975
 
 
976
 
                panfrost_track_image_access(batch, stage, image);
977
 
        }
978
 
 
979
 
        return T.gpu;
980
 
}
981
 
#endif
982
 
 
983
 
static mali_ptr
984
 
panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
985
 
                                 enum pipe_shader_type st,
986
 
                                 struct panfrost_constant_buffer *buf,
987
 
                                 unsigned index)
988
 
{
989
 
        struct pipe_constant_buffer *cb = &buf->cb[index];
990
 
        struct panfrost_resource *rsrc = pan_resource(cb->buffer);
991
 
 
992
 
        if (rsrc) {
993
 
                panfrost_batch_read_rsrc(batch, rsrc, st);
994
 
 
995
 
                /* Alignment gauranteed by
996
 
                 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
997
 
                return rsrc->image.data.bo->ptr.gpu + cb->buffer_offset;
998
 
        } else if (cb->user_buffer) {
999
 
                return pan_pool_upload_aligned(&batch->pool.base,
1000
 
                                               cb->user_buffer +
1001
 
                                               cb->buffer_offset,
1002
 
                                               cb->buffer_size, 16);
1003
 
        } else {
1004
 
                unreachable("No constant buffer");
1005
 
        }
1006
 
}
1007
 
 
1008
 
struct sysval_uniform {
1009
 
        union {
1010
 
                float f[4];
1011
 
                int32_t i[4];
1012
 
                uint32_t u[4];
1013
 
                uint64_t du[2];
1014
 
        };
1015
 
};
1016
 
 
1017
 
static void
1018
 
panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1019
 
                                      struct sysval_uniform *uniform)
1020
 
{
1021
 
        struct panfrost_context *ctx = batch->ctx;
1022
 
        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1023
 
 
1024
 
        uniform->f[0] = vp->scale[0];
1025
 
        uniform->f[1] = vp->scale[1];
1026
 
        uniform->f[2] = vp->scale[2];
1027
 
}
1028
 
 
1029
 
static void
1030
 
panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1031
 
                                       struct sysval_uniform *uniform)
1032
 
{
1033
 
        struct panfrost_context *ctx = batch->ctx;
1034
 
        const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1035
 
 
1036
 
        uniform->f[0] = vp->translate[0];
1037
 
        uniform->f[1] = vp->translate[1];
1038
 
        uniform->f[2] = vp->translate[2];
1039
 
}
1040
 
 
1041
 
static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1042
 
                                       enum pipe_shader_type st,
1043
 
                                       unsigned int sysvalid,
1044
 
                                       struct sysval_uniform *uniform)
1045
 
{
1046
 
        struct panfrost_context *ctx = batch->ctx;
1047
 
        unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1048
 
        unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1049
 
        bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1050
 
        struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1051
 
 
1052
 
        assert(dim);
1053
 
 
1054
 
        if (tex->target == PIPE_BUFFER) {
1055
 
                assert(dim == 1);
1056
 
                uniform->i[0] =
1057
 
                        tex->u.buf.size / util_format_get_blocksize(tex->format);
1058
 
                return;
1059
 
        }
1060
 
 
1061
 
        uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1062
 
 
1063
 
        if (dim > 1)
1064
 
                uniform->i[1] = u_minify(tex->texture->height0,
1065
 
                                         tex->u.tex.first_level);
1066
 
 
1067
 
        if (dim > 2)
1068
 
                uniform->i[2] = u_minify(tex->texture->depth0,
1069
 
                                         tex->u.tex.first_level);
1070
 
 
1071
 
        if (is_array) {
1072
 
                unsigned size = tex->texture->array_size;
1073
 
 
1074
 
                /* Internally, we store the number of 2D images (faces * array
1075
 
                 * size). Externally, we report the array size in terms of
1076
 
                 * complete cubes. So divide by the # of faces per cube.
1077
 
                 */
1078
 
                if (tex->target == PIPE_TEXTURE_CUBE_ARRAY)
1079
 
                        size /= 6;
1080
 
 
1081
 
                uniform->i[dim] = size;
1082
 
        }
1083
 
}
1084
 
 
1085
 
static void panfrost_upload_image_size_sysval(struct panfrost_batch *batch,
1086
 
                                              enum pipe_shader_type st,
1087
 
                                              unsigned int sysvalid,
1088
 
                                              struct sysval_uniform *uniform)
1089
 
{
1090
 
        struct panfrost_context *ctx = batch->ctx;
1091
 
        unsigned idx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1092
 
        unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1093
 
        unsigned is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1094
 
 
1095
 
        assert(dim && dim < 4);
1096
 
 
1097
 
        struct pipe_image_view *image = &ctx->images[st][idx];
1098
 
 
1099
 
        if (image->resource->target == PIPE_BUFFER) {
1100
 
                unsigned blocksize = util_format_get_blocksize(image->format);
1101
 
                uniform->i[0] = image->resource->width0 / blocksize;
1102
 
                return;
1103
 
        }
1104
 
 
1105
 
        uniform->i[0] = u_minify(image->resource->width0,
1106
 
                                 image->u.tex.level);
1107
 
 
1108
 
        if (dim > 1)
1109
 
                uniform->i[1] = u_minify(image->resource->height0,
1110
 
                                         image->u.tex.level);
1111
 
 
1112
 
        if (dim > 2)
1113
 
                uniform->i[2] = u_minify(image->resource->depth0,
1114
 
                                         image->u.tex.level);
1115
 
 
1116
 
        if (is_array)
1117
 
                uniform->i[dim] = image->resource->array_size;
1118
 
}
1119
 
 
1120
 
static void
1121
 
panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1122
 
                            enum pipe_shader_type st,
1123
 
                            unsigned ssbo_id,
1124
 
                            struct sysval_uniform *uniform)
1125
 
{
1126
 
        struct panfrost_context *ctx = batch->ctx;
1127
 
 
1128
 
        assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1129
 
        struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1130
 
 
1131
 
        /* Compute address */
1132
 
        struct panfrost_resource *rsrc = pan_resource(sb.buffer);
1133
 
        struct panfrost_bo *bo = rsrc->image.data.bo;
1134
 
 
1135
 
        panfrost_batch_write_rsrc(batch, rsrc, st);
1136
 
 
1137
 
        util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
1138
 
                        sb.buffer_offset, sb.buffer_size);
1139
 
 
1140
 
        /* Upload address and size as sysval */
1141
 
        uniform->du[0] = bo->ptr.gpu + sb.buffer_offset;
1142
 
        uniform->u[2] = sb.buffer_size;
1143
 
}
1144
 
 
1145
 
static void
1146
 
panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1147
 
                               enum pipe_shader_type st,
1148
 
                               unsigned samp_idx,
1149
 
                               struct sysval_uniform *uniform)
1150
 
{
1151
 
        struct panfrost_context *ctx = batch->ctx;
1152
 
        struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1153
 
 
1154
 
        uniform->f[0] = sampl->min_lod;
1155
 
        uniform->f[1] = sampl->max_lod;
1156
 
        uniform->f[2] = sampl->lod_bias;
1157
 
 
1158
 
        /* Even without any errata, Midgard represents "no mipmapping" as
1159
 
         * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1160
 
         * panfrost_create_sampler_state which also explains our choice of
1161
 
         * epsilon value (again to keep behaviour consistent) */
1162
 
 
1163
 
        if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1164
 
                uniform->f[1] = uniform->f[0] + (1.0/256.0);
1165
 
}
1166
 
 
1167
 
static void
1168
 
panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1169
 
                                       struct sysval_uniform *uniform)
1170
 
{
1171
 
        struct panfrost_context *ctx = batch->ctx;
1172
 
 
1173
 
        uniform->u[0] = ctx->compute_grid->grid[0];
1174
 
        uniform->u[1] = ctx->compute_grid->grid[1];
1175
 
        uniform->u[2] = ctx->compute_grid->grid[2];
1176
 
}
1177
 
 
1178
 
static void
1179
 
panfrost_upload_local_group_size_sysval(struct panfrost_batch *batch,
1180
 
                                        struct sysval_uniform *uniform)
1181
 
{
1182
 
        struct panfrost_context *ctx = batch->ctx;
1183
 
 
1184
 
        uniform->u[0] = ctx->compute_grid->block[0];
1185
 
        uniform->u[1] = ctx->compute_grid->block[1];
1186
 
        uniform->u[2] = ctx->compute_grid->block[2];
1187
 
}
1188
 
 
1189
 
static void
1190
 
panfrost_upload_work_dim_sysval(struct panfrost_batch *batch,
1191
 
                                struct sysval_uniform *uniform)
1192
 
{
1193
 
        struct panfrost_context *ctx = batch->ctx;
1194
 
 
1195
 
        uniform->u[0] = ctx->compute_grid->work_dim;
1196
 
}
1197
 
 
1198
 
/* Sample positions are pushed in a Bifrost specific format on Bifrost. On
1199
 
 * Midgard, we emulate the Bifrost path with some extra arithmetic in the
1200
 
 * shader, to keep the code as unified as possible. */
1201
 
 
1202
 
static void
1203
 
panfrost_upload_sample_positions_sysval(struct panfrost_batch *batch,
1204
 
                                struct sysval_uniform *uniform)
1205
 
{
1206
 
        struct panfrost_context *ctx = batch->ctx;
1207
 
        struct panfrost_device *dev = pan_device(ctx->base.screen);
1208
 
 
1209
 
        unsigned samples = util_framebuffer_get_num_samples(&batch->key);
1210
 
        uniform->du[0] = panfrost_sample_positions(dev, panfrost_sample_pattern(samples));
1211
 
}
1212
 
 
1213
 
static void
1214
 
panfrost_upload_multisampled_sysval(struct panfrost_batch *batch,
1215
 
                                struct sysval_uniform *uniform)
1216
 
{
1217
 
        unsigned samples = util_framebuffer_get_num_samples(&batch->key);
1218
 
        uniform->u[0] = samples > 1;
1219
 
}
1220
 
 
1221
 
#if PAN_ARCH >= 6
1222
 
static void
1223
 
panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch,
1224
 
                unsigned size_and_rt, struct sysval_uniform *uniform)
1225
 
{
1226
 
        struct panfrost_context *ctx = batch->ctx;
1227
 
        struct panfrost_device *dev = pan_device(ctx->base.screen);
1228
 
        unsigned rt = size_and_rt & 0xF;
1229
 
        unsigned size = size_and_rt >> 4;
1230
 
 
1231
 
        if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) {
1232
 
                enum pipe_format format = batch->key.cbufs[rt]->format;
1233
 
                uniform->u[0] =
1234
 
                        GENX(pan_blend_get_internal_desc)(dev, format, rt, size, false) >> 32;
1235
 
        } else {
1236
 
                pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
1237
 
                        cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw;
1238
 
        }
1239
 
}
1240
 
#endif
1241
 
 
1242
 
static void
1243
 
panfrost_upload_sysvals(struct panfrost_batch *batch,
1244
 
                        const struct panfrost_ptr *ptr,
1245
 
                        struct panfrost_shader_state *ss,
1246
 
                        enum pipe_shader_type st)
1247
 
{
1248
 
        struct sysval_uniform *uniforms = ptr->cpu;
1249
 
 
1250
 
        for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
1251
 
                int sysval = ss->info.sysvals.sysvals[i];
1252
 
 
1253
 
                switch (PAN_SYSVAL_TYPE(sysval)) {
1254
 
                case PAN_SYSVAL_VIEWPORT_SCALE:
1255
 
                        panfrost_upload_viewport_scale_sysval(batch,
1256
 
                                                              &uniforms[i]);
1257
 
                        break;
1258
 
                case PAN_SYSVAL_VIEWPORT_OFFSET:
1259
 
                        panfrost_upload_viewport_offset_sysval(batch,
1260
 
                                                               &uniforms[i]);
1261
 
                        break;
1262
 
                case PAN_SYSVAL_TEXTURE_SIZE:
1263
 
                        panfrost_upload_txs_sysval(batch, st,
1264
 
                                                   PAN_SYSVAL_ID(sysval),
1265
 
                                                   &uniforms[i]);
1266
 
                        break;
1267
 
                case PAN_SYSVAL_SSBO:
1268
 
                        panfrost_upload_ssbo_sysval(batch, st,
1269
 
                                                    PAN_SYSVAL_ID(sysval),
1270
 
                                                    &uniforms[i]);
1271
 
                        break;
1272
 
                case PAN_SYSVAL_NUM_WORK_GROUPS:
1273
 
                        for (unsigned j = 0; j < 3; j++) {
1274
 
                                batch->num_wg_sysval[j] =
1275
 
                                        ptr->gpu + (i * sizeof(*uniforms)) + (j * 4);
1276
 
                        }
1277
 
                        panfrost_upload_num_work_groups_sysval(batch,
1278
 
                                                               &uniforms[i]);
1279
 
                        break;
1280
 
                case PAN_SYSVAL_LOCAL_GROUP_SIZE:
1281
 
                        panfrost_upload_local_group_size_sysval(batch,
1282
 
                                                                &uniforms[i]);
1283
 
                        break;
1284
 
                case PAN_SYSVAL_WORK_DIM:
1285
 
                        panfrost_upload_work_dim_sysval(batch,
1286
 
                                                        &uniforms[i]);
1287
 
                        break;
1288
 
                case PAN_SYSVAL_SAMPLER:
1289
 
                        panfrost_upload_sampler_sysval(batch, st,
1290
 
                                                       PAN_SYSVAL_ID(sysval),
1291
 
                                                       &uniforms[i]);
1292
 
                        break;
1293
 
                case PAN_SYSVAL_IMAGE_SIZE:
1294
 
                        panfrost_upload_image_size_sysval(batch, st,
1295
 
                                                          PAN_SYSVAL_ID(sysval),
1296
 
                                                          &uniforms[i]);
1297
 
                        break;
1298
 
                case PAN_SYSVAL_SAMPLE_POSITIONS:
1299
 
                        panfrost_upload_sample_positions_sysval(batch,
1300
 
                                                        &uniforms[i]);
1301
 
                        break;
1302
 
                case PAN_SYSVAL_MULTISAMPLED:
1303
 
                        panfrost_upload_multisampled_sysval(batch,
1304
 
                                                               &uniforms[i]);
1305
 
                        break;
1306
 
#if PAN_ARCH >= 6
1307
 
                case PAN_SYSVAL_RT_CONVERSION:
1308
 
                        panfrost_upload_rt_conversion_sysval(batch,
1309
 
                                        PAN_SYSVAL_ID(sysval), &uniforms[i]);
1310
 
                        break;
1311
 
#endif
1312
 
                case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
1313
 
                        batch->ctx->first_vertex_sysval_ptr =
1314
 
                                ptr->gpu + (i * sizeof(*uniforms));
1315
 
                        batch->ctx->base_vertex_sysval_ptr =
1316
 
                                batch->ctx->first_vertex_sysval_ptr + 4;
1317
 
                        batch->ctx->base_instance_sysval_ptr =
1318
 
                                batch->ctx->first_vertex_sysval_ptr + 8;
1319
 
 
1320
 
                        uniforms[i].u[0] = batch->ctx->offset_start;
1321
 
                        uniforms[i].u[1] = batch->ctx->base_vertex;
1322
 
                        uniforms[i].u[2] = batch->ctx->base_instance;
1323
 
                        break;
1324
 
                case PAN_SYSVAL_DRAWID:
1325
 
                        uniforms[i].u[0] = batch->ctx->drawid;
1326
 
                        break;
1327
 
                default:
1328
 
                        assert(0);
1329
 
                }
1330
 
        }
1331
 
}
1332
 
 
1333
 
static const void *
1334
 
panfrost_map_constant_buffer_cpu(struct panfrost_context *ctx,
1335
 
                                 struct panfrost_constant_buffer *buf,
1336
 
                                 unsigned index)
1337
 
{
1338
 
        struct pipe_constant_buffer *cb = &buf->cb[index];
1339
 
        struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1340
 
 
1341
 
        if (rsrc) {
1342
 
                panfrost_bo_mmap(rsrc->image.data.bo);
1343
 
                panfrost_flush_writer(ctx, rsrc, "CPU constant buffer mapping");
1344
 
                panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
1345
 
 
1346
 
                return rsrc->image.data.bo->ptr.cpu + cb->buffer_offset;
1347
 
        } else if (cb->user_buffer) {
1348
 
                return cb->user_buffer + cb->buffer_offset;
1349
 
        } else
1350
 
                unreachable("No constant buffer");
1351
 
}
1352
 
 
1353
 
static mali_ptr
1354
 
panfrost_emit_const_buf(struct panfrost_batch *batch,
1355
 
                        enum pipe_shader_type stage,
1356
 
                        mali_ptr *push_constants)
1357
 
{
1358
 
        struct panfrost_context *ctx = batch->ctx;
1359
 
        struct panfrost_shader_variants *all = ctx->shader[stage];
1360
 
 
1361
 
        if (!all)
1362
 
                return 0;
1363
 
 
1364
 
        struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1365
 
        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1366
 
 
1367
 
        /* Allocate room for the sysval and the uniforms */
1368
 
        size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
1369
 
        struct panfrost_ptr transfer =
1370
 
                pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16);
1371
 
 
1372
 
        /* Upload sysvals requested by the shader */
1373
 
        panfrost_upload_sysvals(batch, &transfer, ss, stage);
1374
 
 
1375
 
        /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
1376
 
        struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
1377
 
        unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
1378
 
        unsigned sysval_ubo = sys_size ? ubo_count : ~0;
1379
 
 
1380
 
        struct panfrost_ptr ubos =
1381
 
                pan_pool_alloc_desc_array(&batch->pool.base,
1382
 
                                          ubo_count + 1,
1383
 
                                          UNIFORM_BUFFER);
1384
 
 
1385
 
        uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
1386
 
 
1387
 
        /* Upload sysval as a final UBO */
1388
 
 
1389
 
        if (sys_size) {
1390
 
                pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) {
1391
 
                        cfg.entries = DIV_ROUND_UP(sys_size, 16);
1392
 
                        cfg.pointer = transfer.gpu;
1393
 
                }
1394
 
        }
1395
 
 
1396
 
        /* The rest are honest-to-goodness UBOs */
1397
 
 
1398
 
        u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) {
1399
 
                size_t usz = buf->cb[ubo].buffer_size;
1400
 
 
1401
 
                if (usz == 0) {
1402
 
                        ubo_ptr[ubo] = 0;
1403
 
                        continue;
1404
 
                }
1405
 
 
1406
 
                /* Issue (57) for the ARB_uniform_buffer_object spec says that
1407
 
                 * the buffer can be larger than the uniform data inside it,
1408
 
                 * so clamp ubo size to what hardware supports. */
1409
 
 
1410
 
                pan_pack(ubo_ptr + ubo, UNIFORM_BUFFER, cfg) {
1411
 
                        cfg.entries = MIN2(DIV_ROUND_UP(usz, 16), 1 << 12);
1412
 
                        cfg.pointer = panfrost_map_constant_buffer_gpu(batch,
1413
 
                                        stage, buf, ubo);
1414
 
                }
1415
 
        }
1416
 
 
1417
 
        if (ss->info.push.count == 0)
1418
 
                return ubos.gpu;
1419
 
 
1420
 
        /* Copy push constants required by the shader */
1421
 
        struct panfrost_ptr push_transfer =
1422
 
                pan_pool_alloc_aligned(&batch->pool.base,
1423
 
                                       ss->info.push.count * 4, 16);
1424
 
 
1425
 
        uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
1426
 
        *push_constants = push_transfer.gpu;
1427
 
 
1428
 
        for (unsigned i = 0; i < ss->info.push.count; ++i) {
1429
 
                struct panfrost_ubo_word src = ss->info.push.words[i];
1430
 
 
1431
 
                if (src.ubo == sysval_ubo) {
1432
 
                        unsigned sysval_idx = src.offset / 16;
1433
 
                        unsigned sysval_comp = (src.offset % 16) / 4;
1434
 
                        unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
1435
 
                        mali_ptr ptr = push_transfer.gpu + (4 * i);
1436
 
 
1437
 
                        switch (sysval_type) {
1438
 
                        case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
1439
 
                                switch (sysval_comp) {
1440
 
                                case 0:
1441
 
                                        batch->ctx->first_vertex_sysval_ptr = ptr;
1442
 
                                        break;
1443
 
                                case 1:
1444
 
                                        batch->ctx->base_vertex_sysval_ptr = ptr;
1445
 
                                        break;
1446
 
                                case 2:
1447
 
                                        batch->ctx->base_instance_sysval_ptr = ptr;
1448
 
                                        break;
1449
 
                                case 3:
1450
 
                                        /* Spurious (Midgard doesn't pack) */
1451
 
                                        break;
1452
 
                                default:
1453
 
                                        unreachable("Invalid vertex/instance offset component\n");
1454
 
                                }
1455
 
                                break;
1456
 
 
1457
 
                        case PAN_SYSVAL_NUM_WORK_GROUPS:
1458
 
                                batch->num_wg_sysval[sysval_comp] = ptr;
1459
 
                                break;
1460
 
 
1461
 
                        default:
1462
 
                                break;
1463
 
                        }
1464
 
                }
1465
 
                /* Map the UBO, this should be cheap. However this is reading
1466
 
                 * from write-combine memory which is _very_ slow. It might pay
1467
 
                 * off to upload sysvals to a staging buffer on the CPU on the
1468
 
                 * assumption sysvals will get pushed (TODO) */
1469
 
 
1470
 
                const void *mapped_ubo = (src.ubo == sysval_ubo) ? transfer.cpu :
1471
 
                        panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo);
1472
 
 
1473
 
                /* TODO: Is there any benefit to combining ranges */
1474
 
                memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4);
1475
 
        }
1476
 
 
1477
 
        return ubos.gpu;
1478
 
}
1479
 
 
1480
 
static mali_ptr
1481
 
panfrost_emit_shared_memory(struct panfrost_batch *batch,
1482
 
                            const struct pipe_grid_info *info)
1483
 
{
1484
 
        struct panfrost_context *ctx = batch->ctx;
1485
 
        struct panfrost_device *dev = pan_device(ctx->base.screen);
1486
 
        struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1487
 
        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1488
 
        struct panfrost_ptr t =
1489
 
                pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
1490
 
 
1491
 
        pan_pack(t.cpu, LOCAL_STORAGE, ls) {
1492
 
                unsigned wls_single_size =
1493
 
                        util_next_power_of_two(MAX2(ss->info.wls_size, 128));
1494
 
 
1495
 
                if (ss->info.wls_size) {
1496
 
                        ls.wls_instances =
1497
 
                                util_next_power_of_two(info->grid[0]) *
1498
 
                                util_next_power_of_two(info->grid[1]) *
1499
 
                                util_next_power_of_two(info->grid[2]);
1500
 
 
1501
 
                        ls.wls_size_scale = util_logbase2(wls_single_size) + 1;
1502
 
 
1503
 
                        unsigned wls_size = wls_single_size * ls.wls_instances * dev->core_count;
1504
 
 
1505
 
                        ls.wls_base_pointer =
1506
 
                                (panfrost_batch_get_shared_memory(batch,
1507
 
                                                                  wls_size,
1508
 
                                                                  1))->ptr.gpu;
1509
 
                } else {
1510
 
                        ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
1511
 
                }
1512
 
 
1513
 
                if (ss->info.tls_size) {
1514
 
                        unsigned shift =
1515
 
                                panfrost_get_stack_shift(ss->info.tls_size);
1516
 
                        struct panfrost_bo *bo =
1517
 
                                panfrost_batch_get_scratchpad(batch,
1518
 
                                                              ss->info.tls_size,
1519
 
                                                              dev->thread_tls_alloc,
1520
 
                                                              dev->core_count);
1521
 
 
1522
 
                        ls.tls_size = shift;
1523
 
                        ls.tls_base_pointer = bo->ptr.gpu;
1524
 
                }
1525
 
        };
1526
 
 
1527
 
        return t.gpu;
1528
 
}
1529
 
 
1530
 
#if PAN_ARCH <= 5
1531
 
static mali_ptr
1532
 
panfrost_get_tex_desc(struct panfrost_batch *batch,
1533
 
                      enum pipe_shader_type st,
1534
 
                      struct panfrost_sampler_view *view)
1535
 
{
1536
 
        if (!view)
1537
 
                return (mali_ptr) 0;
1538
 
 
1539
 
        struct pipe_sampler_view *pview = &view->base;
1540
 
        struct panfrost_resource *rsrc = pan_resource(pview->texture);
1541
 
 
1542
 
        panfrost_batch_read_rsrc(batch, rsrc, st);
1543
 
        panfrost_batch_add_bo(batch, view->state.bo, st);
1544
 
 
1545
 
        return view->state.gpu;
1546
 
}
1547
 
#endif
1548
 
 
1549
 
static void
1550
 
panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
1551
 
                                struct pipe_context *pctx,
1552
 
                                struct pipe_resource *texture)
1553
 
{
1554
 
        struct panfrost_device *device = pan_device(pctx->screen);
1555
 
        struct panfrost_context *ctx = pan_context(pctx);
1556
 
        struct panfrost_resource *prsrc = (struct panfrost_resource *)texture;
1557
 
        enum pipe_format format = so->base.format;
1558
 
        assert(prsrc->image.data.bo);
1559
 
 
1560
 
        /* Format to access the stencil/depth portion of a Z32_S8 texture */
1561
 
        if (format == PIPE_FORMAT_X32_S8X24_UINT) {
1562
 
                assert(prsrc->separate_stencil);
1563
 
                texture = &prsrc->separate_stencil->base;
1564
 
                prsrc = (struct panfrost_resource *)texture;
1565
 
                format = texture->format;
1566
 
        } else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
1567
 
                format = PIPE_FORMAT_Z32_FLOAT;
1568
 
        }
1569
 
 
1570
 
        const struct util_format_description *desc = util_format_description(format);
1571
 
 
1572
 
        bool fake_rgtc = !panfrost_supports_compressed_format(device, MALI_BC4_UNORM);
1573
 
 
1574
 
        if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC && fake_rgtc) {
1575
 
                if (desc->is_snorm)
1576
 
                        format = PIPE_FORMAT_R8G8B8A8_SNORM;
1577
 
                else
1578
 
                        format = PIPE_FORMAT_R8G8B8A8_UNORM;
1579
 
                desc = util_format_description(format);
1580
 
        }
1581
 
 
1582
 
        so->texture_bo = prsrc->image.data.bo->ptr.gpu;
1583
 
        so->modifier = prsrc->image.layout.modifier;
1584
 
 
1585
 
        /* MSAA only supported for 2D textures */
1586
 
 
1587
 
        assert(texture->nr_samples <= 1 ||
1588
 
               so->base.target == PIPE_TEXTURE_2D ||
1589
 
               so->base.target == PIPE_TEXTURE_2D_ARRAY);
1590
 
 
1591
 
        enum mali_texture_dimension type =
1592
 
                panfrost_translate_texture_dimension(so->base.target);
1593
 
 
1594
 
        bool is_buffer = (so->base.target == PIPE_BUFFER);
1595
 
 
1596
 
        unsigned first_level = is_buffer ? 0 : so->base.u.tex.first_level;
1597
 
        unsigned last_level = is_buffer ? 0 : so->base.u.tex.last_level;
1598
 
        unsigned first_layer = is_buffer ? 0 : so->base.u.tex.first_layer;
1599
 
        unsigned last_layer = is_buffer ? 0 : so->base.u.tex.last_layer;
1600
 
        unsigned buf_offset = is_buffer ? so->base.u.buf.offset : 0;
1601
 
        unsigned buf_size = (is_buffer ? so->base.u.buf.size : 0) /
1602
 
                            util_format_get_blocksize(format);
1603
 
 
1604
 
        if (so->base.target == PIPE_TEXTURE_3D) {
1605
 
                first_layer /= prsrc->image.layout.depth;
1606
 
                last_layer /= prsrc->image.layout.depth;
1607
 
                assert(!first_layer && !last_layer);
1608
 
        }
1609
 
 
1610
 
        struct pan_image_view iview = {
1611
 
                .format = format,
1612
 
                .dim = type,
1613
 
                .first_level = first_level,
1614
 
                .last_level = last_level,
1615
 
                .first_layer = first_layer,
1616
 
                .last_layer = last_layer,
1617
 
                .swizzle = {
1618
 
                        so->base.swizzle_r,
1619
 
                        so->base.swizzle_g,
1620
 
                        so->base.swizzle_b,
1621
 
                        so->base.swizzle_a,
1622
 
                },
1623
 
                .image = &prsrc->image,
1624
 
 
1625
 
                .buf.offset = buf_offset,
1626
 
                .buf.size = buf_size,
1627
 
        };
1628
 
 
1629
 
        unsigned size =
1630
 
                (PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) +
1631
 
                GENX(panfrost_estimate_texture_payload_size)(&iview);
1632
 
 
1633
 
        struct panfrost_pool *pool = so->pool ?: &ctx->descs;
1634
 
        struct panfrost_ptr payload = pan_pool_alloc_aligned(&pool->base, size, 64);
1635
 
        so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu);
1636
 
 
1637
 
        void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
1638
 
 
1639
 
        if (PAN_ARCH <= 5) {
1640
 
                payload.cpu += pan_size(TEXTURE);
1641
 
                payload.gpu += pan_size(TEXTURE);
1642
 
        }
1643
 
 
1644
 
        GENX(panfrost_new_texture)(device, &iview, tex, &payload);
1645
 
}
1646
 
 
1647
 
static void
1648
 
panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1649
 
                             struct pipe_context *pctx)
1650
 
{
1651
 
        struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1652
 
        if (view->texture_bo != rsrc->image.data.bo->ptr.gpu ||
1653
 
            view->modifier != rsrc->image.layout.modifier) {
1654
 
                panfrost_bo_unreference(view->state.bo);
1655
 
                panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1656
 
        }
1657
 
}
1658
 
 
1659
 
static mali_ptr
1660
 
panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1661
 
                                  enum pipe_shader_type stage)
1662
 
{
1663
 
        struct panfrost_context *ctx = batch->ctx;
1664
 
 
1665
 
        if (!ctx->sampler_view_count[stage])
1666
 
                return 0;
1667
 
 
1668
 
#if PAN_ARCH >= 6
1669
 
        struct panfrost_ptr T =
1670
 
                pan_pool_alloc_desc_array(&batch->pool.base,
1671
 
                                          ctx->sampler_view_count[stage],
1672
 
                                          TEXTURE);
1673
 
        struct mali_texture_packed *out =
1674
 
                (struct mali_texture_packed *) T.cpu;
1675
 
 
1676
 
        for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1677
 
                struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1678
 
 
1679
 
                if (!view) {
1680
 
                        memset(&out[i], 0, sizeof(out[i]));
1681
 
                        continue;
1682
 
                }
1683
 
 
1684
 
                struct pipe_sampler_view *pview = &view->base;
1685
 
                struct panfrost_resource *rsrc = pan_resource(pview->texture);
1686
 
 
1687
 
                panfrost_update_sampler_view(view, &ctx->base);
1688
 
                out[i] = view->bifrost_descriptor;
1689
 
 
1690
 
                panfrost_batch_read_rsrc(batch, rsrc, stage);
1691
 
                panfrost_batch_add_bo(batch, view->state.bo, stage);
1692
 
        }
1693
 
 
1694
 
        return T.gpu;
1695
 
#else
1696
 
        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1697
 
 
1698
 
        for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1699
 
                struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1700
 
 
1701
 
                if (!view) {
1702
 
                        trampolines[i] = 0;
1703
 
                        continue;
1704
 
                }
1705
 
 
1706
 
                panfrost_update_sampler_view(view, &ctx->base);
1707
 
 
1708
 
                trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1709
 
        }
1710
 
 
1711
 
        return pan_pool_upload_aligned(&batch->pool.base, trampolines,
1712
 
                                       sizeof(uint64_t) *
1713
 
                                       ctx->sampler_view_count[stage],
1714
 
                                       sizeof(uint64_t));
1715
 
#endif
1716
 
}
1717
 
 
1718
 
static mali_ptr
1719
 
panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1720
 
                                  enum pipe_shader_type stage)
1721
 
{
1722
 
        struct panfrost_context *ctx = batch->ctx;
1723
 
 
1724
 
        if (!ctx->sampler_count[stage])
1725
 
                return 0;
1726
 
 
1727
 
        struct panfrost_ptr T =
1728
 
                pan_pool_alloc_desc_array(&batch->pool.base,
1729
 
                                          ctx->sampler_count[stage],
1730
 
                                          SAMPLER);
1731
 
        struct mali_sampler_packed *out = (struct mali_sampler_packed *) T.cpu;
1732
 
 
1733
 
        for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) {
1734
 
                struct panfrost_sampler_state *st = ctx->samplers[stage][i];
1735
 
 
1736
 
                out[i] = st ? st->hw : (struct mali_sampler_packed){0};
1737
 
        }
1738
 
 
1739
 
        return T.gpu;
1740
 
}
1741
 
 
1742
 
#if PAN_ARCH <= 7
1743
 
/* Packs all image attribute descs and attribute buffer descs.
1744
 
 * `first_image_buf_index` must be the index of the first image attribute buffer descriptor.
1745
 
 */
1746
 
static void
1747
 
emit_image_attribs(struct panfrost_context *ctx, enum pipe_shader_type shader,
1748
 
                   struct mali_attribute_packed *attribs, unsigned first_buf)
1749
 
{
1750
 
        struct panfrost_device *dev = pan_device(ctx->base.screen);
1751
 
        unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
1752
 
 
1753
 
        for (unsigned i = 0; i < last_bit; ++i) {
1754
 
                enum pipe_format format = ctx->images[shader][i].format;
1755
 
 
1756
 
                pan_pack(attribs + i, ATTRIBUTE, cfg) {
1757
 
                        /* Continuation record means 2 buffers per image */
1758
 
                        cfg.buffer_index = first_buf + (i * 2);
1759
 
                        cfg.offset_enable = (PAN_ARCH <= 5);
1760
 
                        cfg.format = dev->formats[format].hw;
1761
 
                }
1762
 
        }
1763
 
}
1764
 
 
1765
 
static enum mali_attribute_type
1766
 
pan_modifier_to_attr_type(uint64_t modifier)
1767
 
{
1768
 
        switch (modifier) {
1769
 
        case DRM_FORMAT_MOD_LINEAR:
1770
 
                return MALI_ATTRIBUTE_TYPE_3D_LINEAR;
1771
 
        case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
1772
 
                return MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED;
1773
 
        default:
1774
 
                unreachable("Invalid modifier for attribute record");
1775
 
        }
1776
 
}
1777
 
 
1778
 
static void
1779
 
emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader,
1780
 
                struct mali_attribute_buffer_packed *bufs,
1781
 
                unsigned first_image_buf_index)
1782
 
{
1783
 
        struct panfrost_context *ctx = batch->ctx;
1784
 
        unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
1785
 
 
1786
 
        for (unsigned i = 0; i < last_bit; ++i) {
1787
 
                struct pipe_image_view *image = &ctx->images[shader][i];
1788
 
 
1789
 
                if (!(ctx->image_mask[shader] & (1 << i)) ||
1790
 
                    !(image->shader_access & PIPE_IMAGE_ACCESS_READ_WRITE)) {
1791
 
                        /* Unused image bindings */
1792
 
                        pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg);
1793
 
                        pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER, cfg);
1794
 
                        continue;
1795
 
                }
1796
 
 
1797
 
                struct panfrost_resource *rsrc = pan_resource(image->resource);
1798
 
 
1799
 
                /* TODO: MSAA */
1800
 
                assert(image->resource->nr_samples <= 1 && "MSAA'd images not supported");
1801
 
 
1802
 
                bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
1803
 
                bool is_buffer = rsrc->base.target == PIPE_BUFFER;
1804
 
 
1805
 
                unsigned offset = is_buffer ? image->u.buf.offset :
1806
 
                        panfrost_texture_offset(&rsrc->image.layout,
1807
 
                                                image->u.tex.level,
1808
 
                                                is_3d ? 0 : image->u.tex.first_layer,
1809
 
                                                is_3d ? image->u.tex.first_layer : 0);
1810
 
 
1811
 
                panfrost_track_image_access(batch, shader, image);
1812
 
 
1813
 
                pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
1814
 
                        cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier);
1815
 
                        cfg.pointer = rsrc->image.data.bo->ptr.gpu + offset;
1816
 
                        cfg.stride = util_format_get_blocksize(image->format);
1817
 
                        cfg.size = rsrc->image.data.bo->size - offset;
1818
 
                }
1819
 
 
1820
 
                if (is_buffer) {
1821
 
                        pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
1822
 
                                cfg.s_dimension = rsrc->base.width0 /
1823
 
                                        util_format_get_blocksize(image->format);
1824
 
                                cfg.t_dimension = cfg.r_dimension = 1;
1825
 
                        }
1826
 
 
1827
 
                        continue;
1828
 
                }
1829
 
 
1830
 
                pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
1831
 
                        unsigned level = image->u.tex.level;
1832
 
 
1833
 
                        cfg.s_dimension = u_minify(rsrc->base.width0, level);
1834
 
                        cfg.t_dimension = u_minify(rsrc->base.height0, level);
1835
 
                        cfg.r_dimension = is_3d ?
1836
 
                                u_minify(rsrc->base.depth0, level) :
1837
 
                                image->u.tex.last_layer - image->u.tex.first_layer + 1;
1838
 
 
1839
 
                        cfg.row_stride =
1840
 
                                rsrc->image.layout.slices[level].row_stride;
1841
 
 
1842
 
                        if (rsrc->base.target != PIPE_TEXTURE_2D) {
1843
 
                                cfg.slice_stride =
1844
 
                                        panfrost_get_layer_stride(&rsrc->image.layout,
1845
 
                                                                  level);
1846
 
                        }
1847
 
                }
1848
 
        }
1849
 
}
1850
 
 
1851
 
static mali_ptr
1852
 
panfrost_emit_image_attribs(struct panfrost_batch *batch,
1853
 
                            mali_ptr *buffers,
1854
 
                            enum pipe_shader_type type)
1855
 
{
1856
 
        struct panfrost_context *ctx = batch->ctx;
1857
 
        struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);
1858
 
 
1859
 
        if (!shader->info.attribute_count) {
1860
 
                *buffers = 0;
1861
 
                return 0;
1862
 
        }
1863
 
 
1864
 
        /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */
1865
 
        unsigned attr_count = shader->info.attribute_count;
1866
 
        unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0);
1867
 
 
1868
 
        struct panfrost_ptr bufs =
1869
 
                pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER);
1870
 
 
1871
 
        struct panfrost_ptr attribs =
1872
 
                pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE);
1873
 
 
1874
 
        emit_image_attribs(ctx, type, attribs.cpu, 0);
1875
 
        emit_image_bufs(batch, type, bufs.cpu, 0);
1876
 
 
1877
 
        /* We need an empty attrib buf to stop the prefetching on Bifrost */
1878
 
#if PAN_ARCH >= 6
1879
 
        pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)),
1880
 
                 ATTRIBUTE_BUFFER, cfg);
1881
 
#endif
1882
 
 
1883
 
        *buffers = bufs.gpu;
1884
 
        return attribs.gpu;
1885
 
}
1886
 
 
1887
 
static mali_ptr
1888
 
panfrost_emit_vertex_data(struct panfrost_batch *batch,
1889
 
                          mali_ptr *buffers)
1890
 
{
1891
 
        struct panfrost_context *ctx = batch->ctx;
1892
 
        struct panfrost_vertex_state *so = ctx->vertex;
1893
 
        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1894
 
        bool instanced = ctx->indirect_draw || ctx->instance_count > 1;
1895
 
        uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
1896
 
        unsigned nr_images = util_last_bit(image_mask);
1897
 
 
1898
 
        /* Worst case: everything is NPOT, which is only possible if instancing
1899
 
         * is enabled. Otherwise single record is gauranteed.
1900
 
         * Also, we allocate more memory than what's needed here if either instancing
1901
 
         * is enabled or images are present, this can be improved. */
1902
 
        unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
1903
 
        unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) +
1904
 
                           (PAN_ARCH >= 6 ? 1 : 0);
1905
 
 
1906
 
#if PAN_ARCH <= 5
1907
 
        /* Midgard needs vertexid/instanceid handled specially */
1908
 
        bool special_vbufs = vs->info.attribute_count >= PAN_VERTEX_ID;
1909
 
 
1910
 
        if (special_vbufs)
1911
 
                nr_bufs += 2;
1912
 
#endif
1913
 
 
1914
 
        if (!nr_bufs) {
1915
 
                *buffers = 0;
1916
 
                return 0;
1917
 
        }
1918
 
 
1919
 
        struct panfrost_ptr S =
1920
 
                pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs,
1921
 
                                          ATTRIBUTE_BUFFER);
1922
 
        struct panfrost_ptr T =
1923
 
                pan_pool_alloc_desc_array(&batch->pool.base,
1924
 
                                          vs->info.attribute_count,
1925
 
                                          ATTRIBUTE);
1926
 
 
1927
 
        struct mali_attribute_buffer_packed *bufs =
1928
 
                (struct mali_attribute_buffer_packed *) S.cpu;
1929
 
 
1930
 
        struct mali_attribute_packed *out =
1931
 
                (struct mali_attribute_packed *) T.cpu;
1932
 
 
1933
 
        unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 };
1934
 
        unsigned k = 0;
1935
 
 
1936
 
        for (unsigned i = 0; i < so->nr_bufs; ++i) {
1937
 
                unsigned vbi = so->buffers[i].vbi;
1938
 
                unsigned divisor = so->buffers[i].divisor;
1939
 
                attrib_to_buffer[i] = k;
1940
 
 
1941
 
                if (!(ctx->vb_mask & (1 << vbi)))
1942
 
                        continue;
1943
 
 
1944
 
                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1945
 
                struct panfrost_resource *rsrc;
1946
 
 
1947
 
                rsrc = pan_resource(buf->buffer.resource);
1948
 
                if (!rsrc)
1949
 
                        continue;
1950
 
 
1951
 
                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
1952
 
 
1953
 
                /* Mask off lower bits, see offset fixup below */
1954
 
                mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset;
1955
 
                mali_ptr addr = raw_addr & ~63;
1956
 
 
1957
 
                /* Since we advanced the base pointer, we shrink the buffer
1958
 
                 * size, but add the offset we subtracted */
1959
 
                unsigned size = rsrc->base.width0 + (raw_addr - addr)
1960
 
                        - buf->buffer_offset;
1961
 
 
1962
 
                /* When there is a divisor, the hardware-level divisor is
1963
 
                 * the product of the instance divisor and the padded count */
1964
 
                unsigned stride = buf->stride;
1965
 
 
1966
 
                if (ctx->indirect_draw) {
1967
 
                        /* We allocated 2 records for each attribute buffer */
1968
 
                        assert((k & 1) == 0);
1969
 
 
1970
 
                        /* With indirect draws we can't guess the vertex_count.
1971
 
                         * Pre-set the address, stride and size fields, the
1972
 
                         * compute shader do the rest.
1973
 
                         */
1974
 
                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1975
 
                                cfg.type = MALI_ATTRIBUTE_TYPE_1D;
1976
 
                                cfg.pointer = addr;
1977
 
                                cfg.stride = stride;
1978
 
                                cfg.size = size;
1979
 
                        }
1980
 
 
1981
 
                        /* We store the unmodified divisor in the continuation
1982
 
                         * slot so the compute shader can retrieve it.
1983
 
                         */
1984
 
                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
1985
 
                                cfg.divisor = divisor;
1986
 
                        }
1987
 
 
1988
 
                        k += 2;
1989
 
                        continue;
1990
 
                }
1991
 
 
1992
 
                unsigned hw_divisor = ctx->padded_count * divisor;
1993
 
 
1994
 
                if (ctx->instance_count <= 1) {
1995
 
                        /* Per-instance would be every attribute equal */
1996
 
                        if (divisor)
1997
 
                                stride = 0;
1998
 
 
1999
 
                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2000
 
                                cfg.pointer = addr;
2001
 
                                cfg.stride = stride;
2002
 
                                cfg.size = size;
2003
 
                        }
2004
 
                } else if (!divisor) {
2005
 
                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2006
 
                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
2007
 
                                cfg.pointer = addr;
2008
 
                                cfg.stride = stride;
2009
 
                                cfg.size = size;
2010
 
                                cfg.divisor = ctx->padded_count;
2011
 
                        }
2012
 
                } else if (util_is_power_of_two_or_zero(hw_divisor)) {
2013
 
                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2014
 
                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
2015
 
                                cfg.pointer = addr;
2016
 
                                cfg.stride = stride;
2017
 
                                cfg.size = size;
2018
 
                                cfg.divisor_r = __builtin_ctz(hw_divisor);
2019
 
                        }
2020
 
 
2021
 
                } else {
2022
 
                        unsigned shift = 0, extra_flags = 0;
2023
 
 
2024
 
                        unsigned magic_divisor =
2025
 
                                panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
2026
 
 
2027
 
                        /* Records with continuations must be aligned */
2028
 
                        k = ALIGN_POT(k, 2);
2029
 
                        attrib_to_buffer[i] = k;
2030
 
 
2031
 
                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2032
 
                                cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
2033
 
                                cfg.pointer = addr;
2034
 
                                cfg.stride = stride;
2035
 
                                cfg.size = size;
2036
 
 
2037
 
                                cfg.divisor_r = shift;
2038
 
                                cfg.divisor_e = extra_flags;
2039
 
                        }
2040
 
 
2041
 
                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
2042
 
                                cfg.divisor_numerator = magic_divisor;
2043
 
                                cfg.divisor = divisor;
2044
 
                        }
2045
 
 
2046
 
                        ++k;
2047
 
                }
2048
 
 
2049
 
                ++k;
2050
 
        }
2051
 
 
2052
 
#if PAN_ARCH <= 5
2053
 
        /* Add special gl_VertexID/gl_InstanceID buffers */
2054
 
        if (special_vbufs) {
2055
 
                panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
2056
 
 
2057
 
                pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
2058
 
                        cfg.buffer_index = k++;
2059
 
                        cfg.format = so->formats[PAN_VERTEX_ID];
2060
 
                }
2061
 
 
2062
 
                panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
2063
 
 
2064
 
                pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
2065
 
                        cfg.buffer_index = k++;
2066
 
                        cfg.format = so->formats[PAN_INSTANCE_ID];
2067
 
                }
2068
 
        }
2069
 
#endif
2070
 
 
2071
 
        k = ALIGN_POT(k, 2);
2072
 
        emit_image_attribs(ctx, PIPE_SHADER_VERTEX, out + so->num_elements, k);
2073
 
        emit_image_bufs(batch, PIPE_SHADER_VERTEX, bufs + k, k);
2074
 
        k += (util_last_bit(ctx->image_mask[PIPE_SHADER_VERTEX]) * 2);
2075
 
 
2076
 
#if PAN_ARCH >= 6
2077
 
        /* We need an empty attrib buf to stop the prefetching on Bifrost */
2078
 
        pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg);
2079
 
#endif
2080
 
 
2081
 
        /* Attribute addresses require 64-byte alignment, so let:
2082
 
         *
2083
 
         *      base' = base & ~63 = base - (base & 63)
2084
 
         *      offset' = offset + (base & 63)
2085
 
         *
2086
 
         * Since base' + offset' = base + offset, these are equivalent
2087
 
         * addressing modes and now base is 64 aligned.
2088
 
         */
2089
 
 
2090
 
        for (unsigned i = 0; i < so->num_elements; ++i) {
2091
 
                unsigned vbi = so->pipe[i].vertex_buffer_index;
2092
 
                struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
2093
 
 
2094
 
                /* BOs are aligned; just fixup for buffer_offset */
2095
 
                signed src_offset = so->pipe[i].src_offset;
2096
 
                src_offset += (buf->buffer_offset & 63);
2097
 
 
2098
 
                /* Base instance offset */
2099
 
                if (ctx->base_instance && so->pipe[i].instance_divisor) {
2100
 
                        src_offset += (ctx->base_instance * buf->stride) /
2101
 
                                      so->pipe[i].instance_divisor;
2102
 
                }
2103
 
 
2104
 
                /* Also, somewhat obscurely per-instance data needs to be
2105
 
                 * offset in response to a delayed start in an indexed draw */
2106
 
 
2107
 
                if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
2108
 
                        src_offset -= buf->stride * ctx->offset_start;
2109
 
 
2110
 
                pan_pack(out + i, ATTRIBUTE, cfg) {
2111
 
                        cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]];
2112
 
                        cfg.format = so->formats[i];
2113
 
                        cfg.offset = src_offset;
2114
 
                }
2115
 
        }
2116
 
 
2117
 
        *buffers = S.gpu;
2118
 
        return T.gpu;
2119
 
}
2120
 
 
2121
 
static mali_ptr
2122
 
panfrost_emit_varyings(struct panfrost_batch *batch,
2123
 
                struct mali_attribute_buffer_packed *slot,
2124
 
                unsigned stride, unsigned count)
2125
 
{
2126
 
        unsigned size = stride * count;
2127
 
        mali_ptr ptr =
2128
 
                batch->ctx->indirect_draw ? 0 :
2129
 
                pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu;
2130
 
 
2131
 
        pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
2132
 
                cfg.stride = stride;
2133
 
                cfg.size = size;
2134
 
                cfg.pointer = ptr;
2135
 
        }
2136
 
 
2137
 
        return ptr;
2138
 
}
2139
 
 
2140
 
static unsigned
2141
 
panfrost_xfb_offset(unsigned stride, struct pipe_stream_output_target *target)
2142
 
{
2143
 
        return target->buffer_offset + (pan_so_target(target)->offset * stride);
2144
 
}
2145
 
 
2146
 
static void
2147
 
panfrost_emit_streamout(struct panfrost_batch *batch,
2148
 
                        struct mali_attribute_buffer_packed *slot,
2149
 
                        unsigned stride, unsigned count,
2150
 
                        struct pipe_stream_output_target *target)
2151
 
{
2152
 
        unsigned max_size = target->buffer_size;
2153
 
        unsigned expected_size = stride * count;
2154
 
 
2155
 
        /* Grab the BO and bind it to the batch */
2156
 
        struct panfrost_resource *rsrc = pan_resource(target->buffer);
2157
 
        struct panfrost_bo *bo = rsrc->image.data.bo;
2158
 
 
2159
 
        panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
2160
 
        panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
2161
 
 
2162
 
        unsigned offset = panfrost_xfb_offset(stride, target);
2163
 
 
2164
 
        pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
2165
 
                cfg.pointer = bo->ptr.gpu + (offset & ~63);
2166
 
                cfg.stride = stride;
2167
 
                cfg.size = MIN2(max_size, expected_size) + (offset & 63);
2168
 
 
2169
 
                util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
2170
 
                                offset, cfg.size);
2171
 
        }
2172
 
}
2173
 
 
2174
 
/* Helpers for manipulating stream out information so we can pack varyings
2175
 
 * accordingly. Compute the src_offset for a given captured varying */
2176
 
 
2177
 
static struct pipe_stream_output *
2178
 
pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
2179
 
{
2180
 
        for (unsigned i = 0; i < info->num_outputs; ++i) {
2181
 
                if (info->output[i].register_index == loc)
2182
 
                        return &info->output[i];
2183
 
        }
2184
 
 
2185
 
        unreachable("Varying not captured");
2186
 
}
2187
 
 
2188
 
/* Given a varying, figure out which index it corresponds to */
2189
 
 
2190
 
static inline unsigned
2191
 
pan_varying_index(unsigned present, enum pan_special_varying v)
2192
 
{
2193
 
        return util_bitcount(present & BITFIELD_MASK(v));
2194
 
}
2195
 
 
2196
 
/* Get the base offset for XFB buffers, which by convention come after
2197
 
 * everything else. Wrapper function for semantic reasons; by construction this
2198
 
 * is just popcount. */
2199
 
 
2200
 
static inline unsigned
2201
 
pan_xfb_base(unsigned present)
2202
 
{
2203
 
        return util_bitcount(present);
2204
 
}
2205
 
 
2206
 
/* Determines which varying buffers are required */
2207
 
 
2208
 
static inline unsigned
2209
 
pan_varying_present(const struct panfrost_device *dev,
2210
 
                    struct pan_shader_info *producer,
2211
 
                    struct pan_shader_info *consumer,
2212
 
                    uint16_t point_coord_mask)
2213
 
{
2214
 
        /* At the moment we always emit general and position buffers. Not
2215
 
         * strictly necessary but usually harmless */
2216
 
 
2217
 
        unsigned present = BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION);
2218
 
 
2219
 
        /* Enable special buffers by the shader info */
2220
 
 
2221
 
        if (producer->vs.writes_point_size)
2222
 
                present |= BITFIELD_BIT(PAN_VARY_PSIZ);
2223
 
 
2224
 
#if PAN_ARCH <= 5
2225
 
        /* On Midgard, these exist as real varyings. Later architectures use
2226
 
         * LD_VAR_SPECIAL reads instead. */
2227
 
 
2228
 
        if (consumer->fs.reads_point_coord)
2229
 
                present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
2230
 
 
2231
 
        if (consumer->fs.reads_face)
2232
 
                present |= BITFIELD_BIT(PAN_VARY_FACE);
2233
 
 
2234
 
        if (consumer->fs.reads_frag_coord)
2235
 
                present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD);
2236
 
 
2237
 
        /* Also, if we have a point sprite, we need a point coord buffer */
2238
 
 
2239
 
        for (unsigned i = 0; i < consumer->varyings.input_count; i++)  {
2240
 
                gl_varying_slot loc = consumer->varyings.input[i].location;
2241
 
 
2242
 
                if (util_varying_is_point_coord(loc, point_coord_mask))
2243
 
                        present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
2244
 
        }
2245
 
#endif
2246
 
 
2247
 
        return present;
2248
 
}
2249
 
 
2250
 
/* Emitters for varying records */
2251
 
 
2252
 
static void
2253
 
pan_emit_vary(const struct panfrost_device *dev,
2254
 
              struct mali_attribute_packed *out,
2255
 
              unsigned buffer_index,
2256
 
              mali_pixel_format format, unsigned offset)
2257
 
{
2258
 
        pan_pack(out, ATTRIBUTE, cfg) {
2259
 
                cfg.buffer_index = buffer_index;
2260
 
                cfg.offset_enable = (PAN_ARCH <= 5);
2261
 
                cfg.format = format;
2262
 
                cfg.offset = offset;
2263
 
        }
2264
 
}
2265
 
 
2266
 
/* Special records */
2267
 
 
2268
 
static const struct {
2269
 
       unsigned components;
2270
 
       enum mali_format format;
2271
 
} pan_varying_formats[PAN_VARY_MAX] = {
2272
 
        [PAN_VARY_POSITION]     = { 4, MALI_SNAP_4 },
2273
 
        [PAN_VARY_PSIZ]         = { 1, MALI_R16F },
2274
 
        [PAN_VARY_PNTCOORD]     = { 1, MALI_R16F },
2275
 
        [PAN_VARY_FACE]         = { 1, MALI_R32I },
2276
 
        [PAN_VARY_FRAGCOORD]    = { 4, MALI_RGBA32F },
2277
 
};
2278
 
 
2279
 
static mali_pixel_format
2280
 
pan_special_format(const struct panfrost_device *dev,
2281
 
                enum pan_special_varying buf)
2282
 
{
2283
 
        assert(buf < PAN_VARY_MAX);
2284
 
        mali_pixel_format format = (pan_varying_formats[buf].format << 12);
2285
 
 
2286
 
#if PAN_ARCH <= 6
2287
 
        unsigned nr = pan_varying_formats[buf].components;
2288
 
        format |= panfrost_get_default_swizzle(nr);
2289
 
#endif
2290
 
 
2291
 
        return format;
2292
 
}
2293
 
 
2294
 
static void
2295
 
pan_emit_vary_special(const struct panfrost_device *dev,
2296
 
                      struct mali_attribute_packed *out,
2297
 
                      unsigned present, enum pan_special_varying buf)
2298
 
{
2299
 
        pan_emit_vary(dev, out, pan_varying_index(present, buf),
2300
 
                        pan_special_format(dev, buf), 0);
2301
 
}
2302
 
 
2303
 
/* Negative indicates a varying is not found */
2304
 
 
2305
 
static signed
2306
 
pan_find_vary(const struct pan_shader_varying *vary,
2307
 
                unsigned vary_count, unsigned loc)
2308
 
{
2309
 
        for (unsigned i = 0; i < vary_count; ++i) {
2310
 
                if (vary[i].location == loc)
2311
 
                        return i;
2312
 
        }
2313
 
 
2314
 
        return -1;
2315
 
}
2316
 
 
2317
 
/* Assign varying locations for the general buffer. Returns the calculated
2318
 
 * per-vertex stride, and outputs offsets into the passed array. Negative
2319
 
 * offset indicates a varying is not used. */
2320
 
 
2321
 
static unsigned
2322
 
pan_assign_varyings(const struct panfrost_device *dev,
2323
 
                    struct pan_shader_info *producer,
2324
 
                    struct pan_shader_info *consumer,
2325
 
                    signed *offsets)
2326
 
{
2327
 
        unsigned producer_count = producer->varyings.output_count;
2328
 
        unsigned consumer_count = consumer->varyings.input_count;
2329
 
 
2330
 
        const struct pan_shader_varying *producer_vars = producer->varyings.output;
2331
 
        const struct pan_shader_varying *consumer_vars = consumer->varyings.input;
2332
 
 
2333
 
        unsigned stride = 0;
2334
 
 
2335
 
        for (unsigned i = 0; i < producer_count; ++i) {
2336
 
                signed loc = pan_find_vary(consumer_vars, consumer_count,
2337
 
                                producer_vars[i].location);
2338
 
 
2339
 
                if (loc >= 0) {
2340
 
                        offsets[i] = stride;
2341
 
 
2342
 
                        enum pipe_format format = consumer_vars[loc].format;
2343
 
                        stride += util_format_get_blocksize(format);
2344
 
                } else {
2345
 
                        offsets[i] = -1;
2346
 
                }
2347
 
        }
2348
 
 
2349
 
        return stride;
2350
 
}
2351
 
 
2352
 
/* Emitter for a single varying (attribute) descriptor */
2353
 
 
2354
 
static void
2355
 
panfrost_emit_varying(const struct panfrost_device *dev,
2356
 
                      struct mali_attribute_packed *out,
2357
 
                      const struct pan_shader_varying varying,
2358
 
                      enum pipe_format pipe_format,
2359
 
                      unsigned present,
2360
 
                      uint16_t point_sprite_mask,
2361
 
                      struct pipe_stream_output_info *xfb,
2362
 
                      uint64_t xfb_loc_mask,
2363
 
                      unsigned max_xfb,
2364
 
                      unsigned *xfb_offsets,
2365
 
                      signed offset,
2366
 
                      enum pan_special_varying pos_varying)
2367
 
{
2368
 
        /* Note: varying.format != pipe_format in some obscure cases due to a
2369
 
         * limitation of the NIR linker. This should be fixed in the future to
2370
 
         * eliminate the additional lookups. See:
2371
 
         * dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex
2372
 
         */
2373
 
        gl_varying_slot loc = varying.location;
2374
 
        mali_pixel_format format = dev->formats[pipe_format].hw;
2375
 
 
2376
 
        struct pipe_stream_output *o = (xfb_loc_mask & BITFIELD64_BIT(loc)) ?
2377
 
                pan_get_so(xfb, loc) : NULL;
2378
 
 
2379
 
        if (util_varying_is_point_coord(loc, point_sprite_mask)) {
2380
 
                pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
2381
 
        } else if (o && o->output_buffer < max_xfb) {
2382
 
                unsigned fixup_offset = xfb_offsets[o->output_buffer] & 63;
2383
 
 
2384
 
                pan_emit_vary(dev, out,
2385
 
                                pan_xfb_base(present) + o->output_buffer,
2386
 
                                format, (o->dst_offset * 4) + fixup_offset);
2387
 
        } else if (loc == VARYING_SLOT_POS) {
2388
 
                pan_emit_vary_special(dev, out, present, pos_varying);
2389
 
        } else if (loc == VARYING_SLOT_PSIZ) {
2390
 
                pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ);
2391
 
        } else if (loc == VARYING_SLOT_FACE) {
2392
 
                pan_emit_vary_special(dev, out, present, PAN_VARY_FACE);
2393
 
        } else if (offset < 0) {
2394
 
                pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0);
2395
 
        } else {
2396
 
                STATIC_ASSERT(PAN_VARY_GENERAL == 0);
2397
 
                pan_emit_vary(dev, out, 0, format, offset);
2398
 
        }
2399
 
}
2400
 
 
2401
 
/* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time,
2402
 
 * rather than draw time (under good conditions). */
2403
 
 
2404
 
static void
2405
 
panfrost_emit_varying_descs(
2406
 
                struct panfrost_pool *pool,
2407
 
                struct panfrost_shader_state *producer,
2408
 
                struct panfrost_shader_state *consumer,
2409
 
                struct panfrost_streamout *xfb,
2410
 
                uint16_t point_coord_mask,
2411
 
                struct pan_linkage *out)
2412
 
{
2413
 
        struct panfrost_device *dev = pool->base.dev;
2414
 
        struct pipe_stream_output_info *xfb_info = &producer->stream_output;
2415
 
        unsigned producer_count = producer->info.varyings.output_count;
2416
 
        unsigned consumer_count = consumer->info.varyings.input_count;
2417
 
 
2418
 
        /* Offsets within the general varying buffer, indexed by location */
2419
 
        signed offsets[PAN_MAX_VARYINGS];
2420
 
        assert(producer_count <= ARRAY_SIZE(offsets));
2421
 
        assert(consumer_count <= ARRAY_SIZE(offsets));
2422
 
 
2423
 
        /* Allocate enough descriptors for both shader stages */
2424
 
        struct panfrost_ptr T =
2425
 
                pan_pool_alloc_desc_array(&pool->base,
2426
 
                                          producer_count + consumer_count,
2427
 
                                          ATTRIBUTE);
2428
 
 
2429
 
        /* Take a reference if we're being put on the CSO */
2430
 
        if (!pool->owned) {
2431
 
                out->bo = pool->transient_bo;
2432
 
                panfrost_bo_reference(out->bo);
2433
 
        }
2434
 
 
2435
 
        struct mali_attribute_packed *descs = T.cpu;
2436
 
        out->producer = producer_count ? T.gpu : 0;
2437
 
        out->consumer = consumer_count ? T.gpu +
2438
 
                (pan_size(ATTRIBUTE) * producer_count) : 0;
2439
 
 
2440
 
        /* Lay out the varyings. Must use producer to lay out, in order to
2441
 
         * respect transform feedback precisions. */
2442
 
        out->present = pan_varying_present(dev, &producer->info,
2443
 
                        &consumer->info, point_coord_mask);
2444
 
 
2445
 
        out->stride = pan_assign_varyings(dev, &producer->info,
2446
 
                        &consumer->info, offsets);
2447
 
 
2448
 
        unsigned xfb_offsets[PIPE_MAX_SO_BUFFERS];
2449
 
 
2450
 
        for (unsigned i = 0; i < xfb->num_targets; ++i) {
2451
 
                xfb_offsets[i] = panfrost_xfb_offset(xfb_info->stride[i] * 4,
2452
 
                                xfb->targets[i]);
2453
 
        }
2454
 
 
2455
 
        for (unsigned i = 0; i < producer_count; ++i) {
2456
 
                signed j = pan_find_vary(consumer->info.varyings.input,
2457
 
                                consumer->info.varyings.input_count,
2458
 
                                producer->info.varyings.output[i].location);
2459
 
 
2460
 
                enum pipe_format format = (j >= 0) ?
2461
 
                        consumer->info.varyings.input[j].format :
2462
 
                        producer->info.varyings.output[i].format;
2463
 
 
2464
 
                panfrost_emit_varying(dev, descs + i,
2465
 
                                producer->info.varyings.output[i], format,
2466
 
                                out->present, 0, &producer->stream_output,
2467
 
                                producer->so_mask, xfb->num_targets,
2468
 
                                xfb_offsets, offsets[i], PAN_VARY_POSITION);
2469
 
        }
2470
 
 
2471
 
        for (unsigned i = 0; i < consumer_count; ++i) {
2472
 
                signed j = pan_find_vary(producer->info.varyings.output,
2473
 
                                producer->info.varyings.output_count,
2474
 
                                consumer->info.varyings.input[i].location);
2475
 
 
2476
 
                signed offset = (j >= 0) ? offsets[j] : -1;
2477
 
 
2478
 
                panfrost_emit_varying(dev, descs + producer_count + i,
2479
 
                                consumer->info.varyings.input[i],
2480
 
                                consumer->info.varyings.input[i].format,
2481
 
                                out->present, point_coord_mask,
2482
 
                                &producer->stream_output, producer->so_mask,
2483
 
                                xfb->num_targets, xfb_offsets, offset,
2484
 
                                PAN_VARY_FRAGCOORD);
2485
 
        }
2486
 
}
2487
 
 
2488
 
#if PAN_ARCH <= 5
2489
 
static void
2490
 
pan_emit_special_input(struct mali_attribute_buffer_packed *out,
2491
 
                unsigned present,
2492
 
                enum pan_special_varying v,
2493
 
                unsigned special)
2494
 
{
2495
 
        if (present & BITFIELD_BIT(v)) {
2496
 
                unsigned idx = pan_varying_index(present, v);
2497
 
 
2498
 
                pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
2499
 
                        cfg.special = special;
2500
 
                        cfg.type = 0;
2501
 
                }
2502
 
        }
2503
 
}
2504
 
#endif
2505
 
 
2506
 
static void
2507
 
panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2508
 
                                 unsigned vertex_count,
2509
 
                                 mali_ptr *vs_attribs,
2510
 
                                 mali_ptr *fs_attribs,
2511
 
                                 mali_ptr *buffers,
2512
 
                                 unsigned *buffer_count,
2513
 
                                 mali_ptr *position,
2514
 
                                 mali_ptr *psiz,
2515
 
                                 bool point_coord_replace)
2516
 
{
2517
 
        /* Load the shaders */
2518
 
        struct panfrost_context *ctx = batch->ctx;
2519
 
        struct panfrost_shader_state *vs, *fs;
2520
 
 
2521
 
        vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2522
 
        fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2523
 
 
2524
 
        uint16_t point_coord_mask = 0;
2525
 
 
2526
 
#if PAN_ARCH <= 5
2527
 
        /* Point sprites are lowered on Bifrost and newer */
2528
 
        if (point_coord_replace)
2529
 
                point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
2530
 
#endif
2531
 
 
2532
 
        /* In good conditions, we only need to link varyings once */
2533
 
        bool prelink =
2534
 
                (point_coord_mask == 0) &&
2535
 
                (ctx->streamout.num_targets == 0) &&
2536
 
                !vs->info.separable &&
2537
 
                !fs->info.separable;
2538
 
 
2539
 
        /* Try to reduce copies */
2540
 
        struct pan_linkage _linkage;
2541
 
        struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage;
2542
 
 
2543
 
        /* Emit ATTRIBUTE descriptors if needed */
2544
 
        if (!prelink || vs->linkage.bo == NULL) {
2545
 
                struct panfrost_pool *pool =
2546
 
                        prelink ? &ctx->descs : &batch->pool;
2547
 
 
2548
 
                panfrost_emit_varying_descs(pool, vs, fs, &ctx->streamout, point_coord_mask, linkage);
2549
 
        }
2550
 
 
2551
 
        struct pipe_stream_output_info *so = &vs->stream_output;
2552
 
        unsigned present = linkage->present, stride = linkage->stride;
2553
 
        unsigned xfb_base = pan_xfb_base(present);
2554
 
        struct panfrost_ptr T =
2555
 
                pan_pool_alloc_desc_array(&batch->pool.base,
2556
 
                                          xfb_base +
2557
 
                                          ctx->streamout.num_targets + 1,
2558
 
                                          ATTRIBUTE_BUFFER);
2559
 
        struct mali_attribute_buffer_packed *varyings =
2560
 
                (struct mali_attribute_buffer_packed *) T.cpu;
2561
 
 
2562
 
        if (buffer_count)
2563
 
                *buffer_count = xfb_base + ctx->streamout.num_targets;
2564
 
 
2565
 
#if PAN_ARCH >= 6
2566
 
        /* Suppress prefetch on Bifrost */
2567
 
        memset(varyings + (xfb_base * ctx->streamout.num_targets), 0, sizeof(*varyings));
2568
 
#endif
2569
 
 
2570
 
        /* Emit the stream out buffers. We need enough room for all the
2571
 
         * vertices we emit across all instances */
2572
 
 
2573
 
        unsigned out_count = ctx->instance_count *
2574
 
                u_stream_outputs_for_vertices(ctx->active_prim, ctx->vertex_count);
2575
 
 
2576
 
        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2577
 
                panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2578
 
                                        so->stride[i] * 4,
2579
 
                                        out_count,
2580
 
                                        ctx->streamout.targets[i]);
2581
 
        }
2582
 
 
2583
 
        if (stride) {
2584
 
                panfrost_emit_varyings(batch,
2585
 
                                &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2586
 
                                stride, vertex_count);
2587
 
        }
2588
 
 
2589
 
        /* fp32 vec4 gl_Position */
2590
 
        *position = panfrost_emit_varyings(batch,
2591
 
                        &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2592
 
                        sizeof(float) * 4, vertex_count);
2593
 
 
2594
 
        if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
2595
 
                *psiz = panfrost_emit_varyings(batch,
2596
 
                                &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2597
 
                                2, vertex_count);
2598
 
        }
2599
 
 
2600
 
#if PAN_ARCH <= 5
2601
 
        pan_emit_special_input(varyings, present,
2602
 
                        PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
2603
 
        pan_emit_special_input(varyings, present, PAN_VARY_FACE,
2604
 
                        MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
2605
 
        pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD,
2606
 
                        MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
2607
 
#endif
2608
 
 
2609
 
        *buffers = T.gpu;
2610
 
        *vs_attribs = linkage->producer;
2611
 
        *fs_attribs = linkage->consumer;
2612
 
}
2613
 
 
2614
 
static void
2615
 
panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2616
 
                                const struct panfrost_ptr *vertex_job,
2617
 
                                const struct panfrost_ptr *tiler_job)
2618
 
{
2619
 
        struct panfrost_context *ctx = batch->ctx;
2620
 
 
2621
 
        /* If rasterizer discard is enable, only submit the vertex. XXX - set
2622
 
         * job_barrier in case buffers get ping-ponged and we need to enforce
2623
 
         * ordering, this has a perf hit! See
2624
 
         * KHR-GLES31.core.vertex_attrib_binding.advanced-iterations */
2625
 
 
2626
 
        unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard,
2627
 
                                           MALI_JOB_TYPE_VERTEX, true, false,
2628
 
                                           ctx->indirect_draw ?
2629
 
                                           batch->indirect_draw_job_id : 0,
2630
 
                                           0, vertex_job, false);
2631
 
 
2632
 
        if (panfrost_batch_skip_rasterization(batch))
2633
 
                return;
2634
 
 
2635
 
        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
2636
 
                         MALI_JOB_TYPE_TILER, false, false,
2637
 
                         vertex, 0, tiler_job, false);
2638
 
}
2639
 
#endif
2640
 
 
2641
 
static void
2642
 
emit_tls(struct panfrost_batch *batch)
2643
 
{
2644
 
        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2645
 
 
2646
 
        /* Emitted with the FB descriptor on Midgard. */
2647
 
        if (PAN_ARCH <= 5 && batch->framebuffer.gpu)
2648
 
                return;
2649
 
 
2650
 
        struct panfrost_bo *tls_bo =
2651
 
                batch->stack_size ?
2652
 
                panfrost_batch_get_scratchpad(batch,
2653
 
                                              batch->stack_size,
2654
 
                                              dev->thread_tls_alloc,
2655
 
                                              dev->core_count):
2656
 
                NULL;
2657
 
        struct pan_tls_info tls = {
2658
 
                .tls = {
2659
 
                        .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
2660
 
                        .size = batch->stack_size,
2661
 
                },
2662
 
        };
2663
 
 
2664
 
        assert(batch->tls.cpu);
2665
 
        GENX(pan_emit_tls)(&tls, batch->tls.cpu);
2666
 
}
2667
 
 
2668
 
static void
2669
 
emit_fbd(struct panfrost_batch *batch, const struct pan_fb_info *fb)
2670
 
{
2671
 
        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2672
 
        struct panfrost_bo *tls_bo =
2673
 
                batch->stack_size ?
2674
 
                panfrost_batch_get_scratchpad(batch,
2675
 
                                              batch->stack_size,
2676
 
                                              dev->thread_tls_alloc,
2677
 
                                              dev->core_count):
2678
 
                NULL;
2679
 
        struct pan_tls_info tls = {
2680
 
                .tls = {
2681
 
                        .ptr = tls_bo ? tls_bo->ptr.gpu : 0,
2682
 
                        .size = batch->stack_size,
2683
 
                },
2684
 
        };
2685
 
 
2686
 
        batch->framebuffer.gpu |=
2687
 
                GENX(pan_emit_fbd)(dev, fb, &tls, &batch->tiler_ctx,
2688
 
                                   batch->framebuffer.cpu);
2689
 
}
2690
 
 
2691
 
/* Mark a surface as written */
2692
 
 
2693
 
static void
2694
 
panfrost_initialize_surface(struct panfrost_batch *batch,
2695
 
                            struct pipe_surface *surf)
2696
 
{
2697
 
        if (surf) {
2698
 
                struct panfrost_resource *rsrc = pan_resource(surf->texture);
2699
 
                BITSET_SET(rsrc->valid.data, surf->u.tex.level);
2700
 
        }
2701
 
}
2702
 
 
2703
 
/* Generate a fragment job. This should be called once per frame. (Usually,
2704
 
 * this corresponds to eglSwapBuffers or one of glFlush, glFinish)
2705
 
 */
2706
 
static mali_ptr
2707
 
emit_fragment_job(struct panfrost_batch *batch, const struct pan_fb_info *pfb)
2708
 
{
2709
 
        /* Mark the affected buffers as initialized, since we're writing to it.
2710
 
         * Also, add the surfaces we're writing to to the batch */
2711
 
 
2712
 
        struct pipe_framebuffer_state *fb = &batch->key;
2713
 
 
2714
 
        for (unsigned i = 0; i < fb->nr_cbufs; ++i)
2715
 
                panfrost_initialize_surface(batch, fb->cbufs[i]);
2716
 
 
2717
 
        panfrost_initialize_surface(batch, fb->zsbuf);
2718
 
 
2719
 
        /* The passed tile coords can be out of range in some cases, so we need
2720
 
         * to clamp them to the framebuffer size to avoid a TILE_RANGE_FAULT.
2721
 
         * Theoretically we also need to clamp the coordinates positive, but we
2722
 
         * avoid that edge case as all four values are unsigned. Also,
2723
 
         * theoretically we could clamp the minima, but if that has to happen
2724
 
         * the asserts would fail anyway (since the maxima would get clamped
2725
 
         * and then be smaller than the minima). An edge case of sorts occurs
2726
 
         * when no scissors are added to draw, so by default min=~0 and max=0.
2727
 
         * But that can't happen if any actual drawing occurs (beyond a
2728
 
         * wallpaper reload), so this is again irrelevant in practice. */
2729
 
 
2730
 
        batch->maxx = MIN2(batch->maxx, fb->width);
2731
 
        batch->maxy = MIN2(batch->maxy, fb->height);
2732
 
 
2733
 
        /* Rendering region must be at least 1x1; otherwise, there is nothing
2734
 
         * to do and the whole job chain should have been discarded. */
2735
 
 
2736
 
        assert(batch->maxx > batch->minx);
2737
 
        assert(batch->maxy > batch->miny);
2738
 
 
2739
 
        struct panfrost_ptr transfer =
2740
 
                pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
2741
 
 
2742
 
        GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu,
2743
 
                                    transfer.cpu);
2744
 
 
2745
 
        return transfer.gpu;
2746
 
}
2747
 
 
2748
 
#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c;
2749
 
 
2750
 
static uint8_t
2751
 
pan_draw_mode(enum pipe_prim_type mode)
2752
 
{
2753
 
        switch (mode) {
2754
 
                DEFINE_CASE(POINTS);
2755
 
                DEFINE_CASE(LINES);
2756
 
                DEFINE_CASE(LINE_LOOP);
2757
 
                DEFINE_CASE(LINE_STRIP);
2758
 
                DEFINE_CASE(TRIANGLES);
2759
 
                DEFINE_CASE(TRIANGLE_STRIP);
2760
 
                DEFINE_CASE(TRIANGLE_FAN);
2761
 
                DEFINE_CASE(QUADS);
2762
 
                DEFINE_CASE(POLYGON);
2763
 
#if PAN_ARCH <= 6
2764
 
                DEFINE_CASE(QUAD_STRIP);
2765
 
#endif
2766
 
 
2767
 
        default:
2768
 
                unreachable("Invalid draw mode");
2769
 
        }
2770
 
}
2771
 
 
2772
 
#undef DEFINE_CASE
2773
 
 
2774
 
/* Count generated primitives (when there is no geom/tess shaders) for
2775
 
 * transform feedback */
2776
 
 
2777
 
static void
2778
 
panfrost_statistics_record(
2779
 
                struct panfrost_context *ctx,
2780
 
                const struct pipe_draw_info *info,
2781
 
                const struct pipe_draw_start_count_bias *draw)
2782
 
{
2783
 
        if (!ctx->active_queries)
2784
 
                return;
2785
 
 
2786
 
        uint32_t prims = u_prims_for_vertices(info->mode, draw->count);
2787
 
        ctx->prims_generated += prims;
2788
 
 
2789
 
        if (!ctx->streamout.num_targets)
2790
 
                return;
2791
 
 
2792
 
        ctx->tf_prims_generated += prims;
2793
 
}
2794
 
 
2795
 
static void
2796
 
panfrost_update_streamout_offsets(struct panfrost_context *ctx)
2797
 
{
2798
 
        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2799
 
                unsigned count;
2800
 
 
2801
 
                count = u_stream_outputs_for_vertices(ctx->active_prim,
2802
 
                                                      ctx->vertex_count);
2803
 
                pan_so_target(ctx->streamout.targets[i])->offset += count;
2804
 
        }
2805
 
}
2806
 
 
2807
 
static inline enum mali_index_type
2808
 
panfrost_translate_index_size(unsigned size)
2809
 
{
2810
 
        STATIC_ASSERT(MALI_INDEX_TYPE_NONE  == 0);
2811
 
        STATIC_ASSERT(MALI_INDEX_TYPE_UINT8  == 1);
2812
 
        STATIC_ASSERT(MALI_INDEX_TYPE_UINT16 == 2);
2813
 
 
2814
 
        return (size == 4) ? MALI_INDEX_TYPE_UINT32 : size;
2815
 
}
2816
 
 
2817
 
#if PAN_ARCH <= 7
2818
 
static inline void
2819
 
pan_emit_draw_descs(struct panfrost_batch *batch,
2820
 
                struct MALI_DRAW *d, enum pipe_shader_type st)
2821
 
{
2822
 
        d->offset_start = batch->ctx->offset_start;
2823
 
        d->instance_size = batch->ctx->instance_count > 1 ?
2824
 
                           batch->ctx->padded_count : 1;
2825
 
 
2826
 
        d->uniform_buffers = batch->uniform_buffers[st];
2827
 
        d->push_uniforms = batch->push_uniforms[st];
2828
 
        d->textures = batch->textures[st];
2829
 
        d->samplers = batch->samplers[st];
2830
 
}
2831
 
 
2832
 
static void
2833
 
panfrost_draw_emit_vertex_section(struct panfrost_batch *batch,
2834
 
                                  mali_ptr vs_vary, mali_ptr varyings,
2835
 
                                  mali_ptr attribs, mali_ptr attrib_bufs,
2836
 
                                  void *section)
2837
 
{
2838
 
        pan_pack(section, DRAW, cfg) {
2839
 
                cfg.draw_descriptor_is_64b = true;
2840
 
                cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
2841
 
                cfg.attributes = attribs;
2842
 
                cfg.attribute_buffers = attrib_bufs;
2843
 
                cfg.varyings = vs_vary;
2844
 
                cfg.varying_buffers = vs_vary ? varyings : 0;
2845
 
                cfg.thread_storage = batch->tls.gpu;
2846
 
                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
2847
 
        }
2848
 
}
2849
 
 
2850
 
static void
2851
 
panfrost_draw_emit_vertex(struct panfrost_batch *batch,
2852
 
                          const struct pipe_draw_info *info,
2853
 
                          void *invocation_template,
2854
 
                          mali_ptr vs_vary, mali_ptr varyings,
2855
 
                          mali_ptr attribs, mali_ptr attrib_bufs,
2856
 
                          void *job)
2857
 
{
2858
 
        void *section =
2859
 
                pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
2860
 
        memcpy(section, invocation_template, pan_size(INVOCATION));
2861
 
 
2862
 
        pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
2863
 
                cfg.job_task_split = 5;
2864
 
        }
2865
 
 
2866
 
        section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
2867
 
        panfrost_draw_emit_vertex_section(batch, vs_vary, varyings,
2868
 
                                          attribs, attrib_bufs, section);
2869
 
}
2870
 
#endif
2871
 
 
2872
 
static void
2873
 
panfrost_emit_primitive_size(struct panfrost_context *ctx,
2874
 
                             bool points, mali_ptr size_array,
2875
 
                             void *prim_size)
2876
 
{
2877
 
        struct panfrost_rasterizer *rast = ctx->rasterizer;
2878
 
 
2879
 
        pan_pack(prim_size, PRIMITIVE_SIZE, cfg) {
2880
 
                if (panfrost_writes_point_size(ctx)) {
2881
 
                        cfg.size_array = size_array;
2882
 
                } else {
2883
 
                        cfg.constant = points ?
2884
 
                                       rast->base.point_size :
2885
 
                                       rast->base.line_width;
2886
 
                }
2887
 
        }
2888
 
}
2889
 
 
2890
 
static bool
2891
 
panfrost_is_implicit_prim_restart(const struct pipe_draw_info *info)
2892
 
{
2893
 
        unsigned implicit_index = BITFIELD_MASK(info->index_size * 8);
2894
 
        bool implicit = info->restart_index == implicit_index;
2895
 
        return info->primitive_restart && implicit;
2896
 
}
2897
 
 
2898
 
static inline void
2899
 
panfrost_update_state_tex(struct panfrost_batch *batch,
2900
 
                          enum pipe_shader_type st)
2901
 
{
2902
 
        struct panfrost_context *ctx = batch->ctx;
2903
 
        struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
2904
 
 
2905
 
        unsigned dirty_3d = ctx->dirty;
2906
 
        unsigned dirty = ctx->dirty_shader[st];
2907
 
 
2908
 
        if (dirty & PAN_DIRTY_STAGE_TEXTURE) {
2909
 
                batch->textures[st] =
2910
 
                        panfrost_emit_texture_descriptors(batch, st);
2911
 
        }
2912
 
 
2913
 
        if (dirty & PAN_DIRTY_STAGE_SAMPLER) {
2914
 
                batch->samplers[st] =
2915
 
                        panfrost_emit_sampler_descriptors(batch, st);
2916
 
        }
2917
 
 
2918
 
        if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) {
2919
 
                batch->uniform_buffers[st] = panfrost_emit_const_buf(batch, st,
2920
 
                                &batch->push_uniforms[st]);
2921
 
        }
2922
 
}
2923
 
 
2924
 
static inline void
2925
 
panfrost_update_state_3d(struct panfrost_batch *batch)
2926
 
{
2927
 
        unsigned dirty = batch->ctx->dirty;
2928
 
 
2929
 
        if (dirty & PAN_DIRTY_TLS_SIZE)
2930
 
                panfrost_batch_adjust_stack_size(batch);
2931
 
}
2932
 
 
2933
 
static void
2934
 
panfrost_update_state_vs(struct panfrost_batch *batch)
2935
 
{
2936
 
        enum pipe_shader_type st = PIPE_SHADER_VERTEX;
2937
 
        unsigned dirty = batch->ctx->dirty_shader[st];
2938
 
 
2939
 
        if (dirty & PAN_DIRTY_STAGE_RENDERER)
2940
 
                batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st);
2941
 
 
2942
 
        panfrost_update_state_tex(batch, st);
2943
 
}
2944
 
 
2945
 
static void
2946
 
panfrost_update_state_fs(struct panfrost_batch *batch)
2947
 
{
2948
 
        enum pipe_shader_type st = PIPE_SHADER_FRAGMENT;
2949
 
        unsigned dirty = batch->ctx->dirty_shader[st];
2950
 
 
2951
 
        if (dirty & PAN_DIRTY_STAGE_RENDERER)
2952
 
                batch->rsd[st] = panfrost_emit_frag_shader_meta(batch);
2953
 
 
2954
 
        if (dirty & PAN_DIRTY_STAGE_IMAGE) {
2955
 
                batch->attribs[st] = panfrost_emit_image_attribs(batch,
2956
 
                                &batch->attrib_bufs[st], st);
2957
 
        }
2958
 
 
2959
 
        panfrost_update_state_tex(batch, st);
2960
 
}
2961
 
 
2962
 
#if PAN_ARCH >= 6
2963
 
static mali_ptr
2964
 
panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)
2965
 
{
2966
 
        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2967
 
 
2968
 
        if (!vertex_count)
2969
 
                return 0;
2970
 
 
2971
 
        if (batch->tiler_ctx.bifrost)
2972
 
                return batch->tiler_ctx.bifrost;
2973
 
 
2974
 
        struct panfrost_ptr t =
2975
 
                pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
2976
 
 
2977
 
        GENX(pan_emit_tiler_heap)(dev, t.cpu);
2978
 
 
2979
 
        mali_ptr heap = t.gpu;
2980
 
 
2981
 
        t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
2982
 
        GENX(pan_emit_tiler_ctx)(dev, batch->key.width, batch->key.height,
2983
 
                                 util_framebuffer_get_num_samples(&batch->key),
2984
 
                                 heap, t.cpu);
2985
 
 
2986
 
        batch->tiler_ctx.bifrost = t.gpu;
2987
 
        return batch->tiler_ctx.bifrost;
2988
 
}
2989
 
#endif
2990
 
 
2991
 
static void
2992
 
panfrost_draw_emit_tiler(struct panfrost_batch *batch,
2993
 
                         const struct pipe_draw_info *info,
2994
 
                         const struct pipe_draw_start_count_bias *draw,
2995
 
                         void *invocation_template,
2996
 
                         mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings,
2997
 
                         mali_ptr pos, mali_ptr psiz, bool secondary_shader,
2998
 
                         void *job)
2999
 
{
3000
 
        struct panfrost_context *ctx = batch->ctx;
3001
 
        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
3002
 
 
3003
 
        void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
3004
 
        memcpy(section, invocation_template, pan_size(INVOCATION));
3005
 
 
3006
 
        section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
3007
 
        pan_pack(section, PRIMITIVE, cfg) {
3008
 
                cfg.draw_mode = pan_draw_mode(info->mode);
3009
 
                if (panfrost_writes_point_size(ctx))
3010
 
                        cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
3011
 
 
3012
 
                /* For line primitives, PRIMITIVE.first_provoking_vertex must
3013
 
                 * be set to true and the provoking vertex is selected with
3014
 
                 * DRAW.flat_shading_vertex.
3015
 
                 */
3016
 
                if (info->mode == PIPE_PRIM_LINES ||
3017
 
                    info->mode == PIPE_PRIM_LINE_LOOP ||
3018
 
                    info->mode == PIPE_PRIM_LINE_STRIP)
3019
 
                        cfg.first_provoking_vertex = true;
3020
 
                else
3021
 
                        cfg.first_provoking_vertex = rast->flatshade_first;
3022
 
 
3023
 
                if (panfrost_is_implicit_prim_restart(info)) {
3024
 
                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
3025
 
                } else if (info->primitive_restart) {
3026
 
                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
3027
 
                        cfg.primitive_restart_index = info->restart_index;
3028
 
                }
3029
 
 
3030
 
                cfg.job_task_split = 6;
3031
 
 
3032
 
                cfg.index_count = ctx->indirect_draw ? 1 : draw->count;
3033
 
                cfg.index_type = panfrost_translate_index_size(info->index_size);
3034
 
 
3035
 
                if (cfg.index_type) {
3036
 
                        cfg.indices = indices;
3037
 
                        cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
3038
 
                }
3039
 
 
3040
 
#if PAN_ARCH >= 6
3041
 
                cfg.secondary_shader = secondary_shader;
3042
 
#endif
3043
 
        }
3044
 
 
3045
 
        enum pipe_prim_type prim = u_reduced_prim(info->mode);
3046
 
        bool polygon = (prim == PIPE_PRIM_TRIANGLES);
3047
 
        void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
3048
 
 
3049
 
#if PAN_ARCH >= 6
3050
 
        pan_section_pack(job, TILER_JOB, TILER, cfg) {
3051
 
                cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
3052
 
        }
3053
 
 
3054
 
        pan_section_pack(job, TILER_JOB, PADDING, cfg);
3055
 
#endif
3056
 
 
3057
 
        section = pan_section_ptr(job, TILER_JOB, DRAW);
3058
 
        pan_pack(section, DRAW, cfg) {
3059
 
                /*
3060
 
                 * From the Gallium documentation,
3061
 
                 * pipe_rasterizer_state::cull_face "indicates which faces of
3062
 
                 * polygons to cull". Points and lines are not considered
3063
 
                 * polygons and should be drawn even if all faces are culled.
3064
 
                 * The hardware does not take primitive type into account when
3065
 
                 * culling, so we need to do that check ourselves.
3066
 
                 */
3067
 
                cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
3068
 
                cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
3069
 
                cfg.front_face_ccw = rast->front_ccw;
3070
 
                cfg.position = pos;
3071
 
                cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
3072
 
                cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
3073
 
                cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
3074
 
                cfg.viewport = batch->viewport;
3075
 
                cfg.varyings = fs_vary;
3076
 
                cfg.varying_buffers = fs_vary ? varyings : 0;
3077
 
                cfg.thread_storage = batch->tls.gpu;
3078
 
 
3079
 
                /* For all primitives but lines DRAW.flat_shading_vertex must
3080
 
                 * be set to 0 and the provoking vertex is selected with the
3081
 
                 * PRIMITIVE.first_provoking_vertex field.
3082
 
                 */
3083
 
                if (prim == PIPE_PRIM_LINES) {
3084
 
                        /* The logic is inverted across arches. */
3085
 
                        cfg.flat_shading_vertex = rast->flatshade_first
3086
 
                                                ^ (PAN_ARCH <= 5);
3087
 
                }
3088
 
 
3089
 
                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
3090
 
 
3091
 
                if (ctx->occlusion_query && ctx->active_queries) {
3092
 
                        if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
3093
 
                                cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
3094
 
                        else
3095
 
                                cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
3096
 
 
3097
 
                        struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc);
3098
 
                        cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
3099
 
                        panfrost_batch_write_rsrc(ctx->batch, rsrc,
3100
 
                                              PIPE_SHADER_FRAGMENT);
3101
 
                }
3102
 
        }
3103
 
 
3104
 
        panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size);
3105
 
}
3106
 
 
3107
 
static void
3108
 
panfrost_direct_draw(struct panfrost_batch *batch,
3109
 
                     const struct pipe_draw_info *info,
3110
 
                     unsigned drawid_offset,
3111
 
                     const struct pipe_draw_start_count_bias *draw)
3112
 
{
3113
 
        if (!draw->count || !info->instance_count)
3114
 
                return;
3115
 
 
3116
 
        struct panfrost_context *ctx = batch->ctx;
3117
 
 
3118
 
        /* Take into account a negative bias */
3119
 
        ctx->indirect_draw = false;
3120
 
        ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0);
3121
 
        ctx->instance_count = info->instance_count;
3122
 
        ctx->base_vertex = info->index_size ? draw->index_bias : 0;
3123
 
        ctx->base_instance = info->start_instance;
3124
 
        ctx->active_prim = info->mode;
3125
 
        ctx->drawid = drawid_offset;
3126
 
 
3127
 
        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3128
 
 
3129
 
        bool idvs = vs->info.vs.idvs;
3130
 
        bool secondary_shader = vs->info.vs.secondary_enable;
3131
 
 
3132
 
        struct panfrost_ptr tiler, vertex;
3133
 
 
3134
 
        if (idvs) {
3135
 
#if PAN_ARCH >= 6
3136
 
                tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
3137
 
#else
3138
 
                unreachable("IDVS is unsupported on Midgard");
3139
 
#endif
3140
 
        } else {
3141
 
                vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3142
 
                tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
3143
 
        }
3144
 
 
3145
 
        unsigned vertex_count = ctx->vertex_count;
3146
 
 
3147
 
        unsigned min_index = 0, max_index = 0;
3148
 
        mali_ptr indices = 0;
3149
 
 
3150
 
        if (info->index_size) {
3151
 
                indices = panfrost_get_index_buffer_bounded(batch, info, draw,
3152
 
                                                            &min_index,
3153
 
                                                            &max_index);
3154
 
 
3155
 
                /* Use the corresponding values */
3156
 
                vertex_count = max_index - min_index + 1;
3157
 
                ctx->offset_start = min_index + draw->index_bias;
3158
 
        } else {
3159
 
                ctx->offset_start = draw->start;
3160
 
        }
3161
 
 
3162
 
        if (info->instance_count > 1) {
3163
 
                unsigned count = vertex_count;
3164
 
 
3165
 
                /* Index-Driven Vertex Shading requires different instances to
3166
 
                 * have different cache lines for position results. Each vertex
3167
 
                 * position is 16 bytes and the Mali cache line is 64 bytes, so
3168
 
                 * the instance count must be aligned to 4 vertices.
3169
 
                 */
3170
 
                if (idvs)
3171
 
                        count = ALIGN_POT(count, 4);
3172
 
 
3173
 
                ctx->padded_count = panfrost_padded_vertex_count(count);
3174
 
        } else
3175
 
                ctx->padded_count = vertex_count;
3176
 
 
3177
 
        panfrost_statistics_record(ctx, info, draw);
3178
 
 
3179
 
        struct mali_invocation_packed invocation;
3180
 
        if (info->instance_count > 1) {
3181
 
                panfrost_pack_work_groups_compute(&invocation,
3182
 
                                                  1, vertex_count, info->instance_count,
3183
 
                                                  1, 1, 1, true, false);
3184
 
        } else {
3185
 
                pan_pack(&invocation, INVOCATION, cfg) {
3186
 
                        cfg.invocations = MALI_POSITIVE(vertex_count);
3187
 
                        cfg.size_y_shift = 0;
3188
 
                        cfg.size_z_shift = 0;
3189
 
                        cfg.workgroups_x_shift = 0;
3190
 
                        cfg.workgroups_y_shift = 0;
3191
 
                        cfg.workgroups_z_shift = 32;
3192
 
                        cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
3193
 
                }
3194
 
        }
3195
 
 
3196
 
        /* Emit all sort of descriptors. */
3197
 
        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
3198
 
 
3199
 
        panfrost_emit_varying_descriptor(batch,
3200
 
                                         ctx->padded_count *
3201
 
                                         ctx->instance_count,
3202
 
                                         &vs_vary, &fs_vary, &varyings,
3203
 
                                         NULL, &pos, &psiz,
3204
 
                                         info->mode == PIPE_PRIM_POINTS);
3205
 
 
3206
 
        mali_ptr attribs, attrib_bufs;
3207
 
        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
3208
 
 
3209
 
        panfrost_update_state_3d(batch);
3210
 
        panfrost_update_state_vs(batch);
3211
 
        panfrost_update_state_fs(batch);
3212
 
        panfrost_clean_state_3d(ctx);
3213
 
 
3214
 
        /* Fire off the draw itself */
3215
 
        panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices,
3216
 
                                 fs_vary, varyings, pos, psiz, secondary_shader,
3217
 
                                 tiler.cpu);
3218
 
 
3219
 
        if (idvs) {
3220
 
#if PAN_ARCH >= 6
3221
 
                panfrost_draw_emit_vertex_section(batch,
3222
 
                                  vs_vary, varyings,
3223
 
                                  attribs, attrib_bufs,
3224
 
                                  pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
3225
 
 
3226
 
                panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3227
 
                                 MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
3228
 
                                 0, 0, &tiler, false);
3229
 
#endif
3230
 
        } else {
3231
 
                panfrost_draw_emit_vertex(batch, info, &invocation,
3232
 
                                          vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
3233
 
                panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
3234
 
        }
3235
 
 
3236
 
        /* Increment transform feedback offsets */
3237
 
        panfrost_update_streamout_offsets(ctx);
3238
 
}
3239
 
 
3240
 
#if PAN_GPU_INDIRECTS
3241
 
static void
3242
 
panfrost_indirect_draw(struct panfrost_batch *batch,
3243
 
                       const struct pipe_draw_info *info,
3244
 
                       unsigned drawid_offset,
3245
 
                       const struct pipe_draw_indirect_info *indirect,
3246
 
                       const struct pipe_draw_start_count_bias *draw)
3247
 
{
3248
 
        /* Indirect draw count and multi-draw not supported. */
3249
 
        assert(indirect->draw_count == 1 && !indirect->indirect_draw_count);
3250
 
 
3251
 
        struct panfrost_context *ctx = batch->ctx;
3252
 
        struct panfrost_device *dev = pan_device(ctx->base.screen);
3253
 
 
3254
 
        /* TODO: update statistics (see panfrost_statistics_record()) */
3255
 
        /* TODO: Increment transform feedback offsets */
3256
 
        assert(ctx->streamout.num_targets == 0);
3257
 
 
3258
 
        ctx->active_prim = info->mode;
3259
 
        ctx->drawid = drawid_offset;
3260
 
        ctx->indirect_draw = true;
3261
 
 
3262
 
        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3263
 
 
3264
 
        bool idvs = vs->info.vs.idvs;
3265
 
        bool secondary_shader = vs->info.vs.secondary_enable;
3266
 
 
3267
 
        struct panfrost_ptr tiler = { 0 }, vertex = { 0 };
3268
 
 
3269
 
        if (idvs) {
3270
 
#if PAN_ARCH >= 6
3271
 
                tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
3272
 
#else
3273
 
                unreachable("IDVS is unsupported on Midgard");
3274
 
#endif
3275
 
        } else {
3276
 
                vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3277
 
                tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
3278
 
        }
3279
 
 
3280
 
        struct panfrost_bo *index_buf = NULL;
3281
 
 
3282
 
        if (info->index_size) {
3283
 
                assert(!info->has_user_indices);
3284
 
                struct panfrost_resource *rsrc = pan_resource(info->index.resource);
3285
 
                index_buf = rsrc->image.data.bo;
3286
 
                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
3287
 
        }
3288
 
 
3289
 
        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
3290
 
        unsigned varying_buf_count;
3291
 
 
3292
 
        /* We want to create templates, set all count fields to 0 to reflect
3293
 
         * that.
3294
 
         */
3295
 
        ctx->instance_count = ctx->vertex_count = ctx->padded_count = 0;
3296
 
        ctx->offset_start = 0;
3297
 
 
3298
 
        /* Set the {first,base}_vertex sysvals to NULL. Will be updated if the
3299
 
         * vertex shader uses gl_VertexID or gl_BaseVertex.
3300
 
         */
3301
 
        ctx->first_vertex_sysval_ptr = 0;
3302
 
        ctx->base_vertex_sysval_ptr = 0;
3303
 
        ctx->base_instance_sysval_ptr = 0;
3304
 
 
3305
 
        panfrost_update_state_3d(batch);
3306
 
        panfrost_update_state_vs(batch);
3307
 
        panfrost_update_state_fs(batch);
3308
 
        panfrost_clean_state_3d(ctx);
3309
 
 
3310
 
        bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS);
3311
 
 
3312
 
        panfrost_emit_varying_descriptor(batch, 0,
3313
 
                                         &vs_vary, &fs_vary, &varyings,
3314
 
                                         &varying_buf_count, &pos, &psiz,
3315
 
                                         point_coord_replace);
3316
 
 
3317
 
        mali_ptr attribs, attrib_bufs;
3318
 
        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
3319
 
 
3320
 
        /* Zero-ed invocation, the compute job will update it. */
3321
 
        static struct mali_invocation_packed invocation;
3322
 
 
3323
 
        /* Fire off the draw itself */
3324
 
        panfrost_draw_emit_tiler(batch, info, draw, &invocation,
3325
 
                                 index_buf ? index_buf->ptr.gpu : 0,
3326
 
                                 fs_vary, varyings, pos, psiz, secondary_shader,
3327
 
                                 tiler.cpu);
3328
 
        if (idvs) {
3329
 
#if PAN_ARCH >= 6
3330
 
                panfrost_draw_emit_vertex_section(batch,
3331
 
                                  vs_vary, varyings,
3332
 
                                  attribs, attrib_bufs,
3333
 
                                  pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
3334
 
#endif
3335
 
        } else {
3336
 
                panfrost_draw_emit_vertex(batch, info, &invocation,
3337
 
                                          vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
3338
 
        }
3339
 
 
3340
 
        /* Add the varying heap BO to the batch if we're allocating varyings. */
3341
 
        if (varyings) {
3342
 
                panfrost_batch_add_bo(batch,
3343
 
                                      dev->indirect_draw_shaders.varying_heap,
3344
 
                                      PIPE_SHADER_VERTEX);
3345
 
        }
3346
 
 
3347
 
        assert(indirect->buffer);
3348
 
 
3349
 
        struct panfrost_resource *draw_buf = pan_resource(indirect->buffer);
3350
 
 
3351
 
        /* Don't count images: those attributes don't need to be patched. */
3352
 
        unsigned attrib_count =
3353
 
                vs->info.attribute_count -
3354
 
                util_bitcount(ctx->image_mask[PIPE_SHADER_VERTEX]);
3355
 
 
3356
 
        panfrost_batch_read_rsrc(batch, draw_buf, PIPE_SHADER_VERTEX);
3357
 
 
3358
 
        struct pan_indirect_draw_info draw_info = {
3359
 
                .last_indirect_draw = batch->indirect_draw_job_id,
3360
 
                .draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset,
3361
 
                .index_buf = index_buf ? index_buf->ptr.gpu : 0,
3362
 
                .first_vertex_sysval = ctx->first_vertex_sysval_ptr,
3363
 
                .base_vertex_sysval = ctx->base_vertex_sysval_ptr,
3364
 
                .base_instance_sysval = ctx->base_instance_sysval_ptr,
3365
 
                .vertex_job = vertex.gpu,
3366
 
                .tiler_job = tiler.gpu,
3367
 
                .attrib_bufs = attrib_bufs,
3368
 
                .attribs = attribs,
3369
 
                .attrib_count = attrib_count,
3370
 
                .varying_bufs = varyings,
3371
 
                .index_size = info->index_size,
3372
 
        };
3373
 
 
3374
 
        if (panfrost_writes_point_size(ctx))
3375
 
                draw_info.flags |= PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE;
3376
 
 
3377
 
        if (vs->info.vs.writes_point_size)
3378
 
                draw_info.flags |= PAN_INDIRECT_DRAW_HAS_PSIZ;
3379
 
 
3380
 
        if (idvs)
3381
 
                draw_info.flags |= PAN_INDIRECT_DRAW_IDVS;
3382
 
 
3383
 
        if (info->primitive_restart) {
3384
 
                draw_info.restart_index = info->restart_index;
3385
 
                draw_info.flags |= PAN_INDIRECT_DRAW_PRIMITIVE_RESTART;
3386
 
        }
3387
 
 
3388
 
        batch->indirect_draw_job_id =
3389
 
                GENX(panfrost_emit_indirect_draw)(&batch->pool.base,
3390
 
                                                  &batch->scoreboard,
3391
 
                                                  &draw_info,
3392
 
                                                  &batch->indirect_draw_ctx);
3393
 
 
3394
 
        if (idvs) {
3395
 
                panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3396
 
                                 MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
3397
 
                                 0, 0, &tiler, false);
3398
 
        } else {
3399
 
                panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
3400
 
        }
3401
 
}
3402
 
#endif
3403
 
 
3404
 
static void
3405
 
panfrost_draw_vbo(struct pipe_context *pipe,
3406
 
                  const struct pipe_draw_info *info,
3407
 
                  unsigned drawid_offset,
3408
 
                  const struct pipe_draw_indirect_info *indirect,
3409
 
                  const struct pipe_draw_start_count_bias *draws,
3410
 
                  unsigned num_draws)
3411
 
{
3412
 
        struct panfrost_context *ctx = pan_context(pipe);
3413
 
        struct panfrost_device *dev = pan_device(pipe->screen);
3414
 
 
3415
 
        if (!panfrost_render_condition_check(ctx))
3416
 
                return;
3417
 
 
3418
 
        /* Emulate indirect draws unless we're using the experimental path */
3419
 
        if ((!(dev->debug & PAN_DBG_INDIRECT) || !PAN_GPU_INDIRECTS) && indirect && indirect->buffer) {
3420
 
                assert(num_draws == 1);
3421
 
                util_draw_indirect(pipe, info, indirect);
3422
 
                return;
3423
 
        }
3424
 
 
3425
 
        /* Do some common setup */
3426
 
        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
3427
 
 
3428
 
        /* Don't add too many jobs to a single batch. Hardware has a hard limit
3429
 
         * of 65536 jobs, but we choose a smaller soft limit (arbitrary) to
3430
 
         * avoid the risk of timeouts. This might not be a good idea. */
3431
 
        if (unlikely(batch->scoreboard.job_index > 10000))
3432
 
                batch = panfrost_get_fresh_batch_for_fbo(ctx, "Too many draws");
3433
 
 
3434
 
        /* panfrost_batch_skip_rasterization reads
3435
 
         * batch->scissor_culls_everything, which is set by
3436
 
         * panfrost_emit_viewport, so call that first.
3437
 
         */
3438
 
        if (ctx->dirty & (PAN_DIRTY_VIEWPORT | PAN_DIRTY_SCISSOR))
3439
 
                batch->viewport = panfrost_emit_viewport(batch);
3440
 
 
3441
 
        /* If rasterization discard is enabled but the vertex shader does not
3442
 
         * have side effects (including transform feedback), skip the draw
3443
 
         * altogether. This is always an optimization. Additionally, this is
3444
 
         * required for Index-Driven Vertex Shading, since IDVS always
3445
 
         * rasterizes. The compiler will not use IDVS if the vertex shader has
3446
 
         * side effects. So the only problem case is rasterizer discard with a
3447
 
         * shader without side effects -- but these draws are useless.
3448
 
         */
3449
 
        if (panfrost_batch_skip_rasterization(batch)) {
3450
 
                struct panfrost_shader_state *vs =
3451
 
                        panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3452
 
 
3453
 
                if (!vs->info.writes_global)
3454
 
                        return;
3455
 
        }
3456
 
 
3457
 
        unsigned zs_draws = ctx->depth_stencil->draws;
3458
 
        batch->draws |= zs_draws;
3459
 
        batch->resolve |= zs_draws;
3460
 
 
3461
 
        /* Mark everything dirty when debugging */
3462
 
        if (unlikely(dev->debug & PAN_DBG_DIRTY))
3463
 
                panfrost_dirty_state_all(ctx);
3464
 
 
3465
 
        /* Conservatively assume draw parameters always change */
3466
 
        ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID;
3467
 
 
3468
 
        if (indirect) {
3469
 
                assert(num_draws == 1);
3470
 
                assert(PAN_GPU_INDIRECTS);
3471
 
 
3472
 
#if PAN_GPU_INDIRECTS
3473
 
                if (indirect->count_from_stream_output) {
3474
 
                        struct pipe_draw_start_count_bias tmp_draw = *draws;
3475
 
                        struct panfrost_streamout_target *so =
3476
 
                                pan_so_target(indirect->count_from_stream_output);
3477
 
 
3478
 
                        tmp_draw.start = 0;
3479
 
                        tmp_draw.count = so->offset;
3480
 
                        tmp_draw.index_bias = 0;
3481
 
                        panfrost_direct_draw(batch, info, drawid_offset, &tmp_draw);
3482
 
                        return;
3483
 
                }
3484
 
 
3485
 
                panfrost_indirect_draw(batch, info, drawid_offset, indirect, &draws[0]);
3486
 
                return;
3487
 
#endif
3488
 
        }
3489
 
 
3490
 
        struct pipe_draw_info tmp_info = *info;
3491
 
        unsigned drawid = drawid_offset;
3492
 
 
3493
 
        for (unsigned i = 0; i < num_draws; i++) {
3494
 
                panfrost_direct_draw(batch, &tmp_info, drawid, &draws[i]);
3495
 
 
3496
 
                if (tmp_info.increment_draw_id) {
3497
 
                        ctx->dirty |= PAN_DIRTY_DRAWID;
3498
 
                        drawid++;
3499
 
                }
3500
 
        }
3501
 
 
3502
 
}
3503
 
 
3504
 
/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
3505
 
 * construct the COMPUTE job and some of its payload.
3506
 
 */
3507
 
 
3508
 
static void
3509
 
panfrost_launch_grid(struct pipe_context *pipe,
3510
 
                const struct pipe_grid_info *info)
3511
 
{
3512
 
        struct panfrost_context *ctx = pan_context(pipe);
3513
 
 
3514
 
        /* XXX - shouldn't be necessary with working memory barriers. Affected
3515
 
         * test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
3516
 
        panfrost_flush_all_batches(ctx, "Launch grid pre-barrier");
3517
 
 
3518
 
        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
3519
 
 
3520
 
        struct panfrost_shader_state *cs =
3521
 
                &ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
3522
 
 
3523
 
        /* Indirect dispatch can't handle workgroup local storage since that
3524
 
         * would require dynamic memory allocation. Bail in this case. */
3525
 
        if (info->indirect && ((cs->info.wls_size != 0) || !PAN_GPU_INDIRECTS)) {
3526
 
                struct pipe_transfer *transfer;
3527
 
                uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
3528
 
                                info->indirect_offset,
3529
 
                                3 * sizeof(uint32_t),
3530
 
                                PIPE_MAP_READ,
3531
 
                                &transfer);
3532
 
 
3533
 
                struct pipe_grid_info direct = *info;
3534
 
                direct.indirect = NULL;
3535
 
                direct.grid[0] = params[0];
3536
 
                direct.grid[1] = params[1];
3537
 
                direct.grid[2] = params[2];
3538
 
                pipe_buffer_unmap(pipe, transfer);
3539
 
 
3540
 
                if (params[0] && params[1] && params[2])
3541
 
                        panfrost_launch_grid(pipe, &direct);
3542
 
 
3543
 
                return;
3544
 
        }
3545
 
 
3546
 
        ctx->compute_grid = info;
3547
 
 
3548
 
        struct panfrost_ptr t =
3549
 
                pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3550
 
 
3551
 
        /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
3552
 
         * reuse the graphics path for this by lowering to Gallium */
3553
 
 
3554
 
        struct pipe_constant_buffer ubuf = {
3555
 
                .buffer = NULL,
3556
 
                .buffer_offset = 0,
3557
 
                .buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem,
3558
 
                .user_buffer = info->input
3559
 
        };
3560
 
 
3561
 
        if (info->input)
3562
 
                pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf);
3563
 
 
3564
 
        /* Invoke according to the grid info */
3565
 
 
3566
 
        void *invocation =
3567
 
                pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
3568
 
        unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
3569
 
 
3570
 
        if (info->indirect)
3571
 
                num_wg[0] = num_wg[1] = num_wg[2] = 1;
3572
 
 
3573
 
        panfrost_pack_work_groups_compute(invocation,
3574
 
                                          num_wg[0], num_wg[1], num_wg[2],
3575
 
                                          info->block[0], info->block[1],
3576
 
                                          info->block[2],
3577
 
                                          false, info->indirect != NULL);
3578
 
 
3579
 
        pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
3580
 
                cfg.job_task_split =
3581
 
                        util_logbase2_ceil(info->block[0] + 1) +
3582
 
                        util_logbase2_ceil(info->block[1] + 1) +
3583
 
                        util_logbase2_ceil(info->block[2] + 1);
3584
 
        }
3585
 
 
3586
 
        pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
3587
 
                cfg.draw_descriptor_is_64b = true;
3588
 
                cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
3589
 
                cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
3590
 
                cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
3591
 
                cfg.uniform_buffers = panfrost_emit_const_buf(batch,
3592
 
                                PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
3593
 
                cfg.textures = panfrost_emit_texture_descriptors(batch,
3594
 
                                PIPE_SHADER_COMPUTE);
3595
 
                cfg.samplers = panfrost_emit_sampler_descriptors(batch,
3596
 
                                PIPE_SHADER_COMPUTE);
3597
 
        }
3598
 
 
3599
 
        unsigned indirect_dep = 0;
3600
 
#if PAN_GPU_INDIRECTS
3601
 
        if (info->indirect) {
3602
 
                struct pan_indirect_dispatch_info indirect = {
3603
 
                        .job = t.gpu,
3604
 
                        .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
3605
 
                                        info->indirect_offset,
3606
 
                        .num_wg_sysval = {
3607
 
                                batch->num_wg_sysval[0],
3608
 
                                batch->num_wg_sysval[1],
3609
 
                                batch->num_wg_sysval[2],
3610
 
                        },
3611
 
                };
3612
 
 
3613
 
                indirect_dep = GENX(pan_indirect_dispatch_emit)(&batch->pool.base,
3614
 
                                                                &batch->scoreboard,
3615
 
                                                                &indirect);
3616
 
        }
3617
 
#endif
3618
 
 
3619
 
        panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3620
 
                         MALI_JOB_TYPE_COMPUTE, true, false,
3621
 
                         indirect_dep, 0, &t, false);
3622
 
        panfrost_flush_all_batches(ctx, "Launch grid post-barrier");
3623
 
}
3624
 
 
3625
 
static void *
3626
 
panfrost_create_rasterizer_state(
3627
 
        struct pipe_context *pctx,
3628
 
        const struct pipe_rasterizer_state *cso)
3629
 
{
3630
 
        struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
3631
 
 
3632
 
        so->base = *cso;
3633
 
 
3634
 
        /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
3635
 
        assert(cso->offset_clamp == 0.0);
3636
 
 
3637
 
        pan_pack(&so->multisample, MULTISAMPLE_MISC, cfg) {
3638
 
                cfg.multisample_enable = cso->multisample;
3639
 
                cfg.fixed_function_near_discard = cso->depth_clip_near;
3640
 
                cfg.fixed_function_far_discard = cso->depth_clip_far;
3641
 
                cfg.shader_depth_range_fixed = true;
3642
 
        }
3643
 
 
3644
 
        pan_pack(&so->stencil_misc, STENCIL_MASK_MISC, cfg) {
3645
 
                cfg.front_facing_depth_bias = cso->offset_tri;
3646
 
                cfg.back_facing_depth_bias = cso->offset_tri;
3647
 
                cfg.single_sampled_lines = !cso->multisample;
3648
 
        }
3649
 
 
3650
 
        return so;
3651
 
}
3652
 
 
3653
 
static void *
3654
 
panfrost_create_vertex_elements_state(
3655
 
        struct pipe_context *pctx,
3656
 
        unsigned num_elements,
3657
 
        const struct pipe_vertex_element *elements)
3658
 
{
3659
 
        struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
3660
 
        struct panfrost_device *dev = pan_device(pctx->screen);
3661
 
 
3662
 
        so->num_elements = num_elements;
3663
 
        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
3664
 
 
3665
 
        /* Assign attribute buffers corresponding to the vertex buffers, keyed
3666
 
         * for a particular divisor since that's how instancing works on Mali */
3667
 
        for (unsigned i = 0; i < num_elements; ++i) {
3668
 
                so->element_buffer[i] = pan_assign_vertex_buffer(
3669
 
                                so->buffers, &so->nr_bufs,
3670
 
                                elements[i].vertex_buffer_index,
3671
 
                                elements[i].instance_divisor);
3672
 
        }
3673
 
 
3674
 
        for (int i = 0; i < num_elements; ++i) {
3675
 
                enum pipe_format fmt = elements[i].src_format;
3676
 
                const struct util_format_description *desc = util_format_description(fmt);
3677
 
                so->formats[i] = dev->formats[desc->format].hw;
3678
 
                assert(so->formats[i]);
3679
 
        }
3680
 
 
3681
 
        /* Let's also prepare vertex builtins */
3682
 
        so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
3683
 
        so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
3684
 
 
3685
 
        return so;
3686
 
}
3687
 
 
3688
 
static inline unsigned
3689
 
pan_pipe_to_stencil_op(enum pipe_stencil_op in)
3690
 
{
3691
 
        switch (in) {
3692
 
        case PIPE_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
3693
 
        case PIPE_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
3694
 
        case PIPE_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
3695
 
        case PIPE_STENCIL_OP_INCR: return MALI_STENCIL_OP_INCR_SAT;
3696
 
        case PIPE_STENCIL_OP_DECR: return MALI_STENCIL_OP_DECR_SAT;
3697
 
        case PIPE_STENCIL_OP_INCR_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
3698
 
        case PIPE_STENCIL_OP_DECR_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
3699
 
        case PIPE_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
3700
 
        default: unreachable("Invalid stencil op");
3701
 
        }
3702
 
}
3703
 
 
3704
 
static inline void
3705
 
pan_pipe_to_stencil(const struct pipe_stencil_state *in,
3706
 
                    struct mali_stencil_packed *out)
3707
 
{
3708
 
        pan_pack(out, STENCIL, s) {
3709
 
                s.mask = in->valuemask;
3710
 
                s.compare_function = (enum mali_func) in->func;
3711
 
                s.stencil_fail = pan_pipe_to_stencil_op(in->fail_op);
3712
 
                s.depth_fail = pan_pipe_to_stencil_op(in->zfail_op);
3713
 
                s.depth_pass = pan_pipe_to_stencil_op(in->zpass_op);
3714
 
        }
3715
 
}
3716
 
 
3717
 
static void *
3718
 
panfrost_create_depth_stencil_state(struct pipe_context *pipe,
3719
 
                                    const struct pipe_depth_stencil_alpha_state *zsa)
3720
 
{
3721
 
        struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state);
3722
 
        so->base = *zsa;
3723
 
 
3724
 
        /* Normalize (there's no separate enable) */
3725
 
        if (!zsa->alpha_enabled)
3726
 
                so->base.alpha_func = MALI_FUNC_ALWAYS;
3727
 
 
3728
 
        /* Prepack relevant parts of the Renderer State Descriptor. They will
3729
 
         * be ORed in at draw-time */
3730
 
        pan_pack(&so->rsd_depth, MULTISAMPLE_MISC, cfg) {
3731
 
                cfg.depth_function = zsa->depth_enabled ?
3732
 
                        (enum mali_func) zsa->depth_func : MALI_FUNC_ALWAYS;
3733
 
 
3734
 
                cfg.depth_write_mask = zsa->depth_writemask;
3735
 
        }
3736
 
 
3737
 
        pan_pack(&so->rsd_stencil, STENCIL_MASK_MISC, cfg) {
3738
 
                cfg.stencil_enable = zsa->stencil[0].enabled;
3739
 
 
3740
 
                cfg.stencil_mask_front = zsa->stencil[0].writemask;
3741
 
                cfg.stencil_mask_back = zsa->stencil[1].enabled ?
3742
 
                        zsa->stencil[1].writemask : zsa->stencil[0].writemask;
3743
 
 
3744
 
#if PAN_ARCH <= 5
3745
 
                cfg.alpha_test_compare_function =
3746
 
                        (enum mali_func) so->base.alpha_func;
3747
 
#endif
3748
 
        }
3749
 
 
3750
 
        /* Stencil tests have their own words in the RSD */
3751
 
        pan_pipe_to_stencil(&zsa->stencil[0], &so->stencil_front);
3752
 
 
3753
 
        if (zsa->stencil[1].enabled)
3754
 
                pan_pipe_to_stencil(&zsa->stencil[1], &so->stencil_back);
3755
 
        else
3756
 
                so->stencil_back = so->stencil_front;
3757
 
 
3758
 
        so->enabled = zsa->stencil[0].enabled ||
3759
 
                (zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS);
3760
 
 
3761
 
        /* Write masks need tracking together */
3762
 
        if (zsa->depth_writemask)
3763
 
                so->draws |= PIPE_CLEAR_DEPTH;
3764
 
 
3765
 
        if (zsa->stencil[0].enabled)
3766
 
                so->draws |= PIPE_CLEAR_STENCIL;
3767
 
 
3768
 
        /* TODO: Bounds test should be easy */
3769
 
        assert(!zsa->depth_bounds_test);
3770
 
 
3771
 
        return so;
3772
 
}
3773
 
 
3774
 
static struct pipe_sampler_view *
3775
 
panfrost_create_sampler_view(
3776
 
        struct pipe_context *pctx,
3777
 
        struct pipe_resource *texture,
3778
 
        const struct pipe_sampler_view *template)
3779
 
{
3780
 
        struct panfrost_context *ctx = pan_context(pctx);
3781
 
        struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
3782
 
 
3783
 
        pan_legalize_afbc_format(ctx, pan_resource(texture), template->format);
3784
 
 
3785
 
        pipe_reference(NULL, &texture->reference);
3786
 
 
3787
 
        so->base = *template;
3788
 
        so->base.texture = texture;
3789
 
        so->base.reference.count = 1;
3790
 
        so->base.context = pctx;
3791
 
 
3792
 
        panfrost_create_sampler_view_bo(so, pctx, texture);
3793
 
 
3794
 
        return (struct pipe_sampler_view *) so;
3795
 
}
3796
 
 
3797
 
/* A given Gallium blend state can be encoded to the hardware in numerous,
3798
 
 * dramatically divergent ways due to the interactions of blending with
3799
 
 * framebuffer formats. Conceptually, there are two modes:
3800
 
 *
3801
 
 * - Fixed-function blending (for suitable framebuffer formats, suitable blend
3802
 
 *   state, and suitable blend constant)
3803
 
 *
3804
 
 * - Blend shaders (for everything else)
3805
 
 *
3806
 
 * A given Gallium blend configuration will compile to exactly one
3807
 
 * fixed-function blend state, if it compiles to any, although the constant
3808
 
 * will vary across runs as that is tracked outside of the Gallium CSO.
3809
 
 *
3810
 
 * However, that same blend configuration will compile to many different blend
3811
 
 * shaders, depending on the framebuffer formats active. The rationale is that
3812
 
 * blend shaders override not just fixed-function blending but also
3813
 
 * fixed-function format conversion, so blend shaders are keyed to a particular
3814
 
 * framebuffer format. As an example, the tilebuffer format is identical for
3815
 
 * RG16F and RG16UI -- both are simply 32-bit raw pixels -- so both require
3816
 
 * blend shaders.
3817
 
 *
3818
 
 * All of this state is encapsulated in the panfrost_blend_state struct
3819
 
 * (our subclass of pipe_blend_state).
3820
 
 */
3821
 
 
3822
 
/* Create a blend CSO. Essentially, try to compile a fixed-function
3823
 
 * expression and initialize blend shaders */
3824
 
 
3825
 
static void *
3826
 
panfrost_create_blend_state(struct pipe_context *pipe,
3827
 
                            const struct pipe_blend_state *blend)
3828
 
{
3829
 
        struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
3830
 
        so->base = *blend;
3831
 
 
3832
 
        so->pan.logicop_enable = blend->logicop_enable;
3833
 
        so->pan.logicop_func = blend->logicop_func;
3834
 
        so->pan.rt_count = blend->max_rt + 1;
3835
 
 
3836
 
        for (unsigned c = 0; c < so->pan.rt_count; ++c) {
3837
 
                unsigned g = blend->independent_blend_enable ? c : 0;
3838
 
                const struct pipe_rt_blend_state pipe = blend->rt[g];
3839
 
                struct pan_blend_equation equation = {0};
3840
 
 
3841
 
                equation.color_mask = pipe.colormask;
3842
 
                equation.blend_enable = pipe.blend_enable;
3843
 
 
3844
 
                if (pipe.blend_enable) {
3845
 
                        equation.rgb_func = util_blend_func_to_shader(pipe.rgb_func);
3846
 
                        equation.rgb_src_factor = util_blend_factor_to_shader(pipe.rgb_src_factor);
3847
 
                        equation.rgb_invert_src_factor = util_blend_factor_is_inverted(pipe.rgb_src_factor);
3848
 
                        equation.rgb_dst_factor = util_blend_factor_to_shader(pipe.rgb_dst_factor);
3849
 
                        equation.rgb_invert_dst_factor = util_blend_factor_is_inverted(pipe.rgb_dst_factor);
3850
 
                        equation.alpha_func = util_blend_func_to_shader(pipe.alpha_func);
3851
 
                        equation.alpha_src_factor = util_blend_factor_to_shader(pipe.alpha_src_factor);
3852
 
                        equation.alpha_invert_src_factor = util_blend_factor_is_inverted(pipe.alpha_src_factor);
3853
 
                        equation.alpha_dst_factor = util_blend_factor_to_shader(pipe.alpha_dst_factor);
3854
 
                        equation.alpha_invert_dst_factor = util_blend_factor_is_inverted(pipe.alpha_dst_factor);
3855
 
                }
3856
 
 
3857
 
                /* Determine some common properties */
3858
 
                unsigned constant_mask = pan_blend_constant_mask(equation);
3859
 
                const bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
3860
 
                so->info[c] = (struct pan_blend_info) {
3861
 
                        .no_colour = (equation.color_mask == 0),
3862
 
                        .opaque = pan_blend_is_opaque(equation),
3863
 
                        .constant_mask = constant_mask,
3864
 
 
3865
 
                        /* TODO: check the dest for the logicop */
3866
 
                        .load_dest = blend->logicop_enable ||
3867
 
                                pan_blend_reads_dest(equation),
3868
 
 
3869
 
                        /* Could this possibly be fixed-function? */
3870
 
                        .fixed_function = !blend->logicop_enable &&
3871
 
                                pan_blend_can_fixed_function(equation,
3872
 
                                                             supports_2src) &&
3873
 
                                (!constant_mask ||
3874
 
                                 pan_blend_supports_constant(PAN_ARCH, c)),
3875
 
 
3876
 
                        .alpha_zero_nop = pan_blend_alpha_zero_nop(equation),
3877
 
                        .alpha_one_store = pan_blend_alpha_one_store(equation),
3878
 
                };
3879
 
 
3880
 
                so->pan.rts[c].equation = equation;
3881
 
 
3882
 
                /* Bifrost needs to know if any render target loads its
3883
 
                 * destination in the hot draw path, so precompute this */
3884
 
                if (so->info[c].load_dest)
3885
 
                        so->load_dest_mask |= BITFIELD_BIT(c);
3886
 
 
3887
 
                /* Converting equations to Mali style is expensive, do it at
3888
 
                 * CSO create time instead of draw-time */
3889
 
                if (so->info[c].fixed_function) {
3890
 
                        so->equation[c] = pan_pack_blend(equation);
3891
 
                }
3892
 
        }
3893
 
 
3894
 
        return so;
3895
 
}
3896
 
 
3897
 
static void
3898
 
prepare_shader(struct panfrost_shader_state *state,
3899
 
            struct panfrost_pool *pool, bool upload)
3900
 
{
3901
 
        struct mali_renderer_state_packed *out =
3902
 
                (struct mali_renderer_state_packed *)&state->partial_rsd;
3903
 
 
3904
 
        if (upload) {
3905
 
                struct panfrost_ptr ptr =
3906
 
                        pan_pool_alloc_desc(&pool->base, RENDERER_STATE);
3907
 
 
3908
 
                state->state = panfrost_pool_take_ref(pool, ptr.gpu);
3909
 
                out = ptr.cpu;
3910
 
        }
3911
 
 
3912
 
        pan_pack(out, RENDERER_STATE, cfg) {
3913
 
                pan_shader_prepare_rsd(&state->info, state->bin.gpu, &cfg);
3914
 
        }
3915
 
}
3916
 
 
3917
 
static void
3918
 
panfrost_get_sample_position(struct pipe_context *context,
3919
 
                             unsigned sample_count,
3920
 
                             unsigned sample_index,
3921
 
                             float *out_value)
3922
 
{
3923
 
        panfrost_query_sample_position(
3924
 
                        panfrost_sample_pattern(sample_count),
3925
 
                        sample_index,
3926
 
                        out_value);
3927
 
}
3928
 
 
3929
 
static void
3930
 
screen_destroy(struct pipe_screen *pscreen)
3931
 
{
3932
 
        struct panfrost_device *dev = pan_device(pscreen);
3933
 
        GENX(pan_blitter_cleanup)(dev);
3934
 
 
3935
 
#if PAN_GPU_INDIRECTS
3936
 
        GENX(panfrost_cleanup_indirect_draw_shaders)(dev);
3937
 
        GENX(pan_indirect_dispatch_cleanup)(dev);
3938
 
#endif
3939
 
}
3940
 
 
3941
 
static void
3942
 
preload(struct panfrost_batch *batch, struct pan_fb_info *fb)
3943
 
{
3944
 
        GENX(pan_preload_fb)(&batch->pool.base, &batch->scoreboard, fb, batch->tls.gpu,
3945
 
                             PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
3946
 
}
3947
 
 
3948
 
static void
3949
 
init_batch(struct panfrost_batch *batch)
3950
 
{
3951
 
        /* Reserve the framebuffer and local storage descriptors */
3952
 
        batch->framebuffer =
3953
 
#if PAN_ARCH == 4
3954
 
                pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
3955
 
#else
3956
 
                pan_pool_alloc_desc_aggregate(&batch->pool.base,
3957
 
                                              PAN_DESC(FRAMEBUFFER),
3958
 
                                              PAN_DESC(ZS_CRC_EXTENSION),
3959
 
                                              PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
3960
 
 
3961
 
                batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
3962
 
#endif
3963
 
 
3964
 
#if PAN_ARCH >= 6
3965
 
        batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
3966
 
#else
3967
 
        /* On Midgard, the TLS is embedded in the FB descriptor */
3968
 
        batch->tls = batch->framebuffer;
3969
 
#endif
3970
 
}
3971
 
 
3972
 
static void
3973
 
panfrost_sampler_view_destroy(
3974
 
        struct pipe_context *pctx,
3975
 
        struct pipe_sampler_view *pview)
3976
 
{
3977
 
        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
3978
 
 
3979
 
        pipe_resource_reference(&pview->texture, NULL);
3980
 
        panfrost_bo_unreference(view->state.bo);
3981
 
        ralloc_free(view);
3982
 
}
3983
 
 
3984
 
static void
3985
 
context_init(struct pipe_context *pipe)
3986
 
{
3987
 
        pipe->draw_vbo           = panfrost_draw_vbo;
3988
 
        pipe->launch_grid        = panfrost_launch_grid;
3989
 
 
3990
 
        pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state;
3991
 
        pipe->create_rasterizer_state = panfrost_create_rasterizer_state;
3992
 
        pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
3993
 
        pipe->create_sampler_view = panfrost_create_sampler_view;
3994
 
        pipe->sampler_view_destroy = panfrost_sampler_view_destroy;
3995
 
        pipe->create_sampler_state = panfrost_create_sampler_state;
3996
 
        pipe->create_blend_state = panfrost_create_blend_state;
3997
 
 
3998
 
        pipe->get_sample_position = panfrost_get_sample_position;
3999
 
}
4000
 
 
4001
 
#if PAN_ARCH <= 5
4002
 
 
4003
 
/* Returns the polygon list's GPU address if available, or otherwise allocates
4004
 
 * the polygon list.  It's perfectly fast to use allocate/free BO directly,
4005
 
 * since we'll hit the BO cache and this is one-per-batch anyway. */
4006
 
 
4007
 
static mali_ptr
4008
 
batch_get_polygon_list(struct panfrost_batch *batch)
4009
 
{
4010
 
        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
4011
 
 
4012
 
        if (!batch->tiler_ctx.midgard.polygon_list) {
4013
 
                bool has_draws = batch->scoreboard.first_tiler != NULL;
4014
 
                unsigned size =
4015
 
                        panfrost_tiler_get_polygon_list_size(dev,
4016
 
                                                             batch->key.width,
4017
 
                                                             batch->key.height,
4018
 
                                                             has_draws);
4019
 
                size = util_next_power_of_two(size);
4020
 
 
4021
 
                /* Create the BO as invisible if we can. In the non-hierarchical tiler case,
4022
 
                 * we need to write the polygon list manually because there's not WRITE_VALUE
4023
 
                 * job in the chain (maybe we should add one...). */
4024
 
                bool init_polygon_list = !has_draws && dev->model->quirks.no_hierarchical_tiling;
4025
 
                batch->tiler_ctx.midgard.polygon_list =
4026
 
                        panfrost_batch_create_bo(batch, size,
4027
 
                                                 init_polygon_list ? 0 : PAN_BO_INVISIBLE,
4028
 
                                                 PIPE_SHADER_VERTEX,
4029
 
                                                 "Polygon list");
4030
 
                panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
4031
 
                                PIPE_SHADER_FRAGMENT);
4032
 
 
4033
 
                if (init_polygon_list) {
4034
 
                        assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
4035
 
                        uint32_t *polygon_list_body =
4036
 
                                batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
4037
 
                                MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
4038
 
 
4039
 
                        /* Magic for Mali T720 */
4040
 
                        polygon_list_body[0] = 0xa0000000;
4041
 
                }
4042
 
 
4043
 
                batch->tiler_ctx.midgard.disable = !has_draws;
4044
 
        }
4045
 
 
4046
 
        return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
4047
 
}
4048
 
#endif
4049
 
 
4050
 
static void
4051
 
init_polygon_list(struct panfrost_batch *batch)
4052
 
{
4053
 
#if PAN_ARCH <= 5
4054
 
        mali_ptr polygon_list = batch_get_polygon_list(batch);
4055
 
        panfrost_scoreboard_initialize_tiler(&batch->pool.base,
4056
 
                                             &batch->scoreboard,
4057
 
                                             polygon_list);
4058
 
#endif
4059
 
}
4060
 
 
4061
 
void
4062
 
GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
4063
 
{
4064
 
        struct panfrost_device *dev = &screen->dev;
4065
 
 
4066
 
        screen->vtbl.prepare_shader = prepare_shader;
4067
 
        screen->vtbl.emit_tls    = emit_tls;
4068
 
        screen->vtbl.emit_fbd    = emit_fbd;
4069
 
        screen->vtbl.emit_fragment_job = emit_fragment_job;
4070
 
        screen->vtbl.screen_destroy = screen_destroy;
4071
 
        screen->vtbl.preload     = preload;
4072
 
        screen->vtbl.context_init = context_init;
4073
 
        screen->vtbl.init_batch = init_batch;
4074
 
        screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
4075
 
        screen->vtbl.init_polygon_list = init_polygon_list;
4076
 
        screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
4077
 
        screen->vtbl.compile_shader = GENX(pan_shader_compile);
4078
 
 
4079
 
        GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base,
4080
 
                               &screen->blitter.desc_pool.base);
4081
 
#if PAN_GPU_INDIRECTS
4082
 
        GENX(pan_indirect_dispatch_init)(dev);
4083
 
        GENX(panfrost_init_indirect_draw_shaders)(dev, &screen->indirect_draw.bin_pool.base);
4084
 
#endif
4085
 
}