2
* Copyright (C) 2018 Alyssa Rosenzweig
3
* Copyright (C) 2020 Collabora Ltd.
4
* Copyright © 2017 Intel Corporation
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
13
* The above copyright notice and this permission notice (including the next
14
* paragraph) shall be included in all copies or substantial portions of the
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
#include "util/macros.h"
27
#include "util/u_prim.h"
28
#include "util/u_vbuf.h"
29
#include "util/u_helpers.h"
30
#include "util/u_draw.h"
31
#include "util/u_memory.h"
32
#include "pipe/p_defines.h"
33
#include "pipe/p_state.h"
34
#include "gallium/auxiliary/util/u_blend.h"
36
#include "genxml/gen_macros.h"
40
#include "pan_blend.h"
41
#include "pan_context.h"
43
#include "pan_shader.h"
44
#include "pan_texture.h"
46
#include "pan_indirect_draw.h"
47
#include "pan_indirect_dispatch.h"
48
#include "pan_blitter.h"
50
#define PAN_GPU_INDIRECTS (PAN_ARCH == 7)
52
struct panfrost_rasterizer {
53
struct pipe_rasterizer_state base;
56
/* Partially packed RSD words */
57
struct mali_multisample_misc_packed multisample;
58
struct mali_stencil_mask_misc_packed stencil_misc;
62
struct panfrost_zsa_state {
63
struct pipe_depth_stencil_alpha_state base;
65
/* Is any depth, stencil, or alpha testing enabled? */
68
/* Mask of PIPE_CLEAR_{DEPTH,STENCIL} written */
71
/* Prepacked words from the RSD */
72
struct mali_multisample_misc_packed rsd_depth;
73
struct mali_stencil_mask_misc_packed rsd_stencil;
74
struct mali_stencil_packed stencil_front, stencil_back;
77
struct panfrost_sampler_state {
78
struct pipe_sampler_state base;
79
struct mali_sampler_packed hw;
82
/* Misnomer: Sampler view corresponds to textures, not samplers */
84
struct panfrost_sampler_view {
85
struct pipe_sampler_view base;
86
struct panfrost_pool_ref state;
87
struct mali_texture_packed bifrost_descriptor;
91
/* Pool used to allocate the descriptor. If NULL, defaults to the global
92
* descriptor pool. Can be set for short lived descriptors, useful for
93
* shader images on Valhall.
95
struct panfrost_pool *pool;
98
struct panfrost_vertex_state {
99
unsigned num_elements;
100
struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
103
/* Packed attribute descriptor. All fields are set at CSO create time
104
* except for stride, which must be ORed in at draw time
106
struct mali_attribute_packed attributes[PIPE_MAX_ATTRIBS];
108
/* buffers corresponds to attribute buffer, element_buffers corresponds
109
* to an index in buffers for each vertex element */
110
struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS];
111
unsigned element_buffer[PIPE_MAX_ATTRIBS];
114
unsigned formats[PIPE_MAX_ATTRIBS];
118
/* Statically assert that PIPE_* enums match the hardware enums.
119
* (As long as they match, we don't need to translate them.)
124
#define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
126
/* Compare functions are natural in both Gallium and Mali */
127
PIPE_ASSERT(PIPE_FUNC_NEVER == MALI_FUNC_NEVER);
128
PIPE_ASSERT(PIPE_FUNC_LESS == MALI_FUNC_LESS);
129
PIPE_ASSERT(PIPE_FUNC_EQUAL == MALI_FUNC_EQUAL);
130
PIPE_ASSERT(PIPE_FUNC_LEQUAL == MALI_FUNC_LEQUAL);
131
PIPE_ASSERT(PIPE_FUNC_GREATER == MALI_FUNC_GREATER);
132
PIPE_ASSERT(PIPE_FUNC_NOTEQUAL == MALI_FUNC_NOT_EQUAL);
133
PIPE_ASSERT(PIPE_FUNC_GEQUAL == MALI_FUNC_GEQUAL);
134
PIPE_ASSERT(PIPE_FUNC_ALWAYS == MALI_FUNC_ALWAYS);
137
static inline enum mali_sample_pattern
138
panfrost_sample_pattern(unsigned samples)
141
case 1: return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
142
case 4: return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
143
case 8: return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
144
case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
145
default: unreachable("Unsupported sample count");
150
translate_tex_wrap(enum pipe_tex_wrap w, bool using_nearest)
152
/* CLAMP is only supported on Midgard, where it is broken for nearest
153
* filtering. Use CLAMP_TO_EDGE in that case.
157
case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
158
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
159
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
160
case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
161
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
162
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
165
case PIPE_TEX_WRAP_CLAMP:
166
return using_nearest ? MALI_WRAP_MODE_CLAMP_TO_EDGE :
167
MALI_WRAP_MODE_CLAMP;
168
case PIPE_TEX_WRAP_MIRROR_CLAMP:
169
return using_nearest ? MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE :
170
MALI_WRAP_MODE_MIRRORED_CLAMP;
173
default: unreachable("Invalid wrap");
177
/* The hardware compares in the wrong order order, so we have to flip before
178
* encoding. Yes, really. */
180
static enum mali_func
181
panfrost_sampler_compare_func(const struct pipe_sampler_state *cso)
183
return !cso->compare_mode ? MALI_FUNC_NEVER :
184
panfrost_flip_compare_func((enum mali_func) cso->compare_func);
187
static enum mali_mipmap_mode
188
pan_pipe_to_mipmode(enum pipe_tex_mipfilter f)
191
case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
192
case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
194
case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE;
196
case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NEAREST;
198
default: unreachable("Invalid");
204
panfrost_create_sampler_state(
205
struct pipe_context *pctx,
206
const struct pipe_sampler_state *cso)
208
struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
211
bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST;
213
pan_pack(&so->hw, SAMPLER, cfg) {
214
cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
215
cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
217
cfg.normalized_coordinates = cso->normalized_coords;
218
cfg.lod_bias = FIXED_16(cso->lod_bias, true);
219
cfg.minimum_lod = FIXED_16(cso->min_lod, false);
220
cfg.maximum_lod = FIXED_16(cso->max_lod, false);
222
cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s, using_nearest);
223
cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t, using_nearest);
224
cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r, using_nearest);
226
cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
227
cfg.compare_function = panfrost_sampler_compare_func(cso);
228
cfg.seamless_cube_map = cso->seamless_cube_map;
230
cfg.border_color_r = cso->border_color.ui[0];
231
cfg.border_color_g = cso->border_color.ui[1];
232
cfg.border_color_b = cso->border_color.ui[2];
233
cfg.border_color_a = cso->border_color.ui[3];
236
if (cso->max_anisotropy > 1) {
237
cfg.maximum_anisotropy = cso->max_anisotropy;
238
cfg.lod_algorithm = MALI_LOD_ALGORITHM_ANISOTROPIC;
241
/* Emulate disabled mipmapping by clamping the LOD as tight as
242
* possible (from 0 to epsilon = 1/256) */
243
if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
244
cfg.maximum_lod = cfg.minimum_lod + 1;
252
panfrost_fs_required(
253
struct panfrost_shader_state *fs,
254
struct panfrost_blend_state *blend,
255
struct pipe_framebuffer_state *state,
256
const struct panfrost_zsa_state *zsa)
258
/* If we generally have side effects. This inclues use of discard,
259
* which can affect the results of an occlusion query. */
260
if (fs->info.fs.sidefx)
263
/* Using an empty FS requires early-z to be enabled, but alpha test
264
* needs it disabled. Alpha test is only native on Midgard, so only
267
if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
270
/* If colour is written we need to execute */
271
for (unsigned i = 0; i < state->nr_cbufs; ++i) {
272
if (state->cbufs[i] && !blend->info[i].no_colour)
276
/* If depth is written and not implied we need to execute.
277
* TODO: Predicate on Z/S writes being enabled */
278
return (fs->info.fs.writes_depth || fs->info.fs.writes_stencil);
281
/* Get pointers to the blend shaders bound to each active render target. Used
282
* to emit the blend descriptors, as well as the fragment renderer state
286
panfrost_get_blend_shaders(struct panfrost_batch *batch,
287
mali_ptr *blend_shaders)
289
unsigned shader_offset = 0;
290
struct panfrost_bo *shader_bo = NULL;
292
for (unsigned c = 0; c < batch->key.nr_cbufs; ++c) {
293
if (batch->key.cbufs[c]) {
294
blend_shaders[c] = panfrost_get_blend(batch,
295
c, &shader_bo, &shader_offset);
301
UNUSED static uint16_t
302
pack_blend_constant(enum pipe_format format, float cons)
304
const struct util_format_description *format_desc =
305
util_format_description(format);
307
unsigned chan_size = 0;
309
for (unsigned i = 0; i < format_desc->nr_channels; i++)
310
chan_size = MAX2(format_desc->channel[0].size, chan_size);
312
uint16_t unorm = (cons * ((1 << chan_size) - 1));
313
return unorm << (16 - chan_size);
317
panfrost_emit_blend(struct panfrost_batch *batch, void *rts, mali_ptr *blend_shaders)
319
unsigned rt_count = batch->key.nr_cbufs;
320
struct panfrost_context *ctx = batch->ctx;
321
const struct panfrost_blend_state *so = ctx->blend;
322
bool dithered = so->base.dither;
324
/* Always have at least one render target for depth-only passes */
325
for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
326
struct mali_blend_packed *packed = rts + (i * pan_size(BLEND));
328
/* Disable blending for unbacked render targets */
329
if (rt_count == 0 || !batch->key.cbufs[i] || so->info[i].no_colour) {
330
pan_pack(rts + i * pan_size(BLEND), BLEND, cfg) {
333
cfg.internal.mode = MALI_BLEND_MODE_OFF;
340
struct pan_blend_info info = so->info[i];
341
enum pipe_format format = batch->key.cbufs[i]->format;
342
float cons = pan_blend_get_constant(info.constant_mask,
343
ctx->blend_color.color);
345
/* Word 0: Flags and constant */
346
pan_pack(packed, BLEND, cfg) {
347
cfg.srgb = util_format_is_srgb(format);
348
cfg.load_destination = info.load_dest;
349
cfg.round_to_fb_precision = !dithered;
350
cfg.alpha_to_one = ctx->blend->base.alpha_to_one;
352
if (!blend_shaders[i])
353
cfg.constant = pack_blend_constant(format, cons);
355
cfg.blend_shader = (blend_shaders[i] != 0);
357
if (blend_shaders[i])
358
cfg.shader_pc = blend_shaders[i];
364
if (!blend_shaders[i]) {
365
/* Word 1: Blend Equation */
366
STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
367
packed->opaque[PAN_ARCH >= 6 ? 1 : 2] = so->equation[i];
371
const struct panfrost_device *dev = pan_device(ctx->base.screen);
372
struct panfrost_shader_state *fs =
373
panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
375
/* Words 2 and 3: Internal blend */
376
if (blend_shaders[i]) {
377
/* The blend shader's address needs to be at
378
* the same top 32 bit as the fragment shader.
379
* TODO: Ensure that's always the case.
381
assert(!fs->bin.bo ||
382
(blend_shaders[i] & (0xffffffffull << 32)) ==
383
(fs->bin.gpu & (0xffffffffull << 32)));
385
pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
386
cfg.mode = MALI_BLEND_MODE_SHADER;
387
cfg.shader.pc = (u32) blend_shaders[i];
390
unsigned ret_offset = fs->info.bifrost.blend[i].return_offset;
391
assert(!(ret_offset & 0x7));
393
cfg.shader.return_value = ret_offset ?
394
fs->bin.gpu + ret_offset : 0;
398
pan_pack(&packed->opaque[2], INTERNAL_BLEND, cfg) {
399
cfg.mode = info.opaque ?
400
MALI_BLEND_MODE_OPAQUE :
401
MALI_BLEND_MODE_FIXED_FUNCTION;
403
/* If we want the conversion to work properly,
404
* num_comps must be set to 4
406
cfg.fixed_function.num_comps = 4;
407
cfg.fixed_function.conversion.memory_format =
408
panfrost_format_to_bifrost_blend(dev, format, dithered);
409
cfg.fixed_function.conversion.register_format =
410
fs->info.bifrost.blend[i].format;
411
cfg.fixed_function.rt = i;
415
cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop;
416
cfg.fixed_function.alpha_one_store = info.alpha_one_store;
424
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
425
if (!so->info[i].no_colour && batch->key.cbufs[i]) {
426
batch->draws |= (PIPE_CLEAR_COLOR0 << i);
427
batch->resolve |= (PIPE_CLEAR_COLOR0 << i);
434
pan_allow_forward_pixel_to_kill(struct panfrost_context *ctx, struct panfrost_shader_state *fs)
436
/* Track if any colour buffer is reused across draws, either
437
* from reading it directly, or from failing to write it
439
unsigned rt_mask = ctx->fb_rt_mask;
440
uint64_t rt_written = (fs->info.outputs_written >> FRAG_RESULT_DATA0);
441
bool blend_reads_dest = (ctx->blend->load_dest_mask & rt_mask);
442
bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
444
return fs->info.fs.can_fpk &&
445
!(rt_mask & ~rt_written) &&
446
!alpha_to_coverage &&
451
panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage)
453
struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage);
455
panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_VERTEX);
456
panfrost_batch_add_bo(batch, ss->state.bo, PIPE_SHADER_VERTEX);
458
return ss->state.gpu;
462
/* Construct a partial RSD corresponding to no executed fragment shader, and
463
* merge with the existing partial RSD. */
466
pan_merge_empty_fs(struct mali_renderer_state_packed *rsd)
468
struct mali_renderer_state_packed empty_rsd;
470
pan_pack(&empty_rsd, RENDERER_STATE, cfg) {
472
cfg.properties.shader_modifies_coverage = true;
473
cfg.properties.allow_forward_pixel_to_kill = true;
474
cfg.properties.allow_forward_pixel_to_be_killed = true;
475
cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
477
cfg.shader.shader = 0x1;
478
cfg.properties.work_register_count = 1;
479
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
480
cfg.properties.force_early_z = true;
484
pan_merge((*rsd), empty_rsd, RENDERER_STATE);
488
panfrost_prepare_fs_state(struct panfrost_context *ctx,
489
mali_ptr *blend_shaders,
490
struct mali_renderer_state_packed *rsd)
492
struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
493
const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
494
struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
495
struct panfrost_blend_state *so = ctx->blend;
496
bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
497
bool msaa = rast->multisample;
499
unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
501
bool has_blend_shader = false;
503
for (unsigned c = 0; c < rt_count; ++c)
504
has_blend_shader |= (blend_shaders[c] != 0);
506
pan_pack(rsd, RENDERER_STATE, cfg) {
507
if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) {
509
cfg.properties.allow_forward_pixel_to_kill =
510
pan_allow_forward_pixel_to_kill(ctx, fs);
512
cfg.properties.force_early_z =
513
fs->info.fs.can_early_z && !alpha_to_coverage &&
514
((enum mali_func) zsa->base.alpha_func == MALI_FUNC_ALWAYS);
516
/* TODO: Reduce this limit? */
517
if (has_blend_shader)
518
cfg.properties.work_register_count = MAX2(fs->info.work_reg_count, 8);
520
cfg.properties.work_register_count = fs->info.work_reg_count;
522
/* Hardware quirks around early-zs forcing without a
523
* depth buffer. Note this breaks occlusion queries. */
524
bool has_oq = ctx->occlusion_query && ctx->active_queries;
525
bool force_ez_with_discard = !zsa->enabled && !has_oq;
527
cfg.properties.shader_reads_tilebuffer =
528
force_ez_with_discard && fs->info.fs.can_discard;
529
cfg.properties.shader_contains_discard =
530
!force_ez_with_discard && fs->info.fs.can_discard;
536
cfg.multisample_misc.load_destination = so->info[0].load_dest;
537
cfg.multisample_misc.blend_shader = (blend_shaders[0] != 0);
538
cfg.stencil_mask_misc.write_enable = !so->info[0].no_colour;
539
cfg.stencil_mask_misc.srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
540
cfg.stencil_mask_misc.dither_disable = !so->base.dither;
541
cfg.stencil_mask_misc.alpha_to_one = so->base.alpha_to_one;
543
if (blend_shaders[0]) {
544
cfg.blend_shader = blend_shaders[0];
546
cfg.blend_constant = pan_blend_get_constant(
547
so->info[0].constant_mask,
548
ctx->blend_color.color);
551
/* If there is no colour buffer, leaving fields default is
552
* fine, except for blending which is nonnullable */
553
cfg.blend_equation.color_mask = 0xf;
554
cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
555
cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
556
cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
557
cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
558
cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
559
cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
563
cfg.legacy_blend_shader = panfrost_last_nonnull(blend_shaders, rt_count);
566
cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF;
568
cfg.multisample_misc.evaluate_per_sample =
569
msaa && (ctx->min_samples > 1);
572
/* MSAA blend shaders need to pass their sample ID to
573
* LD_TILE/ST_TILE, so we must preload it. Additionally, we
574
* need per-sample shading for the blend shader, accomplished
575
* by forcing per-sample shading for the whole program. */
577
if (msaa && has_blend_shader) {
578
cfg.multisample_misc.evaluate_per_sample = true;
579
cfg.preload.fragment.sample_mask_id = true;
583
cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
584
cfg.depth_units = rast->offset_units * 2.0f;
585
cfg.depth_factor = rast->offset_scale;
587
bool back_enab = zsa->base.stencil[1].enabled;
588
cfg.stencil_front.reference_value = ctx->stencil_ref.ref_value[0];
589
cfg.stencil_back.reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
592
/* v6+ fits register preload here, no alpha testing */
593
cfg.alpha_reference = zsa->base.alpha_ref_value;
599
panfrost_emit_frag_shader(struct panfrost_context *ctx,
600
struct mali_renderer_state_packed *fragmeta,
601
mali_ptr *blend_shaders)
603
const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
604
const struct panfrost_rasterizer *rast = ctx->rasterizer;
605
struct panfrost_shader_state *fs =
606
panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
608
/* We need to merge several several partial renderer state descriptors,
609
* so stage to temporary storage rather than reading back write-combine
610
* memory, which will trash performance. */
611
struct mali_renderer_state_packed rsd;
612
panfrost_prepare_fs_state(ctx, blend_shaders, &rsd);
615
if (ctx->pipe_framebuffer.nr_cbufs > 0 && !blend_shaders[0]) {
616
/* Word 14: SFBD Blend Equation */
617
STATIC_ASSERT(pan_size(BLEND_EQUATION) == 4);
618
rsd.opaque[14] = ctx->blend->equation[0];
622
/* Merge with CSO state and upload */
623
if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer, zsa)) {
624
struct mali_renderer_state_packed *partial_rsd =
625
(struct mali_renderer_state_packed *)&fs->partial_rsd;
626
STATIC_ASSERT(sizeof(fs->partial_rsd) == sizeof(*partial_rsd));
627
pan_merge(rsd, *partial_rsd, RENDERER_STATE);
629
pan_merge_empty_fs(&rsd);
632
/* Word 8, 9 Misc state */
633
rsd.opaque[8] |= zsa->rsd_depth.opaque[0]
634
| rast->multisample.opaque[0];
636
rsd.opaque[9] |= zsa->rsd_stencil.opaque[0]
637
| rast->stencil_misc.opaque[0];
639
/* Word 10, 11 Stencil Front and Back */
640
rsd.opaque[10] |= zsa->stencil_front.opaque[0];
641
rsd.opaque[11] |= zsa->stencil_back.opaque[0];
643
memcpy(fragmeta, &rsd, sizeof(rsd));
647
panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
649
struct panfrost_context *ctx = batch->ctx;
650
struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
652
panfrost_batch_add_bo(batch, ss->bin.bo, PIPE_SHADER_FRAGMENT);
654
struct panfrost_ptr xfer;
657
xfer = pan_pool_alloc_desc(&batch->pool.base, RENDERER_STATE);
659
unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
661
xfer = pan_pool_alloc_desc_aggregate(&batch->pool.base,
662
PAN_DESC(RENDERER_STATE),
663
PAN_DESC_ARRAY(rt_count, BLEND));
666
mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
667
panfrost_get_blend_shaders(batch, blend_shaders);
669
panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *) xfer.cpu, blend_shaders);
672
panfrost_emit_blend(batch, xfer.cpu + pan_size(RENDERER_STATE), blend_shaders);
674
batch->draws |= PIPE_CLEAR_COLOR0;
675
batch->resolve |= PIPE_CLEAR_COLOR0;
678
if (ctx->depth_stencil->base.depth_enabled)
679
batch->read |= PIPE_CLEAR_DEPTH;
681
if (ctx->depth_stencil->base.stencil[0].enabled)
682
batch->read |= PIPE_CLEAR_STENCIL;
689
panfrost_emit_viewport(struct panfrost_batch *batch)
691
struct panfrost_context *ctx = batch->ctx;
692
const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
693
const struct pipe_scissor_state *ss = &ctx->scissor;
694
const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
696
/* Derive min/max from translate/scale. Note since |x| >= 0 by
697
* definition, we have that -|x| <= |x| hence translate - |scale| <=
698
* translate + |scale|, so the ordering is correct here. */
699
float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
700
float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
701
float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
702
float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
703
float minz = (vp->translate[2] - fabsf(vp->scale[2]));
704
float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
706
/* Scissor to the intersection of viewport and to the scissor, clamped
707
* to the framebuffer */
709
unsigned minx = MIN2(batch->key.width, MAX2((int) vp_minx, 0));
710
unsigned maxx = MIN2(batch->key.width, MAX2((int) vp_maxx, 0));
711
unsigned miny = MIN2(batch->key.height, MAX2((int) vp_miny, 0));
712
unsigned maxy = MIN2(batch->key.height, MAX2((int) vp_maxy, 0));
714
if (ss && rast->scissor) {
715
minx = MAX2(ss->minx, minx);
716
miny = MAX2(ss->miny, miny);
717
maxx = MIN2(ss->maxx, maxx);
718
maxy = MIN2(ss->maxy, maxy);
721
/* Set the range to [1, 1) so max values don't wrap round */
722
if (maxx == 0 || maxy == 0)
723
maxx = maxy = minx = miny = 1;
725
panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
726
batch->scissor_culls_everything = (minx >= maxx || miny >= maxy);
728
/* [minx, maxx) and [miny, maxy) are exclusive ranges in the hardware */
732
batch->minimum_z = rast->depth_clip_near ? minz : -INFINITY;
733
batch->maximum_z = rast->depth_clip_far ? maxz : +INFINITY;
736
struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, VIEWPORT);
738
pan_pack(T.cpu, VIEWPORT, cfg) {
739
cfg.scissor_minimum_x = minx;
740
cfg.scissor_minimum_y = miny;
741
cfg.scissor_maximum_x = maxx;
742
cfg.scissor_maximum_y = maxy;
744
cfg.minimum_z = batch->minimum_z;
745
cfg.maximum_z = batch->maximum_z;
750
pan_pack(&batch->scissor, SCISSOR, cfg) {
751
cfg.scissor_minimum_x = minx;
752
cfg.scissor_minimum_y = miny;
753
cfg.scissor_maximum_x = maxx;
754
cfg.scissor_maximum_y = maxy;
763
* Emit a Valhall depth/stencil descriptor at draw-time. The bulk of the
764
* descriptor corresponds to a pipe_depth_stencil_alpha CSO and is packed at
765
* CSO create time. However, the stencil reference values and shader
766
* interactions are dynamic state. Pack only the dynamic state here and OR
770
panfrost_emit_depth_stencil(struct panfrost_batch *batch)
772
struct panfrost_context *ctx = batch->ctx;
773
const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
774
struct panfrost_rasterizer *rast = ctx->rasterizer;
775
struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
776
bool back_enab = zsa->base.stencil[1].enabled;
778
struct panfrost_ptr T = pan_pool_alloc_desc(&batch->pool.base, DEPTH_STENCIL);
779
struct mali_depth_stencil_packed dynamic;
781
pan_pack(&dynamic, DEPTH_STENCIL, cfg) {
782
cfg.front_reference_value = ctx->stencil_ref.ref_value[0];
783
cfg.back_reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
785
cfg.stencil_from_shader = fs->info.fs.writes_stencil;
786
cfg.depth_source = pan_depth_source(&fs->info);
788
cfg.depth_bias_enable = rast->base.offset_tri;
789
cfg.depth_units = rast->base.offset_units * 2.0f;
790
cfg.depth_factor = rast->base.offset_scale;
791
cfg.depth_bias_clamp = rast->base.offset_clamp;
794
pan_merge(dynamic, zsa->desc, DEPTH_STENCIL);
795
memcpy(T.cpu, &dynamic, pan_size(DEPTH_STENCIL));
801
* Emit Valhall blend descriptor at draw-time. The descriptor itself is shared
802
* with Bifrost, but the container data structure is simplified.
805
panfrost_emit_blend_valhall(struct panfrost_batch *batch)
807
unsigned rt_count = MAX2(batch->key.nr_cbufs, 1);
809
struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base, rt_count, BLEND);
811
mali_ptr blend_shaders[PIPE_MAX_COLOR_BUFS] = { 0 };
812
panfrost_get_blend_shaders(batch, blend_shaders);
814
panfrost_emit_blend(batch, T.cpu, blend_shaders);
816
/* Precalculate for the per-draw path */
817
bool has_blend_shader = false;
819
for (unsigned i = 0; i < rt_count; ++i)
820
has_blend_shader |= !!blend_shaders[i];
822
batch->ctx->valhall_has_blend_shader = has_blend_shader;
828
* Emit Valhall buffer descriptors for bound vertex buffers at draw-time.
831
panfrost_emit_vertex_buffers(struct panfrost_batch *batch)
833
struct panfrost_context *ctx = batch->ctx;
834
unsigned buffer_count = util_last_bit(ctx->vb_mask);
835
struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
836
buffer_count, BUFFER);
837
struct mali_buffer_packed *buffers = T.cpu;
839
u_foreach_bit(i, ctx->vb_mask) {
840
struct pipe_vertex_buffer vb = ctx->vertex_buffers[i];
841
struct pipe_resource *prsrc = vb.buffer.resource;
842
struct panfrost_resource *rsrc = pan_resource(prsrc);
843
assert(!vb.is_user_buffer);
845
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
847
pan_pack(buffers + i, BUFFER, cfg) {
848
cfg.address = rsrc->image.data.bo->ptr.gpu +
851
cfg.size = prsrc->width0 - vb.buffer_offset;
859
* Emit Valhall attribute descriptors and associated (vertex) buffer
860
* descriptors at draw-time. The attribute descriptors are packed at draw time
861
* except for the stride field. The buffer descriptors are packed here, though
862
* that could be moved into panfrost_set_vertex_buffers if needed.
865
panfrost_emit_vertex_data(struct panfrost_batch *batch)
867
struct panfrost_context *ctx = batch->ctx;
868
struct panfrost_vertex_state *vtx = ctx->vertex;
869
struct panfrost_ptr T = pan_pool_alloc_desc_array(&batch->pool.base,
872
struct mali_attribute_packed *attributes = T.cpu;
874
for (unsigned i = 0; i < vtx->num_elements; ++i) {
875
struct mali_attribute_packed packed;
876
unsigned vbi = vtx->pipe[i].vertex_buffer_index;
878
pan_pack(&packed, ATTRIBUTE, cfg) {
879
cfg.stride = ctx->vertex_buffers[vbi].stride;
882
pan_merge(packed, vtx->attributes[i], ATTRIBUTE);
883
attributes[i] = packed;
890
* Emit Valhall descriptors for shader images. Unlike previous generations,
891
* Valhall does not have a special descriptor for images. Standard texture
892
* descriptors are used. The binding is different in Gallium, however, so we
895
static struct pipe_sampler_view
896
panfrost_pipe_image_to_sampler_view(struct pipe_image_view *v)
898
struct pipe_sampler_view out = {
900
.texture = v->resource,
901
.target = v->resource->target,
902
.swizzle_r = PIPE_SWIZZLE_X,
903
.swizzle_g = PIPE_SWIZZLE_Y,
904
.swizzle_b = PIPE_SWIZZLE_Z,
905
.swizzle_a = PIPE_SWIZZLE_W
908
if (out.target == PIPE_BUFFER) {
909
out.u.buf.offset = v->u.buf.offset;
910
out.u.buf.size = v->u.buf.size;
912
out.u.tex.first_layer = v->u.tex.first_layer;
913
out.u.tex.last_layer = v->u.tex.last_layer;
915
/* Single level only */
916
out.u.tex.first_level = v->u.tex.level;
917
out.u.tex.last_level = v->u.tex.level;
924
panfrost_update_sampler_view(struct panfrost_sampler_view *view,
925
struct pipe_context *pctx);
928
panfrost_emit_images(struct panfrost_batch *batch, enum pipe_shader_type stage)
930
struct panfrost_context *ctx = batch->ctx;
931
unsigned last_bit = util_last_bit(ctx->image_mask[stage]);
933
struct panfrost_ptr T =
934
pan_pool_alloc_desc_array(&batch->pool.base, last_bit, TEXTURE);
936
struct mali_texture_packed *out = (struct mali_texture_packed *) T.cpu;
938
for (int i = 0; i < last_bit; ++i) {
939
struct pipe_image_view *image = &ctx->images[stage][i];
941
if (!(ctx->image_mask[stage] & BITFIELD_BIT(i))) {
942
memset(&out[i], 0, sizeof(out[i]));
946
/* Construct a synthetic sampler view so we can use our usual
947
* sampler view code for the actual descriptor packing.
949
* Use the batch pool for a transient allocation, rather than
950
* allocating a long-lived descriptor.
952
struct panfrost_sampler_view view = {
953
.base = panfrost_pipe_image_to_sampler_view(image),
957
/* If we specify a cube map, the hardware internally treat it as
958
* a 2D array. Since cube maps as images can confuse our common
959
* texturing code, explicitly use a 2D array.
961
* Similar concerns apply to 3D textures.
963
if (view.base.target == PIPE_BUFFER) {
964
view.base.target = PIPE_BUFFER;
966
view.base.target = PIPE_TEXTURE_2D_ARRAY;
968
/* Hardware limitation */
969
if (view.base.u.tex.first_level != 0)
970
unreachable("TODO: mipmaps special handling");
973
panfrost_update_sampler_view(&view, &ctx->base);
974
out[i] = view.bifrost_descriptor;
976
panfrost_track_image_access(batch, stage, image);
984
panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
985
enum pipe_shader_type st,
986
struct panfrost_constant_buffer *buf,
989
struct pipe_constant_buffer *cb = &buf->cb[index];
990
struct panfrost_resource *rsrc = pan_resource(cb->buffer);
993
panfrost_batch_read_rsrc(batch, rsrc, st);
995
/* Alignment gauranteed by
996
* PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
997
return rsrc->image.data.bo->ptr.gpu + cb->buffer_offset;
998
} else if (cb->user_buffer) {
999
return pan_pool_upload_aligned(&batch->pool.base,
1002
cb->buffer_size, 16);
1004
unreachable("No constant buffer");
1008
struct sysval_uniform {
1018
panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1019
struct sysval_uniform *uniform)
1021
struct panfrost_context *ctx = batch->ctx;
1022
const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1024
uniform->f[0] = vp->scale[0];
1025
uniform->f[1] = vp->scale[1];
1026
uniform->f[2] = vp->scale[2];
1030
panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1031
struct sysval_uniform *uniform)
1033
struct panfrost_context *ctx = batch->ctx;
1034
const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1036
uniform->f[0] = vp->translate[0];
1037
uniform->f[1] = vp->translate[1];
1038
uniform->f[2] = vp->translate[2];
1041
static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1042
enum pipe_shader_type st,
1043
unsigned int sysvalid,
1044
struct sysval_uniform *uniform)
1046
struct panfrost_context *ctx = batch->ctx;
1047
unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1048
unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1049
bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1050
struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1054
if (tex->target == PIPE_BUFFER) {
1057
tex->u.buf.size / util_format_get_blocksize(tex->format);
1061
uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1064
uniform->i[1] = u_minify(tex->texture->height0,
1065
tex->u.tex.first_level);
1068
uniform->i[2] = u_minify(tex->texture->depth0,
1069
tex->u.tex.first_level);
1072
unsigned size = tex->texture->array_size;
1074
/* Internally, we store the number of 2D images (faces * array
1075
* size). Externally, we report the array size in terms of
1076
* complete cubes. So divide by the # of faces per cube.
1078
if (tex->target == PIPE_TEXTURE_CUBE_ARRAY)
1081
uniform->i[dim] = size;
1085
static void panfrost_upload_image_size_sysval(struct panfrost_batch *batch,
1086
enum pipe_shader_type st,
1087
unsigned int sysvalid,
1088
struct sysval_uniform *uniform)
1090
struct panfrost_context *ctx = batch->ctx;
1091
unsigned idx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1092
unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1093
unsigned is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1095
assert(dim && dim < 4);
1097
struct pipe_image_view *image = &ctx->images[st][idx];
1099
if (image->resource->target == PIPE_BUFFER) {
1100
unsigned blocksize = util_format_get_blocksize(image->format);
1101
uniform->i[0] = image->resource->width0 / blocksize;
1105
uniform->i[0] = u_minify(image->resource->width0,
1106
image->u.tex.level);
1109
uniform->i[1] = u_minify(image->resource->height0,
1110
image->u.tex.level);
1113
uniform->i[2] = u_minify(image->resource->depth0,
1114
image->u.tex.level);
1117
uniform->i[dim] = image->resource->array_size;
1121
panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1122
enum pipe_shader_type st,
1124
struct sysval_uniform *uniform)
1126
struct panfrost_context *ctx = batch->ctx;
1128
assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1129
struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1131
/* Compute address */
1132
struct panfrost_resource *rsrc = pan_resource(sb.buffer);
1133
struct panfrost_bo *bo = rsrc->image.data.bo;
1135
panfrost_batch_write_rsrc(batch, rsrc, st);
1137
util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
1138
sb.buffer_offset, sb.buffer_size);
1140
/* Upload address and size as sysval */
1141
uniform->du[0] = bo->ptr.gpu + sb.buffer_offset;
1142
uniform->u[2] = sb.buffer_size;
1146
panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1147
enum pipe_shader_type st,
1149
struct sysval_uniform *uniform)
1151
struct panfrost_context *ctx = batch->ctx;
1152
struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1154
uniform->f[0] = sampl->min_lod;
1155
uniform->f[1] = sampl->max_lod;
1156
uniform->f[2] = sampl->lod_bias;
1158
/* Even without any errata, Midgard represents "no mipmapping" as
1159
* fixing the LOD with the clamps; keep behaviour consistent. c.f.
1160
* panfrost_create_sampler_state which also explains our choice of
1161
* epsilon value (again to keep behaviour consistent) */
1163
if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1164
uniform->f[1] = uniform->f[0] + (1.0/256.0);
1168
panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1169
struct sysval_uniform *uniform)
1171
struct panfrost_context *ctx = batch->ctx;
1173
uniform->u[0] = ctx->compute_grid->grid[0];
1174
uniform->u[1] = ctx->compute_grid->grid[1];
1175
uniform->u[2] = ctx->compute_grid->grid[2];
1179
panfrost_upload_local_group_size_sysval(struct panfrost_batch *batch,
1180
struct sysval_uniform *uniform)
1182
struct panfrost_context *ctx = batch->ctx;
1184
uniform->u[0] = ctx->compute_grid->block[0];
1185
uniform->u[1] = ctx->compute_grid->block[1];
1186
uniform->u[2] = ctx->compute_grid->block[2];
1190
panfrost_upload_work_dim_sysval(struct panfrost_batch *batch,
1191
struct sysval_uniform *uniform)
1193
struct panfrost_context *ctx = batch->ctx;
1195
uniform->u[0] = ctx->compute_grid->work_dim;
1198
/* Sample positions are pushed in a Bifrost specific format on Bifrost. On
1199
* Midgard, we emulate the Bifrost path with some extra arithmetic in the
1200
* shader, to keep the code as unified as possible. */
1203
panfrost_upload_sample_positions_sysval(struct panfrost_batch *batch,
1204
struct sysval_uniform *uniform)
1206
struct panfrost_context *ctx = batch->ctx;
1207
struct panfrost_device *dev = pan_device(ctx->base.screen);
1209
unsigned samples = util_framebuffer_get_num_samples(&batch->key);
1210
uniform->du[0] = panfrost_sample_positions(dev, panfrost_sample_pattern(samples));
1214
panfrost_upload_multisampled_sysval(struct panfrost_batch *batch,
1215
struct sysval_uniform *uniform)
1217
unsigned samples = util_framebuffer_get_num_samples(&batch->key);
1218
uniform->u[0] = samples > 1;
1223
panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch,
1224
unsigned size_and_rt, struct sysval_uniform *uniform)
1226
struct panfrost_context *ctx = batch->ctx;
1227
struct panfrost_device *dev = pan_device(ctx->base.screen);
1228
unsigned rt = size_and_rt & 0xF;
1229
unsigned size = size_and_rt >> 4;
1231
if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) {
1232
enum pipe_format format = batch->key.cbufs[rt]->format;
1234
GENX(pan_blend_get_internal_desc)(dev, format, rt, size, false) >> 32;
1236
pan_pack(&uniform->u[0], INTERNAL_CONVERSION, cfg)
1237
cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw;
1243
panfrost_upload_sysvals(struct panfrost_batch *batch,
1244
const struct panfrost_ptr *ptr,
1245
struct panfrost_shader_state *ss,
1246
enum pipe_shader_type st)
1248
struct sysval_uniform *uniforms = ptr->cpu;
1250
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
1251
int sysval = ss->info.sysvals.sysvals[i];
1253
switch (PAN_SYSVAL_TYPE(sysval)) {
1254
case PAN_SYSVAL_VIEWPORT_SCALE:
1255
panfrost_upload_viewport_scale_sysval(batch,
1258
case PAN_SYSVAL_VIEWPORT_OFFSET:
1259
panfrost_upload_viewport_offset_sysval(batch,
1262
case PAN_SYSVAL_TEXTURE_SIZE:
1263
panfrost_upload_txs_sysval(batch, st,
1264
PAN_SYSVAL_ID(sysval),
1267
case PAN_SYSVAL_SSBO:
1268
panfrost_upload_ssbo_sysval(batch, st,
1269
PAN_SYSVAL_ID(sysval),
1272
case PAN_SYSVAL_NUM_WORK_GROUPS:
1273
for (unsigned j = 0; j < 3; j++) {
1274
batch->num_wg_sysval[j] =
1275
ptr->gpu + (i * sizeof(*uniforms)) + (j * 4);
1277
panfrost_upload_num_work_groups_sysval(batch,
1280
case PAN_SYSVAL_LOCAL_GROUP_SIZE:
1281
panfrost_upload_local_group_size_sysval(batch,
1284
case PAN_SYSVAL_WORK_DIM:
1285
panfrost_upload_work_dim_sysval(batch,
1288
case PAN_SYSVAL_SAMPLER:
1289
panfrost_upload_sampler_sysval(batch, st,
1290
PAN_SYSVAL_ID(sysval),
1293
case PAN_SYSVAL_IMAGE_SIZE:
1294
panfrost_upload_image_size_sysval(batch, st,
1295
PAN_SYSVAL_ID(sysval),
1298
case PAN_SYSVAL_SAMPLE_POSITIONS:
1299
panfrost_upload_sample_positions_sysval(batch,
1302
case PAN_SYSVAL_MULTISAMPLED:
1303
panfrost_upload_multisampled_sysval(batch,
1307
case PAN_SYSVAL_RT_CONVERSION:
1308
panfrost_upload_rt_conversion_sysval(batch,
1309
PAN_SYSVAL_ID(sysval), &uniforms[i]);
1312
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
1313
batch->ctx->first_vertex_sysval_ptr =
1314
ptr->gpu + (i * sizeof(*uniforms));
1315
batch->ctx->base_vertex_sysval_ptr =
1316
batch->ctx->first_vertex_sysval_ptr + 4;
1317
batch->ctx->base_instance_sysval_ptr =
1318
batch->ctx->first_vertex_sysval_ptr + 8;
1320
uniforms[i].u[0] = batch->ctx->offset_start;
1321
uniforms[i].u[1] = batch->ctx->base_vertex;
1322
uniforms[i].u[2] = batch->ctx->base_instance;
1324
case PAN_SYSVAL_DRAWID:
1325
uniforms[i].u[0] = batch->ctx->drawid;
1334
panfrost_map_constant_buffer_cpu(struct panfrost_context *ctx,
1335
struct panfrost_constant_buffer *buf,
1338
struct pipe_constant_buffer *cb = &buf->cb[index];
1339
struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1342
panfrost_bo_mmap(rsrc->image.data.bo);
1343
panfrost_flush_writer(ctx, rsrc, "CPU constant buffer mapping");
1344
panfrost_bo_wait(rsrc->image.data.bo, INT64_MAX, false);
1346
return rsrc->image.data.bo->ptr.cpu + cb->buffer_offset;
1347
} else if (cb->user_buffer) {
1348
return cb->user_buffer + cb->buffer_offset;
1350
unreachable("No constant buffer");
1354
panfrost_emit_const_buf(struct panfrost_batch *batch,
1355
enum pipe_shader_type stage,
1356
mali_ptr *push_constants)
1358
struct panfrost_context *ctx = batch->ctx;
1359
struct panfrost_shader_variants *all = ctx->shader[stage];
1364
struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1365
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1367
/* Allocate room for the sysval and the uniforms */
1368
size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count;
1369
struct panfrost_ptr transfer =
1370
pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16);
1372
/* Upload sysvals requested by the shader */
1373
panfrost_upload_sysvals(batch, &transfer, ss, stage);
1375
/* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */
1376
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage);
1377
unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0);
1378
unsigned sysval_ubo = sys_size ? ubo_count : ~0;
1380
struct panfrost_ptr ubos =
1381
pan_pool_alloc_desc_array(&batch->pool.base,
1385
uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
1387
/* Upload sysval as a final UBO */
1390
pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) {
1391
cfg.entries = DIV_ROUND_UP(sys_size, 16);
1392
cfg.pointer = transfer.gpu;
1396
/* The rest are honest-to-goodness UBOs */
1398
u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) {
1399
size_t usz = buf->cb[ubo].buffer_size;
1406
/* Issue (57) for the ARB_uniform_buffer_object spec says that
1407
* the buffer can be larger than the uniform data inside it,
1408
* so clamp ubo size to what hardware supports. */
1410
pan_pack(ubo_ptr + ubo, UNIFORM_BUFFER, cfg) {
1411
cfg.entries = MIN2(DIV_ROUND_UP(usz, 16), 1 << 12);
1412
cfg.pointer = panfrost_map_constant_buffer_gpu(batch,
1417
if (ss->info.push.count == 0)
1420
/* Copy push constants required by the shader */
1421
struct panfrost_ptr push_transfer =
1422
pan_pool_alloc_aligned(&batch->pool.base,
1423
ss->info.push.count * 4, 16);
1425
uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
1426
*push_constants = push_transfer.gpu;
1428
for (unsigned i = 0; i < ss->info.push.count; ++i) {
1429
struct panfrost_ubo_word src = ss->info.push.words[i];
1431
if (src.ubo == sysval_ubo) {
1432
unsigned sysval_idx = src.offset / 16;
1433
unsigned sysval_comp = (src.offset % 16) / 4;
1434
unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
1435
mali_ptr ptr = push_transfer.gpu + (4 * i);
1437
switch (sysval_type) {
1438
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
1439
switch (sysval_comp) {
1441
batch->ctx->first_vertex_sysval_ptr = ptr;
1444
batch->ctx->base_vertex_sysval_ptr = ptr;
1447
batch->ctx->base_instance_sysval_ptr = ptr;
1450
/* Spurious (Midgard doesn't pack) */
1453
unreachable("Invalid vertex/instance offset component\n");
1457
case PAN_SYSVAL_NUM_WORK_GROUPS:
1458
batch->num_wg_sysval[sysval_comp] = ptr;
1465
/* Map the UBO, this should be cheap. However this is reading
1466
* from write-combine memory which is _very_ slow. It might pay
1467
* off to upload sysvals to a staging buffer on the CPU on the
1468
* assumption sysvals will get pushed (TODO) */
1470
const void *mapped_ubo = (src.ubo == sysval_ubo) ? transfer.cpu :
1471
panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo);
1473
/* TODO: Is there any benefit to combining ranges */
1474
memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4);
1481
panfrost_emit_shared_memory(struct panfrost_batch *batch,
1482
const struct pipe_grid_info *info)
1484
struct panfrost_context *ctx = batch->ctx;
1485
struct panfrost_device *dev = pan_device(ctx->base.screen);
1486
struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1487
struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1488
struct panfrost_ptr t =
1489
pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
1491
pan_pack(t.cpu, LOCAL_STORAGE, ls) {
1492
unsigned wls_single_size =
1493
util_next_power_of_two(MAX2(ss->info.wls_size, 128));
1495
if (ss->info.wls_size) {
1497
util_next_power_of_two(info->grid[0]) *
1498
util_next_power_of_two(info->grid[1]) *
1499
util_next_power_of_two(info->grid[2]);
1501
ls.wls_size_scale = util_logbase2(wls_single_size) + 1;
1503
unsigned wls_size = wls_single_size * ls.wls_instances * dev->core_count;
1505
ls.wls_base_pointer =
1506
(panfrost_batch_get_shared_memory(batch,
1510
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
1513
if (ss->info.tls_size) {
1515
panfrost_get_stack_shift(ss->info.tls_size);
1516
struct panfrost_bo *bo =
1517
panfrost_batch_get_scratchpad(batch,
1519
dev->thread_tls_alloc,
1522
ls.tls_size = shift;
1523
ls.tls_base_pointer = bo->ptr.gpu;
1532
panfrost_get_tex_desc(struct panfrost_batch *batch,
1533
enum pipe_shader_type st,
1534
struct panfrost_sampler_view *view)
1537
return (mali_ptr) 0;
1539
struct pipe_sampler_view *pview = &view->base;
1540
struct panfrost_resource *rsrc = pan_resource(pview->texture);
1542
panfrost_batch_read_rsrc(batch, rsrc, st);
1543
panfrost_batch_add_bo(batch, view->state.bo, st);
1545
return view->state.gpu;
1550
panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
1551
struct pipe_context *pctx,
1552
struct pipe_resource *texture)
1554
struct panfrost_device *device = pan_device(pctx->screen);
1555
struct panfrost_context *ctx = pan_context(pctx);
1556
struct panfrost_resource *prsrc = (struct panfrost_resource *)texture;
1557
enum pipe_format format = so->base.format;
1558
assert(prsrc->image.data.bo);
1560
/* Format to access the stencil/depth portion of a Z32_S8 texture */
1561
if (format == PIPE_FORMAT_X32_S8X24_UINT) {
1562
assert(prsrc->separate_stencil);
1563
texture = &prsrc->separate_stencil->base;
1564
prsrc = (struct panfrost_resource *)texture;
1565
format = texture->format;
1566
} else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
1567
format = PIPE_FORMAT_Z32_FLOAT;
1570
const struct util_format_description *desc = util_format_description(format);
1572
bool fake_rgtc = !panfrost_supports_compressed_format(device, MALI_BC4_UNORM);
1574
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC && fake_rgtc) {
1576
format = PIPE_FORMAT_R8G8B8A8_SNORM;
1578
format = PIPE_FORMAT_R8G8B8A8_UNORM;
1579
desc = util_format_description(format);
1582
so->texture_bo = prsrc->image.data.bo->ptr.gpu;
1583
so->modifier = prsrc->image.layout.modifier;
1585
/* MSAA only supported for 2D textures */
1587
assert(texture->nr_samples <= 1 ||
1588
so->base.target == PIPE_TEXTURE_2D ||
1589
so->base.target == PIPE_TEXTURE_2D_ARRAY);
1591
enum mali_texture_dimension type =
1592
panfrost_translate_texture_dimension(so->base.target);
1594
bool is_buffer = (so->base.target == PIPE_BUFFER);
1596
unsigned first_level = is_buffer ? 0 : so->base.u.tex.first_level;
1597
unsigned last_level = is_buffer ? 0 : so->base.u.tex.last_level;
1598
unsigned first_layer = is_buffer ? 0 : so->base.u.tex.first_layer;
1599
unsigned last_layer = is_buffer ? 0 : so->base.u.tex.last_layer;
1600
unsigned buf_offset = is_buffer ? so->base.u.buf.offset : 0;
1601
unsigned buf_size = (is_buffer ? so->base.u.buf.size : 0) /
1602
util_format_get_blocksize(format);
1604
if (so->base.target == PIPE_TEXTURE_3D) {
1605
first_layer /= prsrc->image.layout.depth;
1606
last_layer /= prsrc->image.layout.depth;
1607
assert(!first_layer && !last_layer);
1610
struct pan_image_view iview = {
1613
.first_level = first_level,
1614
.last_level = last_level,
1615
.first_layer = first_layer,
1616
.last_layer = last_layer,
1623
.image = &prsrc->image,
1625
.buf.offset = buf_offset,
1626
.buf.size = buf_size,
1630
(PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0) +
1631
GENX(panfrost_estimate_texture_payload_size)(&iview);
1633
struct panfrost_pool *pool = so->pool ?: &ctx->descs;
1634
struct panfrost_ptr payload = pan_pool_alloc_aligned(&pool->base, size, 64);
1635
so->state = panfrost_pool_take_ref(&ctx->descs, payload.gpu);
1637
void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
1639
if (PAN_ARCH <= 5) {
1640
payload.cpu += pan_size(TEXTURE);
1641
payload.gpu += pan_size(TEXTURE);
1644
GENX(panfrost_new_texture)(device, &iview, tex, &payload);
1648
panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1649
struct pipe_context *pctx)
1651
struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1652
if (view->texture_bo != rsrc->image.data.bo->ptr.gpu ||
1653
view->modifier != rsrc->image.layout.modifier) {
1654
panfrost_bo_unreference(view->state.bo);
1655
panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1660
panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1661
enum pipe_shader_type stage)
1663
struct panfrost_context *ctx = batch->ctx;
1665
if (!ctx->sampler_view_count[stage])
1669
struct panfrost_ptr T =
1670
pan_pool_alloc_desc_array(&batch->pool.base,
1671
ctx->sampler_view_count[stage],
1673
struct mali_texture_packed *out =
1674
(struct mali_texture_packed *) T.cpu;
1676
for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1677
struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1680
memset(&out[i], 0, sizeof(out[i]));
1684
struct pipe_sampler_view *pview = &view->base;
1685
struct panfrost_resource *rsrc = pan_resource(pview->texture);
1687
panfrost_update_sampler_view(view, &ctx->base);
1688
out[i] = view->bifrost_descriptor;
1690
panfrost_batch_read_rsrc(batch, rsrc, stage);
1691
panfrost_batch_add_bo(batch, view->state.bo, stage);
1696
uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1698
for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1699
struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1706
panfrost_update_sampler_view(view, &ctx->base);
1708
trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1711
return pan_pool_upload_aligned(&batch->pool.base, trampolines,
1713
ctx->sampler_view_count[stage],
1719
panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1720
enum pipe_shader_type stage)
1722
struct panfrost_context *ctx = batch->ctx;
1724
if (!ctx->sampler_count[stage])
1727
struct panfrost_ptr T =
1728
pan_pool_alloc_desc_array(&batch->pool.base,
1729
ctx->sampler_count[stage],
1731
struct mali_sampler_packed *out = (struct mali_sampler_packed *) T.cpu;
1733
for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i) {
1734
struct panfrost_sampler_state *st = ctx->samplers[stage][i];
1736
out[i] = st ? st->hw : (struct mali_sampler_packed){0};
1743
/* Packs all image attribute descs and attribute buffer descs.
1744
* `first_image_buf_index` must be the index of the first image attribute buffer descriptor.
1747
emit_image_attribs(struct panfrost_context *ctx, enum pipe_shader_type shader,
1748
struct mali_attribute_packed *attribs, unsigned first_buf)
1750
struct panfrost_device *dev = pan_device(ctx->base.screen);
1751
unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
1753
for (unsigned i = 0; i < last_bit; ++i) {
1754
enum pipe_format format = ctx->images[shader][i].format;
1756
pan_pack(attribs + i, ATTRIBUTE, cfg) {
1757
/* Continuation record means 2 buffers per image */
1758
cfg.buffer_index = first_buf + (i * 2);
1759
cfg.offset_enable = (PAN_ARCH <= 5);
1760
cfg.format = dev->formats[format].hw;
1765
static enum mali_attribute_type
1766
pan_modifier_to_attr_type(uint64_t modifier)
1769
case DRM_FORMAT_MOD_LINEAR:
1770
return MALI_ATTRIBUTE_TYPE_3D_LINEAR;
1771
case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
1772
return MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED;
1774
unreachable("Invalid modifier for attribute record");
1779
emit_image_bufs(struct panfrost_batch *batch, enum pipe_shader_type shader,
1780
struct mali_attribute_buffer_packed *bufs,
1781
unsigned first_image_buf_index)
1783
struct panfrost_context *ctx = batch->ctx;
1784
unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
1786
for (unsigned i = 0; i < last_bit; ++i) {
1787
struct pipe_image_view *image = &ctx->images[shader][i];
1789
if (!(ctx->image_mask[shader] & (1 << i)) ||
1790
!(image->shader_access & PIPE_IMAGE_ACCESS_READ_WRITE)) {
1791
/* Unused image bindings */
1792
pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg);
1793
pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER, cfg);
1797
struct panfrost_resource *rsrc = pan_resource(image->resource);
1800
assert(image->resource->nr_samples <= 1 && "MSAA'd images not supported");
1802
bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
1803
bool is_buffer = rsrc->base.target == PIPE_BUFFER;
1805
unsigned offset = is_buffer ? image->u.buf.offset :
1806
panfrost_texture_offset(&rsrc->image.layout,
1808
is_3d ? 0 : image->u.tex.first_layer,
1809
is_3d ? image->u.tex.first_layer : 0);
1811
panfrost_track_image_access(batch, shader, image);
1813
pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
1814
cfg.type = pan_modifier_to_attr_type(rsrc->image.layout.modifier);
1815
cfg.pointer = rsrc->image.data.bo->ptr.gpu + offset;
1816
cfg.stride = util_format_get_blocksize(image->format);
1817
cfg.size = rsrc->image.data.bo->size - offset;
1821
pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
1822
cfg.s_dimension = rsrc->base.width0 /
1823
util_format_get_blocksize(image->format);
1824
cfg.t_dimension = cfg.r_dimension = 1;
1830
pan_pack(bufs + (i * 2) + 1, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
1831
unsigned level = image->u.tex.level;
1833
cfg.s_dimension = u_minify(rsrc->base.width0, level);
1834
cfg.t_dimension = u_minify(rsrc->base.height0, level);
1835
cfg.r_dimension = is_3d ?
1836
u_minify(rsrc->base.depth0, level) :
1837
image->u.tex.last_layer - image->u.tex.first_layer + 1;
1840
rsrc->image.layout.slices[level].row_stride;
1842
if (rsrc->base.target != PIPE_TEXTURE_2D) {
1844
panfrost_get_layer_stride(&rsrc->image.layout,
1852
panfrost_emit_image_attribs(struct panfrost_batch *batch,
1854
enum pipe_shader_type type)
1856
struct panfrost_context *ctx = batch->ctx;
1857
struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, type);
1859
if (!shader->info.attribute_count) {
1864
/* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */
1865
unsigned attr_count = shader->info.attribute_count;
1866
unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0);
1868
struct panfrost_ptr bufs =
1869
pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER);
1871
struct panfrost_ptr attribs =
1872
pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE);
1874
emit_image_attribs(ctx, type, attribs.cpu, 0);
1875
emit_image_bufs(batch, type, bufs.cpu, 0);
1877
/* We need an empty attrib buf to stop the prefetching on Bifrost */
1879
pan_pack(bufs.cpu + ((buf_count - 1) * pan_size(ATTRIBUTE_BUFFER)),
1880
ATTRIBUTE_BUFFER, cfg);
1883
*buffers = bufs.gpu;
1888
panfrost_emit_vertex_data(struct panfrost_batch *batch,
1891
struct panfrost_context *ctx = batch->ctx;
1892
struct panfrost_vertex_state *so = ctx->vertex;
1893
struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1894
bool instanced = ctx->indirect_draw || ctx->instance_count > 1;
1895
uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
1896
unsigned nr_images = util_last_bit(image_mask);
1898
/* Worst case: everything is NPOT, which is only possible if instancing
1899
* is enabled. Otherwise single record is gauranteed.
1900
* Also, we allocate more memory than what's needed here if either instancing
1901
* is enabled or images are present, this can be improved. */
1902
unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
1903
unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) +
1904
(PAN_ARCH >= 6 ? 1 : 0);
1907
/* Midgard needs vertexid/instanceid handled specially */
1908
bool special_vbufs = vs->info.attribute_count >= PAN_VERTEX_ID;
1919
struct panfrost_ptr S =
1920
pan_pool_alloc_desc_array(&batch->pool.base, nr_bufs,
1922
struct panfrost_ptr T =
1923
pan_pool_alloc_desc_array(&batch->pool.base,
1924
vs->info.attribute_count,
1927
struct mali_attribute_buffer_packed *bufs =
1928
(struct mali_attribute_buffer_packed *) S.cpu;
1930
struct mali_attribute_packed *out =
1931
(struct mali_attribute_packed *) T.cpu;
1933
unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 };
1936
for (unsigned i = 0; i < so->nr_bufs; ++i) {
1937
unsigned vbi = so->buffers[i].vbi;
1938
unsigned divisor = so->buffers[i].divisor;
1939
attrib_to_buffer[i] = k;
1941
if (!(ctx->vb_mask & (1 << vbi)))
1944
struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1945
struct panfrost_resource *rsrc;
1947
rsrc = pan_resource(buf->buffer.resource);
1951
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
1953
/* Mask off lower bits, see offset fixup below */
1954
mali_ptr raw_addr = rsrc->image.data.bo->ptr.gpu + buf->buffer_offset;
1955
mali_ptr addr = raw_addr & ~63;
1957
/* Since we advanced the base pointer, we shrink the buffer
1958
* size, but add the offset we subtracted */
1959
unsigned size = rsrc->base.width0 + (raw_addr - addr)
1960
- buf->buffer_offset;
1962
/* When there is a divisor, the hardware-level divisor is
1963
* the product of the instance divisor and the padded count */
1964
unsigned stride = buf->stride;
1966
if (ctx->indirect_draw) {
1967
/* We allocated 2 records for each attribute buffer */
1968
assert((k & 1) == 0);
1970
/* With indirect draws we can't guess the vertex_count.
1971
* Pre-set the address, stride and size fields, the
1972
* compute shader do the rest.
1974
pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1975
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
1977
cfg.stride = stride;
1981
/* We store the unmodified divisor in the continuation
1982
* slot so the compute shader can retrieve it.
1984
pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
1985
cfg.divisor = divisor;
1992
unsigned hw_divisor = ctx->padded_count * divisor;
1994
if (ctx->instance_count <= 1) {
1995
/* Per-instance would be every attribute equal */
1999
pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2001
cfg.stride = stride;
2004
} else if (!divisor) {
2005
pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2006
cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
2008
cfg.stride = stride;
2010
cfg.divisor = ctx->padded_count;
2012
} else if (util_is_power_of_two_or_zero(hw_divisor)) {
2013
pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2014
cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
2016
cfg.stride = stride;
2018
cfg.divisor_r = __builtin_ctz(hw_divisor);
2022
unsigned shift = 0, extra_flags = 0;
2024
unsigned magic_divisor =
2025
panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
2027
/* Records with continuations must be aligned */
2028
k = ALIGN_POT(k, 2);
2029
attrib_to_buffer[i] = k;
2031
pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
2032
cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
2034
cfg.stride = stride;
2037
cfg.divisor_r = shift;
2038
cfg.divisor_e = extra_flags;
2041
pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
2042
cfg.divisor_numerator = magic_divisor;
2043
cfg.divisor = divisor;
2053
/* Add special gl_VertexID/gl_InstanceID buffers */
2054
if (special_vbufs) {
2055
panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
2057
pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
2058
cfg.buffer_index = k++;
2059
cfg.format = so->formats[PAN_VERTEX_ID];
2062
panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
2064
pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
2065
cfg.buffer_index = k++;
2066
cfg.format = so->formats[PAN_INSTANCE_ID];
2071
k = ALIGN_POT(k, 2);
2072
emit_image_attribs(ctx, PIPE_SHADER_VERTEX, out + so->num_elements, k);
2073
emit_image_bufs(batch, PIPE_SHADER_VERTEX, bufs + k, k);
2074
k += (util_last_bit(ctx->image_mask[PIPE_SHADER_VERTEX]) * 2);
2077
/* We need an empty attrib buf to stop the prefetching on Bifrost */
2078
pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg);
2081
/* Attribute addresses require 64-byte alignment, so let:
2083
* base' = base & ~63 = base - (base & 63)
2084
* offset' = offset + (base & 63)
2086
* Since base' + offset' = base + offset, these are equivalent
2087
* addressing modes and now base is 64 aligned.
2090
for (unsigned i = 0; i < so->num_elements; ++i) {
2091
unsigned vbi = so->pipe[i].vertex_buffer_index;
2092
struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
2094
/* BOs are aligned; just fixup for buffer_offset */
2095
signed src_offset = so->pipe[i].src_offset;
2096
src_offset += (buf->buffer_offset & 63);
2098
/* Base instance offset */
2099
if (ctx->base_instance && so->pipe[i].instance_divisor) {
2100
src_offset += (ctx->base_instance * buf->stride) /
2101
so->pipe[i].instance_divisor;
2104
/* Also, somewhat obscurely per-instance data needs to be
2105
* offset in response to a delayed start in an indexed draw */
2107
if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
2108
src_offset -= buf->stride * ctx->offset_start;
2110
pan_pack(out + i, ATTRIBUTE, cfg) {
2111
cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]];
2112
cfg.format = so->formats[i];
2113
cfg.offset = src_offset;
2122
panfrost_emit_varyings(struct panfrost_batch *batch,
2123
struct mali_attribute_buffer_packed *slot,
2124
unsigned stride, unsigned count)
2126
unsigned size = stride * count;
2128
batch->ctx->indirect_draw ? 0 :
2129
pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu;
2131
pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
2132
cfg.stride = stride;
2141
panfrost_xfb_offset(unsigned stride, struct pipe_stream_output_target *target)
2143
return target->buffer_offset + (pan_so_target(target)->offset * stride);
2147
panfrost_emit_streamout(struct panfrost_batch *batch,
2148
struct mali_attribute_buffer_packed *slot,
2149
unsigned stride, unsigned count,
2150
struct pipe_stream_output_target *target)
2152
unsigned max_size = target->buffer_size;
2153
unsigned expected_size = stride * count;
2155
/* Grab the BO and bind it to the batch */
2156
struct panfrost_resource *rsrc = pan_resource(target->buffer);
2157
struct panfrost_bo *bo = rsrc->image.data.bo;
2159
panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
2160
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
2162
unsigned offset = panfrost_xfb_offset(stride, target);
2164
pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
2165
cfg.pointer = bo->ptr.gpu + (offset & ~63);
2166
cfg.stride = stride;
2167
cfg.size = MIN2(max_size, expected_size) + (offset & 63);
2169
util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
2174
/* Helpers for manipulating stream out information so we can pack varyings
2175
* accordingly. Compute the src_offset for a given captured varying */
2177
static struct pipe_stream_output *
2178
pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
2180
for (unsigned i = 0; i < info->num_outputs; ++i) {
2181
if (info->output[i].register_index == loc)
2182
return &info->output[i];
2185
unreachable("Varying not captured");
2188
/* Given a varying, figure out which index it corresponds to */
2190
static inline unsigned
2191
pan_varying_index(unsigned present, enum pan_special_varying v)
2193
return util_bitcount(present & BITFIELD_MASK(v));
2196
/* Get the base offset for XFB buffers, which by convention come after
2197
* everything else. Wrapper function for semantic reasons; by construction this
2198
* is just popcount. */
2200
static inline unsigned
2201
pan_xfb_base(unsigned present)
2203
return util_bitcount(present);
2206
/* Determines which varying buffers are required */
2208
static inline unsigned
2209
pan_varying_present(const struct panfrost_device *dev,
2210
struct pan_shader_info *producer,
2211
struct pan_shader_info *consumer,
2212
uint16_t point_coord_mask)
2214
/* At the moment we always emit general and position buffers. Not
2215
* strictly necessary but usually harmless */
2217
unsigned present = BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION);
2219
/* Enable special buffers by the shader info */
2221
if (producer->vs.writes_point_size)
2222
present |= BITFIELD_BIT(PAN_VARY_PSIZ);
2225
/* On Midgard, these exist as real varyings. Later architectures use
2226
* LD_VAR_SPECIAL reads instead. */
2228
if (consumer->fs.reads_point_coord)
2229
present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
2231
if (consumer->fs.reads_face)
2232
present |= BITFIELD_BIT(PAN_VARY_FACE);
2234
if (consumer->fs.reads_frag_coord)
2235
present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD);
2237
/* Also, if we have a point sprite, we need a point coord buffer */
2239
for (unsigned i = 0; i < consumer->varyings.input_count; i++) {
2240
gl_varying_slot loc = consumer->varyings.input[i].location;
2242
if (util_varying_is_point_coord(loc, point_coord_mask))
2243
present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
2250
/* Emitters for varying records */
2253
pan_emit_vary(const struct panfrost_device *dev,
2254
struct mali_attribute_packed *out,
2255
unsigned buffer_index,
2256
mali_pixel_format format, unsigned offset)
2258
pan_pack(out, ATTRIBUTE, cfg) {
2259
cfg.buffer_index = buffer_index;
2260
cfg.offset_enable = (PAN_ARCH <= 5);
2261
cfg.format = format;
2262
cfg.offset = offset;
2266
/* Special records */
2268
static const struct {
2269
unsigned components;
2270
enum mali_format format;
2271
} pan_varying_formats[PAN_VARY_MAX] = {
2272
[PAN_VARY_POSITION] = { 4, MALI_SNAP_4 },
2273
[PAN_VARY_PSIZ] = { 1, MALI_R16F },
2274
[PAN_VARY_PNTCOORD] = { 1, MALI_R16F },
2275
[PAN_VARY_FACE] = { 1, MALI_R32I },
2276
[PAN_VARY_FRAGCOORD] = { 4, MALI_RGBA32F },
2279
static mali_pixel_format
2280
pan_special_format(const struct panfrost_device *dev,
2281
enum pan_special_varying buf)
2283
assert(buf < PAN_VARY_MAX);
2284
mali_pixel_format format = (pan_varying_formats[buf].format << 12);
2287
unsigned nr = pan_varying_formats[buf].components;
2288
format |= panfrost_get_default_swizzle(nr);
2295
pan_emit_vary_special(const struct panfrost_device *dev,
2296
struct mali_attribute_packed *out,
2297
unsigned present, enum pan_special_varying buf)
2299
pan_emit_vary(dev, out, pan_varying_index(present, buf),
2300
pan_special_format(dev, buf), 0);
2303
/* Negative indicates a varying is not found */
2306
pan_find_vary(const struct pan_shader_varying *vary,
2307
unsigned vary_count, unsigned loc)
2309
for (unsigned i = 0; i < vary_count; ++i) {
2310
if (vary[i].location == loc)
2317
/* Assign varying locations for the general buffer. Returns the calculated
2318
* per-vertex stride, and outputs offsets into the passed array. Negative
2319
* offset indicates a varying is not used. */
2322
pan_assign_varyings(const struct panfrost_device *dev,
2323
struct pan_shader_info *producer,
2324
struct pan_shader_info *consumer,
2327
unsigned producer_count = producer->varyings.output_count;
2328
unsigned consumer_count = consumer->varyings.input_count;
2330
const struct pan_shader_varying *producer_vars = producer->varyings.output;
2331
const struct pan_shader_varying *consumer_vars = consumer->varyings.input;
2333
unsigned stride = 0;
2335
for (unsigned i = 0; i < producer_count; ++i) {
2336
signed loc = pan_find_vary(consumer_vars, consumer_count,
2337
producer_vars[i].location);
2340
offsets[i] = stride;
2342
enum pipe_format format = consumer_vars[loc].format;
2343
stride += util_format_get_blocksize(format);
2352
/* Emitter for a single varying (attribute) descriptor */
2355
panfrost_emit_varying(const struct panfrost_device *dev,
2356
struct mali_attribute_packed *out,
2357
const struct pan_shader_varying varying,
2358
enum pipe_format pipe_format,
2360
uint16_t point_sprite_mask,
2361
struct pipe_stream_output_info *xfb,
2362
uint64_t xfb_loc_mask,
2364
unsigned *xfb_offsets,
2366
enum pan_special_varying pos_varying)
2368
/* Note: varying.format != pipe_format in some obscure cases due to a
2369
* limitation of the NIR linker. This should be fixed in the future to
2370
* eliminate the additional lookups. See:
2371
* dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex
2373
gl_varying_slot loc = varying.location;
2374
mali_pixel_format format = dev->formats[pipe_format].hw;
2376
struct pipe_stream_output *o = (xfb_loc_mask & BITFIELD64_BIT(loc)) ?
2377
pan_get_so(xfb, loc) : NULL;
2379
if (util_varying_is_point_coord(loc, point_sprite_mask)) {
2380
pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
2381
} else if (o && o->output_buffer < max_xfb) {
2382
unsigned fixup_offset = xfb_offsets[o->output_buffer] & 63;
2384
pan_emit_vary(dev, out,
2385
pan_xfb_base(present) + o->output_buffer,
2386
format, (o->dst_offset * 4) + fixup_offset);
2387
} else if (loc == VARYING_SLOT_POS) {
2388
pan_emit_vary_special(dev, out, present, pos_varying);
2389
} else if (loc == VARYING_SLOT_PSIZ) {
2390
pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ);
2391
} else if (loc == VARYING_SLOT_FACE) {
2392
pan_emit_vary_special(dev, out, present, PAN_VARY_FACE);
2393
} else if (offset < 0) {
2394
pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0);
2396
STATIC_ASSERT(PAN_VARY_GENERAL == 0);
2397
pan_emit_vary(dev, out, 0, format, offset);
2401
/* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time,
2402
* rather than draw time (under good conditions). */
2405
panfrost_emit_varying_descs(
2406
struct panfrost_pool *pool,
2407
struct panfrost_shader_state *producer,
2408
struct panfrost_shader_state *consumer,
2409
struct panfrost_streamout *xfb,
2410
uint16_t point_coord_mask,
2411
struct pan_linkage *out)
2413
struct panfrost_device *dev = pool->base.dev;
2414
struct pipe_stream_output_info *xfb_info = &producer->stream_output;
2415
unsigned producer_count = producer->info.varyings.output_count;
2416
unsigned consumer_count = consumer->info.varyings.input_count;
2418
/* Offsets within the general varying buffer, indexed by location */
2419
signed offsets[PAN_MAX_VARYINGS];
2420
assert(producer_count <= ARRAY_SIZE(offsets));
2421
assert(consumer_count <= ARRAY_SIZE(offsets));
2423
/* Allocate enough descriptors for both shader stages */
2424
struct panfrost_ptr T =
2425
pan_pool_alloc_desc_array(&pool->base,
2426
producer_count + consumer_count,
2429
/* Take a reference if we're being put on the CSO */
2431
out->bo = pool->transient_bo;
2432
panfrost_bo_reference(out->bo);
2435
struct mali_attribute_packed *descs = T.cpu;
2436
out->producer = producer_count ? T.gpu : 0;
2437
out->consumer = consumer_count ? T.gpu +
2438
(pan_size(ATTRIBUTE) * producer_count) : 0;
2440
/* Lay out the varyings. Must use producer to lay out, in order to
2441
* respect transform feedback precisions. */
2442
out->present = pan_varying_present(dev, &producer->info,
2443
&consumer->info, point_coord_mask);
2445
out->stride = pan_assign_varyings(dev, &producer->info,
2446
&consumer->info, offsets);
2448
unsigned xfb_offsets[PIPE_MAX_SO_BUFFERS];
2450
for (unsigned i = 0; i < xfb->num_targets; ++i) {
2451
xfb_offsets[i] = panfrost_xfb_offset(xfb_info->stride[i] * 4,
2455
for (unsigned i = 0; i < producer_count; ++i) {
2456
signed j = pan_find_vary(consumer->info.varyings.input,
2457
consumer->info.varyings.input_count,
2458
producer->info.varyings.output[i].location);
2460
enum pipe_format format = (j >= 0) ?
2461
consumer->info.varyings.input[j].format :
2462
producer->info.varyings.output[i].format;
2464
panfrost_emit_varying(dev, descs + i,
2465
producer->info.varyings.output[i], format,
2466
out->present, 0, &producer->stream_output,
2467
producer->so_mask, xfb->num_targets,
2468
xfb_offsets, offsets[i], PAN_VARY_POSITION);
2471
for (unsigned i = 0; i < consumer_count; ++i) {
2472
signed j = pan_find_vary(producer->info.varyings.output,
2473
producer->info.varyings.output_count,
2474
consumer->info.varyings.input[i].location);
2476
signed offset = (j >= 0) ? offsets[j] : -1;
2478
panfrost_emit_varying(dev, descs + producer_count + i,
2479
consumer->info.varyings.input[i],
2480
consumer->info.varyings.input[i].format,
2481
out->present, point_coord_mask,
2482
&producer->stream_output, producer->so_mask,
2483
xfb->num_targets, xfb_offsets, offset,
2484
PAN_VARY_FRAGCOORD);
2490
pan_emit_special_input(struct mali_attribute_buffer_packed *out,
2492
enum pan_special_varying v,
2495
if (present & BITFIELD_BIT(v)) {
2496
unsigned idx = pan_varying_index(present, v);
2498
pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
2499
cfg.special = special;
2507
panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2508
unsigned vertex_count,
2509
mali_ptr *vs_attribs,
2510
mali_ptr *fs_attribs,
2512
unsigned *buffer_count,
2515
bool point_coord_replace)
2517
/* Load the shaders */
2518
struct panfrost_context *ctx = batch->ctx;
2519
struct panfrost_shader_state *vs, *fs;
2521
vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2522
fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2524
uint16_t point_coord_mask = 0;
2527
/* Point sprites are lowered on Bifrost and newer */
2528
if (point_coord_replace)
2529
point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
2532
/* In good conditions, we only need to link varyings once */
2534
(point_coord_mask == 0) &&
2535
(ctx->streamout.num_targets == 0) &&
2536
!vs->info.separable &&
2537
!fs->info.separable;
2539
/* Try to reduce copies */
2540
struct pan_linkage _linkage;
2541
struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage;
2543
/* Emit ATTRIBUTE descriptors if needed */
2544
if (!prelink || vs->linkage.bo == NULL) {
2545
struct panfrost_pool *pool =
2546
prelink ? &ctx->descs : &batch->pool;
2548
panfrost_emit_varying_descs(pool, vs, fs, &ctx->streamout, point_coord_mask, linkage);
2551
struct pipe_stream_output_info *so = &vs->stream_output;
2552
unsigned present = linkage->present, stride = linkage->stride;
2553
unsigned xfb_base = pan_xfb_base(present);
2554
struct panfrost_ptr T =
2555
pan_pool_alloc_desc_array(&batch->pool.base,
2557
ctx->streamout.num_targets + 1,
2559
struct mali_attribute_buffer_packed *varyings =
2560
(struct mali_attribute_buffer_packed *) T.cpu;
2563
*buffer_count = xfb_base + ctx->streamout.num_targets;
2566
/* Suppress prefetch on Bifrost */
2567
memset(varyings + (xfb_base * ctx->streamout.num_targets), 0, sizeof(*varyings));
2570
/* Emit the stream out buffers. We need enough room for all the
2571
* vertices we emit across all instances */
2573
unsigned out_count = ctx->instance_count *
2574
u_stream_outputs_for_vertices(ctx->active_prim, ctx->vertex_count);
2576
for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2577
panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2580
ctx->streamout.targets[i]);
2584
panfrost_emit_varyings(batch,
2585
&varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2586
stride, vertex_count);
2589
/* fp32 vec4 gl_Position */
2590
*position = panfrost_emit_varyings(batch,
2591
&varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2592
sizeof(float) * 4, vertex_count);
2594
if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
2595
*psiz = panfrost_emit_varyings(batch,
2596
&varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2601
pan_emit_special_input(varyings, present,
2602
PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
2603
pan_emit_special_input(varyings, present, PAN_VARY_FACE,
2604
MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
2605
pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD,
2606
MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
2610
*vs_attribs = linkage->producer;
2611
*fs_attribs = linkage->consumer;
2615
panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2616
const struct panfrost_ptr *vertex_job,
2617
const struct panfrost_ptr *tiler_job)
2619
struct panfrost_context *ctx = batch->ctx;
2621
/* If rasterizer discard is enable, only submit the vertex. XXX - set
2622
* job_barrier in case buffers get ping-ponged and we need to enforce
2623
* ordering, this has a perf hit! See
2624
* KHR-GLES31.core.vertex_attrib_binding.advanced-iterations */
2626
unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard,
2627
MALI_JOB_TYPE_VERTEX, true, false,
2628
ctx->indirect_draw ?
2629
batch->indirect_draw_job_id : 0,
2630
0, vertex_job, false);
2632
if (panfrost_batch_skip_rasterization(batch))
2635
panfrost_add_job(&batch->pool.base, &batch->scoreboard,
2636
MALI_JOB_TYPE_TILER, false, false,
2637
vertex, 0, tiler_job, false);
2642
emit_tls(struct panfrost_batch *batch)
2644
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2646
/* Emitted with the FB descriptor on Midgard. */
2647
if (PAN_ARCH <= 5 && batch->framebuffer.gpu)
2650
struct panfrost_bo *tls_bo =
2652
panfrost_batch_get_scratchpad(batch,
2654
dev->thread_tls_alloc,
2657
struct pan_tls_info tls = {
2659
.ptr = tls_bo ? tls_bo->ptr.gpu : 0,
2660
.size = batch->stack_size,
2664
assert(batch->tls.cpu);
2665
GENX(pan_emit_tls)(&tls, batch->tls.cpu);
2669
emit_fbd(struct panfrost_batch *batch, const struct pan_fb_info *fb)
2671
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2672
struct panfrost_bo *tls_bo =
2674
panfrost_batch_get_scratchpad(batch,
2676
dev->thread_tls_alloc,
2679
struct pan_tls_info tls = {
2681
.ptr = tls_bo ? tls_bo->ptr.gpu : 0,
2682
.size = batch->stack_size,
2686
batch->framebuffer.gpu |=
2687
GENX(pan_emit_fbd)(dev, fb, &tls, &batch->tiler_ctx,
2688
batch->framebuffer.cpu);
2691
/* Mark a surface as written */
2694
panfrost_initialize_surface(struct panfrost_batch *batch,
2695
struct pipe_surface *surf)
2698
struct panfrost_resource *rsrc = pan_resource(surf->texture);
2699
BITSET_SET(rsrc->valid.data, surf->u.tex.level);
2703
/* Generate a fragment job. This should be called once per frame. (Usually,
2704
* this corresponds to eglSwapBuffers or one of glFlush, glFinish)
2707
emit_fragment_job(struct panfrost_batch *batch, const struct pan_fb_info *pfb)
2709
/* Mark the affected buffers as initialized, since we're writing to it.
2710
* Also, add the surfaces we're writing to to the batch */
2712
struct pipe_framebuffer_state *fb = &batch->key;
2714
for (unsigned i = 0; i < fb->nr_cbufs; ++i)
2715
panfrost_initialize_surface(batch, fb->cbufs[i]);
2717
panfrost_initialize_surface(batch, fb->zsbuf);
2719
/* The passed tile coords can be out of range in some cases, so we need
2720
* to clamp them to the framebuffer size to avoid a TILE_RANGE_FAULT.
2721
* Theoretically we also need to clamp the coordinates positive, but we
2722
* avoid that edge case as all four values are unsigned. Also,
2723
* theoretically we could clamp the minima, but if that has to happen
2724
* the asserts would fail anyway (since the maxima would get clamped
2725
* and then be smaller than the minima). An edge case of sorts occurs
2726
* when no scissors are added to draw, so by default min=~0 and max=0.
2727
* But that can't happen if any actual drawing occurs (beyond a
2728
* wallpaper reload), so this is again irrelevant in practice. */
2730
batch->maxx = MIN2(batch->maxx, fb->width);
2731
batch->maxy = MIN2(batch->maxy, fb->height);
2733
/* Rendering region must be at least 1x1; otherwise, there is nothing
2734
* to do and the whole job chain should have been discarded. */
2736
assert(batch->maxx > batch->minx);
2737
assert(batch->maxy > batch->miny);
2739
struct panfrost_ptr transfer =
2740
pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
2742
GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu,
2745
return transfer.gpu;
2748
#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c;
2751
pan_draw_mode(enum pipe_prim_type mode)
2754
DEFINE_CASE(POINTS);
2756
DEFINE_CASE(LINE_LOOP);
2757
DEFINE_CASE(LINE_STRIP);
2758
DEFINE_CASE(TRIANGLES);
2759
DEFINE_CASE(TRIANGLE_STRIP);
2760
DEFINE_CASE(TRIANGLE_FAN);
2762
DEFINE_CASE(POLYGON);
2764
DEFINE_CASE(QUAD_STRIP);
2768
unreachable("Invalid draw mode");
2774
/* Count generated primitives (when there is no geom/tess shaders) for
2775
* transform feedback */
2778
panfrost_statistics_record(
2779
struct panfrost_context *ctx,
2780
const struct pipe_draw_info *info,
2781
const struct pipe_draw_start_count_bias *draw)
2783
if (!ctx->active_queries)
2786
uint32_t prims = u_prims_for_vertices(info->mode, draw->count);
2787
ctx->prims_generated += prims;
2789
if (!ctx->streamout.num_targets)
2792
ctx->tf_prims_generated += prims;
2796
panfrost_update_streamout_offsets(struct panfrost_context *ctx)
2798
for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2801
count = u_stream_outputs_for_vertices(ctx->active_prim,
2803
pan_so_target(ctx->streamout.targets[i])->offset += count;
2807
static inline enum mali_index_type
2808
panfrost_translate_index_size(unsigned size)
2810
STATIC_ASSERT(MALI_INDEX_TYPE_NONE == 0);
2811
STATIC_ASSERT(MALI_INDEX_TYPE_UINT8 == 1);
2812
STATIC_ASSERT(MALI_INDEX_TYPE_UINT16 == 2);
2814
return (size == 4) ? MALI_INDEX_TYPE_UINT32 : size;
2819
pan_emit_draw_descs(struct panfrost_batch *batch,
2820
struct MALI_DRAW *d, enum pipe_shader_type st)
2822
d->offset_start = batch->ctx->offset_start;
2823
d->instance_size = batch->ctx->instance_count > 1 ?
2824
batch->ctx->padded_count : 1;
2826
d->uniform_buffers = batch->uniform_buffers[st];
2827
d->push_uniforms = batch->push_uniforms[st];
2828
d->textures = batch->textures[st];
2829
d->samplers = batch->samplers[st];
2833
panfrost_draw_emit_vertex_section(struct panfrost_batch *batch,
2834
mali_ptr vs_vary, mali_ptr varyings,
2835
mali_ptr attribs, mali_ptr attrib_bufs,
2838
pan_pack(section, DRAW, cfg) {
2839
cfg.draw_descriptor_is_64b = true;
2840
cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
2841
cfg.attributes = attribs;
2842
cfg.attribute_buffers = attrib_bufs;
2843
cfg.varyings = vs_vary;
2844
cfg.varying_buffers = vs_vary ? varyings : 0;
2845
cfg.thread_storage = batch->tls.gpu;
2846
pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
2851
panfrost_draw_emit_vertex(struct panfrost_batch *batch,
2852
const struct pipe_draw_info *info,
2853
void *invocation_template,
2854
mali_ptr vs_vary, mali_ptr varyings,
2855
mali_ptr attribs, mali_ptr attrib_bufs,
2859
pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
2860
memcpy(section, invocation_template, pan_size(INVOCATION));
2862
pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
2863
cfg.job_task_split = 5;
2866
section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
2867
panfrost_draw_emit_vertex_section(batch, vs_vary, varyings,
2868
attribs, attrib_bufs, section);
2873
panfrost_emit_primitive_size(struct panfrost_context *ctx,
2874
bool points, mali_ptr size_array,
2877
struct panfrost_rasterizer *rast = ctx->rasterizer;
2879
pan_pack(prim_size, PRIMITIVE_SIZE, cfg) {
2880
if (panfrost_writes_point_size(ctx)) {
2881
cfg.size_array = size_array;
2883
cfg.constant = points ?
2884
rast->base.point_size :
2885
rast->base.line_width;
2891
panfrost_is_implicit_prim_restart(const struct pipe_draw_info *info)
2893
unsigned implicit_index = BITFIELD_MASK(info->index_size * 8);
2894
bool implicit = info->restart_index == implicit_index;
2895
return info->primitive_restart && implicit;
2899
panfrost_update_state_tex(struct panfrost_batch *batch,
2900
enum pipe_shader_type st)
2902
struct panfrost_context *ctx = batch->ctx;
2903
struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
2905
unsigned dirty_3d = ctx->dirty;
2906
unsigned dirty = ctx->dirty_shader[st];
2908
if (dirty & PAN_DIRTY_STAGE_TEXTURE) {
2909
batch->textures[st] =
2910
panfrost_emit_texture_descriptors(batch, st);
2913
if (dirty & PAN_DIRTY_STAGE_SAMPLER) {
2914
batch->samplers[st] =
2915
panfrost_emit_sampler_descriptors(batch, st);
2918
if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) {
2919
batch->uniform_buffers[st] = panfrost_emit_const_buf(batch, st,
2920
&batch->push_uniforms[st]);
2925
panfrost_update_state_3d(struct panfrost_batch *batch)
2927
unsigned dirty = batch->ctx->dirty;
2929
if (dirty & PAN_DIRTY_TLS_SIZE)
2930
panfrost_batch_adjust_stack_size(batch);
2934
panfrost_update_state_vs(struct panfrost_batch *batch)
2936
enum pipe_shader_type st = PIPE_SHADER_VERTEX;
2937
unsigned dirty = batch->ctx->dirty_shader[st];
2939
if (dirty & PAN_DIRTY_STAGE_RENDERER)
2940
batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st);
2942
panfrost_update_state_tex(batch, st);
2946
panfrost_update_state_fs(struct panfrost_batch *batch)
2948
enum pipe_shader_type st = PIPE_SHADER_FRAGMENT;
2949
unsigned dirty = batch->ctx->dirty_shader[st];
2951
if (dirty & PAN_DIRTY_STAGE_RENDERER)
2952
batch->rsd[st] = panfrost_emit_frag_shader_meta(batch);
2954
if (dirty & PAN_DIRTY_STAGE_IMAGE) {
2955
batch->attribs[st] = panfrost_emit_image_attribs(batch,
2956
&batch->attrib_bufs[st], st);
2959
panfrost_update_state_tex(batch, st);
2964
panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)
2966
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
2971
if (batch->tiler_ctx.bifrost)
2972
return batch->tiler_ctx.bifrost;
2974
struct panfrost_ptr t =
2975
pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
2977
GENX(pan_emit_tiler_heap)(dev, t.cpu);
2979
mali_ptr heap = t.gpu;
2981
t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
2982
GENX(pan_emit_tiler_ctx)(dev, batch->key.width, batch->key.height,
2983
util_framebuffer_get_num_samples(&batch->key),
2986
batch->tiler_ctx.bifrost = t.gpu;
2987
return batch->tiler_ctx.bifrost;
2992
panfrost_draw_emit_tiler(struct panfrost_batch *batch,
2993
const struct pipe_draw_info *info,
2994
const struct pipe_draw_start_count_bias *draw,
2995
void *invocation_template,
2996
mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings,
2997
mali_ptr pos, mali_ptr psiz, bool secondary_shader,
3000
struct panfrost_context *ctx = batch->ctx;
3001
struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
3003
void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
3004
memcpy(section, invocation_template, pan_size(INVOCATION));
3006
section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
3007
pan_pack(section, PRIMITIVE, cfg) {
3008
cfg.draw_mode = pan_draw_mode(info->mode);
3009
if (panfrost_writes_point_size(ctx))
3010
cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
3012
/* For line primitives, PRIMITIVE.first_provoking_vertex must
3013
* be set to true and the provoking vertex is selected with
3014
* DRAW.flat_shading_vertex.
3016
if (info->mode == PIPE_PRIM_LINES ||
3017
info->mode == PIPE_PRIM_LINE_LOOP ||
3018
info->mode == PIPE_PRIM_LINE_STRIP)
3019
cfg.first_provoking_vertex = true;
3021
cfg.first_provoking_vertex = rast->flatshade_first;
3023
if (panfrost_is_implicit_prim_restart(info)) {
3024
cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
3025
} else if (info->primitive_restart) {
3026
cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
3027
cfg.primitive_restart_index = info->restart_index;
3030
cfg.job_task_split = 6;
3032
cfg.index_count = ctx->indirect_draw ? 1 : draw->count;
3033
cfg.index_type = panfrost_translate_index_size(info->index_size);
3035
if (cfg.index_type) {
3036
cfg.indices = indices;
3037
cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
3041
cfg.secondary_shader = secondary_shader;
3045
enum pipe_prim_type prim = u_reduced_prim(info->mode);
3046
bool polygon = (prim == PIPE_PRIM_TRIANGLES);
3047
void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
3050
pan_section_pack(job, TILER_JOB, TILER, cfg) {
3051
cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
3054
pan_section_pack(job, TILER_JOB, PADDING, cfg);
3057
section = pan_section_ptr(job, TILER_JOB, DRAW);
3058
pan_pack(section, DRAW, cfg) {
3060
* From the Gallium documentation,
3061
* pipe_rasterizer_state::cull_face "indicates which faces of
3062
* polygons to cull". Points and lines are not considered
3063
* polygons and should be drawn even if all faces are culled.
3064
* The hardware does not take primitive type into account when
3065
* culling, so we need to do that check ourselves.
3067
cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
3068
cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
3069
cfg.front_face_ccw = rast->front_ccw;
3071
cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
3072
cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
3073
cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
3074
cfg.viewport = batch->viewport;
3075
cfg.varyings = fs_vary;
3076
cfg.varying_buffers = fs_vary ? varyings : 0;
3077
cfg.thread_storage = batch->tls.gpu;
3079
/* For all primitives but lines DRAW.flat_shading_vertex must
3080
* be set to 0 and the provoking vertex is selected with the
3081
* PRIMITIVE.first_provoking_vertex field.
3083
if (prim == PIPE_PRIM_LINES) {
3084
/* The logic is inverted across arches. */
3085
cfg.flat_shading_vertex = rast->flatshade_first
3089
pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
3091
if (ctx->occlusion_query && ctx->active_queries) {
3092
if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
3093
cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
3095
cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
3097
struct panfrost_resource *rsrc = pan_resource(ctx->occlusion_query->rsrc);
3098
cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
3099
panfrost_batch_write_rsrc(ctx->batch, rsrc,
3100
PIPE_SHADER_FRAGMENT);
3104
panfrost_emit_primitive_size(ctx, prim == PIPE_PRIM_POINTS, psiz, prim_size);
3108
panfrost_direct_draw(struct panfrost_batch *batch,
3109
const struct pipe_draw_info *info,
3110
unsigned drawid_offset,
3111
const struct pipe_draw_start_count_bias *draw)
3113
if (!draw->count || !info->instance_count)
3116
struct panfrost_context *ctx = batch->ctx;
3118
/* Take into account a negative bias */
3119
ctx->indirect_draw = false;
3120
ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0);
3121
ctx->instance_count = info->instance_count;
3122
ctx->base_vertex = info->index_size ? draw->index_bias : 0;
3123
ctx->base_instance = info->start_instance;
3124
ctx->active_prim = info->mode;
3125
ctx->drawid = drawid_offset;
3127
struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3129
bool idvs = vs->info.vs.idvs;
3130
bool secondary_shader = vs->info.vs.secondary_enable;
3132
struct panfrost_ptr tiler, vertex;
3136
tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
3138
unreachable("IDVS is unsupported on Midgard");
3141
vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3142
tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
3145
unsigned vertex_count = ctx->vertex_count;
3147
unsigned min_index = 0, max_index = 0;
3148
mali_ptr indices = 0;
3150
if (info->index_size) {
3151
indices = panfrost_get_index_buffer_bounded(batch, info, draw,
3155
/* Use the corresponding values */
3156
vertex_count = max_index - min_index + 1;
3157
ctx->offset_start = min_index + draw->index_bias;
3159
ctx->offset_start = draw->start;
3162
if (info->instance_count > 1) {
3163
unsigned count = vertex_count;
3165
/* Index-Driven Vertex Shading requires different instances to
3166
* have different cache lines for position results. Each vertex
3167
* position is 16 bytes and the Mali cache line is 64 bytes, so
3168
* the instance count must be aligned to 4 vertices.
3171
count = ALIGN_POT(count, 4);
3173
ctx->padded_count = panfrost_padded_vertex_count(count);
3175
ctx->padded_count = vertex_count;
3177
panfrost_statistics_record(ctx, info, draw);
3179
struct mali_invocation_packed invocation;
3180
if (info->instance_count > 1) {
3181
panfrost_pack_work_groups_compute(&invocation,
3182
1, vertex_count, info->instance_count,
3183
1, 1, 1, true, false);
3185
pan_pack(&invocation, INVOCATION, cfg) {
3186
cfg.invocations = MALI_POSITIVE(vertex_count);
3187
cfg.size_y_shift = 0;
3188
cfg.size_z_shift = 0;
3189
cfg.workgroups_x_shift = 0;
3190
cfg.workgroups_y_shift = 0;
3191
cfg.workgroups_z_shift = 32;
3192
cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
3196
/* Emit all sort of descriptors. */
3197
mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
3199
panfrost_emit_varying_descriptor(batch,
3201
ctx->instance_count,
3202
&vs_vary, &fs_vary, &varyings,
3204
info->mode == PIPE_PRIM_POINTS);
3206
mali_ptr attribs, attrib_bufs;
3207
attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
3209
panfrost_update_state_3d(batch);
3210
panfrost_update_state_vs(batch);
3211
panfrost_update_state_fs(batch);
3212
panfrost_clean_state_3d(ctx);
3214
/* Fire off the draw itself */
3215
panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices,
3216
fs_vary, varyings, pos, psiz, secondary_shader,
3221
panfrost_draw_emit_vertex_section(batch,
3223
attribs, attrib_bufs,
3224
pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
3226
panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3227
MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
3228
0, 0, &tiler, false);
3231
panfrost_draw_emit_vertex(batch, info, &invocation,
3232
vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
3233
panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
3236
/* Increment transform feedback offsets */
3237
panfrost_update_streamout_offsets(ctx);
3240
#if PAN_GPU_INDIRECTS
3242
panfrost_indirect_draw(struct panfrost_batch *batch,
3243
const struct pipe_draw_info *info,
3244
unsigned drawid_offset,
3245
const struct pipe_draw_indirect_info *indirect,
3246
const struct pipe_draw_start_count_bias *draw)
3248
/* Indirect draw count and multi-draw not supported. */
3249
assert(indirect->draw_count == 1 && !indirect->indirect_draw_count);
3251
struct panfrost_context *ctx = batch->ctx;
3252
struct panfrost_device *dev = pan_device(ctx->base.screen);
3254
/* TODO: update statistics (see panfrost_statistics_record()) */
3255
/* TODO: Increment transform feedback offsets */
3256
assert(ctx->streamout.num_targets == 0);
3258
ctx->active_prim = info->mode;
3259
ctx->drawid = drawid_offset;
3260
ctx->indirect_draw = true;
3262
struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3264
bool idvs = vs->info.vs.idvs;
3265
bool secondary_shader = vs->info.vs.secondary_enable;
3267
struct panfrost_ptr tiler = { 0 }, vertex = { 0 };
3271
tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
3273
unreachable("IDVS is unsupported on Midgard");
3276
vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3277
tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
3280
struct panfrost_bo *index_buf = NULL;
3282
if (info->index_size) {
3283
assert(!info->has_user_indices);
3284
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
3285
index_buf = rsrc->image.data.bo;
3286
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
3289
mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
3290
unsigned varying_buf_count;
3292
/* We want to create templates, set all count fields to 0 to reflect
3295
ctx->instance_count = ctx->vertex_count = ctx->padded_count = 0;
3296
ctx->offset_start = 0;
3298
/* Set the {first,base}_vertex sysvals to NULL. Will be updated if the
3299
* vertex shader uses gl_VertexID or gl_BaseVertex.
3301
ctx->first_vertex_sysval_ptr = 0;
3302
ctx->base_vertex_sysval_ptr = 0;
3303
ctx->base_instance_sysval_ptr = 0;
3305
panfrost_update_state_3d(batch);
3306
panfrost_update_state_vs(batch);
3307
panfrost_update_state_fs(batch);
3308
panfrost_clean_state_3d(ctx);
3310
bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS);
3312
panfrost_emit_varying_descriptor(batch, 0,
3313
&vs_vary, &fs_vary, &varyings,
3314
&varying_buf_count, &pos, &psiz,
3315
point_coord_replace);
3317
mali_ptr attribs, attrib_bufs;
3318
attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
3320
/* Zero-ed invocation, the compute job will update it. */
3321
static struct mali_invocation_packed invocation;
3323
/* Fire off the draw itself */
3324
panfrost_draw_emit_tiler(batch, info, draw, &invocation,
3325
index_buf ? index_buf->ptr.gpu : 0,
3326
fs_vary, varyings, pos, psiz, secondary_shader,
3330
panfrost_draw_emit_vertex_section(batch,
3332
attribs, attrib_bufs,
3333
pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
3336
panfrost_draw_emit_vertex(batch, info, &invocation,
3337
vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
3340
/* Add the varying heap BO to the batch if we're allocating varyings. */
3342
panfrost_batch_add_bo(batch,
3343
dev->indirect_draw_shaders.varying_heap,
3344
PIPE_SHADER_VERTEX);
3347
assert(indirect->buffer);
3349
struct panfrost_resource *draw_buf = pan_resource(indirect->buffer);
3351
/* Don't count images: those attributes don't need to be patched. */
3352
unsigned attrib_count =
3353
vs->info.attribute_count -
3354
util_bitcount(ctx->image_mask[PIPE_SHADER_VERTEX]);
3356
panfrost_batch_read_rsrc(batch, draw_buf, PIPE_SHADER_VERTEX);
3358
struct pan_indirect_draw_info draw_info = {
3359
.last_indirect_draw = batch->indirect_draw_job_id,
3360
.draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset,
3361
.index_buf = index_buf ? index_buf->ptr.gpu : 0,
3362
.first_vertex_sysval = ctx->first_vertex_sysval_ptr,
3363
.base_vertex_sysval = ctx->base_vertex_sysval_ptr,
3364
.base_instance_sysval = ctx->base_instance_sysval_ptr,
3365
.vertex_job = vertex.gpu,
3366
.tiler_job = tiler.gpu,
3367
.attrib_bufs = attrib_bufs,
3369
.attrib_count = attrib_count,
3370
.varying_bufs = varyings,
3371
.index_size = info->index_size,
3374
if (panfrost_writes_point_size(ctx))
3375
draw_info.flags |= PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE;
3377
if (vs->info.vs.writes_point_size)
3378
draw_info.flags |= PAN_INDIRECT_DRAW_HAS_PSIZ;
3381
draw_info.flags |= PAN_INDIRECT_DRAW_IDVS;
3383
if (info->primitive_restart) {
3384
draw_info.restart_index = info->restart_index;
3385
draw_info.flags |= PAN_INDIRECT_DRAW_PRIMITIVE_RESTART;
3388
batch->indirect_draw_job_id =
3389
GENX(panfrost_emit_indirect_draw)(&batch->pool.base,
3392
&batch->indirect_draw_ctx);
3395
panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3396
MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
3397
0, 0, &tiler, false);
3399
panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
3405
panfrost_draw_vbo(struct pipe_context *pipe,
3406
const struct pipe_draw_info *info,
3407
unsigned drawid_offset,
3408
const struct pipe_draw_indirect_info *indirect,
3409
const struct pipe_draw_start_count_bias *draws,
3412
struct panfrost_context *ctx = pan_context(pipe);
3413
struct panfrost_device *dev = pan_device(pipe->screen);
3415
if (!panfrost_render_condition_check(ctx))
3418
/* Emulate indirect draws unless we're using the experimental path */
3419
if ((!(dev->debug & PAN_DBG_INDIRECT) || !PAN_GPU_INDIRECTS) && indirect && indirect->buffer) {
3420
assert(num_draws == 1);
3421
util_draw_indirect(pipe, info, indirect);
3425
/* Do some common setup */
3426
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
3428
/* Don't add too many jobs to a single batch. Hardware has a hard limit
3429
* of 65536 jobs, but we choose a smaller soft limit (arbitrary) to
3430
* avoid the risk of timeouts. This might not be a good idea. */
3431
if (unlikely(batch->scoreboard.job_index > 10000))
3432
batch = panfrost_get_fresh_batch_for_fbo(ctx, "Too many draws");
3434
/* panfrost_batch_skip_rasterization reads
3435
* batch->scissor_culls_everything, which is set by
3436
* panfrost_emit_viewport, so call that first.
3438
if (ctx->dirty & (PAN_DIRTY_VIEWPORT | PAN_DIRTY_SCISSOR))
3439
batch->viewport = panfrost_emit_viewport(batch);
3441
/* If rasterization discard is enabled but the vertex shader does not
3442
* have side effects (including transform feedback), skip the draw
3443
* altogether. This is always an optimization. Additionally, this is
3444
* required for Index-Driven Vertex Shading, since IDVS always
3445
* rasterizes. The compiler will not use IDVS if the vertex shader has
3446
* side effects. So the only problem case is rasterizer discard with a
3447
* shader without side effects -- but these draws are useless.
3449
if (panfrost_batch_skip_rasterization(batch)) {
3450
struct panfrost_shader_state *vs =
3451
panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
3453
if (!vs->info.writes_global)
3457
unsigned zs_draws = ctx->depth_stencil->draws;
3458
batch->draws |= zs_draws;
3459
batch->resolve |= zs_draws;
3461
/* Mark everything dirty when debugging */
3462
if (unlikely(dev->debug & PAN_DBG_DIRTY))
3463
panfrost_dirty_state_all(ctx);
3465
/* Conservatively assume draw parameters always change */
3466
ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID;
3469
assert(num_draws == 1);
3470
assert(PAN_GPU_INDIRECTS);
3472
#if PAN_GPU_INDIRECTS
3473
if (indirect->count_from_stream_output) {
3474
struct pipe_draw_start_count_bias tmp_draw = *draws;
3475
struct panfrost_streamout_target *so =
3476
pan_so_target(indirect->count_from_stream_output);
3479
tmp_draw.count = so->offset;
3480
tmp_draw.index_bias = 0;
3481
panfrost_direct_draw(batch, info, drawid_offset, &tmp_draw);
3485
panfrost_indirect_draw(batch, info, drawid_offset, indirect, &draws[0]);
3490
struct pipe_draw_info tmp_info = *info;
3491
unsigned drawid = drawid_offset;
3493
for (unsigned i = 0; i < num_draws; i++) {
3494
panfrost_direct_draw(batch, &tmp_info, drawid, &draws[i]);
3496
if (tmp_info.increment_draw_id) {
3497
ctx->dirty |= PAN_DIRTY_DRAWID;
3504
/* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
3505
* construct the COMPUTE job and some of its payload.
3509
panfrost_launch_grid(struct pipe_context *pipe,
3510
const struct pipe_grid_info *info)
3512
struct panfrost_context *ctx = pan_context(pipe);
3514
/* XXX - shouldn't be necessary with working memory barriers. Affected
3515
* test: KHR-GLES31.core.compute_shader.pipeline-post-xfb */
3516
panfrost_flush_all_batches(ctx, "Launch grid pre-barrier");
3518
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
3520
struct panfrost_shader_state *cs =
3521
&ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
3523
/* Indirect dispatch can't handle workgroup local storage since that
3524
* would require dynamic memory allocation. Bail in this case. */
3525
if (info->indirect && ((cs->info.wls_size != 0) || !PAN_GPU_INDIRECTS)) {
3526
struct pipe_transfer *transfer;
3527
uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
3528
info->indirect_offset,
3529
3 * sizeof(uint32_t),
3533
struct pipe_grid_info direct = *info;
3534
direct.indirect = NULL;
3535
direct.grid[0] = params[0];
3536
direct.grid[1] = params[1];
3537
direct.grid[2] = params[2];
3538
pipe_buffer_unmap(pipe, transfer);
3540
if (params[0] && params[1] && params[2])
3541
panfrost_launch_grid(pipe, &direct);
3546
ctx->compute_grid = info;
3548
struct panfrost_ptr t =
3549
pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
3551
/* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
3552
* reuse the graphics path for this by lowering to Gallium */
3554
struct pipe_constant_buffer ubuf = {
3557
.buffer_size = ctx->shader[PIPE_SHADER_COMPUTE]->cbase.req_input_mem,
3558
.user_buffer = info->input
3562
pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &ubuf);
3564
/* Invoke according to the grid info */
3567
pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION);
3568
unsigned num_wg[3] = { info->grid[0], info->grid[1], info->grid[2] };
3571
num_wg[0] = num_wg[1] = num_wg[2] = 1;
3573
panfrost_pack_work_groups_compute(invocation,
3574
num_wg[0], num_wg[1], num_wg[2],
3575
info->block[0], info->block[1],
3577
false, info->indirect != NULL);
3579
pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
3580
cfg.job_task_split =
3581
util_logbase2_ceil(info->block[0] + 1) +
3582
util_logbase2_ceil(info->block[1] + 1) +
3583
util_logbase2_ceil(info->block[2] + 1);
3586
pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
3587
cfg.draw_descriptor_is_64b = true;
3588
cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_COMPUTE);
3589
cfg.attributes = panfrost_emit_image_attribs(batch, &cfg.attribute_buffers, PIPE_SHADER_COMPUTE);
3590
cfg.thread_storage = panfrost_emit_shared_memory(batch, info);
3591
cfg.uniform_buffers = panfrost_emit_const_buf(batch,
3592
PIPE_SHADER_COMPUTE, &cfg.push_uniforms);
3593
cfg.textures = panfrost_emit_texture_descriptors(batch,
3594
PIPE_SHADER_COMPUTE);
3595
cfg.samplers = panfrost_emit_sampler_descriptors(batch,
3596
PIPE_SHADER_COMPUTE);
3599
unsigned indirect_dep = 0;
3600
#if PAN_GPU_INDIRECTS
3601
if (info->indirect) {
3602
struct pan_indirect_dispatch_info indirect = {
3604
.indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
3605
info->indirect_offset,
3607
batch->num_wg_sysval[0],
3608
batch->num_wg_sysval[1],
3609
batch->num_wg_sysval[2],
3613
indirect_dep = GENX(pan_indirect_dispatch_emit)(&batch->pool.base,
3619
panfrost_add_job(&batch->pool.base, &batch->scoreboard,
3620
MALI_JOB_TYPE_COMPUTE, true, false,
3621
indirect_dep, 0, &t, false);
3622
panfrost_flush_all_batches(ctx, "Launch grid post-barrier");
3626
panfrost_create_rasterizer_state(
3627
struct pipe_context *pctx,
3628
const struct pipe_rasterizer_state *cso)
3630
struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
3634
/* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
3635
assert(cso->offset_clamp == 0.0);
3637
pan_pack(&so->multisample, MULTISAMPLE_MISC, cfg) {
3638
cfg.multisample_enable = cso->multisample;
3639
cfg.fixed_function_near_discard = cso->depth_clip_near;
3640
cfg.fixed_function_far_discard = cso->depth_clip_far;
3641
cfg.shader_depth_range_fixed = true;
3644
pan_pack(&so->stencil_misc, STENCIL_MASK_MISC, cfg) {
3645
cfg.front_facing_depth_bias = cso->offset_tri;
3646
cfg.back_facing_depth_bias = cso->offset_tri;
3647
cfg.single_sampled_lines = !cso->multisample;
3654
panfrost_create_vertex_elements_state(
3655
struct pipe_context *pctx,
3656
unsigned num_elements,
3657
const struct pipe_vertex_element *elements)
3659
struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
3660
struct panfrost_device *dev = pan_device(pctx->screen);
3662
so->num_elements = num_elements;
3663
memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
3665
/* Assign attribute buffers corresponding to the vertex buffers, keyed
3666
* for a particular divisor since that's how instancing works on Mali */
3667
for (unsigned i = 0; i < num_elements; ++i) {
3668
so->element_buffer[i] = pan_assign_vertex_buffer(
3669
so->buffers, &so->nr_bufs,
3670
elements[i].vertex_buffer_index,
3671
elements[i].instance_divisor);
3674
for (int i = 0; i < num_elements; ++i) {
3675
enum pipe_format fmt = elements[i].src_format;
3676
const struct util_format_description *desc = util_format_description(fmt);
3677
so->formats[i] = dev->formats[desc->format].hw;
3678
assert(so->formats[i]);
3681
/* Let's also prepare vertex builtins */
3682
so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
3683
so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
3688
static inline unsigned
3689
pan_pipe_to_stencil_op(enum pipe_stencil_op in)
3692
case PIPE_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
3693
case PIPE_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
3694
case PIPE_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
3695
case PIPE_STENCIL_OP_INCR: return MALI_STENCIL_OP_INCR_SAT;
3696
case PIPE_STENCIL_OP_DECR: return MALI_STENCIL_OP_DECR_SAT;
3697
case PIPE_STENCIL_OP_INCR_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
3698
case PIPE_STENCIL_OP_DECR_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
3699
case PIPE_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
3700
default: unreachable("Invalid stencil op");
3705
pan_pipe_to_stencil(const struct pipe_stencil_state *in,
3706
struct mali_stencil_packed *out)
3708
pan_pack(out, STENCIL, s) {
3709
s.mask = in->valuemask;
3710
s.compare_function = (enum mali_func) in->func;
3711
s.stencil_fail = pan_pipe_to_stencil_op(in->fail_op);
3712
s.depth_fail = pan_pipe_to_stencil_op(in->zfail_op);
3713
s.depth_pass = pan_pipe_to_stencil_op(in->zpass_op);
3718
panfrost_create_depth_stencil_state(struct pipe_context *pipe,
3719
const struct pipe_depth_stencil_alpha_state *zsa)
3721
struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state);
3724
/* Normalize (there's no separate enable) */
3725
if (!zsa->alpha_enabled)
3726
so->base.alpha_func = MALI_FUNC_ALWAYS;
3728
/* Prepack relevant parts of the Renderer State Descriptor. They will
3729
* be ORed in at draw-time */
3730
pan_pack(&so->rsd_depth, MULTISAMPLE_MISC, cfg) {
3731
cfg.depth_function = zsa->depth_enabled ?
3732
(enum mali_func) zsa->depth_func : MALI_FUNC_ALWAYS;
3734
cfg.depth_write_mask = zsa->depth_writemask;
3737
pan_pack(&so->rsd_stencil, STENCIL_MASK_MISC, cfg) {
3738
cfg.stencil_enable = zsa->stencil[0].enabled;
3740
cfg.stencil_mask_front = zsa->stencil[0].writemask;
3741
cfg.stencil_mask_back = zsa->stencil[1].enabled ?
3742
zsa->stencil[1].writemask : zsa->stencil[0].writemask;
3745
cfg.alpha_test_compare_function =
3746
(enum mali_func) so->base.alpha_func;
3750
/* Stencil tests have their own words in the RSD */
3751
pan_pipe_to_stencil(&zsa->stencil[0], &so->stencil_front);
3753
if (zsa->stencil[1].enabled)
3754
pan_pipe_to_stencil(&zsa->stencil[1], &so->stencil_back);
3756
so->stencil_back = so->stencil_front;
3758
so->enabled = zsa->stencil[0].enabled ||
3759
(zsa->depth_enabled && zsa->depth_func != PIPE_FUNC_ALWAYS);
3761
/* Write masks need tracking together */
3762
if (zsa->depth_writemask)
3763
so->draws |= PIPE_CLEAR_DEPTH;
3765
if (zsa->stencil[0].enabled)
3766
so->draws |= PIPE_CLEAR_STENCIL;
3768
/* TODO: Bounds test should be easy */
3769
assert(!zsa->depth_bounds_test);
3774
static struct pipe_sampler_view *
3775
panfrost_create_sampler_view(
3776
struct pipe_context *pctx,
3777
struct pipe_resource *texture,
3778
const struct pipe_sampler_view *template)
3780
struct panfrost_context *ctx = pan_context(pctx);
3781
struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
3783
pan_legalize_afbc_format(ctx, pan_resource(texture), template->format);
3785
pipe_reference(NULL, &texture->reference);
3787
so->base = *template;
3788
so->base.texture = texture;
3789
so->base.reference.count = 1;
3790
so->base.context = pctx;
3792
panfrost_create_sampler_view_bo(so, pctx, texture);
3794
return (struct pipe_sampler_view *) so;
3797
/* A given Gallium blend state can be encoded to the hardware in numerous,
3798
* dramatically divergent ways due to the interactions of blending with
3799
* framebuffer formats. Conceptually, there are two modes:
3801
* - Fixed-function blending (for suitable framebuffer formats, suitable blend
3802
* state, and suitable blend constant)
3804
* - Blend shaders (for everything else)
3806
* A given Gallium blend configuration will compile to exactly one
3807
* fixed-function blend state, if it compiles to any, although the constant
3808
* will vary across runs as that is tracked outside of the Gallium CSO.
3810
* However, that same blend configuration will compile to many different blend
3811
* shaders, depending on the framebuffer formats active. The rationale is that
3812
* blend shaders override not just fixed-function blending but also
3813
* fixed-function format conversion, so blend shaders are keyed to a particular
3814
* framebuffer format. As an example, the tilebuffer format is identical for
3815
* RG16F and RG16UI -- both are simply 32-bit raw pixels -- so both require
3818
* All of this state is encapsulated in the panfrost_blend_state struct
3819
* (our subclass of pipe_blend_state).
3822
/* Create a blend CSO. Essentially, try to compile a fixed-function
3823
* expression and initialize blend shaders */
3826
panfrost_create_blend_state(struct pipe_context *pipe,
3827
const struct pipe_blend_state *blend)
3829
struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
3832
so->pan.logicop_enable = blend->logicop_enable;
3833
so->pan.logicop_func = blend->logicop_func;
3834
so->pan.rt_count = blend->max_rt + 1;
3836
for (unsigned c = 0; c < so->pan.rt_count; ++c) {
3837
unsigned g = blend->independent_blend_enable ? c : 0;
3838
const struct pipe_rt_blend_state pipe = blend->rt[g];
3839
struct pan_blend_equation equation = {0};
3841
equation.color_mask = pipe.colormask;
3842
equation.blend_enable = pipe.blend_enable;
3844
if (pipe.blend_enable) {
3845
equation.rgb_func = util_blend_func_to_shader(pipe.rgb_func);
3846
equation.rgb_src_factor = util_blend_factor_to_shader(pipe.rgb_src_factor);
3847
equation.rgb_invert_src_factor = util_blend_factor_is_inverted(pipe.rgb_src_factor);
3848
equation.rgb_dst_factor = util_blend_factor_to_shader(pipe.rgb_dst_factor);
3849
equation.rgb_invert_dst_factor = util_blend_factor_is_inverted(pipe.rgb_dst_factor);
3850
equation.alpha_func = util_blend_func_to_shader(pipe.alpha_func);
3851
equation.alpha_src_factor = util_blend_factor_to_shader(pipe.alpha_src_factor);
3852
equation.alpha_invert_src_factor = util_blend_factor_is_inverted(pipe.alpha_src_factor);
3853
equation.alpha_dst_factor = util_blend_factor_to_shader(pipe.alpha_dst_factor);
3854
equation.alpha_invert_dst_factor = util_blend_factor_is_inverted(pipe.alpha_dst_factor);
3857
/* Determine some common properties */
3858
unsigned constant_mask = pan_blend_constant_mask(equation);
3859
const bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
3860
so->info[c] = (struct pan_blend_info) {
3861
.no_colour = (equation.color_mask == 0),
3862
.opaque = pan_blend_is_opaque(equation),
3863
.constant_mask = constant_mask,
3865
/* TODO: check the dest for the logicop */
3866
.load_dest = blend->logicop_enable ||
3867
pan_blend_reads_dest(equation),
3869
/* Could this possibly be fixed-function? */
3870
.fixed_function = !blend->logicop_enable &&
3871
pan_blend_can_fixed_function(equation,
3874
pan_blend_supports_constant(PAN_ARCH, c)),
3876
.alpha_zero_nop = pan_blend_alpha_zero_nop(equation),
3877
.alpha_one_store = pan_blend_alpha_one_store(equation),
3880
so->pan.rts[c].equation = equation;
3882
/* Bifrost needs to know if any render target loads its
3883
* destination in the hot draw path, so precompute this */
3884
if (so->info[c].load_dest)
3885
so->load_dest_mask |= BITFIELD_BIT(c);
3887
/* Converting equations to Mali style is expensive, do it at
3888
* CSO create time instead of draw-time */
3889
if (so->info[c].fixed_function) {
3890
so->equation[c] = pan_pack_blend(equation);
3898
prepare_shader(struct panfrost_shader_state *state,
3899
struct panfrost_pool *pool, bool upload)
3901
struct mali_renderer_state_packed *out =
3902
(struct mali_renderer_state_packed *)&state->partial_rsd;
3905
struct panfrost_ptr ptr =
3906
pan_pool_alloc_desc(&pool->base, RENDERER_STATE);
3908
state->state = panfrost_pool_take_ref(pool, ptr.gpu);
3912
pan_pack(out, RENDERER_STATE, cfg) {
3913
pan_shader_prepare_rsd(&state->info, state->bin.gpu, &cfg);
3918
panfrost_get_sample_position(struct pipe_context *context,
3919
unsigned sample_count,
3920
unsigned sample_index,
3923
panfrost_query_sample_position(
3924
panfrost_sample_pattern(sample_count),
3930
screen_destroy(struct pipe_screen *pscreen)
3932
struct panfrost_device *dev = pan_device(pscreen);
3933
GENX(pan_blitter_cleanup)(dev);
3935
#if PAN_GPU_INDIRECTS
3936
GENX(panfrost_cleanup_indirect_draw_shaders)(dev);
3937
GENX(pan_indirect_dispatch_cleanup)(dev);
3942
preload(struct panfrost_batch *batch, struct pan_fb_info *fb)
3944
GENX(pan_preload_fb)(&batch->pool.base, &batch->scoreboard, fb, batch->tls.gpu,
3945
PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
3949
init_batch(struct panfrost_batch *batch)
3951
/* Reserve the framebuffer and local storage descriptors */
3952
batch->framebuffer =
3954
pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
3956
pan_pool_alloc_desc_aggregate(&batch->pool.base,
3957
PAN_DESC(FRAMEBUFFER),
3958
PAN_DESC(ZS_CRC_EXTENSION),
3959
PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
3961
batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
3965
batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
3967
/* On Midgard, the TLS is embedded in the FB descriptor */
3968
batch->tls = batch->framebuffer;
3973
panfrost_sampler_view_destroy(
3974
struct pipe_context *pctx,
3975
struct pipe_sampler_view *pview)
3977
struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
3979
pipe_resource_reference(&pview->texture, NULL);
3980
panfrost_bo_unreference(view->state.bo);
3985
context_init(struct pipe_context *pipe)
3987
pipe->draw_vbo = panfrost_draw_vbo;
3988
pipe->launch_grid = panfrost_launch_grid;
3990
pipe->create_vertex_elements_state = panfrost_create_vertex_elements_state;
3991
pipe->create_rasterizer_state = panfrost_create_rasterizer_state;
3992
pipe->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
3993
pipe->create_sampler_view = panfrost_create_sampler_view;
3994
pipe->sampler_view_destroy = panfrost_sampler_view_destroy;
3995
pipe->create_sampler_state = panfrost_create_sampler_state;
3996
pipe->create_blend_state = panfrost_create_blend_state;
3998
pipe->get_sample_position = panfrost_get_sample_position;
4003
/* Returns the polygon list's GPU address if available, or otherwise allocates
4004
* the polygon list. It's perfectly fast to use allocate/free BO directly,
4005
* since we'll hit the BO cache and this is one-per-batch anyway. */
4008
batch_get_polygon_list(struct panfrost_batch *batch)
4010
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
4012
if (!batch->tiler_ctx.midgard.polygon_list) {
4013
bool has_draws = batch->scoreboard.first_tiler != NULL;
4015
panfrost_tiler_get_polygon_list_size(dev,
4019
size = util_next_power_of_two(size);
4021
/* Create the BO as invisible if we can. In the non-hierarchical tiler case,
4022
* we need to write the polygon list manually because there's not WRITE_VALUE
4023
* job in the chain (maybe we should add one...). */
4024
bool init_polygon_list = !has_draws && dev->model->quirks.no_hierarchical_tiling;
4025
batch->tiler_ctx.midgard.polygon_list =
4026
panfrost_batch_create_bo(batch, size,
4027
init_polygon_list ? 0 : PAN_BO_INVISIBLE,
4030
panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
4031
PIPE_SHADER_FRAGMENT);
4033
if (init_polygon_list) {
4034
assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
4035
uint32_t *polygon_list_body =
4036
batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
4037
MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
4039
/* Magic for Mali T720 */
4040
polygon_list_body[0] = 0xa0000000;
4043
batch->tiler_ctx.midgard.disable = !has_draws;
4046
return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
4051
init_polygon_list(struct panfrost_batch *batch)
4054
mali_ptr polygon_list = batch_get_polygon_list(batch);
4055
panfrost_scoreboard_initialize_tiler(&batch->pool.base,
4062
GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
4064
struct panfrost_device *dev = &screen->dev;
4066
screen->vtbl.prepare_shader = prepare_shader;
4067
screen->vtbl.emit_tls = emit_tls;
4068
screen->vtbl.emit_fbd = emit_fbd;
4069
screen->vtbl.emit_fragment_job = emit_fragment_job;
4070
screen->vtbl.screen_destroy = screen_destroy;
4071
screen->vtbl.preload = preload;
4072
screen->vtbl.context_init = context_init;
4073
screen->vtbl.init_batch = init_batch;
4074
screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
4075
screen->vtbl.init_polygon_list = init_polygon_list;
4076
screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
4077
screen->vtbl.compile_shader = GENX(pan_shader_compile);
4079
GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base,
4080
&screen->blitter.desc_pool.base);
4081
#if PAN_GPU_INDIRECTS
4082
GENX(pan_indirect_dispatch_init)(dev);
4083
GENX(panfrost_init_indirect_draw_shaders)(dev, &screen->indirect_draw.bin_pool.base);