2
* Copyright Ā© 2006,2008,2011 Intel Corporation
3
* Copyright Ā© 2007 Red Hat, Inc.
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
* Wang Zhenyu <zhenyu.z.wang@sna.com>
26
* Eric Anholt <eric@anholt.net>
27
* Carl Worth <cworth@redhat.com>
28
* Keith Packard <keithp@keithp.com>
29
* Chris Wilson <chris@chris-wilson.co.uk>
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
#include "sna_video.h"
44
#include "gen4_common.h"
45
#include "gen4_render.h"
46
#include "gen4_source.h"
47
#include "gen4_vertex.h"
49
/* gen4 has a serious issue with its shaders that we need to flush
50
* after every rectangle... So until that is resolved, prefer
54
#define FORCE_NONRECTILINEAR_SPANS -1
55
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
57
#define ALWAYS_FLUSH 1
59
#define NO_COMPOSITE 0
60
#define NO_COMPOSITE_SPANS 0
62
#define NO_COPY_BOXES 0
65
#define NO_FILL_BOXES 0
68
#define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
70
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
72
/* Set up a default static partitioning of the URB, which is supposed to
73
* allow anything we would want to do, at potentially lower performance.
75
#define URB_CS_ENTRY_SIZE 1
76
#define URB_CS_ENTRIES 0
78
#define URB_VS_ENTRY_SIZE 1
79
#define URB_VS_ENTRIES 32
81
#define URB_GS_ENTRY_SIZE 0
82
#define URB_GS_ENTRIES 0
84
#define URB_CL_ENTRY_SIZE 0
85
#define URB_CL_ENTRIES 0
87
#define URB_SF_ENTRY_SIZE 2
88
#define URB_SF_ENTRIES 64
91
* this program computes dA/dx and dA/dy for the texture coordinates along
92
* with the base texture coordinate. It was extracted from the Mesa driver
95
#define SF_KERNEL_NUM_GRF 16
96
#define PS_KERNEL_NUM_GRF 32
98
#define GEN4_MAX_SF_THREADS 24
99
#define GEN4_MAX_WM_THREADS 32
100
#define G4X_MAX_WM_THREADS 50
102
static const uint32_t ps_kernel_packed_static[][4] = {
103
#include "exa_wm_xy.g4b"
104
#include "exa_wm_src_affine.g4b"
105
#include "exa_wm_src_sample_argb.g4b"
106
#include "exa_wm_yuv_rgb.g4b"
107
#include "exa_wm_write.g4b"
110
static const uint32_t ps_kernel_planar_static[][4] = {
111
#include "exa_wm_xy.g4b"
112
#include "exa_wm_src_affine.g4b"
113
#include "exa_wm_src_sample_planar.g4b"
114
#include "exa_wm_yuv_rgb.g4b"
115
#include "exa_wm_write.g4b"
118
#define NOKERNEL(kernel_enum, func, masked) \
119
[kernel_enum] = {func, 0, masked}
120
#define KERNEL(kernel_enum, kernel, masked) \
121
[kernel_enum] = {&kernel, sizeof(kernel), masked}
122
static const struct wm_kernel_info {
127
NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
128
NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
130
NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
131
NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
133
NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
134
NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
136
NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
137
NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
139
NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
140
NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
142
KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
143
KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
147
static const struct blendinfo {
151
} gen4_blend_op[] = {
152
/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
153
/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
154
/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
155
/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
156
/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
157
/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
158
/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
159
/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
160
/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
161
/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
162
/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
163
/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
164
/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
168
* Highest-valued BLENDFACTOR used in gen4_blend_op.
170
* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
171
* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
172
* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
174
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
176
#define BLEND_OFFSET(s, d) \
177
(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
179
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
180
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
183
gen4_emit_pipelined_pointers(struct sna *sna,
184
const struct sna_composite_op *op,
185
int blend, int kernel);
187
#define OUT_BATCH(v) batch_emit(sna, v)
188
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
189
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
191
#define GEN4_MAX_3D_SIZE 8192
193
static inline bool too_large(int width, int height)
195
return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
199
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
205
if (gen4_blend_op[op].src_alpha)
206
base = WM_KERNEL_MASKSA;
208
base = WM_KERNEL_MASKCA;
210
base = WM_KERNEL_MASK;
214
return base + !is_affine;
217
static bool gen4_magic_ca_pass(struct sna *sna,
218
const struct sna_composite_op *op)
220
struct gen4_render_state *state = &sna->render_state.gen4;
222
if (!op->need_magic_ca_pass)
225
assert(sna->render.vertex_index > sna->render.vertex_start);
227
DBG(("%s: CA fixup\n", __FUNCTION__));
228
assert(op->mask.bo != NULL);
229
assert(op->has_component_alpha);
231
gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
232
gen4_choose_composite_kernel(PictOpAdd,
233
true, true, op->is_affine));
235
OUT_BATCH(GEN4_3DPRIMITIVE |
236
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
237
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
240
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
241
OUT_BATCH(sna->render.vertex_start);
242
OUT_BATCH(1); /* single instance */
243
OUT_BATCH(0); /* start instance location */
244
OUT_BATCH(0); /* index buffer offset, ignored */
246
state->last_primitive = sna->kgem.nbatch;
250
static uint32_t gen4_get_blend(int op,
251
bool has_component_alpha,
256
src = gen4_blend_op[op].src_blend;
257
dst = gen4_blend_op[op].dst_blend;
259
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
262
if (PICT_FORMAT_A(dst_format) == 0) {
263
if (src == GEN4_BLENDFACTOR_DST_ALPHA)
264
src = GEN4_BLENDFACTOR_ONE;
265
else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
266
src = GEN4_BLENDFACTOR_ZERO;
269
/* If the source alpha is being used, then we should only be in a
270
* case where the source blend factor is 0, and the source blend
271
* value is the mask channels multiplied by the source picture's alpha.
273
if (has_component_alpha && gen4_blend_op[op].src_alpha) {
274
if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
275
dst = GEN4_BLENDFACTOR_SRC_COLOR;
276
else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
277
dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
280
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
281
op, dst_format, PICT_FORMAT_A(dst_format),
282
src, dst, BLEND_OFFSET(src, dst)));
283
return BLEND_OFFSET(src, dst);
286
static uint32_t gen4_get_card_format(PictFormat format)
292
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
294
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
296
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
298
return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
299
#ifdef PICT_a2r10g10b10
300
case PICT_a2r10g10b10:
301
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
302
case PICT_x2r10g10b10:
303
return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
306
return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
308
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
310
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
312
return GEN4_SURFACEFORMAT_A8_UNORM;
314
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
318
static uint32_t gen4_get_dest_format(PictFormat format)
325
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
328
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
329
#ifdef PICT_a2r10g10b10
330
case PICT_a2r10g10b10:
331
case PICT_x2r10g10b10:
332
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
335
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
338
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
340
return GEN4_SURFACEFORMAT_A8_UNORM;
343
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
347
static bool gen4_check_dst_format(PictFormat format)
349
if (gen4_get_dest_format(format) != -1)
352
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
356
static bool gen4_check_format(uint32_t format)
358
if (gen4_get_card_format(format) != -1)
361
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
365
typedef struct gen4_surface_state_padded {
366
struct gen4_surface_state state;
367
char pad[32 - sizeof(struct gen4_surface_state)];
368
} gen4_surface_state_padded;
370
static void null_create(struct sna_static_stream *stream)
372
/* A bunch of zeros useful for legacy border color and depth-stencil */
373
sna_static_stream_map(stream, 64, 64);
377
sampler_state_init(struct gen4_sampler_state *sampler_state,
378
sampler_filter_t filter,
379
sampler_extend_t extend)
381
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
383
/* We use the legacy mode to get the semantics specified by
384
* the Render extension. */
385
sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
389
case SAMPLER_FILTER_NEAREST:
390
sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
391
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
393
case SAMPLER_FILTER_BILINEAR:
394
sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
395
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
401
case SAMPLER_EXTEND_NONE:
402
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
403
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
404
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
406
case SAMPLER_EXTEND_REPEAT:
407
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
408
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
409
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
411
case SAMPLER_EXTEND_PAD:
412
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
413
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
414
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
416
case SAMPLER_EXTEND_REFLECT:
417
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
418
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
419
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
424
static uint32_t gen4_filter(uint32_t filter)
429
case PictFilterNearest:
430
return SAMPLER_FILTER_NEAREST;
431
case PictFilterBilinear:
432
return SAMPLER_FILTER_BILINEAR;
436
static uint32_t gen4_check_filter(PicturePtr picture)
438
switch (picture->filter) {
439
case PictFilterNearest:
440
case PictFilterBilinear:
443
DBG(("%s: unknown filter: %s [%d]\n",
445
PictureGetFilterName(picture->filter),
451
static uint32_t gen4_repeat(uint32_t repeat)
457
return SAMPLER_EXTEND_NONE;
459
return SAMPLER_EXTEND_REPEAT;
461
return SAMPLER_EXTEND_PAD;
463
return SAMPLER_EXTEND_REFLECT;
467
static bool gen4_check_repeat(PicturePtr picture)
469
if (!picture->repeat)
472
switch (picture->repeatType) {
479
DBG(("%s: unknown repeat: %d\n",
480
__FUNCTION__, picture->repeatType));
486
gen4_tiling_bits(uint32_t tiling)
490
case I915_TILING_NONE: return 0;
491
case I915_TILING_X: return GEN4_SURFACE_TILED;
492
case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
497
* Sets up the common fields for a surface state buffer for the given
498
* picture in the given surface state buffer.
501
gen4_bind_bo(struct sna *sna,
512
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
514
/* After the first bind, we manage the cache domains within the batch */
515
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
517
assert(offset >= sna->kgem.surface);
519
kgem_bo_mark_dirty(bo);
520
return offset * sizeof(uint32_t);
523
offset = sna->kgem.surface -=
524
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
525
ss = sna->kgem.batch + offset;
527
ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
528
GEN4_SURFACE_BLEND_ENABLED |
529
format << GEN4_SURFACE_FORMAT_SHIFT);
532
ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
533
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
535
domains = I915_GEM_DOMAIN_SAMPLER << 16;
536
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
538
ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT |
539
(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
540
ss[3] = (gen4_tiling_bits(bo->tiling) |
541
(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
545
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
547
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
548
offset, bo->handle, ss[1],
549
format, width, height, bo->pitch, bo->tiling,
550
domains & 0xffff ? "render" : "sampler"));
552
return offset * sizeof(uint32_t);
555
static void gen4_emit_vertex_buffer(struct sna *sna,
556
const struct sna_composite_op *op)
558
int id = op->u.gen4.ve_id;
560
assert((sna->render.vb_id & (1 << id)) == 0);
562
OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
563
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
564
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
565
assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
566
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
571
sna->render.vb_id |= 1 << id;
575
gen4_emit_pipe_flush(struct sna *sna)
578
OUT_BATCH(GEN4_PIPE_CONTROL |
579
GEN4_PIPE_CONTROL_WC_FLUSH |
585
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
590
gen4_emit_pipe_break(struct sna *sna)
593
OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2));
598
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
603
gen4_emit_pipe_invalidate(struct sna *sna)
606
OUT_BATCH(GEN4_PIPE_CONTROL |
607
GEN4_PIPE_CONTROL_WC_FLUSH |
608
(sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) |
618
static void gen4_emit_primitive(struct sna *sna)
620
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
621
sna->render.vertex_offset = sna->kgem.nbatch - 5;
625
OUT_BATCH(GEN4_3DPRIMITIVE |
626
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
627
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
630
sna->render.vertex_offset = sna->kgem.nbatch;
631
OUT_BATCH(0); /* vertex count, to be filled in later */
632
OUT_BATCH(sna->render.vertex_index);
633
OUT_BATCH(1); /* single instance */
634
OUT_BATCH(0); /* start instance location */
635
OUT_BATCH(0); /* index buffer offset, ignored */
636
sna->render.vertex_start = sna->render.vertex_index;
638
sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
641
static bool gen4_rectangle_begin(struct sna *sna,
642
const struct sna_composite_op *op)
644
unsigned int id = 1 << op->u.gen4.ve_id;
647
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
650
/* 7xpipelined pointers + 6xprimitive + 1xflush */
651
ndwords = op->need_magic_ca_pass? 19 : 6;
652
if ((sna->render.vb_id & id) == 0)
654
ndwords += 8*FORCE_FLUSH;
656
if (!kgem_check_batch(&sna->kgem, ndwords))
659
if ((sna->render.vb_id & id) == 0)
660
gen4_emit_vertex_buffer(sna, op);
661
if (sna->render.vertex_offset == 0)
662
gen4_emit_primitive(sna);
667
static int gen4_get_rectangles__flush(struct sna *sna,
668
const struct sna_composite_op *op)
670
/* Preventing discarding new vbo after lock contention */
671
if (sna_vertex_wait__locked(&sna->render)) {
672
int rem = vertex_space(sna);
673
if (rem > op->floats_per_rect)
677
if (!kgem_check_batch(&sna->kgem,
678
8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6)))
680
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
683
if (sna->render.vertex_offset) {
684
gen4_vertex_flush(sna);
685
if (gen4_magic_ca_pass(sna, op))
686
gen4_emit_pipelined_pointers(sna, op, op->op,
687
op->u.gen4.wm_kernel);
690
return gen4_vertex_finish(sna);
693
inline static int gen4_get_rectangles(struct sna *sna,
694
const struct sna_composite_op *op,
696
void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
702
rem = sna->render.vertex_offset;
703
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
704
rem = sna->kgem.nbatch - 5;
706
rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
708
if (sna->render.vertex_offset) {
709
gen4_vertex_flush(sna);
710
if (gen4_magic_ca_pass(sna, op)) {
711
if (kgem_check_batch(&sna->kgem, 19+6))
712
gen4_emit_pipelined_pointers(sna, op, op->op,
713
op->u.gen4.wm_kernel);
716
gen4_emit_pipe_break(sna);
717
rem = MAX_FLUSH_VERTICES;
720
rem = MAX_FLUSH_VERTICES;
726
rem = vertex_space(sna);
727
if (unlikely(rem < op->floats_per_rect)) {
728
DBG(("flushing vbo for %s: %d < %d\n",
729
__FUNCTION__, rem, op->floats_per_rect));
730
rem = gen4_get_rectangles__flush(sna, op);
731
if (unlikely(rem == 0))
735
if (unlikely(sna->render.vertex_offset == 0)) {
736
if (!gen4_rectangle_begin(sna, op))
742
assert(rem <= vertex_space(sna));
743
assert(op->floats_per_rect <= rem);
744
if (want > 1 && want * op->floats_per_rect > rem)
745
want = rem / op->floats_per_rect;
747
sna->render.vertex_index += 3*want;
751
if (sna->render.vertex_offset) {
752
gen4_vertex_flush(sna);
753
gen4_magic_ca_pass(sna, op);
755
sna_vertex_wait__locked(&sna->render);
756
_kgem_submit(&sna->kgem);
762
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
765
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
767
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
769
/* Clear all surplus entries to zero in case of prefetch */
770
*offset = sna->kgem.surface;
771
return memset(sna->kgem.batch + sna->kgem.surface,
772
0, sizeof(struct gen4_surface_state_padded));
776
gen4_emit_urb(struct sna *sna)
784
if (!sna->render_state.gen4.needs_urb)
787
urb_vs_end = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
788
urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
789
urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE;
790
urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
791
urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
792
assert(urb_cs_end <= 256);
794
while ((sna->kgem.nbatch & 15) > 12)
797
OUT_BATCH(GEN4_URB_FENCE |
804
OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT |
805
urb_gs_end << UF1_GS_FENCE_SHIFT |
806
urb_vs_end << UF1_VS_FENCE_SHIFT);
807
OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT |
808
urb_sf_end << UF2_SF_FENCE_SHIFT);
810
/* Constant buffer state */
811
OUT_BATCH(GEN4_CS_URB_STATE | 0);
812
OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
814
sna->render_state.gen4.needs_urb = false;
818
gen4_emit_state_base_address(struct sna *sna)
820
assert(sna->render_state.gen4.general_bo->proxy == NULL);
821
OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
822
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
824
sna->render_state.gen4.general_bo,
825
I915_GEM_DOMAIN_INSTRUCTION << 16,
826
BASE_ADDRESS_MODIFY));
827
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
830
I915_GEM_DOMAIN_INSTRUCTION << 16,
831
BASE_ADDRESS_MODIFY));
832
OUT_BATCH(0); /* media */
834
/* upper bounds, all disabled */
835
OUT_BATCH(BASE_ADDRESS_MODIFY);
840
gen4_emit_invariant(struct sna *sna)
842
assert(sna->kgem.surface == sna->kgem.batch_size);
844
if (sna->kgem.gen >= 045)
845
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
847
OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
849
gen4_emit_state_base_address(sna);
851
sna->render_state.gen4.needs_invariant = false;
855
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
857
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
859
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
860
DBG(("%s: flushing batch: %d < %d+%d\n",
861
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
863
kgem_submit(&sna->kgem);
864
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
867
if (sna->render_state.gen4.needs_invariant)
868
gen4_emit_invariant(sna);
872
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
874
assert(op->floats_per_rect == 3*op->floats_per_vertex);
875
if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
876
DBG(("aligning vertex: was %d, now %d floats per vertex\n",
877
sna->render_state.gen4.floats_per_vertex,
878
op->floats_per_vertex));
879
gen4_vertex_align(sna, op);
880
sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
885
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
887
if (sna->render_state.gen4.surface_table == offset)
890
sna->render_state.gen4.surface_table = offset;
892
/* Binding table pointers */
893
OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
894
OUT_BATCH(0); /* vs */
895
OUT_BATCH(0); /* gs */
896
OUT_BATCH(0); /* clip */
897
OUT_BATCH(0); /* sf */
898
/* Only the PS uses the binding table */
903
gen4_emit_pipelined_pointers(struct sna *sna,
904
const struct sna_composite_op *op,
905
int blend, int kernel)
910
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
911
__FUNCTION__, op->u.gen4.ve_id & 2,
912
op->src.filter, op->src.repeat,
913
op->mask.filter, op->mask.repeat,
914
kernel, blend, op->has_component_alpha, (int)op->dst.format));
916
sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
917
op->mask.filter, op->mask.repeat,
919
bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
921
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
922
key = sp | (uint32_t)bp << 16;
923
if (key == sna->render_state.gen4.last_pipelined_pointers)
926
OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
927
OUT_BATCH(sna->render_state.gen4.vs);
928
OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
929
OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
930
OUT_BATCH(sna->render_state.gen4.sf);
931
OUT_BATCH(sna->render_state.gen4.wm + sp);
932
OUT_BATCH(sna->render_state.gen4.cc + bp);
934
sna->render_state.gen4.last_pipelined_pointers = key;
939
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
941
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
942
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
944
assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
945
assert(!too_large(op->dst.width, op->dst.height));
947
if (sna->render_state.gen4.drawrect_limit == limit &&
948
sna->render_state.gen4.drawrect_offset == offset)
951
sna->render_state.gen4.drawrect_offset = offset;
952
sna->render_state.gen4.drawrect_limit = limit;
954
OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
962
gen4_emit_vertex_elements(struct sna *sna,
963
const struct sna_composite_op *op)
966
* vertex data in vertex buffer
968
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
969
* texture coordinate 1 if (has_mask is true): same as above
971
struct gen4_render_state *render = &sna->render_state.gen4;
972
uint32_t src_format, dw;
973
int id = op->u.gen4.ve_id;
975
if (render->ve_id == id)
980
* dword 0-3: position (x, y, 1.0, 1.0),
981
* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
982
* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
984
OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
987
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
988
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
989
0 << VE0_OFFSET_SHIFT);
990
OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
991
VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
992
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
993
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
994
(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
998
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
999
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1004
src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
1005
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1006
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1007
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1010
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
1011
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1012
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1013
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1016
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
1017
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1018
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1019
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1022
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
1023
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1024
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1025
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1028
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1029
src_format << VE0_FORMAT_SHIFT |
1030
4 << VE0_OFFSET_SHIFT);
1031
OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1035
unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1036
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
1037
id >> 2, src_offset));
1038
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1041
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
1042
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1043
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1044
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1049
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
1050
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1051
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1052
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1055
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
1056
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1057
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1058
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1061
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1062
src_format << VE0_FORMAT_SHIFT |
1063
src_offset << VE0_OFFSET_SHIFT);
1064
OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1066
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1067
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
1068
0 << VE0_OFFSET_SHIFT);
1069
OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
1070
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
1071
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
1072
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
1073
12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1078
gen4_emit_state(struct sna *sna,
1079
const struct sna_composite_op *op,
1080
uint16_t wm_binding_table)
1084
assert(op->dst.bo->exec);
1086
flush = wm_binding_table & 1;
1087
wm_binding_table &= ~1;
1089
if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1090
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
1091
kgem_bo_is_dirty(op->src.bo),
1092
kgem_bo_is_dirty(op->mask.bo),
1094
gen4_emit_pipe_invalidate(sna);
1095
kgem_clear_dirty(&sna->kgem);
1096
kgem_bo_mark_dirty(op->dst.bo);
1099
flush &= gen4_emit_drawing_rectangle(sna, op);
1100
if (flush && op->op > PictOpSrc)
1101
gen4_emit_pipe_flush(sna);
1103
gen4_emit_binding_table(sna, wm_binding_table);
1104
gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
1105
gen4_emit_vertex_elements(sna, op);
1109
gen4_bind_surfaces(struct sna *sna,
1110
const struct sna_composite_op *op)
1112
uint32_t *binding_table;
1113
uint16_t offset, dirty;
1115
gen4_get_batch(sna, op);
1116
dirty = kgem_bo_is_dirty(op->dst.bo);
1118
binding_table = gen4_composite_get_binding_table(sna, &offset);
1122
op->dst.bo, op->dst.width, op->dst.height,
1123
gen4_get_dest_format(op->dst.format),
1127
op->src.bo, op->src.width, op->src.height,
1128
op->src.card_format,
1131
assert(op->u.gen4.ve_id >> 2);
1137
op->mask.card_format,
1141
if (sna->kgem.surface == offset &&
1142
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
1143
(op->mask.bo == NULL ||
1144
sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
1145
sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1146
offset = sna->render_state.gen4.surface_table;
1149
if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
1152
gen4_emit_state(sna, op, offset | dirty);
1155
fastcall static void
1156
gen4_render_composite_blt(struct sna *sna,
1157
const struct sna_composite_op *op,
1158
const struct sna_composite_rectangles *r)
1160
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1162
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1163
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1164
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1165
r->width, r->height));
1167
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1168
op->prim_emit(sna, op, r);
1171
fastcall static void
1172
gen4_render_composite_box(struct sna *sna,
1173
const struct sna_composite_op *op,
1176
struct sna_composite_rectangles r;
1178
DBG((" %s: (%d, %d), (%d, %d)\n",
1180
box->x1, box->y1, box->x2, box->y2));
1182
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1186
r.width = box->x2 - box->x1;
1187
r.height = box->y2 - box->y1;
1188
r.mask = r.src = r.dst;
1190
op->prim_emit(sna, op, &r);
1194
gen4_render_composite_boxes__blt(struct sna *sna,
1195
const struct sna_composite_op *op,
1196
const BoxRec *box, int nbox)
1198
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1199
__FUNCTION__, nbox, op->dst.x, op->dst.y,
1200
op->src.offset[0], op->src.offset[1],
1201
op->src.width, op->src.height,
1202
op->mask.offset[0], op->mask.offset[1],
1203
op->mask.width, op->mask.height));
1208
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1209
gen4_bind_surfaces);
1210
nbox -= nbox_this_time;
1213
struct sna_composite_rectangles r;
1215
DBG((" %s: (%d, %d), (%d, %d)\n",
1217
box->x1, box->y1, box->x2, box->y2));
1221
r.width = box->x2 - box->x1;
1222
r.height = box->y2 - box->y1;
1223
r.mask = r.src = r.dst;
1224
op->prim_emit(sna, op, &r);
1226
} while (--nbox_this_time);
1231
gen4_render_composite_boxes(struct sna *sna,
1232
const struct sna_composite_op *op,
1233
const BoxRec *box, int nbox)
1235
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1241
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1242
gen4_bind_surfaces);
1243
assert(nbox_this_time);
1244
nbox -= nbox_this_time;
1246
v = sna->render.vertices + sna->render.vertex_used;
1247
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1249
op->emit_boxes(op, box, nbox_this_time, v);
1250
box += nbox_this_time;
1256
gen4_render_composite_boxes__thread(struct sna *sna,
1257
const struct sna_composite_op *op,
1258
const BoxRec *box, int nbox)
1260
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1262
sna_vertex_lock(&sna->render);
1267
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1268
gen4_bind_surfaces);
1269
assert(nbox_this_time);
1270
nbox -= nbox_this_time;
1272
v = sna->render.vertices + sna->render.vertex_used;
1273
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1275
sna_vertex_acquire__locked(&sna->render);
1276
sna_vertex_unlock(&sna->render);
1278
op->emit_boxes(op, box, nbox_this_time, v);
1279
box += nbox_this_time;
1281
sna_vertex_lock(&sna->render);
1282
sna_vertex_release__locked(&sna->render);
1284
sna_vertex_unlock(&sna->render);
1289
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1292
static uint32_t gen4_bind_video_source(struct sna *sna,
1293
struct kgem_bo *src_bo,
1294
uint32_t src_offset,
1298
uint32_t src_surf_format)
1300
struct gen4_surface_state *ss;
1302
sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1304
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1305
ss->ss0.surface_type = GEN4_SURFACE_2D;
1306
ss->ss0.surface_format = src_surf_format;
1307
ss->ss0.color_blend = 1;
1310
kgem_add_reloc(&sna->kgem,
1311
sna->kgem.surface + 1,
1313
I915_GEM_DOMAIN_SAMPLER << 16,
1316
ss->ss2.width = src_width - 1;
1317
ss->ss2.height = src_height - 1;
1318
ss->ss3.pitch = src_pitch - 1;
1320
return sna->kgem.surface * sizeof(uint32_t);
1323
static void gen4_video_bind_surfaces(struct sna *sna,
1324
const struct sna_composite_op *op)
1326
struct sna_video_frame *frame = op->priv;
1327
uint32_t src_surf_format;
1328
uint32_t src_surf_base[6];
1332
uint32_t *binding_table;
1333
uint16_t offset, dirty;
1336
src_surf_base[0] = 0;
1337
src_surf_base[1] = 0;
1338
src_surf_base[2] = frame->VBufOffset;
1339
src_surf_base[3] = frame->VBufOffset;
1340
src_surf_base[4] = frame->UBufOffset;
1341
src_surf_base[5] = frame->UBufOffset;
1343
if (is_planar_fourcc(frame->id)) {
1344
src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
1345
src_width[1] = src_width[0] = frame->width;
1346
src_height[1] = src_height[0] = frame->height;
1347
src_pitch[1] = src_pitch[0] = frame->pitch[1];
1348
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
1350
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1352
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
1356
if (frame->id == FOURCC_UYVY)
1357
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
1359
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
1361
src_width[0] = frame->width;
1362
src_height[0] = frame->height;
1363
src_pitch[0] = frame->pitch[0];
1367
gen4_get_batch(sna, op);
1368
dirty = kgem_bo_is_dirty(op->dst.bo);
1370
binding_table = gen4_composite_get_binding_table(sna, &offset);
1373
op->dst.bo, op->dst.width, op->dst.height,
1374
gen4_get_dest_format(op->dst.format),
1376
for (n = 0; n < n_src; n++) {
1377
binding_table[1+n] =
1378
gen4_bind_video_source(sna,
1387
if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
1390
gen4_emit_state(sna, op, offset | dirty);
1394
gen4_render_video(struct sna *sna,
1395
struct sna_video *video,
1396
struct sna_video_frame *frame,
1397
RegionPtr dstRegion,
1400
struct sna_composite_op tmp;
1401
struct sna_pixmap *priv = sna_pixmap(pixmap);
1402
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1403
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1404
int src_width = frame->src.x2 - frame->src.x1;
1405
int src_height = frame->src.y2 - frame->src.y1;
1406
float src_offset_x, src_offset_y;
1407
float src_scale_x, src_scale_y;
1411
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1412
src_width, src_height, dst_width, dst_height));
1414
assert(priv->gpu_bo);
1415
memset(&tmp, 0, sizeof(tmp));
1418
tmp.dst.pixmap = pixmap;
1419
tmp.dst.width = pixmap->drawable.width;
1420
tmp.dst.height = pixmap->drawable.height;
1421
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1422
tmp.dst.bo = priv->gpu_bo;
1424
if (src_width == dst_width && src_height == dst_height)
1425
tmp.src.filter = SAMPLER_FILTER_NEAREST;
1427
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1428
tmp.src.repeat = SAMPLER_EXTEND_PAD;
1429
tmp.src.bo = frame->bo;
1431
tmp.u.gen4.wm_kernel =
1432
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
1433
tmp.u.gen4.ve_id = 2;
1434
tmp.is_affine = true;
1435
tmp.floats_per_vertex = 3;
1436
tmp.floats_per_rect = 9;
1439
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1440
kgem_submit(&sna->kgem);
1441
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1445
gen4_align_vertex(sna, &tmp);
1446
gen4_video_bind_surfaces(sna, &tmp);
1448
src_scale_x = (float)src_width / dst_width / frame->width;
1449
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1451
src_scale_y = (float)src_height / dst_height / frame->height;
1452
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1454
box = region_rects(dstRegion);
1455
nbox = region_num_rects(dstRegion);
1459
n = gen4_get_rectangles(sna, &tmp, nbox,
1460
gen4_video_bind_surfaces);
1465
OUT_VERTEX(box->x2, box->y2);
1466
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1467
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1469
OUT_VERTEX(box->x1, box->y2);
1470
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1471
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1473
OUT_VERTEX(box->x1, box->y1);
1474
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1475
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1480
gen4_vertex_flush(sna);
1482
if (!DAMAGE_IS_ALL(priv->gpu_damage))
1483
sna_damage_add(&priv->gpu_damage, dstRegion);
1489
gen4_composite_picture(struct sna *sna,
1491
struct sna_composite_channel *channel,
1494
int dst_x, int dst_y,
1501
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1502
__FUNCTION__, x, y, w, h, dst_x, dst_y));
1504
channel->is_solid = false;
1505
channel->card_format = -1;
1507
if (sna_picture_is_solid(picture, &color))
1508
return gen4_channel_init_solid(sna, channel, color);
1510
if (picture->pDrawable == NULL) {
1513
if (picture->pSourcePict->type == SourcePictTypeLinear)
1514
return gen4_channel_init_linear(sna, picture, channel,
1519
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1522
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1523
x, y, w, h, dst_x, dst_y);
1525
ret = sna_render_picture_fixup(sna, picture, channel,
1526
x, y, w, h, dst_x, dst_y);
1530
if (picture->alphaMap) {
1531
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1532
return sna_render_picture_fixup(sna, picture, channel,
1533
x, y, w, h, dst_x, dst_y);
1536
if (!gen4_check_repeat(picture)) {
1537
DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
1538
return sna_render_picture_fixup(sna, picture, channel,
1539
x, y, w, h, dst_x, dst_y);
1542
if (!gen4_check_filter(picture)) {
1543
DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
1544
return sna_render_picture_fixup(sna, picture, channel,
1545
x, y, w, h, dst_x, dst_y);
1548
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1549
channel->filter = picture->filter;
1551
pixmap = get_drawable_pixmap(picture->pDrawable);
1552
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1554
x += dx + picture->pDrawable->x;
1555
y += dy + picture->pDrawable->y;
1557
channel->is_affine = sna_transform_is_affine(picture->transform);
1558
if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1559
DBG(("%s: integer translation (%d, %d), removing\n",
1560
__FUNCTION__, dx, dy));
1563
channel->transform = NULL;
1564
channel->filter = PictFilterNearest;
1566
if (channel->repeat &&
1569
x + w <= pixmap->drawable.width &&
1570
y + h <= pixmap->drawable.height)) {
1571
struct sna_pixmap *priv = sna_pixmap(pixmap);
1572
if (priv && priv->clear) {
1573
DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1574
return gen4_channel_init_solid(sna, channel,
1575
solid_color(picture->format,
1576
priv->clear_color));
1580
channel->transform = picture->transform;
1582
channel->pict_format = picture->format;
1583
channel->card_format = gen4_get_card_format(picture->format);
1584
if (channel->card_format == -1)
1585
return sna_render_picture_convert(sna, picture, channel, pixmap,
1586
x, y, w, h, dst_x, dst_y,
1589
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1590
return sna_render_picture_extract(sna, picture, channel,
1591
x, y, w, h, dst_x, dst_y);
1593
return sna_render_pixmap_bo(sna, channel, pixmap,
1594
x, y, w, h, dst_x, dst_y);
1597
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
1599
DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
1601
channel->repeat, gen4_repeat(channel->repeat),
1602
channel->filter, gen4_repeat(channel->filter)));
1603
channel->repeat = gen4_repeat(channel->repeat);
1604
channel->filter = gen4_filter(channel->filter);
1605
if (channel->card_format == (unsigned)-1)
1606
channel->card_format = gen4_get_card_format(channel->pict_format);
1610
gen4_render_composite_done(struct sna *sna,
1611
const struct sna_composite_op *op)
1613
DBG(("%s()\n", __FUNCTION__));
1615
if (sna->render.vertex_offset) {
1616
gen4_vertex_flush(sna);
1617
gen4_magic_ca_pass(sna, op);
1621
kgem_bo_destroy(&sna->kgem, op->mask.bo);
1623
kgem_bo_destroy(&sna->kgem, op->src.bo);
1625
sna_render_composite_redirect_done(sna, op);
1629
gen4_composite_set_target(struct sna *sna,
1630
struct sna_composite_op *op,
1632
int x, int y, int w, int h,
1638
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1639
op->dst.width = op->dst.pixmap->drawable.width;
1640
op->dst.height = op->dst.pixmap->drawable.height;
1641
op->dst.format = dst->format;
1648
sna_render_picture_extents(dst, &box);
1650
hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
1652
hint |= IGNORE_DAMAGE;
1653
if (w == op->dst.width && h == op->dst.height)
1657
op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1658
if (op->dst.bo == NULL)
1661
if (hint & REPLACES) {
1662
struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1663
kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1666
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1667
&op->dst.x, &op->dst.y);
1669
DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1671
op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1672
op->dst.width, op->dst.height,
1674
op->dst.x, op->dst.y,
1675
op->damage ? *op->damage : (void *)-1));
1677
assert(op->dst.bo->proxy == NULL);
1679
if (too_large(op->dst.width, op->dst.height) &&
1680
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1687
check_gradient(PicturePtr picture, bool precise)
1689
switch (picture->pSourcePict->type) {
1690
case SourcePictTypeSolidFill:
1691
case SourcePictTypeLinear:
1699
has_alphamap(PicturePtr p)
1701
return p->alphaMap != NULL;
1705
need_upload(struct sna *sna, PicturePtr p)
1707
return p->pDrawable && untransformed(p) &&
1708
!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1712
source_is_busy(PixmapPtr pixmap)
1714
struct sna_pixmap *priv = sna_pixmap(pixmap);
1721
if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1724
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1727
return priv->gpu_damage && !priv->cpu_damage;
1731
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1733
if (sna_picture_is_solid(p, NULL))
1737
return check_gradient(p, precise);
1739
if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
1742
/* soft errors: perfer to upload/compute rather than readback */
1743
if (pixmap && source_is_busy(pixmap))
1746
return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
1750
gen4_composite_fallback(struct sna *sna,
1755
PixmapPtr src_pixmap;
1756
PixmapPtr mask_pixmap;
1757
PixmapPtr dst_pixmap;
1758
bool src_fallback, mask_fallback;
1760
if (!gen4_check_dst_format(dst->format)) {
1761
DBG(("%s: unknown destination format: %d\n",
1762
__FUNCTION__, dst->format));
1766
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1768
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1769
src_fallback = source_fallback(sna, src, src_pixmap,
1770
dst->polyMode == PolyModePrecise);
1773
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1774
mask_fallback = source_fallback(sna, mask, mask_pixmap,
1775
dst->polyMode == PolyModePrecise);
1778
mask_fallback = false;
1781
/* If we are using the destination as a source and need to
1782
* readback in order to upload the source, do it all
1785
if (src_pixmap == dst_pixmap && src_fallback) {
1786
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1789
if (mask_pixmap == dst_pixmap && mask_fallback) {
1790
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1794
/* If anything is on the GPU, push everything out to the GPU */
1795
if (dst_use_gpu(dst_pixmap)) {
1796
DBG(("%s: dst is already on the GPU, try to use GPU\n",
1801
if (src_pixmap && !src_fallback) {
1802
DBG(("%s: src is already on the GPU, try to use GPU\n",
1806
if (mask_pixmap && !mask_fallback) {
1807
DBG(("%s: mask is already on the GPU, try to use GPU\n",
1812
/* However if the dst is not on the GPU and we need to
1813
* render one of the sources using the CPU, we may
1814
* as well do the entire operation in place onthe CPU.
1817
DBG(("%s: dst is on the CPU and src will fallback\n",
1822
if (mask_fallback) {
1823
DBG(("%s: dst is on the CPU and mask will fallback\n",
1828
if (too_large(dst_pixmap->drawable.width,
1829
dst_pixmap->drawable.height) &&
1830
dst_is_cpu(dst_pixmap)) {
1831
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1835
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1837
return dst_use_cpu(dst_pixmap);
1841
reuse_source(struct sna *sna,
1842
PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1843
PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1847
if (src_x != msk_x || src_y != msk_y)
1851
DBG(("%s: mask is source\n", __FUNCTION__));
1853
mc->bo = kgem_bo_reference(mc->bo);
1857
if (sna_picture_is_solid(mask, &color))
1858
return gen4_channel_init_solid(sna, mc, color);
1863
if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1866
DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1868
if (!sna_transform_equal(src->transform, mask->transform))
1871
if (!sna_picture_alphamap_equal(src, mask))
1874
if (!gen4_check_repeat(mask))
1877
if (!gen4_check_filter(mask))
1880
if (!gen4_check_format(mask->format))
1883
DBG(("%s: reusing source channel for mask with a twist\n",
1887
mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1888
mc->filter = gen4_filter(mask->filter);
1889
mc->pict_format = mask->format;
1890
mc->card_format = gen4_get_card_format(mask->format);
1891
mc->bo = kgem_bo_reference(mc->bo);
1896
gen4_render_composite(struct sna *sna,
1901
int16_t src_x, int16_t src_y,
1902
int16_t msk_x, int16_t msk_y,
1903
int16_t dst_x, int16_t dst_y,
1904
int16_t width, int16_t height,
1906
struct sna_composite_op *tmp)
1908
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1909
width, height, sna->kgem.mode));
1911
if (op >= ARRAY_SIZE(gen4_blend_op))
1915
sna_blt_composite(sna, op,
1923
if (gen4_composite_fallback(sna, src, mask, dst))
1926
if (need_tiling(sna, width, height))
1927
return sna_tiling_composite(op, src, mask, dst,
1934
if (!gen4_composite_set_target(sna, tmp, dst,
1935
dst_x, dst_y, width, height,
1936
flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
1937
DBG(("%s: failed to set composite target\n", __FUNCTION__));
1942
switch (gen4_composite_picture(sna, src, &tmp->src,
1946
dst->polyMode == PolyModePrecise)) {
1948
DBG(("%s: failed to prepare source\n", __FUNCTION__));
1951
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1953
/* fall through to fixup */
1956
sna_blt_composite__convert(sna,
1957
dst_x, dst_y, width, height,
1961
gen4_composite_channel_convert(&tmp->src);
1965
tmp->is_affine = tmp->src.is_affine;
1966
tmp->has_component_alpha = false;
1967
tmp->need_magic_ca_pass = false;
1970
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1971
tmp->has_component_alpha = true;
1973
/* Check if it's component alpha that relies on a source alpha and on
1974
* the source value. We can only get one of those into the single
1975
* source value that we get to blend with.
1977
if (gen4_blend_op[op].src_alpha &&
1978
(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
1979
if (op != PictOpOver) {
1980
DBG(("%s -- fallback: unhandled component alpha blend\n",
1986
tmp->need_magic_ca_pass = true;
1987
tmp->op = PictOpOutReverse;
1991
if (!reuse_source(sna,
1992
src, &tmp->src, src_x, src_y,
1993
mask, &tmp->mask, msk_x, msk_y)) {
1994
switch (gen4_composite_picture(sna, mask, &tmp->mask,
1998
dst->polyMode == PolyModePrecise)) {
2000
DBG(("%s: failed to prepare mask\n", __FUNCTION__));
2003
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2005
/* fall through to fixup */
2007
gen4_composite_channel_convert(&tmp->mask);
2012
tmp->is_affine &= tmp->mask.is_affine;
2015
tmp->u.gen4.wm_kernel =
2016
gen4_choose_composite_kernel(tmp->op,
2017
tmp->mask.bo != NULL,
2018
tmp->has_component_alpha,
2020
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
2022
tmp->blt = gen4_render_composite_blt;
2023
tmp->box = gen4_render_composite_box;
2024
tmp->boxes = gen4_render_composite_boxes__blt;
2025
if (tmp->emit_boxes) {
2026
tmp->boxes = gen4_render_composite_boxes;
2028
tmp->thread_boxes = gen4_render_composite_boxes__thread;
2031
tmp->done = gen4_render_composite_done;
2033
if (!kgem_check_bo(&sna->kgem,
2034
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2036
kgem_submit(&sna->kgem);
2037
if (!kgem_check_bo(&sna->kgem,
2038
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2043
gen4_align_vertex(sna, tmp);
2044
gen4_bind_surfaces(sna, tmp);
2049
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2050
tmp->mask.bo = NULL;
2054
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2058
if (tmp->redirect.real_bo) {
2059
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2060
tmp->redirect.real_bo = NULL;
2063
return (mask == NULL &&
2064
sna_blt_composite(sna, op,
2069
flags | COMPOSITE_FALLBACK, tmp));
2072
#if !NO_COMPOSITE_SPANS
2073
fastcall static void
2074
gen4_render_composite_spans_box(struct sna *sna,
2075
const struct sna_composite_spans_op *op,
2076
const BoxRec *box, float opacity)
2078
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2080
op->base.src.offset[0], op->base.src.offset[1],
2082
op->base.dst.x, op->base.dst.y,
2085
box->y2 - box->y1));
2087
gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
2088
op->prim_emit(sna, op, box, opacity);
2092
gen4_render_composite_spans_boxes(struct sna *sna,
2093
const struct sna_composite_spans_op *op,
2094
const BoxRec *box, int nbox,
2097
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2099
op->base.src.offset[0], op->base.src.offset[1],
2101
op->base.dst.x, op->base.dst.y));
2106
nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
2107
gen4_bind_surfaces);
2108
nbox -= nbox_this_time;
2111
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2114
box->y2 - box->y1));
2116
op->prim_emit(sna, op, box++, opacity);
2117
} while (--nbox_this_time);
2121
fastcall static void
2122
gen4_render_composite_spans_boxes__thread(struct sna *sna,
2123
const struct sna_composite_spans_op *op,
2124
const struct sna_opacity_box *box,
2127
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2129
op->base.src.offset[0], op->base.src.offset[1],
2130
op->base.dst.x, op->base.dst.y));
2133
sna_vertex_lock(&sna->render);
2138
nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
2139
gen4_bind_surfaces);
2140
assert(nbox_this_time);
2141
nbox -= nbox_this_time;
2143
v = sna->render.vertices + sna->render.vertex_used;
2144
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2146
sna_vertex_acquire__locked(&sna->render);
2147
sna_vertex_unlock(&sna->render);
2149
op->emit_boxes(op, box, nbox_this_time, v);
2150
box += nbox_this_time;
2152
sna_vertex_lock(&sna->render);
2153
sna_vertex_release__locked(&sna->render);
2155
sna_vertex_unlock(&sna->render);
2158
fastcall static void
2159
gen4_render_composite_spans_done(struct sna *sna,
2160
const struct sna_composite_spans_op *op)
2162
if (sna->render.vertex_offset)
2163
gen4_vertex_flush(sna);
2165
DBG(("%s()\n", __FUNCTION__));
2167
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2168
sna_render_composite_redirect_done(sna, &op->base);
2172
gen4_check_composite_spans(struct sna *sna,
2173
uint8_t op, PicturePtr src, PicturePtr dst,
2174
int16_t width, int16_t height,
2177
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2178
__FUNCTION__, op, width, height, flags));
2180
if (op >= ARRAY_SIZE(gen4_blend_op))
2183
if (gen4_composite_fallback(sna, src, NULL, dst)) {
2184
DBG(("%s: operation would fallback\n", __FUNCTION__));
2188
if (need_tiling(sna, width, height) &&
2189
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2190
DBG(("%s: fallback, tiled operation not on GPU\n",
2196
return FORCE_SPANS > 0;
2198
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2199
struct sna_pixmap *priv;
2201
if (FORCE_NONRECTILINEAR_SPANS)
2202
return FORCE_NONRECTILINEAR_SPANS > 0;
2204
if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0)
2207
priv = sna_pixmap_from_drawable(dst->pDrawable);
2211
__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo))
2214
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2217
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2224
gen4_render_composite_spans(struct sna *sna,
2228
int16_t src_x, int16_t src_y,
2229
int16_t dst_x, int16_t dst_y,
2230
int16_t width, int16_t height,
2232
struct sna_composite_spans_op *tmp)
2234
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2235
width, height, flags, sna->kgem.ring));
2237
assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags));
2239
if (need_tiling(sna, width, height)) {
2240
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2241
__FUNCTION__, width, height));
2242
return sna_tiling_composite_spans(op, src, dst,
2243
src_x, src_y, dst_x, dst_y,
2244
width, height, flags, tmp);
2248
if (!gen4_composite_set_target(sna, &tmp->base, dst,
2249
dst_x, dst_y, width, height, true))
2252
switch (gen4_composite_picture(sna, src, &tmp->base.src,
2256
dst->polyMode == PolyModePrecise)) {
2260
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2262
/* fall through to fixup */
2264
gen4_composite_channel_convert(&tmp->base.src);
2268
tmp->base.mask.bo = NULL;
2269
tmp->base.mask.filter = SAMPLER_FILTER_NEAREST;
2270
tmp->base.mask.repeat = SAMPLER_EXTEND_NONE;
2272
tmp->base.is_affine = tmp->base.src.is_affine;
2273
tmp->base.has_component_alpha = false;
2274
tmp->base.need_magic_ca_pass = false;
2276
tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp);
2277
tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
2279
tmp->box = gen4_render_composite_spans_box;
2280
tmp->boxes = gen4_render_composite_spans_boxes;
2281
if (tmp->emit_boxes)
2282
tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
2283
tmp->done = gen4_render_composite_spans_done;
2285
if (!kgem_check_bo(&sna->kgem,
2286
tmp->base.dst.bo, tmp->base.src.bo,
2288
kgem_submit(&sna->kgem);
2289
if (!kgem_check_bo(&sna->kgem,
2290
tmp->base.dst.bo, tmp->base.src.bo,
2295
gen4_align_vertex(sna, &tmp->base);
2296
gen4_bind_surfaces(sna, &tmp->base);
2300
if (tmp->base.src.bo)
2301
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2303
if (tmp->base.redirect.real_bo)
2304
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2310
gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
2312
uint32_t *binding_table;
2313
uint16_t offset, dirty;
2315
gen4_get_batch(sna, op);
2316
dirty = kgem_bo_is_dirty(op->dst.bo);
2318
binding_table = gen4_composite_get_binding_table(sna, &offset);
2322
op->dst.bo, op->dst.width, op->dst.height,
2323
gen4_get_dest_format(op->dst.format),
2327
op->src.bo, op->src.width, op->src.height,
2328
op->src.card_format,
2331
if (sna->kgem.surface == offset &&
2332
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
2333
sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
2334
offset = sna->render_state.gen4.surface_table;
2337
if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
2340
gen4_emit_state(sna, op, offset | dirty);
2344
gen4_render_copy_one(struct sna *sna,
2345
const struct sna_composite_op *op,
2350
gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces);
2352
OUT_VERTEX(dx+w, dy+h);
2353
OUT_VERTEX_F((sx+w)*op->src.scale[0]);
2354
OUT_VERTEX_F((sy+h)*op->src.scale[1]);
2356
OUT_VERTEX(dx, dy+h);
2357
OUT_VERTEX_F(sx*op->src.scale[0]);
2358
OUT_VERTEX_F((sy+h)*op->src.scale[1]);
2361
OUT_VERTEX_F(sx*op->src.scale[0]);
2362
OUT_VERTEX_F(sy*op->src.scale[1]);
2366
gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
2367
const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2368
const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2369
const BoxRec *box, int n, unsigned flags)
2371
struct sna_composite_op tmp;
2373
DBG(("%s x %d\n", __FUNCTION__, n));
2375
if (sna_blt_compare_depth(src, dst) &&
2376
sna_blt_copy_boxes(sna, alu,
2377
src_bo, src_dx, src_dy,
2378
dst_bo, dst_dx, dst_dy,
2383
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
2385
if (!sna_blt_compare_depth(src, dst))
2388
return sna_blt_copy_boxes_fallback(sna, alu,
2389
src, src_bo, src_dx, src_dy,
2390
dst, dst_bo, dst_dx, dst_dy,
2394
memset(&tmp, 0, sizeof(tmp));
2396
DBG(("%s (%d, %d)->(%d, %d) x %d\n",
2397
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
2399
if (dst->depth == src->depth) {
2400
tmp.dst.format = sna_render_format_for_depth(dst->depth);
2401
tmp.src.pict_format = tmp.dst.format;
2403
tmp.dst.format = sna_format_for_depth(dst->depth);
2404
tmp.src.pict_format = sna_format_for_depth(src->depth);
2406
if (!gen4_check_format(tmp.src.pict_format))
2409
tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2411
tmp.dst.pixmap = (PixmapPtr)dst;
2412
tmp.dst.width = dst->width;
2413
tmp.dst.height = dst->height;
2414
tmp.dst.x = tmp.dst.y = 0;
2415
tmp.dst.bo = dst_bo;
2418
sna_render_composite_redirect_init(&tmp);
2419
if (too_large(tmp.dst.width, tmp.dst.height)) {
2420
BoxRec extents = box[0];
2423
for (i = 1; i < n; i++) {
2424
if (box[i].x1 < extents.x1)
2425
extents.x1 = box[i].x1;
2426
if (box[i].y1 < extents.y1)
2427
extents.y1 = box[i].y1;
2429
if (box[i].x2 > extents.x2)
2430
extents.x2 = box[i].x2;
2431
if (box[i].y2 > extents.y2)
2432
extents.y2 = box[i].y2;
2434
if (!sna_render_composite_redirect(sna, &tmp,
2435
extents.x1 + dst_dx,
2436
extents.y1 + dst_dy,
2437
extents.x2 - extents.x1,
2438
extents.y2 - extents.y1,
2440
goto fallback_tiled;
2443
tmp.src.filter = SAMPLER_FILTER_NEAREST;
2444
tmp.src.repeat = SAMPLER_EXTEND_NONE;
2445
tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format);
2446
if (too_large(src->width, src->height)) {
2447
BoxRec extents = box[0];
2450
for (i = 1; i < n; i++) {
2451
if (box[i].x1 < extents.x1)
2452
extents.x1 = box[i].x1;
2453
if (box[i].y1 < extents.y1)
2454
extents.y1 = box[i].y1;
2456
if (box[i].x2 > extents.x2)
2457
extents.x2 = box[i].x2;
2458
if (box[i].y2 > extents.y2)
2459
extents.y2 = box[i].y2;
2462
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2463
extents.x1 + src_dx,
2464
extents.y1 + src_dy,
2465
extents.x2 - extents.x1,
2466
extents.y2 - extents.y1))
2467
goto fallback_tiled_dst;
2469
tmp.src.bo = kgem_bo_reference(src_bo);
2470
tmp.src.width = src->width;
2471
tmp.src.height = src->height;
2472
tmp.src.offset[0] = tmp.src.offset[1] = 0;
2473
tmp.src.scale[0] = 1.f/src->width;
2474
tmp.src.scale[1] = 1.f/src->height;
2477
tmp.is_affine = true;
2478
tmp.floats_per_vertex = 3;
2479
tmp.floats_per_rect = 9;
2480
tmp.u.gen4.wm_kernel = WM_KERNEL;
2481
tmp.u.gen4.ve_id = 2;
2483
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2484
kgem_submit(&sna->kgem);
2485
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2486
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2487
if (tmp.redirect.real_bo)
2488
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2494
dst_dx += tmp.dst.x;
2495
dst_dy += tmp.dst.y;
2496
tmp.dst.x = tmp.dst.y = 0;
2498
src_dx += tmp.src.offset[0];
2499
src_dy += tmp.src.offset[1];
2501
gen4_align_vertex(sna, &tmp);
2502
gen4_copy_bind_surfaces(sna, &tmp);
2505
gen4_render_copy_one(sna, &tmp,
2506
box->x1 + src_dx, box->y1 + src_dy,
2507
box->x2 - box->x1, box->y2 - box->y1,
2508
box->x1 + dst_dx, box->y1 + dst_dy);
2512
gen4_vertex_flush(sna);
2513
sna_render_composite_redirect_done(sna, &tmp);
2514
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2518
if (tmp.redirect.real_bo)
2519
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2521
if (sna_blt_compare_depth(src, dst) &&
2522
sna_blt_copy_boxes(sna, alu,
2523
src_bo, src_dx, src_dy,
2524
dst_bo, dst_dx, dst_dy,
2529
return sna_tiling_copy_boxes(sna, alu,
2530
src, src_bo, src_dx, src_dy,
2531
dst, dst_bo, dst_dx, dst_dy,
2536
gen4_render_copy_blt(struct sna *sna,
2537
const struct sna_copy_op *op,
2538
int16_t sx, int16_t sy,
2539
int16_t w, int16_t h,
2540
int16_t dx, int16_t dy)
2542
gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
2546
gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2548
if (sna->render.vertex_offset)
2549
gen4_vertex_flush(sna);
2553
gen4_render_copy(struct sna *sna, uint8_t alu,
2554
PixmapPtr src, struct kgem_bo *src_bo,
2555
PixmapPtr dst, struct kgem_bo *dst_bo,
2556
struct sna_copy_op *op)
2558
DBG(("%s: src=%ld, dst=%ld, alu=%d\n",
2560
src->drawable.serialNumber,
2561
dst->drawable.serialNumber,
2564
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2565
sna_blt_copy(sna, alu,
2567
dst->drawable.bitsPerPixel,
2571
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2572
too_large(src->drawable.width, src->drawable.height) ||
2573
too_large(dst->drawable.width, dst->drawable.height)) {
2575
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2578
return sna_blt_copy(sna, alu, src_bo, dst_bo,
2579
dst->drawable.bitsPerPixel,
2583
if (dst->drawable.depth == src->drawable.depth) {
2584
op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2585
op->base.src.pict_format = op->base.dst.format;
2587
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2588
op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2590
if (!gen4_check_format(op->base.src.pict_format))
2593
op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2595
op->base.dst.pixmap = dst;
2596
op->base.dst.width = dst->drawable.width;
2597
op->base.dst.height = dst->drawable.height;
2598
op->base.dst.bo = dst_bo;
2600
op->base.src.bo = src_bo;
2601
op->base.src.card_format =
2602
gen4_get_card_format(op->base.src.pict_format);
2603
op->base.src.width = src->drawable.width;
2604
op->base.src.height = src->drawable.height;
2605
op->base.src.scale[0] = 1.f/src->drawable.width;
2606
op->base.src.scale[1] = 1.f/src->drawable.height;
2607
op->base.src.filter = SAMPLER_FILTER_NEAREST;
2608
op->base.src.repeat = SAMPLER_EXTEND_NONE;
2610
op->base.is_affine = true;
2611
op->base.floats_per_vertex = 3;
2612
op->base.floats_per_rect = 9;
2613
op->base.u.gen4.wm_kernel = WM_KERNEL;
2614
op->base.u.gen4.ve_id = 2;
2616
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2617
kgem_submit(&sna->kgem);
2618
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2622
if (kgem_bo_is_dirty(src_bo)) {
2623
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2624
sna_blt_copy(sna, alu,
2626
dst->drawable.bitsPerPixel,
2631
gen4_align_vertex(sna, &op->base);
2632
gen4_copy_bind_surfaces(sna, &op->base);
2634
op->blt = gen4_render_copy_blt;
2635
op->done = gen4_render_copy_done;
2640
gen4_render_fill_rectangle(struct sna *sna,
2641
const struct sna_composite_op *op,
2642
int x, int y, int w, int h)
2644
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
2646
OUT_VERTEX(x+w, y+h);
2657
gen4_render_fill_boxes(struct sna *sna,
2660
const xRenderColor *color,
2661
const DrawableRec *dst, struct kgem_bo *dst_bo,
2662
const BoxRec *box, int n)
2664
struct sna_composite_op tmp;
2667
if (op >= ARRAY_SIZE(gen4_blend_op)) {
2668
DBG(("%s: fallback due to unhandled blend op: %d\n",
2673
if (op <= PictOpSrc) {
2674
uint8_t alu = GXinvalid;
2677
if (op == PictOpClear)
2679
else if (sna_get_pixel_from_rgba(&pixel,
2687
if (alu != GXinvalid &&
2688
sna_blt_fill_boxes(sna, alu,
2689
dst_bo, dst->bitsPerPixel,
2693
if (!gen4_check_dst_format(format))
2696
if (too_large(dst->width, dst->height))
2697
return sna_tiling_fill_boxes(sna, op, format, color,
2698
dst, dst_bo, box, n);
2701
if (op == PictOpClear) {
2704
} else if (!sna_get_pixel_from_rgba(&pixel,
2712
DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
2714
memset(&tmp, 0, sizeof(tmp));
2718
tmp.dst.pixmap = (PixmapPtr)dst;
2719
tmp.dst.width = dst->width;
2720
tmp.dst.height = dst->height;
2721
tmp.dst.format = format;
2722
tmp.dst.bo = dst_bo;
2724
sna_render_composite_redirect_init(&tmp);
2725
if (too_large(dst->width, dst->height)) {
2728
boxes_extents(box, n, &extents);
2729
if (!sna_render_composite_redirect(sna, &tmp,
2730
extents.x1, extents.y1,
2731
extents.x2 - extents.x1,
2732
extents.y2 - extents.y1,
2734
return sna_tiling_fill_boxes(sna, op, format, color,
2735
dst, dst_bo, box, n);
2738
gen4_channel_init_solid(sna, &tmp.src, pixel);
2740
tmp.is_affine = true;
2741
tmp.floats_per_vertex = 2;
2742
tmp.floats_per_rect = 6;
2743
tmp.u.gen4.wm_kernel = WM_KERNEL;
2744
tmp.u.gen4.ve_id = 1;
2746
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2747
kgem_submit(&sna->kgem);
2748
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2749
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2754
gen4_align_vertex(sna, &tmp);
2755
gen4_bind_surfaces(sna, &tmp);
2758
gen4_render_fill_rectangle(sna, &tmp,
2765
gen4_vertex_flush(sna);
2766
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2767
sna_render_composite_redirect_done(sna, &tmp);
2772
gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
2773
int16_t x, int16_t y, int16_t w, int16_t h)
2775
gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
2778
fastcall static void
2779
gen4_render_fill_op_box(struct sna *sna,
2780
const struct sna_fill_op *op,
2783
gen4_render_fill_rectangle(sna, &op->base,
2785
box->x2-box->x1, box->y2-box->y1);
2788
fastcall static void
2789
gen4_render_fill_op_boxes(struct sna *sna,
2790
const struct sna_fill_op *op,
2795
gen4_render_fill_rectangle(sna, &op->base,
2797
box->x2-box->x1, box->y2-box->y1);
2803
gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
2805
if (sna->render.vertex_offset)
2806
gen4_vertex_flush(sna);
2807
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2811
gen4_render_fill(struct sna *sna, uint8_t alu,
2812
PixmapPtr dst, struct kgem_bo *dst_bo,
2813
uint32_t color, unsigned flags,
2814
struct sna_fill_op *op)
2816
if (sna_blt_fill(sna, alu,
2817
dst_bo, dst->drawable.bitsPerPixel,
2822
if (!(alu == GXcopy || alu == GXclear) ||
2823
too_large(dst->drawable.width, dst->drawable.height))
2824
return sna_blt_fill(sna, alu,
2825
dst_bo, dst->drawable.bitsPerPixel,
2832
op->base.op = color == 0 ? PictOpClear : PictOpSrc;
2834
op->base.dst.pixmap = dst;
2835
op->base.dst.width = dst->drawable.width;
2836
op->base.dst.height = dst->drawable.height;
2837
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2838
op->base.dst.bo = dst_bo;
2839
op->base.dst.x = op->base.dst.y = 0;
2841
op->base.need_magic_ca_pass = 0;
2842
op->base.has_component_alpha = 0;
2844
gen4_channel_init_solid(sna, &op->base.src,
2845
sna_rgba_for_color(color,
2846
dst->drawable.depth));
2847
op->base.mask.bo = NULL;
2849
op->base.is_affine = true;
2850
op->base.floats_per_vertex = 2;
2851
op->base.floats_per_rect = 6;
2852
op->base.u.gen4.wm_kernel = WM_KERNEL;
2853
op->base.u.gen4.ve_id = 1;
2855
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2856
kgem_submit(&sna->kgem);
2857
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2858
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2863
gen4_align_vertex(sna, &op->base);
2864
gen4_bind_surfaces(sna, &op->base);
2866
op->blt = gen4_render_fill_op_blt;
2867
op->box = gen4_render_fill_op_box;
2868
op->boxes = gen4_render_fill_op_boxes;
2870
op->done = gen4_render_fill_op_done;
2875
gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2877
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
2887
return sna_blt_fill_boxes(sna, alu,
2888
bo, dst->drawable.bitsPerPixel,
2893
gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2895
int16_t x1, int16_t y1,
2896
int16_t x2, int16_t y2,
2899
struct sna_composite_op tmp;
2901
DBG(("%s: color=%08x\n", __FUNCTION__, color));
2903
if (gen4_render_fill_one_try_blt(sna, dst, bo, color,
2904
x1, y1, x2, y2, alu))
2907
/* Must use the BLT if we can't RENDER... */
2908
if (!(alu == GXcopy || alu == GXclear) ||
2909
too_large(dst->drawable.width, dst->drawable.height))
2915
tmp.op = color == 0 ? PictOpClear : PictOpSrc;
2917
tmp.dst.pixmap = dst;
2918
tmp.dst.width = dst->drawable.width;
2919
tmp.dst.height = dst->drawable.height;
2920
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
2922
tmp.dst.x = tmp.dst.y = 0;
2924
gen4_channel_init_solid(sna, &tmp.src,
2925
sna_rgba_for_color(color,
2926
dst->drawable.depth));
2928
tmp.mask.filter = SAMPLER_FILTER_NEAREST;
2929
tmp.mask.repeat = SAMPLER_EXTEND_NONE;
2931
tmp.is_affine = true;
2932
tmp.floats_per_vertex = 2;
2933
tmp.floats_per_rect = 6;
2934
tmp.has_component_alpha = false;
2935
tmp.need_magic_ca_pass = false;
2937
tmp.u.gen4.wm_kernel = WM_KERNEL;
2938
tmp.u.gen4.ve_id = 1;
2940
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
2941
kgem_submit(&sna->kgem);
2942
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
2943
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2948
gen4_align_vertex(sna, &tmp);
2949
gen4_bind_surfaces(sna, &tmp);
2951
gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
2953
gen4_vertex_flush(sna);
2954
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2959
static void gen4_render_reset(struct sna *sna)
2961
sna->render_state.gen4.needs_invariant = true;
2962
sna->render_state.gen4.needs_urb = true;
2963
sna->render_state.gen4.ve_id = -1;
2964
sna->render_state.gen4.last_primitive = -1;
2965
sna->render_state.gen4.last_pipelined_pointers = -1;
2967
sna->render_state.gen4.drawrect_offset = -1;
2968
sna->render_state.gen4.drawrect_limit = -1;
2969
sna->render_state.gen4.surface_table = 0;
2971
if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
2972
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
2976
sna->render.vertex_offset = 0;
2977
sna->render.nvertex_reloc = 0;
2978
sna->render.vb_id = 0;
2981
static void gen4_render_fini(struct sna *sna)
2983
kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
2986
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
2988
struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
2990
/* Set up the vertex shader to be disabled (passthrough) */
2991
vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
2992
vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
2993
vs->vs6.vs_enable = 0;
2994
vs->vs6.vert_cache_disable = 1;
2996
return sna_static_stream_offsetof(stream, vs);
2999
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
3002
struct gen4_sf_unit_state *sf;
3004
sf = sna_static_stream_map(stream, sizeof(*sf), 32);
3006
sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
3007
sf->thread0.kernel_start_pointer = kernel >> 6;
3008
sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
3009
sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
3010
sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
3011
/* don't smash vertex header, read start from dw8 */
3012
sf->thread3.urb_entry_read_offset = 1;
3013
sf->thread3.dispatch_grf_start_reg = 3;
3014
sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
3015
sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
3016
sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
3017
sf->sf5.viewport_transform = false; /* skip viewport */
3018
sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
3019
sf->sf6.scissor = 0;
3020
sf->sf7.trifan_pv = 2;
3021
sf->sf6.dest_org_vbias = 0x8;
3022
sf->sf6.dest_org_hbias = 0x8;
3024
return sna_static_stream_offsetof(stream, sf);
3027
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
3028
sampler_filter_t src_filter,
3029
sampler_extend_t src_extend,
3030
sampler_filter_t mask_filter,
3031
sampler_extend_t mask_extend)
3033
struct gen4_sampler_state *sampler_state;
3035
sampler_state = sna_static_stream_map(stream,
3036
sizeof(struct gen4_sampler_state) * 2,
3038
sampler_state_init(&sampler_state[0], src_filter, src_extend);
3039
sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
3041
return sna_static_stream_offsetof(stream, sampler_state);
3044
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
3050
assert((kernel & 63) == 0);
3051
wm->thread0.kernel_start_pointer = kernel >> 6;
3052
wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
3054
wm->thread1.single_program_flow = 0;
3056
wm->thread3.const_urb_entry_read_length = 0;
3057
wm->thread3.const_urb_entry_read_offset = 0;
3059
wm->thread3.urb_entry_read_offset = 0;
3060
wm->thread3.dispatch_grf_start_reg = 3;
3062
assert((sampler & 31) == 0);
3063
wm->wm4.sampler_state_pointer = sampler >> 5;
3064
wm->wm4.sampler_count = 1;
3066
wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
3067
wm->wm5.transposed_urb_read = 0;
3068
wm->wm5.thread_dispatch_enable = 1;
3069
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
3072
wm->wm5.enable_16_pix = 1;
3073
wm->wm5.enable_8_pix = 0;
3074
wm->wm5.early_depth_test = 1;
3076
/* Each pair of attributes (src/mask coords) is two URB entries */
3078
wm->thread1.binding_table_entry_count = 3;
3079
wm->thread3.urb_entry_read_length = 4;
3081
wm->thread1.binding_table_entry_count = 2;
3082
wm->thread3.urb_entry_read_length = 2;
3086
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
3088
uint8_t *ptr, *base;
3092
sna_static_stream_map(stream,
3093
GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
3096
for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
3097
for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
3098
struct gen4_cc_unit_state *state =
3099
(struct gen4_cc_unit_state *)ptr;
3101
state->cc3.blend_enable =
3102
!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
3104
state->cc5.logicop_func = 0xc; /* COPY */
3105
state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
3107
/* Fill in alpha blend factors same as color, for the future. */
3108
state->cc5.ia_src_blend_factor = i;
3109
state->cc5.ia_dest_blend_factor = j;
3111
state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
3112
state->cc6.clamp_post_alpha_blend = 1;
3113
state->cc6.clamp_pre_alpha_blend = 1;
3114
state->cc6.src_blend_factor = i;
3115
state->cc6.dest_blend_factor = j;
3121
return sna_static_stream_offsetof(stream, base);
3124
static bool gen4_render_setup(struct sna *sna)
3126
struct gen4_render_state *state = &sna->render_state.gen4;
3127
struct sna_static_stream general;
3128
struct gen4_wm_unit_state_padded *wm_state;
3129
uint32_t sf, wm[KERNEL_COUNT];
3132
sna_static_stream_init(&general);
3134
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3135
* dumps, you know it points to zero.
3137
null_create(&general);
3139
sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
3140
for (m = 0; m < KERNEL_COUNT; m++) {
3141
if (wm_kernels[m].size) {
3142
wm[m] = sna_static_stream_add(&general,
3147
wm[m] = sna_static_stream_compile_wm(sna, &general,
3153
state->vs = gen4_create_vs_unit_state(&general);
3154
state->sf = gen4_create_sf_state(&general, sf);
3156
wm_state = sna_static_stream_map(&general,
3157
sizeof(*wm_state) * KERNEL_COUNT *
3158
FILTER_COUNT * EXTEND_COUNT *
3159
FILTER_COUNT * EXTEND_COUNT,
3161
state->wm = sna_static_stream_offsetof(&general, wm_state);
3162
for (i = 0; i < FILTER_COUNT; i++) {
3163
for (j = 0; j < EXTEND_COUNT; j++) {
3164
for (k = 0; k < FILTER_COUNT; k++) {
3165
for (l = 0; l < EXTEND_COUNT; l++) {
3166
uint32_t sampler_state;
3169
gen4_create_sampler_state(&general,
3173
for (m = 0; m < KERNEL_COUNT; m++) {
3174
gen4_init_wm_state(&wm_state->state,
3176
wm_kernels[m].has_mask,
3177
wm[m], sampler_state);
3185
state->cc = gen4_create_cc_unit_state(&general);
3187
state->general_bo = sna_static_stream_fini(sna, &general);
3188
return state->general_bo != NULL;
3191
const char *gen4_render_init(struct sna *sna, const char *backend)
3193
if (!gen4_render_setup(sna))
3196
sna->kgem.retire = gen4_render_retire;
3197
sna->kgem.expire = gen4_render_expire;
3200
sna->render.composite = gen4_render_composite;
3201
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3203
#if !NO_COMPOSITE_SPANS
3204
sna->render.check_composite_spans = gen4_check_composite_spans;
3205
sna->render.composite_spans = gen4_render_composite_spans;
3207
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3211
sna->render.video = gen4_render_video;
3215
sna->render.copy_boxes = gen4_render_copy_boxes;
3218
sna->render.copy = gen4_render_copy;
3222
sna->render.fill_boxes = gen4_render_fill_boxes;
3225
sna->render.fill = gen4_render_fill;
3228
sna->render.fill_one = gen4_render_fill_one;
3231
sna->render.flush = gen4_render_flush;
3232
sna->render.reset = gen4_render_reset;
3233
sna->render.fini = gen4_render_fini;
3235
sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
3236
sna->render.max_3d_pitch = 1 << 18;
3237
return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";