~ubuntu-branches/ubuntu/trusty/xserver-xorg-video-intel-lts-xenial/trusty-updates

« back to all changes in this revision

Viewing changes to src/sna/gen4_render.c

  • Committer: Package Import Robot
  • Author(s): Timo Aaltonen
  • Date: 2016-05-03 14:02:35 UTC
  • Revision ID: package-import@ubuntu.com-20160503140235-syaq8uojka8imy1a
Tags: upstream-2.99.917+git20160325
ImportĀ upstreamĀ versionĀ 2.99.917+git20160325

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright Ā© 2006,2008,2011 Intel Corporation
 
3
 * Copyright Ā© 2007 Red Hat, Inc.
 
4
 *
 
5
 * Permission is hereby granted, free of charge, to any person obtaining a
 
6
 * copy of this software and associated documentation files (the "Software"),
 
7
 * to deal in the Software without restriction, including without limitation
 
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 
9
 * and/or sell copies of the Software, and to permit persons to whom the
 
10
 * Software is furnished to do so, subject to the following conditions:
 
11
 *
 
12
 * The above copyright notice and this permission notice (including the next
 
13
 * paragraph) shall be included in all copies or substantial portions of the
 
14
 * Software.
 
15
 *
 
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
22
 * SOFTWARE.
 
23
 *
 
24
 * Authors:
 
25
 *    Wang Zhenyu <zhenyu.z.wang@sna.com>
 
26
 *    Eric Anholt <eric@anholt.net>
 
27
 *    Carl Worth <cworth@redhat.com>
 
28
 *    Keith Packard <keithp@keithp.com>
 
29
 *    Chris Wilson <chris@chris-wilson.co.uk>
 
30
 *
 
31
 */
 
32
 
 
33
#ifdef HAVE_CONFIG_H
 
34
#include "config.h"
 
35
#endif
 
36
 
 
37
#include "sna.h"
 
38
#include "sna_reg.h"
 
39
#include "sna_render.h"
 
40
#include "sna_render_inline.h"
 
41
#include "sna_video.h"
 
42
 
 
43
#include "brw/brw.h"
 
44
#include "gen4_common.h"
 
45
#include "gen4_render.h"
 
46
#include "gen4_source.h"
 
47
#include "gen4_vertex.h"
 
48
 
 
49
/* gen4 has a serious issue with its shaders that we need to flush
 
50
 * after every rectangle... So until that is resolved, prefer
 
51
 * the BLT engine.
 
52
 */
 
53
#define FORCE_SPANS 0
 
54
#define FORCE_NONRECTILINEAR_SPANS -1
 
55
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
 
56
 
 
57
#define ALWAYS_FLUSH 1
 
58
 
 
59
#define NO_COMPOSITE 0
 
60
#define NO_COMPOSITE_SPANS 0
 
61
#define NO_COPY 0
 
62
#define NO_COPY_BOXES 0
 
63
#define NO_FILL 0
 
64
#define NO_FILL_ONE 0
 
65
#define NO_FILL_BOXES 0
 
66
#define NO_VIDEO 0
 
67
 
 
68
#define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
 
69
 
 
70
#define GEN4_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
 
71
 
 
72
/* Set up a default static partitioning of the URB, which is supposed to
 
73
 * allow anything we would want to do, at potentially lower performance.
 
74
 */
 
75
#define URB_CS_ENTRY_SIZE     1
 
76
#define URB_CS_ENTRIES        0
 
77
 
 
78
#define URB_VS_ENTRY_SIZE     1
 
79
#define URB_VS_ENTRIES        32
 
80
 
 
81
#define URB_GS_ENTRY_SIZE     0
 
82
#define URB_GS_ENTRIES        0
 
83
 
 
84
#define URB_CL_ENTRY_SIZE   0
 
85
#define URB_CL_ENTRIES      0
 
86
 
 
87
#define URB_SF_ENTRY_SIZE     2
 
88
#define URB_SF_ENTRIES        64
 
89
 
 
90
/*
 
91
 * this program computes dA/dx and dA/dy for the texture coordinates along
 
92
 * with the base texture coordinate. It was extracted from the Mesa driver
 
93
 */
 
94
 
 
95
#define SF_KERNEL_NUM_GRF 16
 
96
#define PS_KERNEL_NUM_GRF 32
 
97
 
 
98
#define GEN4_MAX_SF_THREADS 24
 
99
#define GEN4_MAX_WM_THREADS 32
 
100
#define G4X_MAX_WM_THREADS 50
 
101
 
 
102
static const uint32_t ps_kernel_packed_static[][4] = {
 
103
#include "exa_wm_xy.g4b"
 
104
#include "exa_wm_src_affine.g4b"
 
105
#include "exa_wm_src_sample_argb.g4b"
 
106
#include "exa_wm_yuv_rgb.g4b"
 
107
#include "exa_wm_write.g4b"
 
108
};
 
109
 
 
110
static const uint32_t ps_kernel_planar_static[][4] = {
 
111
#include "exa_wm_xy.g4b"
 
112
#include "exa_wm_src_affine.g4b"
 
113
#include "exa_wm_src_sample_planar.g4b"
 
114
#include "exa_wm_yuv_rgb.g4b"
 
115
#include "exa_wm_write.g4b"
 
116
};
 
117
 
 
118
#define NOKERNEL(kernel_enum, func, masked) \
 
119
    [kernel_enum] = {func, 0, masked}
 
120
#define KERNEL(kernel_enum, kernel, masked) \
 
121
    [kernel_enum] = {&kernel, sizeof(kernel), masked}
 
122
static const struct wm_kernel_info {
 
123
        const void *data;
 
124
        unsigned int size;
 
125
        bool has_mask;
 
126
} wm_kernels[] = {
 
127
        NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
 
128
        NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
 
129
 
 
130
        NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
 
131
        NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
 
132
 
 
133
        NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
 
134
        NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
 
135
 
 
136
        NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
 
137
        NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
 
138
 
 
139
        NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
 
140
        NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
 
141
 
 
142
        KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
 
143
        KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
 
144
};
 
145
#undef KERNEL
 
146
 
 
147
static const struct blendinfo {
 
148
        bool src_alpha;
 
149
        uint32_t src_blend;
 
150
        uint32_t dst_blend;
 
151
} gen4_blend_op[] = {
 
152
        /* Clear */     {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
 
153
        /* Src */       {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
 
154
        /* Dst */       {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
 
155
        /* Over */      {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
 
156
        /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
 
157
        /* In */        {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
 
158
        /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
 
159
        /* Out */       {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
 
160
        /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
 
161
        /* Atop */      {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
 
162
        /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
 
163
        /* Xor */       {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
 
164
        /* Add */       {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
 
165
};
 
166
 
 
167
/**
 
168
 * Highest-valued BLENDFACTOR used in gen4_blend_op.
 
169
 *
 
170
 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
 
171
 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
 
172
 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
 
173
 */
 
174
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
 
175
 
 
176
#define BLEND_OFFSET(s, d) \
 
177
        (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
 
178
 
 
179
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
 
180
        ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
181
 
 
182
static void
 
183
gen4_emit_pipelined_pointers(struct sna *sna,
 
184
                             const struct sna_composite_op *op,
 
185
                             int blend, int kernel);
 
186
 
 
187
#define OUT_BATCH(v) batch_emit(sna, v)
 
188
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
 
189
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
190
 
 
191
#define GEN4_MAX_3D_SIZE 8192
 
192
 
 
193
static inline bool too_large(int width, int height)
 
194
{
 
195
        return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
 
196
}
 
197
 
 
198
static int
 
199
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
 
200
{
 
201
        int base;
 
202
 
 
203
        if (has_mask) {
 
204
                if (is_ca) {
 
205
                        if (gen4_blend_op[op].src_alpha)
 
206
                                base = WM_KERNEL_MASKSA;
 
207
                        else
 
208
                                base = WM_KERNEL_MASKCA;
 
209
                } else
 
210
                        base = WM_KERNEL_MASK;
 
211
        } else
 
212
                base = WM_KERNEL;
 
213
 
 
214
        return base + !is_affine;
 
215
}
 
216
 
 
217
static bool gen4_magic_ca_pass(struct sna *sna,
 
218
                               const struct sna_composite_op *op)
 
219
{
 
220
        struct gen4_render_state *state = &sna->render_state.gen4;
 
221
 
 
222
        if (!op->need_magic_ca_pass)
 
223
                return false;
 
224
 
 
225
        assert(sna->render.vertex_index > sna->render.vertex_start);
 
226
 
 
227
        DBG(("%s: CA fixup\n", __FUNCTION__));
 
228
        assert(op->mask.bo != NULL);
 
229
        assert(op->has_component_alpha);
 
230
 
 
231
        gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
 
232
                                     gen4_choose_composite_kernel(PictOpAdd,
 
233
                                                                  true, true, op->is_affine));
 
234
 
 
235
        OUT_BATCH(GEN4_3DPRIMITIVE |
 
236
                  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
 
237
                  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
 
238
                  (0 << 9) |
 
239
                  4);
 
240
        OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
 
241
        OUT_BATCH(sna->render.vertex_start);
 
242
        OUT_BATCH(1);   /* single instance */
 
243
        OUT_BATCH(0);   /* start instance location */
 
244
        OUT_BATCH(0);   /* index buffer offset, ignored */
 
245
 
 
246
        state->last_primitive = sna->kgem.nbatch;
 
247
        return true;
 
248
}
 
249
 
 
250
static uint32_t gen4_get_blend(int op,
 
251
                               bool has_component_alpha,
 
252
                               uint32_t dst_format)
 
253
{
 
254
        uint32_t src, dst;
 
255
 
 
256
        src = gen4_blend_op[op].src_blend;
 
257
        dst = gen4_blend_op[op].dst_blend;
 
258
 
 
259
        /* If there's no dst alpha channel, adjust the blend op so that we'll treat
 
260
         * it as always 1.
 
261
         */
 
262
        if (PICT_FORMAT_A(dst_format) == 0) {
 
263
                if (src == GEN4_BLENDFACTOR_DST_ALPHA)
 
264
                        src = GEN4_BLENDFACTOR_ONE;
 
265
                else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
 
266
                        src = GEN4_BLENDFACTOR_ZERO;
 
267
        }
 
268
 
 
269
        /* If the source alpha is being used, then we should only be in a
 
270
         * case where the source blend factor is 0, and the source blend
 
271
         * value is the mask channels multiplied by the source picture's alpha.
 
272
         */
 
273
        if (has_component_alpha && gen4_blend_op[op].src_alpha) {
 
274
                if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
 
275
                        dst = GEN4_BLENDFACTOR_SRC_COLOR;
 
276
                else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
 
277
                        dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
 
278
        }
 
279
 
 
280
        DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
 
281
             op, dst_format, PICT_FORMAT_A(dst_format),
 
282
             src, dst, BLEND_OFFSET(src, dst)));
 
283
        return BLEND_OFFSET(src, dst);
 
284
}
 
285
 
 
286
static uint32_t gen4_get_card_format(PictFormat format)
 
287
{
 
288
        switch (format) {
 
289
        default:
 
290
                return -1;
 
291
        case PICT_a8r8g8b8:
 
292
                return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
 
293
        case PICT_x8r8g8b8:
 
294
                return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
 
295
        case PICT_a8b8g8r8:
 
296
                return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
 
297
        case PICT_x8b8g8r8:
 
298
                return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
 
299
#ifdef PICT_a2r10g10b10
 
300
        case PICT_a2r10g10b10:
 
301
                return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
 
302
        case PICT_x2r10g10b10:
 
303
                return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
 
304
#endif
 
305
        case PICT_r8g8b8:
 
306
                return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
 
307
        case PICT_r5g6b5:
 
308
                return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
 
309
        case PICT_a1r5g5b5:
 
310
                return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
 
311
        case PICT_a8:
 
312
                return GEN4_SURFACEFORMAT_A8_UNORM;
 
313
        case PICT_a4r4g4b4:
 
314
                return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
 
315
        }
 
316
}
 
317
 
 
318
static uint32_t gen4_get_dest_format(PictFormat format)
 
319
{
 
320
        switch (format) {
 
321
        default:
 
322
                return -1;
 
323
        case PICT_a8r8g8b8:
 
324
        case PICT_x8r8g8b8:
 
325
                return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
 
326
        case PICT_a8b8g8r8:
 
327
        case PICT_x8b8g8r8:
 
328
                return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
 
329
#ifdef PICT_a2r10g10b10
 
330
        case PICT_a2r10g10b10:
 
331
        case PICT_x2r10g10b10:
 
332
                return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
 
333
#endif
 
334
        case PICT_r5g6b5:
 
335
                return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
 
336
        case PICT_x1r5g5b5:
 
337
        case PICT_a1r5g5b5:
 
338
                return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
 
339
        case PICT_a8:
 
340
                return GEN4_SURFACEFORMAT_A8_UNORM;
 
341
        case PICT_a4r4g4b4:
 
342
        case PICT_x4r4g4b4:
 
343
                return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
 
344
        }
 
345
}
 
346
 
 
347
static bool gen4_check_dst_format(PictFormat format)
 
348
{
 
349
        if (gen4_get_dest_format(format) != -1)
 
350
                return true;
 
351
 
 
352
        DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
 
353
        return false;
 
354
}
 
355
 
 
356
static bool gen4_check_format(uint32_t format)
 
357
{
 
358
        if (gen4_get_card_format(format) != -1)
 
359
                return true;
 
360
 
 
361
        DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
 
362
        return false;
 
363
}
 
364
 
 
365
typedef struct gen4_surface_state_padded {
 
366
        struct gen4_surface_state state;
 
367
        char pad[32 - sizeof(struct gen4_surface_state)];
 
368
} gen4_surface_state_padded;
 
369
 
 
370
static void null_create(struct sna_static_stream *stream)
 
371
{
 
372
        /* A bunch of zeros useful for legacy border color and depth-stencil */
 
373
        sna_static_stream_map(stream, 64, 64);
 
374
}
 
375
 
 
376
static void
 
377
sampler_state_init(struct gen4_sampler_state *sampler_state,
 
378
                   sampler_filter_t filter,
 
379
                   sampler_extend_t extend)
 
380
{
 
381
        sampler_state->ss0.lod_preclamp = 1;    /* GL mode */
 
382
 
 
383
        /* We use the legacy mode to get the semantics specified by
 
384
         * the Render extension. */
 
385
        sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
 
386
 
 
387
        switch (filter) {
 
388
        default:
 
389
        case SAMPLER_FILTER_NEAREST:
 
390
                sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
 
391
                sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
 
392
                break;
 
393
        case SAMPLER_FILTER_BILINEAR:
 
394
                sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
 
395
                sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
 
396
                break;
 
397
        }
 
398
 
 
399
        switch (extend) {
 
400
        default:
 
401
        case SAMPLER_EXTEND_NONE:
 
402
                sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
 
403
                sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
 
404
                sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
 
405
                break;
 
406
        case SAMPLER_EXTEND_REPEAT:
 
407
                sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
 
408
                sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
 
409
                sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
 
410
                break;
 
411
        case SAMPLER_EXTEND_PAD:
 
412
                sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
 
413
                sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
 
414
                sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
 
415
                break;
 
416
        case SAMPLER_EXTEND_REFLECT:
 
417
                sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
 
418
                sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
 
419
                sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
 
420
                break;
 
421
        }
 
422
}
 
423
 
 
424
static uint32_t gen4_filter(uint32_t filter)
 
425
{
 
426
        switch (filter) {
 
427
        default:
 
428
                assert(0);
 
429
        case PictFilterNearest:
 
430
                return SAMPLER_FILTER_NEAREST;
 
431
        case PictFilterBilinear:
 
432
                return SAMPLER_FILTER_BILINEAR;
 
433
        }
 
434
}
 
435
 
 
436
static uint32_t gen4_check_filter(PicturePtr picture)
 
437
{
 
438
        switch (picture->filter) {
 
439
        case PictFilterNearest:
 
440
        case PictFilterBilinear:
 
441
                return true;
 
442
        default:
 
443
                DBG(("%s: unknown filter: %s [%d]\n",
 
444
                     __FUNCTION__,
 
445
                     PictureGetFilterName(picture->filter),
 
446
                     picture->filter));
 
447
                return false;
 
448
        }
 
449
}
 
450
 
 
451
static uint32_t gen4_repeat(uint32_t repeat)
 
452
{
 
453
        switch (repeat) {
 
454
        default:
 
455
                assert(0);
 
456
        case RepeatNone:
 
457
                return SAMPLER_EXTEND_NONE;
 
458
        case RepeatNormal:
 
459
                return SAMPLER_EXTEND_REPEAT;
 
460
        case RepeatPad:
 
461
                return SAMPLER_EXTEND_PAD;
 
462
        case RepeatReflect:
 
463
                return SAMPLER_EXTEND_REFLECT;
 
464
        }
 
465
}
 
466
 
 
467
static bool gen4_check_repeat(PicturePtr picture)
 
468
{
 
469
        if (!picture->repeat)
 
470
                return true;
 
471
 
 
472
        switch (picture->repeatType) {
 
473
        case RepeatNone:
 
474
        case RepeatNormal:
 
475
        case RepeatPad:
 
476
        case RepeatReflect:
 
477
                return true;
 
478
        default:
 
479
                DBG(("%s: unknown repeat: %d\n",
 
480
                     __FUNCTION__, picture->repeatType));
 
481
                return false;
 
482
        }
 
483
}
 
484
 
 
485
static uint32_t
 
486
gen4_tiling_bits(uint32_t tiling)
 
487
{
 
488
        switch (tiling) {
 
489
        default: assert(0);
 
490
        case I915_TILING_NONE: return 0;
 
491
        case I915_TILING_X: return GEN4_SURFACE_TILED;
 
492
        case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
 
493
        }
 
494
}
 
495
 
 
496
/**
 
497
 * Sets up the common fields for a surface state buffer for the given
 
498
 * picture in the given surface state buffer.
 
499
 */
 
500
static uint32_t
 
501
gen4_bind_bo(struct sna *sna,
 
502
             struct kgem_bo *bo,
 
503
             uint32_t width,
 
504
             uint32_t height,
 
505
             uint32_t format,
 
506
             bool is_dst)
 
507
{
 
508
        uint32_t domains;
 
509
        uint16_t offset;
 
510
        uint32_t *ss;
 
511
 
 
512
        assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
513
 
 
514
        /* After the first bind, we manage the cache domains within the batch */
 
515
        offset = kgem_bo_get_binding(bo, format | is_dst << 31);
 
516
        if (offset) {
 
517
                assert(offset >= sna->kgem.surface);
 
518
                if (is_dst)
 
519
                        kgem_bo_mark_dirty(bo);
 
520
                return offset * sizeof(uint32_t);
 
521
        }
 
522
 
 
523
        offset = sna->kgem.surface -=
 
524
                sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
525
        ss = sna->kgem.batch + offset;
 
526
 
 
527
        ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
 
528
                 GEN4_SURFACE_BLEND_ENABLED |
 
529
                 format << GEN4_SURFACE_FORMAT_SHIFT);
 
530
 
 
531
        if (is_dst) {
 
532
                ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
 
533
                domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
 
534
        } else
 
535
                domains = I915_GEM_DOMAIN_SAMPLER << 16;
 
536
        ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
537
 
 
538
        ss[2] = ((width - 1)  << GEN4_SURFACE_WIDTH_SHIFT |
 
539
                 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
 
540
        ss[3] = (gen4_tiling_bits(bo->tiling) |
 
541
                 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
 
542
        ss[4] = 0;
 
543
        ss[5] = 0;
 
544
 
 
545
        kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
546
 
 
547
        DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
 
548
             offset, bo->handle, ss[1],
 
549
             format, width, height, bo->pitch, bo->tiling,
 
550
             domains & 0xffff ? "render" : "sampler"));
 
551
 
 
552
        return offset * sizeof(uint32_t);
 
553
}
 
554
 
 
555
static void gen4_emit_vertex_buffer(struct sna *sna,
 
556
                                    const struct sna_composite_op *op)
 
557
{
 
558
        int id = op->u.gen4.ve_id;
 
559
 
 
560
        assert((sna->render.vb_id & (1 << id)) == 0);
 
561
 
 
562
        OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
 
563
        OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
 
564
                  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
 
565
        assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
 
566
        sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
 
567
        OUT_BATCH(0);
 
568
        OUT_BATCH(0);
 
569
        OUT_BATCH(0);
 
570
 
 
571
        sna->render.vb_id |= 1 << id;
 
572
}
 
573
 
 
574
inline static void
 
575
gen4_emit_pipe_flush(struct sna *sna)
 
576
{
 
577
#if 1
 
578
        OUT_BATCH(GEN4_PIPE_CONTROL |
 
579
                  GEN4_PIPE_CONTROL_WC_FLUSH |
 
580
                  (4 - 2));
 
581
        OUT_BATCH(0);
 
582
        OUT_BATCH(0);
 
583
        OUT_BATCH(0);
 
584
#else
 
585
        OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
586
#endif
 
587
}
 
588
 
 
589
inline static void
 
590
gen4_emit_pipe_break(struct sna *sna)
 
591
{
 
592
#if !ALWAYS_FLUSH
 
593
        OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2));
 
594
        OUT_BATCH(0);
 
595
        OUT_BATCH(0);
 
596
        OUT_BATCH(0);
 
597
#else
 
598
        OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
599
#endif
 
600
}
 
601
 
 
602
inline static void
 
603
gen4_emit_pipe_invalidate(struct sna *sna)
 
604
{
 
605
#if 0
 
606
        OUT_BATCH(GEN4_PIPE_CONTROL |
 
607
                  GEN4_PIPE_CONTROL_WC_FLUSH |
 
608
                  (sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) |
 
609
                  (4 - 2));
 
610
        OUT_BATCH(0);
 
611
        OUT_BATCH(0);
 
612
        OUT_BATCH(0);
 
613
#else
 
614
        OUT_BATCH(MI_FLUSH);
 
615
#endif
 
616
}
 
617
 
 
618
static void gen4_emit_primitive(struct sna *sna)
 
619
{
 
620
        if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
 
621
                sna->render.vertex_offset = sna->kgem.nbatch - 5;
 
622
                return;
 
623
        }
 
624
 
 
625
        OUT_BATCH(GEN4_3DPRIMITIVE |
 
626
                  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
 
627
                  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
 
628
                  (0 << 9) |
 
629
                  4);
 
630
        sna->render.vertex_offset = sna->kgem.nbatch;
 
631
        OUT_BATCH(0);   /* vertex count, to be filled in later */
 
632
        OUT_BATCH(sna->render.vertex_index);
 
633
        OUT_BATCH(1);   /* single instance */
 
634
        OUT_BATCH(0);   /* start instance location */
 
635
        OUT_BATCH(0);   /* index buffer offset, ignored */
 
636
        sna->render.vertex_start = sna->render.vertex_index;
 
637
 
 
638
        sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
 
639
}
 
640
 
 
641
static bool gen4_rectangle_begin(struct sna *sna,
 
642
                                 const struct sna_composite_op *op)
 
643
{
 
644
        unsigned int id = 1 << op->u.gen4.ve_id;
 
645
        int ndwords;
 
646
 
 
647
        if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
 
648
                return true;
 
649
 
 
650
        /* 7xpipelined pointers + 6xprimitive + 1xflush */
 
651
        ndwords = op->need_magic_ca_pass? 19 : 6;
 
652
        if ((sna->render.vb_id & id) == 0)
 
653
                ndwords += 5;
 
654
        ndwords += 8*FORCE_FLUSH;
 
655
 
 
656
        if (!kgem_check_batch(&sna->kgem, ndwords))
 
657
                return false;
 
658
 
 
659
        if ((sna->render.vb_id & id) == 0)
 
660
                gen4_emit_vertex_buffer(sna, op);
 
661
        if (sna->render.vertex_offset == 0)
 
662
                gen4_emit_primitive(sna);
 
663
 
 
664
        return true;
 
665
}
 
666
 
 
667
static int gen4_get_rectangles__flush(struct sna *sna,
 
668
                                      const struct sna_composite_op *op)
 
669
{
 
670
        /* Preventing discarding new vbo after lock contention */
 
671
        if (sna_vertex_wait__locked(&sna->render)) {
 
672
                int rem = vertex_space(sna);
 
673
                if (rem > op->floats_per_rect)
 
674
                        return rem;
 
675
        }
 
676
 
 
677
        if (!kgem_check_batch(&sna->kgem,
 
678
                              8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6)))
 
679
                return 0;
 
680
        if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
 
681
                return 0;
 
682
 
 
683
        if (sna->render.vertex_offset) {
 
684
                gen4_vertex_flush(sna);
 
685
                if (gen4_magic_ca_pass(sna, op))
 
686
                        gen4_emit_pipelined_pointers(sna, op, op->op,
 
687
                                                     op->u.gen4.wm_kernel);
 
688
        }
 
689
 
 
690
        return gen4_vertex_finish(sna);
 
691
}
 
692
 
 
693
inline static int gen4_get_rectangles(struct sna *sna,
 
694
                                      const struct sna_composite_op *op,
 
695
                                      int want,
 
696
                                      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
 
697
{
 
698
        int rem;
 
699
 
 
700
        assert(want);
 
701
#if FORCE_FLUSH
 
702
        rem = sna->render.vertex_offset;
 
703
        if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
 
704
                rem = sna->kgem.nbatch - 5;
 
705
        if (rem) {
 
706
                rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
 
707
                if (rem <= 0) {
 
708
                        if (sna->render.vertex_offset) {
 
709
                                gen4_vertex_flush(sna);
 
710
                                if (gen4_magic_ca_pass(sna, op)) {
 
711
                                        if (kgem_check_batch(&sna->kgem, 19+6))
 
712
                                                gen4_emit_pipelined_pointers(sna, op, op->op,
 
713
                                                                             op->u.gen4.wm_kernel);
 
714
                                }
 
715
                        }
 
716
                        gen4_emit_pipe_break(sna);
 
717
                        rem = MAX_FLUSH_VERTICES;
 
718
                }
 
719
        } else
 
720
                rem = MAX_FLUSH_VERTICES;
 
721
        if (want > rem)
 
722
                want = rem;
 
723
#endif
 
724
 
 
725
start:
 
726
        rem = vertex_space(sna);
 
727
        if (unlikely(rem < op->floats_per_rect)) {
 
728
                DBG(("flushing vbo for %s: %d < %d\n",
 
729
                     __FUNCTION__, rem, op->floats_per_rect));
 
730
                rem = gen4_get_rectangles__flush(sna, op);
 
731
                if (unlikely(rem == 0))
 
732
                        goto flush;
 
733
        }
 
734
 
 
735
        if (unlikely(sna->render.vertex_offset == 0)) {
 
736
                if (!gen4_rectangle_begin(sna, op))
 
737
                        goto flush;
 
738
                else
 
739
                        goto start;
 
740
        }
 
741
 
 
742
        assert(rem <= vertex_space(sna));
 
743
        assert(op->floats_per_rect <= rem);
 
744
        if (want > 1 && want * op->floats_per_rect > rem)
 
745
                want = rem / op->floats_per_rect;
 
746
 
 
747
        sna->render.vertex_index += 3*want;
 
748
        return want;
 
749
 
 
750
flush:
 
751
        if (sna->render.vertex_offset) {
 
752
                gen4_vertex_flush(sna);
 
753
                gen4_magic_ca_pass(sna, op);
 
754
        }
 
755
        sna_vertex_wait__locked(&sna->render);
 
756
        _kgem_submit(&sna->kgem);
 
757
        emit_state(sna, op);
 
758
        goto start;
 
759
}
 
760
 
 
761
static uint32_t *
 
762
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
 
763
{
 
764
        sna->kgem.surface -=
 
765
                sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
766
 
 
767
        DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
768
 
 
769
        /* Clear all surplus entries to zero in case of prefetch */
 
770
        *offset = sna->kgem.surface;
 
771
        return memset(sna->kgem.batch + sna->kgem.surface,
 
772
                      0, sizeof(struct gen4_surface_state_padded));
 
773
}
 
774
 
 
775
static void
 
776
gen4_emit_urb(struct sna *sna)
 
777
{
 
778
        int urb_vs_end;
 
779
        int urb_gs_end;
 
780
        int urb_cl_end;
 
781
        int urb_sf_end;
 
782
        int urb_cs_end;
 
783
 
 
784
        if (!sna->render_state.gen4.needs_urb)
 
785
                return;
 
786
 
 
787
        urb_vs_end =              URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
 
788
        urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
 
789
        urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE;
 
790
        urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
 
791
        urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
792
        assert(urb_cs_end <= 256);
 
793
 
 
794
        while ((sna->kgem.nbatch & 15) > 12)
 
795
                OUT_BATCH(MI_NOOP);
 
796
 
 
797
        OUT_BATCH(GEN4_URB_FENCE |
 
798
                  UF0_CS_REALLOC |
 
799
                  UF0_SF_REALLOC |
 
800
                  UF0_CLIP_REALLOC |
 
801
                  UF0_GS_REALLOC |
 
802
                  UF0_VS_REALLOC |
 
803
                  1);
 
804
        OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT |
 
805
                  urb_gs_end << UF1_GS_FENCE_SHIFT |
 
806
                  urb_vs_end << UF1_VS_FENCE_SHIFT);
 
807
        OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT |
 
808
                  urb_sf_end << UF2_SF_FENCE_SHIFT);
 
809
 
 
810
        /* Constant buffer state */
 
811
        OUT_BATCH(GEN4_CS_URB_STATE | 0);
 
812
        OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
 
813
 
 
814
        sna->render_state.gen4.needs_urb = false;
 
815
}
 
816
 
 
817
static void
 
818
gen4_emit_state_base_address(struct sna *sna)
 
819
{
 
820
        assert(sna->render_state.gen4.general_bo->proxy == NULL);
 
821
        OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
 
822
        OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
 
823
                                 sna->kgem.nbatch,
 
824
                                 sna->render_state.gen4.general_bo,
 
825
                                 I915_GEM_DOMAIN_INSTRUCTION << 16,
 
826
                                 BASE_ADDRESS_MODIFY));
 
827
        OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
 
828
                                 sna->kgem.nbatch,
 
829
                                 NULL,
 
830
                                 I915_GEM_DOMAIN_INSTRUCTION << 16,
 
831
                                 BASE_ADDRESS_MODIFY));
 
832
        OUT_BATCH(0); /* media */
 
833
 
 
834
        /* upper bounds, all disabled */
 
835
        OUT_BATCH(BASE_ADDRESS_MODIFY);
 
836
        OUT_BATCH(0);
 
837
}
 
838
 
 
839
static void
 
840
gen4_emit_invariant(struct sna *sna)
 
841
{
 
842
        assert(sna->kgem.surface == sna->kgem.batch_size);
 
843
 
 
844
        if (sna->kgem.gen >= 045)
 
845
                OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
846
        else
 
847
                OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
848
 
 
849
        gen4_emit_state_base_address(sna);
 
850
 
 
851
        sna->render_state.gen4.needs_invariant = false;
 
852
}
 
853
 
 
854
static void
 
855
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
 
856
{
 
857
        kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
858
 
 
859
        if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
 
860
                DBG(("%s: flushing batch: %d < %d+%d\n",
 
861
                     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
 
862
                     150, 4*8));
 
863
                kgem_submit(&sna->kgem);
 
864
                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
 
865
        }
 
866
 
 
867
        if (sna->render_state.gen4.needs_invariant)
 
868
                gen4_emit_invariant(sna);
 
869
}
 
870
 
 
871
static void
 
872
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
 
873
{
 
874
        assert(op->floats_per_rect == 3*op->floats_per_vertex);
 
875
        if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
 
876
                DBG(("aligning vertex: was %d, now %d floats per vertex\n",
 
877
                     sna->render_state.gen4.floats_per_vertex,
 
878
                     op->floats_per_vertex));
 
879
                gen4_vertex_align(sna, op);
 
880
                sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
 
881
        }
 
882
}
 
883
 
 
884
static void
 
885
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
 
886
{
 
887
        if (sna->render_state.gen4.surface_table == offset)
 
888
                return;
 
889
 
 
890
        sna->render_state.gen4.surface_table = offset;
 
891
 
 
892
        /* Binding table pointers */
 
893
        OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
 
894
        OUT_BATCH(0);           /* vs */
 
895
        OUT_BATCH(0);           /* gs */
 
896
        OUT_BATCH(0);           /* clip */
 
897
        OUT_BATCH(0);           /* sf */
 
898
        /* Only the PS uses the binding table */
 
899
        OUT_BATCH(offset*4);
 
900
}
 
901
 
 
902
static void
 
903
gen4_emit_pipelined_pointers(struct sna *sna,
 
904
                             const struct sna_composite_op *op,
 
905
                             int blend, int kernel)
 
906
{
 
907
        uint16_t sp, bp;
 
908
        uint32_t key;
 
909
 
 
910
        DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
 
911
             __FUNCTION__, op->u.gen4.ve_id & 2,
 
912
             op->src.filter, op->src.repeat,
 
913
             op->mask.filter, op->mask.repeat,
 
914
             kernel, blend, op->has_component_alpha, (int)op->dst.format));
 
915
 
 
916
        sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
 
917
                            op->mask.filter, op->mask.repeat,
 
918
                            kernel);
 
919
        bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
 
920
 
 
921
        DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
 
922
        key = sp | (uint32_t)bp << 16;
 
923
        if (key == sna->render_state.gen4.last_pipelined_pointers)
 
924
                return;
 
925
 
 
926
        OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
 
927
        OUT_BATCH(sna->render_state.gen4.vs);
 
928
        OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
 
929
        OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
 
930
        OUT_BATCH(sna->render_state.gen4.sf);
 
931
        OUT_BATCH(sna->render_state.gen4.wm + sp);
 
932
        OUT_BATCH(sna->render_state.gen4.cc + bp);
 
933
 
 
934
        sna->render_state.gen4.last_pipelined_pointers = key;
 
935
        gen4_emit_urb(sna);
 
936
}
 
937
 
 
938
static bool
 
939
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
 
940
{
 
941
        uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
 
942
        uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
943
 
 
944
        assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
 
945
        assert(!too_large(op->dst.width, op->dst.height));
 
946
 
 
947
        if (sna->render_state.gen4.drawrect_limit == limit &&
 
948
            sna->render_state.gen4.drawrect_offset == offset)
 
949
                return true;
 
950
 
 
951
        sna->render_state.gen4.drawrect_offset = offset;
 
952
        sna->render_state.gen4.drawrect_limit = limit;
 
953
 
 
954
        OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
 
955
        OUT_BATCH(0);
 
956
        OUT_BATCH(limit);
 
957
        OUT_BATCH(offset);
 
958
        return false;
 
959
}
 
960
 
 
961
static void
 
962
gen4_emit_vertex_elements(struct sna *sna,
 
963
                          const struct sna_composite_op *op)
 
964
{
 
965
        /*
 
966
         * vertex data in vertex buffer
 
967
         *    position: (x, y)
 
968
         *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
 
969
         *    texture coordinate 1 if (has_mask is true): same as above
 
970
         */
 
971
        struct gen4_render_state *render = &sna->render_state.gen4;
 
972
        uint32_t src_format, dw;
 
973
        int id = op->u.gen4.ve_id;
 
974
 
 
975
        if (render->ve_id == id)
 
976
                return;
 
977
        render->ve_id = id;
 
978
 
 
979
        /* The VUE layout
 
980
         *    dword 0-3: position (x, y, 1.0, 1.0),
 
981
         *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
 
982
         *    [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
 
983
         */
 
984
        OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
 
985
 
 
986
        /* x,y */
 
987
        OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 
988
                  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
 
989
                  0 << VE0_OFFSET_SHIFT);
 
990
        OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
 
991
                  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
 
992
                  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
 
993
                  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
 
994
                  (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
995
 
 
996
        /* u0, v0, w0 */
 
997
        /* u0, v0, w0 */
 
998
        DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
 
999
        dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
 
1000
        switch (id & 3) {
 
1001
        default:
 
1002
                assert(0);
 
1003
        case 0:
 
1004
                src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
 
1005
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1006
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
 
1007
                dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 
1008
                break;
 
1009
        case 1:
 
1010
                src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
 
1011
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1012
                dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
 
1013
                dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 
1014
                break;
 
1015
        case 2:
 
1016
                src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
 
1017
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1018
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
 
1019
                dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 
1020
                break;
 
1021
        case 3:
 
1022
                src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
 
1023
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1024
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
 
1025
                dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
 
1026
                break;
 
1027
        }
 
1028
        OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 
1029
                  src_format << VE0_FORMAT_SHIFT |
 
1030
                  4 << VE0_OFFSET_SHIFT);
 
1031
        OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
1032
 
 
1033
        /* u1, v1, w1 */
 
1034
        if (id >> 2) {
 
1035
                unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
 
1036
                DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
 
1037
                     id >> 2, src_offset));
 
1038
                dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
 
1039
                switch (id >> 2) {
 
1040
                case 1:
 
1041
                        src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
 
1042
                        dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1043
                        dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
 
1044
                        dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 
1045
                        break;
 
1046
                default:
 
1047
                        assert(0);
 
1048
                case 2:
 
1049
                        src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
 
1050
                        dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1051
                        dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
 
1052
                        dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
 
1053
                        break;
 
1054
                case 3:
 
1055
                        src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
 
1056
                        dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
 
1057
                        dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
 
1058
                        dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
 
1059
                        break;
 
1060
                }
 
1061
                OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 
1062
                          src_format << VE0_FORMAT_SHIFT |
 
1063
                          src_offset << VE0_OFFSET_SHIFT);
 
1064
                OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
1065
        } else {
 
1066
                OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
 
1067
                          GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
 
1068
                          0 << VE0_OFFSET_SHIFT);
 
1069
                OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
 
1070
                          VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
 
1071
                          VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
 
1072
                          VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
 
1073
                          12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
1074
        }
 
1075
}
 
1076
 
 
1077
static void
 
1078
gen4_emit_state(struct sna *sna,
 
1079
                const struct sna_composite_op *op,
 
1080
                uint16_t wm_binding_table)
 
1081
{
 
1082
        bool flush;
 
1083
 
 
1084
        assert(op->dst.bo->exec);
 
1085
 
 
1086
        flush = wm_binding_table & 1;
 
1087
        wm_binding_table &= ~1;
 
1088
 
 
1089
        if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 
1090
                DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
 
1091
                     kgem_bo_is_dirty(op->src.bo),
 
1092
                     kgem_bo_is_dirty(op->mask.bo),
 
1093
                     flush));
 
1094
                gen4_emit_pipe_invalidate(sna);
 
1095
                kgem_clear_dirty(&sna->kgem);
 
1096
                kgem_bo_mark_dirty(op->dst.bo);
 
1097
                flush = false;
 
1098
        }
 
1099
        flush &= gen4_emit_drawing_rectangle(sna, op);
 
1100
        if (flush && op->op > PictOpSrc)
 
1101
                gen4_emit_pipe_flush(sna);
 
1102
 
 
1103
        gen4_emit_binding_table(sna, wm_binding_table);
 
1104
        gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
 
1105
        gen4_emit_vertex_elements(sna, op);
 
1106
}
 
1107
 
 
1108
static void
 
1109
gen4_bind_surfaces(struct sna *sna,
 
1110
                   const struct sna_composite_op *op)
 
1111
{
 
1112
        uint32_t *binding_table;
 
1113
        uint16_t offset, dirty;
 
1114
 
 
1115
        gen4_get_batch(sna, op);
 
1116
        dirty = kgem_bo_is_dirty(op->dst.bo);
 
1117
 
 
1118
        binding_table = gen4_composite_get_binding_table(sna, &offset);
 
1119
 
 
1120
        binding_table[0] =
 
1121
                gen4_bind_bo(sna,
 
1122
                            op->dst.bo, op->dst.width, op->dst.height,
 
1123
                            gen4_get_dest_format(op->dst.format),
 
1124
                            true);
 
1125
        binding_table[1] =
 
1126
                gen4_bind_bo(sna,
 
1127
                             op->src.bo, op->src.width, op->src.height,
 
1128
                             op->src.card_format,
 
1129
                             false);
 
1130
        if (op->mask.bo) {
 
1131
                assert(op->u.gen4.ve_id >> 2);
 
1132
                binding_table[2] =
 
1133
                        gen4_bind_bo(sna,
 
1134
                                     op->mask.bo,
 
1135
                                     op->mask.width,
 
1136
                                     op->mask.height,
 
1137
                                     op->mask.card_format,
 
1138
                                     false);
 
1139
        }
 
1140
 
 
1141
        if (sna->kgem.surface == offset &&
 
1142
            *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
 
1143
            (op->mask.bo == NULL ||
 
1144
             sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
 
1145
                sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
1146
                offset = sna->render_state.gen4.surface_table;
 
1147
        }
 
1148
 
 
1149
        if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
 
1150
                dirty = 0;
 
1151
 
 
1152
        gen4_emit_state(sna, op, offset | dirty);
 
1153
}
 
1154
 
 
1155
fastcall static void
 
1156
gen4_render_composite_blt(struct sna *sna,
 
1157
                          const struct sna_composite_op *op,
 
1158
                          const struct sna_composite_rectangles *r)
 
1159
{
 
1160
        DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
 
1161
             __FUNCTION__,
 
1162
             r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
 
1163
             r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
 
1164
             r->dst.x, r->dst.y, op->dst.x, op->dst.y,
 
1165
             r->width, r->height));
 
1166
 
 
1167
        gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
1168
        op->prim_emit(sna, op, r);
 
1169
}
 
1170
 
 
1171
fastcall static void
 
1172
gen4_render_composite_box(struct sna *sna,
 
1173
                          const struct sna_composite_op *op,
 
1174
                          const BoxRec *box)
 
1175
{
 
1176
        struct sna_composite_rectangles r;
 
1177
 
 
1178
        DBG(("  %s: (%d, %d), (%d, %d)\n",
 
1179
             __FUNCTION__,
 
1180
             box->x1, box->y1, box->x2, box->y2));
 
1181
 
 
1182
        gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
1183
 
 
1184
        r.dst.x = box->x1;
 
1185
        r.dst.y = box->y1;
 
1186
        r.width  = box->x2 - box->x1;
 
1187
        r.height = box->y2 - box->y1;
 
1188
        r.mask = r.src = r.dst;
 
1189
 
 
1190
        op->prim_emit(sna, op, &r);
 
1191
}
 
1192
 
 
1193
static void
 
1194
gen4_render_composite_boxes__blt(struct sna *sna,
 
1195
                                 const struct sna_composite_op *op,
 
1196
                                 const BoxRec *box, int nbox)
 
1197
{
 
1198
        DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
 
1199
             __FUNCTION__, nbox, op->dst.x, op->dst.y,
 
1200
             op->src.offset[0], op->src.offset[1],
 
1201
             op->src.width, op->src.height,
 
1202
             op->mask.offset[0], op->mask.offset[1],
 
1203
             op->mask.width, op->mask.height));
 
1204
 
 
1205
        do {
 
1206
                int nbox_this_time;
 
1207
 
 
1208
                nbox_this_time = gen4_get_rectangles(sna, op, nbox,
 
1209
                                                     gen4_bind_surfaces);
 
1210
                nbox -= nbox_this_time;
 
1211
 
 
1212
                do {
 
1213
                        struct sna_composite_rectangles r;
 
1214
 
 
1215
                        DBG(("  %s: (%d, %d), (%d, %d)\n",
 
1216
                             __FUNCTION__,
 
1217
                             box->x1, box->y1, box->x2, box->y2));
 
1218
 
 
1219
                        r.dst.x = box->x1;
 
1220
                        r.dst.y = box->y1;
 
1221
                        r.width  = box->x2 - box->x1;
 
1222
                        r.height = box->y2 - box->y1;
 
1223
                        r.mask = r.src = r.dst;
 
1224
                        op->prim_emit(sna, op, &r);
 
1225
                        box++;
 
1226
                } while (--nbox_this_time);
 
1227
        } while (nbox);
 
1228
}
 
1229
 
 
1230
static void
 
1231
gen4_render_composite_boxes(struct sna *sna,
 
1232
                            const struct sna_composite_op *op,
 
1233
                            const BoxRec *box, int nbox)
 
1234
{
 
1235
        DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
1236
 
 
1237
        do {
 
1238
                int nbox_this_time;
 
1239
                float *v;
 
1240
 
 
1241
                nbox_this_time = gen4_get_rectangles(sna, op, nbox,
 
1242
                                                     gen4_bind_surfaces);
 
1243
                assert(nbox_this_time);
 
1244
                nbox -= nbox_this_time;
 
1245
 
 
1246
                v = sna->render.vertices + sna->render.vertex_used;
 
1247
                sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
1248
 
 
1249
                op->emit_boxes(op, box, nbox_this_time, v);
 
1250
                box += nbox_this_time;
 
1251
        } while (nbox);
 
1252
}
 
1253
 
 
1254
#if !FORCE_FLUSH
 
1255
static void
 
1256
gen4_render_composite_boxes__thread(struct sna *sna,
 
1257
                                    const struct sna_composite_op *op,
 
1258
                                    const BoxRec *box, int nbox)
 
1259
{
 
1260
        DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
1261
 
 
1262
        sna_vertex_lock(&sna->render);
 
1263
        do {
 
1264
                int nbox_this_time;
 
1265
                float *v;
 
1266
 
 
1267
                nbox_this_time = gen4_get_rectangles(sna, op, nbox,
 
1268
                                                     gen4_bind_surfaces);
 
1269
                assert(nbox_this_time);
 
1270
                nbox -= nbox_this_time;
 
1271
 
 
1272
                v = sna->render.vertices + sna->render.vertex_used;
 
1273
                sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
1274
 
 
1275
                sna_vertex_acquire__locked(&sna->render);
 
1276
                sna_vertex_unlock(&sna->render);
 
1277
 
 
1278
                op->emit_boxes(op, box, nbox_this_time, v);
 
1279
                box += nbox_this_time;
 
1280
 
 
1281
                sna_vertex_lock(&sna->render);
 
1282
                sna_vertex_release__locked(&sna->render);
 
1283
        } while (nbox);
 
1284
        sna_vertex_unlock(&sna->render);
 
1285
}
 
1286
#endif
 
1287
 
 
1288
#ifndef MAX
 
1289
#define MAX(a,b) ((a) > (b) ? (a) : (b))
 
1290
#endif
 
1291
 
 
1292
static uint32_t gen4_bind_video_source(struct sna *sna,
 
1293
                                       struct kgem_bo *src_bo,
 
1294
                                       uint32_t src_offset,
 
1295
                                       int src_width,
 
1296
                                       int src_height,
 
1297
                                       int src_pitch,
 
1298
                                       uint32_t src_surf_format)
 
1299
{
 
1300
        struct gen4_surface_state *ss;
 
1301
 
 
1302
        sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
1303
 
 
1304
        ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
 
1305
        ss->ss0.surface_type = GEN4_SURFACE_2D;
 
1306
        ss->ss0.surface_format = src_surf_format;
 
1307
        ss->ss0.color_blend = 1;
 
1308
 
 
1309
        ss->ss1.base_addr =
 
1310
                kgem_add_reloc(&sna->kgem,
 
1311
                               sna->kgem.surface + 1,
 
1312
                               src_bo,
 
1313
                               I915_GEM_DOMAIN_SAMPLER << 16,
 
1314
                               src_offset);
 
1315
 
 
1316
        ss->ss2.width  = src_width - 1;
 
1317
        ss->ss2.height = src_height - 1;
 
1318
        ss->ss3.pitch  = src_pitch - 1;
 
1319
 
 
1320
        return sna->kgem.surface * sizeof(uint32_t);
 
1321
}
 
1322
 
 
1323
static void gen4_video_bind_surfaces(struct sna *sna,
 
1324
                                     const struct sna_composite_op *op)
 
1325
{
 
1326
        struct sna_video_frame *frame = op->priv;
 
1327
        uint32_t src_surf_format;
 
1328
        uint32_t src_surf_base[6];
 
1329
        int src_width[6];
 
1330
        int src_height[6];
 
1331
        int src_pitch[6];
 
1332
        uint32_t *binding_table;
 
1333
        uint16_t offset, dirty;
 
1334
        int n_src, n;
 
1335
 
 
1336
        src_surf_base[0] = 0;
 
1337
        src_surf_base[1] = 0;
 
1338
        src_surf_base[2] = frame->VBufOffset;
 
1339
        src_surf_base[3] = frame->VBufOffset;
 
1340
        src_surf_base[4] = frame->UBufOffset;
 
1341
        src_surf_base[5] = frame->UBufOffset;
 
1342
 
 
1343
        if (is_planar_fourcc(frame->id)) {
 
1344
                src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
 
1345
                src_width[1]  = src_width[0]  = frame->width;
 
1346
                src_height[1] = src_height[0] = frame->height;
 
1347
                src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
 
1348
                src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
 
1349
                        frame->width / 2;
 
1350
                src_height[4] = src_height[5] = src_height[2] = src_height[3] =
 
1351
                        frame->height / 2;
 
1352
                src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
 
1353
                        frame->pitch[0];
 
1354
                n_src = 6;
 
1355
        } else {
 
1356
                if (frame->id == FOURCC_UYVY)
 
1357
                        src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
 
1358
                else
 
1359
                        src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
 
1360
 
 
1361
                src_width[0]  = frame->width;
 
1362
                src_height[0] = frame->height;
 
1363
                src_pitch[0]  = frame->pitch[0];
 
1364
                n_src = 1;
 
1365
        }
 
1366
 
 
1367
        gen4_get_batch(sna, op);
 
1368
        dirty = kgem_bo_is_dirty(op->dst.bo);
 
1369
 
 
1370
        binding_table = gen4_composite_get_binding_table(sna, &offset);
 
1371
        binding_table[0] =
 
1372
                gen4_bind_bo(sna,
 
1373
                             op->dst.bo, op->dst.width, op->dst.height,
 
1374
                             gen4_get_dest_format(op->dst.format),
 
1375
                             true);
 
1376
        for (n = 0; n < n_src; n++) {
 
1377
                binding_table[1+n] =
 
1378
                        gen4_bind_video_source(sna,
 
1379
                                               frame->bo,
 
1380
                                               src_surf_base[n],
 
1381
                                               src_width[n],
 
1382
                                               src_height[n],
 
1383
                                               src_pitch[n],
 
1384
                                               src_surf_format);
 
1385
        }
 
1386
 
 
1387
        if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
 
1388
                dirty = 0;
 
1389
 
 
1390
        gen4_emit_state(sna, op, offset | dirty);
 
1391
}
 
1392
 
 
1393
static bool
 
1394
gen4_render_video(struct sna *sna,
 
1395
                  struct sna_video *video,
 
1396
                  struct sna_video_frame *frame,
 
1397
                  RegionPtr dstRegion,
 
1398
                  PixmapPtr pixmap)
 
1399
{
 
1400
        struct sna_composite_op tmp;
 
1401
        struct sna_pixmap *priv = sna_pixmap(pixmap);
 
1402
        int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
 
1403
        int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
 
1404
        int src_width = frame->src.x2 - frame->src.x1;
 
1405
        int src_height = frame->src.y2 - frame->src.y1;
 
1406
        float src_offset_x, src_offset_y;
 
1407
        float src_scale_x, src_scale_y;
 
1408
        const BoxRec *box;
 
1409
        int nbox;
 
1410
 
 
1411
        DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
 
1412
             src_width, src_height, dst_width, dst_height));
 
1413
 
 
1414
        assert(priv->gpu_bo);
 
1415
        memset(&tmp, 0, sizeof(tmp));
 
1416
 
 
1417
        tmp.op = PictOpSrc;
 
1418
        tmp.dst.pixmap = pixmap;
 
1419
        tmp.dst.width  = pixmap->drawable.width;
 
1420
        tmp.dst.height = pixmap->drawable.height;
 
1421
        tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
 
1422
        tmp.dst.bo = priv->gpu_bo;
 
1423
 
 
1424
        if (src_width == dst_width && src_height == dst_height)
 
1425
                tmp.src.filter = SAMPLER_FILTER_NEAREST;
 
1426
        else
 
1427
                tmp.src.filter = SAMPLER_FILTER_BILINEAR;
 
1428
        tmp.src.repeat = SAMPLER_EXTEND_PAD;
 
1429
        tmp.src.bo = frame->bo;
 
1430
        tmp.mask.bo = NULL;
 
1431
        tmp.u.gen4.wm_kernel =
 
1432
                is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
 
1433
        tmp.u.gen4.ve_id = 2;
 
1434
        tmp.is_affine = true;
 
1435
        tmp.floats_per_vertex = 3;
 
1436
        tmp.floats_per_rect = 9;
 
1437
        tmp.priv = frame;
 
1438
 
 
1439
        if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
 
1440
                kgem_submit(&sna->kgem);
 
1441
                if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
 
1442
                        return false;
 
1443
        }
 
1444
 
 
1445
        gen4_align_vertex(sna, &tmp);
 
1446
        gen4_video_bind_surfaces(sna, &tmp);
 
1447
 
 
1448
        src_scale_x = (float)src_width / dst_width / frame->width;
 
1449
        src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
1450
 
 
1451
        src_scale_y = (float)src_height / dst_height / frame->height;
 
1452
        src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
1453
 
 
1454
        box = region_rects(dstRegion);
 
1455
        nbox = region_num_rects(dstRegion);
 
1456
        do {
 
1457
                int n;
 
1458
 
 
1459
                n = gen4_get_rectangles(sna, &tmp, nbox,
 
1460
                                        gen4_video_bind_surfaces);
 
1461
                assert(n);
 
1462
                nbox -= n;
 
1463
 
 
1464
                do {
 
1465
                        OUT_VERTEX(box->x2, box->y2);
 
1466
                        OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
 
1467
                        OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
1468
 
 
1469
                        OUT_VERTEX(box->x1, box->y2);
 
1470
                        OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
 
1471
                        OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
1472
 
 
1473
                        OUT_VERTEX(box->x1, box->y1);
 
1474
                        OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
 
1475
                        OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
1476
 
 
1477
                        box++;
 
1478
                } while (--n);
 
1479
        } while (nbox);
 
1480
        gen4_vertex_flush(sna);
 
1481
 
 
1482
        if (!DAMAGE_IS_ALL(priv->gpu_damage))
 
1483
                sna_damage_add(&priv->gpu_damage, dstRegion);
 
1484
 
 
1485
        return true;
 
1486
}
 
1487
 
 
1488
static int
 
1489
gen4_composite_picture(struct sna *sna,
 
1490
                       PicturePtr picture,
 
1491
                       struct sna_composite_channel *channel,
 
1492
                       int x, int y,
 
1493
                       int w, int h,
 
1494
                       int dst_x, int dst_y,
 
1495
                       bool precise)
 
1496
{
 
1497
        PixmapPtr pixmap;
 
1498
        uint32_t color;
 
1499
        int16_t dx, dy;
 
1500
 
 
1501
        DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
 
1502
             __FUNCTION__, x, y, w, h, dst_x, dst_y));
 
1503
 
 
1504
        channel->is_solid = false;
 
1505
        channel->card_format = -1;
 
1506
 
 
1507
        if (sna_picture_is_solid(picture, &color))
 
1508
                return gen4_channel_init_solid(sna, channel, color);
 
1509
 
 
1510
        if (picture->pDrawable == NULL) {
 
1511
                int ret;
 
1512
 
 
1513
                if (picture->pSourcePict->type == SourcePictTypeLinear)
 
1514
                        return gen4_channel_init_linear(sna, picture, channel,
 
1515
                                                        x, y,
 
1516
                                                        w, h,
 
1517
                                                        dst_x, dst_y);
 
1518
 
 
1519
                DBG(("%s -- fixup, gradient\n", __FUNCTION__));
 
1520
                ret = -1;
 
1521
                if (!precise)
 
1522
                        ret = sna_render_picture_approximate_gradient(sna, picture, channel,
 
1523
                                                                      x, y, w, h, dst_x, dst_y);
 
1524
                if (ret == -1)
 
1525
                        ret = sna_render_picture_fixup(sna, picture, channel,
 
1526
                                                       x, y, w, h, dst_x, dst_y);
 
1527
                return ret;
 
1528
        }
 
1529
 
 
1530
        if (picture->alphaMap) {
 
1531
                DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
 
1532
                return sna_render_picture_fixup(sna, picture, channel,
 
1533
                                                x, y, w, h, dst_x, dst_y);
 
1534
        }
 
1535
 
 
1536
        if (!gen4_check_repeat(picture)) {
 
1537
                DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
 
1538
                return sna_render_picture_fixup(sna, picture, channel,
 
1539
                                                x, y, w, h, dst_x, dst_y);
 
1540
        }
 
1541
 
 
1542
        if (!gen4_check_filter(picture)) {
 
1543
                DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
 
1544
                return sna_render_picture_fixup(sna, picture, channel,
 
1545
                                                x, y, w, h, dst_x, dst_y);
 
1546
        }
 
1547
 
 
1548
        channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 
1549
        channel->filter = picture->filter;
 
1550
 
 
1551
        pixmap = get_drawable_pixmap(picture->pDrawable);
 
1552
        get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
1553
 
 
1554
        x += dx + picture->pDrawable->x;
 
1555
        y += dy + picture->pDrawable->y;
 
1556
 
 
1557
        channel->is_affine = sna_transform_is_affine(picture->transform);
 
1558
        if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
 
1559
                DBG(("%s: integer translation (%d, %d), removing\n",
 
1560
                     __FUNCTION__, dx, dy));
 
1561
                x += dx;
 
1562
                y += dy;
 
1563
                channel->transform = NULL;
 
1564
                channel->filter = PictFilterNearest;
 
1565
 
 
1566
                if (channel->repeat &&
 
1567
                    (x >= 0 &&
 
1568
                     y >= 0 &&
 
1569
                     x + w <= pixmap->drawable.width &&
 
1570
                     y + h <= pixmap->drawable.height)) {
 
1571
                        struct sna_pixmap *priv = sna_pixmap(pixmap);
 
1572
                        if (priv && priv->clear) {
 
1573
                                DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
 
1574
                                return gen4_channel_init_solid(sna, channel,
 
1575
                                                               solid_color(picture->format,
 
1576
                                                                           priv->clear_color));
 
1577
                        }
 
1578
                }
 
1579
        } else
 
1580
                channel->transform = picture->transform;
 
1581
 
 
1582
        channel->pict_format = picture->format;
 
1583
        channel->card_format = gen4_get_card_format(picture->format);
 
1584
        if (channel->card_format == -1)
 
1585
                return sna_render_picture_convert(sna, picture, channel, pixmap,
 
1586
                                                  x, y, w, h, dst_x, dst_y,
 
1587
                                                  false);
 
1588
 
 
1589
        if (too_large(pixmap->drawable.width, pixmap->drawable.height))
 
1590
                return sna_render_picture_extract(sna, picture, channel,
 
1591
                                                  x, y, w, h, dst_x, dst_y);
 
1592
 
 
1593
        return sna_render_pixmap_bo(sna, channel, pixmap,
 
1594
                                    x, y, w, h, dst_x, dst_y);
 
1595
}
 
1596
 
 
1597
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
 
1598
{
 
1599
        DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
 
1600
             __FUNCTION__,
 
1601
             channel->repeat, gen4_repeat(channel->repeat),
 
1602
             channel->filter, gen4_repeat(channel->filter)));
 
1603
        channel->repeat = gen4_repeat(channel->repeat);
 
1604
        channel->filter = gen4_filter(channel->filter);
 
1605
        if (channel->card_format == (unsigned)-1)
 
1606
                channel->card_format = gen4_get_card_format(channel->pict_format);
 
1607
}
 
1608
 
 
1609
static void
 
1610
gen4_render_composite_done(struct sna *sna,
 
1611
                           const struct sna_composite_op *op)
 
1612
{
 
1613
        DBG(("%s()\n", __FUNCTION__));
 
1614
 
 
1615
        if (sna->render.vertex_offset) {
 
1616
                gen4_vertex_flush(sna);
 
1617
                gen4_magic_ca_pass(sna, op);
 
1618
        }
 
1619
 
 
1620
        if (op->mask.bo)
 
1621
                kgem_bo_destroy(&sna->kgem, op->mask.bo);
 
1622
        if (op->src.bo)
 
1623
                kgem_bo_destroy(&sna->kgem, op->src.bo);
 
1624
 
 
1625
        sna_render_composite_redirect_done(sna, op);
 
1626
}
 
1627
 
 
1628
static bool
 
1629
gen4_composite_set_target(struct sna *sna,
 
1630
                          struct sna_composite_op *op,
 
1631
                          PicturePtr dst,
 
1632
                          int x, int y, int w, int h,
 
1633
                          bool partial)
 
1634
{
 
1635
        BoxRec box;
 
1636
        unsigned hint;
 
1637
 
 
1638
        op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
 
1639
        op->dst.width  = op->dst.pixmap->drawable.width;
 
1640
        op->dst.height = op->dst.pixmap->drawable.height;
 
1641
        op->dst.format = dst->format;
 
1642
        if (w && h) {
 
1643
                box.x1 = x;
 
1644
                box.y1 = y;
 
1645
                box.x2 = x + w;
 
1646
                box.y2 = y + h;
 
1647
        } else
 
1648
                sna_render_picture_extents(dst, &box);
 
1649
 
 
1650
        hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
 
1651
        if (!partial) {
 
1652
                hint |= IGNORE_DAMAGE;
 
1653
                if (w == op->dst.width && h == op->dst.height)
 
1654
                        hint |= REPLACES;
 
1655
        }
 
1656
 
 
1657
        op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
 
1658
        if (op->dst.bo == NULL)
 
1659
                return false;
 
1660
 
 
1661
        if (hint & REPLACES) {
 
1662
                struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
 
1663
                kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
 
1664
        }
 
1665
 
 
1666
        get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
 
1667
                            &op->dst.x, &op->dst.y);
 
1668
 
 
1669
        DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
 
1670
             __FUNCTION__,
 
1671
             op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
 
1672
             op->dst.width, op->dst.height,
 
1673
             op->dst.bo->pitch,
 
1674
             op->dst.x, op->dst.y,
 
1675
             op->damage ? *op->damage : (void *)-1));
 
1676
 
 
1677
        assert(op->dst.bo->proxy == NULL);
 
1678
 
 
1679
        if (too_large(op->dst.width, op->dst.height) &&
 
1680
            !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
 
1681
                return false;
 
1682
 
 
1683
        return true;
 
1684
}
 
1685
 
 
1686
static bool
 
1687
check_gradient(PicturePtr picture, bool precise)
 
1688
{
 
1689
        switch (picture->pSourcePict->type) {
 
1690
        case SourcePictTypeSolidFill:
 
1691
        case SourcePictTypeLinear:
 
1692
                return false;
 
1693
        default:
 
1694
                return precise;
 
1695
        }
 
1696
}
 
1697
 
 
1698
static bool
 
1699
has_alphamap(PicturePtr p)
 
1700
{
 
1701
        return p->alphaMap != NULL;
 
1702
}
 
1703
 
 
1704
static bool
 
1705
need_upload(struct sna *sna, PicturePtr p)
 
1706
{
 
1707
        return p->pDrawable && untransformed(p) &&
 
1708
                !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
 
1709
}
 
1710
 
 
1711
static bool
 
1712
source_is_busy(PixmapPtr pixmap)
 
1713
{
 
1714
        struct sna_pixmap *priv = sna_pixmap(pixmap);
 
1715
        if (priv == NULL)
 
1716
                return false;
 
1717
 
 
1718
        if (priv->clear)
 
1719
                return false;
 
1720
 
 
1721
        if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
 
1722
                return true;
 
1723
 
 
1724
        if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
 
1725
                return true;
 
1726
 
 
1727
        return priv->gpu_damage && !priv->cpu_damage;
 
1728
}
 
1729
 
 
1730
static bool
 
1731
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
 
1732
{
 
1733
        if (sna_picture_is_solid(p, NULL))
 
1734
                return false;
 
1735
 
 
1736
        if (p->pSourcePict)
 
1737
                return check_gradient(p, precise);
 
1738
 
 
1739
        if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
 
1740
                return true;
 
1741
 
 
1742
        /* soft errors: perfer to upload/compute rather than readback */
 
1743
        if (pixmap && source_is_busy(pixmap))
 
1744
                return false;
 
1745
 
 
1746
        return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
 
1747
}
 
1748
 
 
1749
static bool
 
1750
gen4_composite_fallback(struct sna *sna,
 
1751
                        PicturePtr src,
 
1752
                        PicturePtr mask,
 
1753
                        PicturePtr dst)
 
1754
{
 
1755
        PixmapPtr src_pixmap;
 
1756
        PixmapPtr mask_pixmap;
 
1757
        PixmapPtr dst_pixmap;
 
1758
        bool src_fallback, mask_fallback;
 
1759
 
 
1760
        if (!gen4_check_dst_format(dst->format)) {
 
1761
                DBG(("%s: unknown destination format: %d\n",
 
1762
                     __FUNCTION__, dst->format));
 
1763
                return true;
 
1764
        }
 
1765
 
 
1766
        dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
1767
 
 
1768
        src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
 
1769
        src_fallback = source_fallback(sna, src, src_pixmap,
 
1770
                                       dst->polyMode == PolyModePrecise);
 
1771
 
 
1772
        if (mask) {
 
1773
                mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
 
1774
                mask_fallback = source_fallback(sna, mask, mask_pixmap,
 
1775
                                                dst->polyMode == PolyModePrecise);
 
1776
        } else {
 
1777
                mask_pixmap = NULL;
 
1778
                mask_fallback = false;
 
1779
        }
 
1780
 
 
1781
        /* If we are using the destination as a source and need to
 
1782
         * readback in order to upload the source, do it all
 
1783
         * on the cpu.
 
1784
         */
 
1785
        if (src_pixmap == dst_pixmap && src_fallback) {
 
1786
                DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
 
1787
                return true;
 
1788
        }
 
1789
        if (mask_pixmap == dst_pixmap && mask_fallback) {
 
1790
                DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
 
1791
                return true;
 
1792
        }
 
1793
 
 
1794
        /* If anything is on the GPU, push everything out to the GPU */
 
1795
        if (dst_use_gpu(dst_pixmap)) {
 
1796
                DBG(("%s: dst is already on the GPU, try to use GPU\n",
 
1797
                     __FUNCTION__));
 
1798
                return false;
 
1799
        }
 
1800
 
 
1801
        if (src_pixmap && !src_fallback) {
 
1802
                DBG(("%s: src is already on the GPU, try to use GPU\n",
 
1803
                     __FUNCTION__));
 
1804
                return false;
 
1805
        }
 
1806
        if (mask_pixmap && !mask_fallback) {
 
1807
                DBG(("%s: mask is already on the GPU, try to use GPU\n",
 
1808
                     __FUNCTION__));
 
1809
                return false;
 
1810
        }
 
1811
 
 
1812
        /* However if the dst is not on the GPU and we need to
 
1813
         * render one of the sources using the CPU, we may
 
1814
         * as well do the entire operation in place onthe CPU.
 
1815
         */
 
1816
        if (src_fallback) {
 
1817
                DBG(("%s: dst is on the CPU and src will fallback\n",
 
1818
                     __FUNCTION__));
 
1819
                return true;
 
1820
        }
 
1821
 
 
1822
        if (mask_fallback) {
 
1823
                DBG(("%s: dst is on the CPU and mask will fallback\n",
 
1824
                     __FUNCTION__));
 
1825
                return true;
 
1826
        }
 
1827
 
 
1828
        if (too_large(dst_pixmap->drawable.width,
 
1829
                      dst_pixmap->drawable.height) &&
 
1830
            dst_is_cpu(dst_pixmap)) {
 
1831
                DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
 
1832
                return true;
 
1833
        }
 
1834
 
 
1835
        DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
 
1836
             __FUNCTION__));
 
1837
        return dst_use_cpu(dst_pixmap);
 
1838
}
 
1839
 
 
1840
static int
 
1841
reuse_source(struct sna *sna,
 
1842
             PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
 
1843
             PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
 
1844
{
 
1845
        uint32_t color;
 
1846
 
 
1847
        if (src_x != msk_x || src_y != msk_y)
 
1848
                return false;
 
1849
 
 
1850
        if (src == mask) {
 
1851
                DBG(("%s: mask is source\n", __FUNCTION__));
 
1852
                *mc = *sc;
 
1853
                mc->bo = kgem_bo_reference(mc->bo);
 
1854
                return true;
 
1855
        }
 
1856
 
 
1857
        if (sna_picture_is_solid(mask, &color))
 
1858
                return gen4_channel_init_solid(sna, mc, color);
 
1859
 
 
1860
        if (sc->is_solid)
 
1861
                return false;
 
1862
 
 
1863
        if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
 
1864
                return false;
 
1865
 
 
1866
        DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
 
1867
 
 
1868
        if (!sna_transform_equal(src->transform, mask->transform))
 
1869
                return false;
 
1870
 
 
1871
        if (!sna_picture_alphamap_equal(src, mask))
 
1872
                return false;
 
1873
 
 
1874
        if (!gen4_check_repeat(mask))
 
1875
                return false;
 
1876
 
 
1877
        if (!gen4_check_filter(mask))
 
1878
                return false;
 
1879
 
 
1880
        if (!gen4_check_format(mask->format))
 
1881
                return false;
 
1882
 
 
1883
        DBG(("%s: reusing source channel for mask with a twist\n",
 
1884
             __FUNCTION__));
 
1885
 
 
1886
        *mc = *sc;
 
1887
        mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
 
1888
        mc->filter = gen4_filter(mask->filter);
 
1889
        mc->pict_format = mask->format;
 
1890
        mc->card_format = gen4_get_card_format(mask->format);
 
1891
        mc->bo = kgem_bo_reference(mc->bo);
 
1892
        return true;
 
1893
}
 
1894
 
 
1895
static bool
 
1896
gen4_render_composite(struct sna *sna,
 
1897
                      uint8_t op,
 
1898
                      PicturePtr src,
 
1899
                      PicturePtr mask,
 
1900
                      PicturePtr dst,
 
1901
                      int16_t src_x, int16_t src_y,
 
1902
                      int16_t msk_x, int16_t msk_y,
 
1903
                      int16_t dst_x, int16_t dst_y,
 
1904
                      int16_t width, int16_t height,
 
1905
                      unsigned flags,
 
1906
                      struct sna_composite_op *tmp)
 
1907
{
 
1908
        DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
 
1909
             width, height, sna->kgem.mode));
 
1910
 
 
1911
        if (op >= ARRAY_SIZE(gen4_blend_op))
 
1912
                return false;
 
1913
 
 
1914
        if (mask == NULL &&
 
1915
            sna_blt_composite(sna, op,
 
1916
                              src, dst,
 
1917
                              src_x, src_y,
 
1918
                              dst_x, dst_y,
 
1919
                              width, height,
 
1920
                              flags, tmp))
 
1921
                return true;
 
1922
 
 
1923
        if (gen4_composite_fallback(sna, src, mask, dst))
 
1924
                goto fallback;
 
1925
 
 
1926
        if (need_tiling(sna, width, height))
 
1927
                return sna_tiling_composite(op, src, mask, dst,
 
1928
                                            src_x, src_y,
 
1929
                                            msk_x, msk_y,
 
1930
                                            dst_x, dst_y,
 
1931
                                            width, height,
 
1932
                                            tmp);
 
1933
 
 
1934
        if (!gen4_composite_set_target(sna, tmp, dst,
 
1935
                                       dst_x, dst_y, width, height,
 
1936
                                       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
 
1937
                DBG(("%s: failed to set composite target\n", __FUNCTION__));
 
1938
                goto fallback;
 
1939
        }
 
1940
 
 
1941
        tmp->op = op;
 
1942
        switch (gen4_composite_picture(sna, src, &tmp->src,
 
1943
                                       src_x, src_y,
 
1944
                                       width, height,
 
1945
                                       dst_x, dst_y,
 
1946
                                       dst->polyMode == PolyModePrecise)) {
 
1947
        case -1:
 
1948
                DBG(("%s: failed to prepare source\n", __FUNCTION__));
 
1949
                goto cleanup_dst;
 
1950
        case 0:
 
1951
                if (!gen4_channel_init_solid(sna, &tmp->src, 0))
 
1952
                        goto cleanup_dst;
 
1953
                /* fall through to fixup */
 
1954
        case 1:
 
1955
                if (mask == NULL &&
 
1956
                    sna_blt_composite__convert(sna,
 
1957
                                               dst_x, dst_y, width, height,
 
1958
                                               tmp))
 
1959
                        return true;
 
1960
 
 
1961
                gen4_composite_channel_convert(&tmp->src);
 
1962
                break;
 
1963
        }
 
1964
 
 
1965
        tmp->is_affine = tmp->src.is_affine;
 
1966
        tmp->has_component_alpha = false;
 
1967
        tmp->need_magic_ca_pass = false;
 
1968
 
 
1969
        if (mask) {
 
1970
                if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
 
1971
                        tmp->has_component_alpha = true;
 
1972
 
 
1973
                        /* Check if it's component alpha that relies on a source alpha and on
 
1974
                         * the source value.  We can only get one of those into the single
 
1975
                         * source value that we get to blend with.
 
1976
                         */
 
1977
                        if (gen4_blend_op[op].src_alpha &&
 
1978
                            (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
 
1979
                                if (op != PictOpOver) {
 
1980
                                        DBG(("%s -- fallback: unhandled component alpha blend\n",
 
1981
                                             __FUNCTION__));
 
1982
 
 
1983
                                        goto cleanup_src;
 
1984
                                }
 
1985
 
 
1986
                                tmp->need_magic_ca_pass = true;
 
1987
                                tmp->op = PictOpOutReverse;
 
1988
                        }
 
1989
                }
 
1990
 
 
1991
                if (!reuse_source(sna,
 
1992
                                  src, &tmp->src, src_x, src_y,
 
1993
                                  mask, &tmp->mask, msk_x, msk_y)) {
 
1994
                        switch (gen4_composite_picture(sna, mask, &tmp->mask,
 
1995
                                                       msk_x, msk_y,
 
1996
                                                       width, height,
 
1997
                                                       dst_x, dst_y,
 
1998
                                                       dst->polyMode == PolyModePrecise)) {
 
1999
                        case -1:
 
2000
                                DBG(("%s: failed to prepare mask\n", __FUNCTION__));
 
2001
                                goto cleanup_src;
 
2002
                        case 0:
 
2003
                                if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
 
2004
                                        goto cleanup_src;
 
2005
                                /* fall through to fixup */
 
2006
                        case 1:
 
2007
                                gen4_composite_channel_convert(&tmp->mask);
 
2008
                                break;
 
2009
                        }
 
2010
                }
 
2011
 
 
2012
                tmp->is_affine &= tmp->mask.is_affine;
 
2013
        }
 
2014
 
 
2015
        tmp->u.gen4.wm_kernel =
 
2016
                gen4_choose_composite_kernel(tmp->op,
 
2017
                                             tmp->mask.bo != NULL,
 
2018
                                             tmp->has_component_alpha,
 
2019
                                             tmp->is_affine);
 
2020
        tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
2021
 
 
2022
        tmp->blt   = gen4_render_composite_blt;
 
2023
        tmp->box   = gen4_render_composite_box;
 
2024
        tmp->boxes = gen4_render_composite_boxes__blt;
 
2025
        if (tmp->emit_boxes) {
 
2026
                tmp->boxes = gen4_render_composite_boxes;
 
2027
#if !FORCE_FLUSH
 
2028
                tmp->thread_boxes = gen4_render_composite_boxes__thread;
 
2029
#endif
 
2030
        }
 
2031
        tmp->done  = gen4_render_composite_done;
 
2032
 
 
2033
        if (!kgem_check_bo(&sna->kgem,
 
2034
                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
 
2035
                           NULL)) {
 
2036
                kgem_submit(&sna->kgem);
 
2037
                if (!kgem_check_bo(&sna->kgem,
 
2038
                                     tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
 
2039
                                     NULL))
 
2040
                        goto cleanup_mask;
 
2041
        }
 
2042
 
 
2043
        gen4_align_vertex(sna, tmp);
 
2044
        gen4_bind_surfaces(sna, tmp);
 
2045
        return true;
 
2046
 
 
2047
cleanup_mask:
 
2048
        if (tmp->mask.bo) {
 
2049
                kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
 
2050
                tmp->mask.bo = NULL;
 
2051
        }
 
2052
cleanup_src:
 
2053
        if (tmp->src.bo) {
 
2054
                kgem_bo_destroy(&sna->kgem, tmp->src.bo);
 
2055
                tmp->src.bo = NULL;
 
2056
        }
 
2057
cleanup_dst:
 
2058
        if (tmp->redirect.real_bo) {
 
2059
                kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
 
2060
                tmp->redirect.real_bo = NULL;
 
2061
        }
 
2062
fallback:
 
2063
        return (mask == NULL &&
 
2064
                sna_blt_composite(sna, op,
 
2065
                                  src, dst,
 
2066
                                  src_x, src_y,
 
2067
                                  dst_x, dst_y,
 
2068
                                  width, height,
 
2069
                                  flags | COMPOSITE_FALLBACK, tmp));
 
2070
}
 
2071
 
 
2072
#if !NO_COMPOSITE_SPANS
 
2073
fastcall static void
 
2074
gen4_render_composite_spans_box(struct sna *sna,
 
2075
                                const struct sna_composite_spans_op *op,
 
2076
                                const BoxRec *box, float opacity)
 
2077
{
 
2078
        DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
 
2079
             __FUNCTION__,
 
2080
             op->base.src.offset[0], op->base.src.offset[1],
 
2081
             opacity,
 
2082
             op->base.dst.x, op->base.dst.y,
 
2083
             box->x1, box->y1,
 
2084
             box->x2 - box->x1,
 
2085
             box->y2 - box->y1));
 
2086
 
 
2087
        gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
 
2088
        op->prim_emit(sna, op, box, opacity);
 
2089
}
 
2090
 
 
2091
static void
 
2092
gen4_render_composite_spans_boxes(struct sna *sna,
 
2093
                                  const struct sna_composite_spans_op *op,
 
2094
                                  const BoxRec *box, int nbox,
 
2095
                                  float opacity)
 
2096
{
 
2097
        DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
 
2098
             __FUNCTION__, nbox,
 
2099
             op->base.src.offset[0], op->base.src.offset[1],
 
2100
             opacity,
 
2101
             op->base.dst.x, op->base.dst.y));
 
2102
 
 
2103
        do {
 
2104
                int nbox_this_time;
 
2105
 
 
2106
                nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
 
2107
                                                     gen4_bind_surfaces);
 
2108
                nbox -= nbox_this_time;
 
2109
 
 
2110
                do {
 
2111
                        DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
 
2112
                             box->x1, box->y1,
 
2113
                             box->x2 - box->x1,
 
2114
                             box->y2 - box->y1));
 
2115
 
 
2116
                        op->prim_emit(sna, op, box++, opacity);
 
2117
                } while (--nbox_this_time);
 
2118
        } while (nbox);
 
2119
}
 
2120
 
 
2121
fastcall static void
 
2122
gen4_render_composite_spans_boxes__thread(struct sna *sna,
 
2123
                                          const struct sna_composite_spans_op *op,
 
2124
                                          const struct sna_opacity_box *box,
 
2125
                                          int nbox)
 
2126
{
 
2127
        DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
 
2128
             __FUNCTION__, nbox,
 
2129
             op->base.src.offset[0], op->base.src.offset[1],
 
2130
             op->base.dst.x, op->base.dst.y));
 
2131
        assert(nbox);
 
2132
 
 
2133
        sna_vertex_lock(&sna->render);
 
2134
        do {
 
2135
                int nbox_this_time;
 
2136
                float *v;
 
2137
 
 
2138
                nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
 
2139
                                                     gen4_bind_surfaces);
 
2140
                assert(nbox_this_time);
 
2141
                nbox -= nbox_this_time;
 
2142
 
 
2143
                v = sna->render.vertices + sna->render.vertex_used;
 
2144
                sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
2145
 
 
2146
                sna_vertex_acquire__locked(&sna->render);
 
2147
                sna_vertex_unlock(&sna->render);
 
2148
 
 
2149
                op->emit_boxes(op, box, nbox_this_time, v);
 
2150
                box += nbox_this_time;
 
2151
 
 
2152
                sna_vertex_lock(&sna->render);
 
2153
                sna_vertex_release__locked(&sna->render);
 
2154
        } while (nbox);
 
2155
        sna_vertex_unlock(&sna->render);
 
2156
}
 
2157
 
 
2158
fastcall static void
 
2159
gen4_render_composite_spans_done(struct sna *sna,
 
2160
                                 const struct sna_composite_spans_op *op)
 
2161
{
 
2162
        if (sna->render.vertex_offset)
 
2163
                gen4_vertex_flush(sna);
 
2164
 
 
2165
        DBG(("%s()\n", __FUNCTION__));
 
2166
 
 
2167
        kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
2168
        sna_render_composite_redirect_done(sna, &op->base);
 
2169
}
 
2170
 
 
2171
static bool
 
2172
gen4_check_composite_spans(struct sna *sna,
 
2173
                           uint8_t op, PicturePtr src, PicturePtr dst,
 
2174
                           int16_t width, int16_t height,
 
2175
                           unsigned flags)
 
2176
{
 
2177
        DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
 
2178
             __FUNCTION__, op, width, height, flags));
 
2179
 
 
2180
        if (op >= ARRAY_SIZE(gen4_blend_op))
 
2181
                return false;
 
2182
 
 
2183
        if (gen4_composite_fallback(sna, src, NULL, dst)) {
 
2184
                DBG(("%s: operation would fallback\n", __FUNCTION__));
 
2185
                return false;
 
2186
        }
 
2187
 
 
2188
        if (need_tiling(sna, width, height) &&
 
2189
            !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
 
2190
                DBG(("%s: fallback, tiled operation not on GPU\n",
 
2191
                     __FUNCTION__));
 
2192
                return false;
 
2193
        }
 
2194
 
 
2195
        if (FORCE_SPANS)
 
2196
                return FORCE_SPANS > 0;
 
2197
 
 
2198
        if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
 
2199
                struct sna_pixmap *priv;
 
2200
 
 
2201
                if (FORCE_NONRECTILINEAR_SPANS)
 
2202
                        return FORCE_NONRECTILINEAR_SPANS > 0;
 
2203
 
 
2204
                if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0)
 
2205
                        return false;
 
2206
 
 
2207
                priv = sna_pixmap_from_drawable(dst->pDrawable);
 
2208
                assert(priv);
 
2209
 
 
2210
                if (priv->cpu_bo &&
 
2211
                    __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo))
 
2212
                        return true;
 
2213
 
 
2214
                if (flags & COMPOSITE_SPANS_INPLACE_HINT)
 
2215
                        return false;
 
2216
 
 
2217
                return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
 
2218
        }
 
2219
 
 
2220
        return true;
 
2221
}
 
2222
 
 
2223
static bool
 
2224
gen4_render_composite_spans(struct sna *sna,
 
2225
                            uint8_t op,
 
2226
                            PicturePtr src,
 
2227
                            PicturePtr dst,
 
2228
                            int16_t src_x,  int16_t src_y,
 
2229
                            int16_t dst_x,  int16_t dst_y,
 
2230
                            int16_t width,  int16_t height,
 
2231
                            unsigned flags,
 
2232
                            struct sna_composite_spans_op *tmp)
 
2233
{
 
2234
        DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
 
2235
             width, height, flags, sna->kgem.ring));
 
2236
 
 
2237
        assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags));
 
2238
 
 
2239
        if (need_tiling(sna, width, height)) {
 
2240
                DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
 
2241
                     __FUNCTION__, width, height));
 
2242
                return sna_tiling_composite_spans(op, src, dst,
 
2243
                                                  src_x, src_y, dst_x, dst_y,
 
2244
                                                  width, height, flags, tmp);
 
2245
        }
 
2246
 
 
2247
        tmp->base.op = op;
 
2248
        if (!gen4_composite_set_target(sna, &tmp->base, dst,
 
2249
                                       dst_x, dst_y, width, height, true))
 
2250
                return false;
 
2251
 
 
2252
        switch (gen4_composite_picture(sna, src, &tmp->base.src,
 
2253
                                       src_x, src_y,
 
2254
                                       width, height,
 
2255
                                       dst_x, dst_y,
 
2256
                                       dst->polyMode == PolyModePrecise)) {
 
2257
        case -1:
 
2258
                goto cleanup_dst;
 
2259
        case 0:
 
2260
                if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
 
2261
                        goto cleanup_dst;
 
2262
                /* fall through to fixup */
 
2263
        case 1:
 
2264
                gen4_composite_channel_convert(&tmp->base.src);
 
2265
                break;
 
2266
        }
 
2267
 
 
2268
        tmp->base.mask.bo = NULL;
 
2269
        tmp->base.mask.filter = SAMPLER_FILTER_NEAREST;
 
2270
        tmp->base.mask.repeat = SAMPLER_EXTEND_NONE;
 
2271
 
 
2272
        tmp->base.is_affine = tmp->base.src.is_affine;
 
2273
        tmp->base.has_component_alpha = false;
 
2274
        tmp->base.need_magic_ca_pass = false;
 
2275
 
 
2276
        tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp);
 
2277
        tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
2278
 
 
2279
        tmp->box   = gen4_render_composite_spans_box;
 
2280
        tmp->boxes = gen4_render_composite_spans_boxes;
 
2281
        if (tmp->emit_boxes)
 
2282
                tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
 
2283
        tmp->done  = gen4_render_composite_spans_done;
 
2284
 
 
2285
        if (!kgem_check_bo(&sna->kgem,
 
2286
                           tmp->base.dst.bo, tmp->base.src.bo,
 
2287
                           NULL))  {
 
2288
                kgem_submit(&sna->kgem);
 
2289
                if (!kgem_check_bo(&sna->kgem,
 
2290
                                   tmp->base.dst.bo, tmp->base.src.bo,
 
2291
                                   NULL))
 
2292
                        goto cleanup_src;
 
2293
        }
 
2294
 
 
2295
        gen4_align_vertex(sna, &tmp->base);
 
2296
        gen4_bind_surfaces(sna, &tmp->base);
 
2297
        return true;
 
2298
 
 
2299
cleanup_src:
 
2300
        if (tmp->base.src.bo)
 
2301
                kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
 
2302
cleanup_dst:
 
2303
        if (tmp->base.redirect.real_bo)
 
2304
                kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
 
2305
        return false;
 
2306
}
 
2307
#endif
 
2308
 
 
2309
static void
 
2310
gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
 
2311
{
 
2312
        uint32_t *binding_table;
 
2313
        uint16_t offset, dirty;
 
2314
 
 
2315
        gen4_get_batch(sna, op);
 
2316
        dirty = kgem_bo_is_dirty(op->dst.bo);
 
2317
 
 
2318
        binding_table = gen4_composite_get_binding_table(sna, &offset);
 
2319
 
 
2320
        binding_table[0] =
 
2321
                gen4_bind_bo(sna,
 
2322
                             op->dst.bo, op->dst.width, op->dst.height,
 
2323
                             gen4_get_dest_format(op->dst.format),
 
2324
                             true);
 
2325
        binding_table[1] =
 
2326
                gen4_bind_bo(sna,
 
2327
                             op->src.bo, op->src.width, op->src.height,
 
2328
                             op->src.card_format,
 
2329
                             false);
 
2330
 
 
2331
        if (sna->kgem.surface == offset &&
 
2332
            *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
 
2333
                sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
2334
                offset = sna->render_state.gen4.surface_table;
 
2335
        }
 
2336
 
 
2337
        if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
 
2338
                dirty = 0;
 
2339
 
 
2340
        gen4_emit_state(sna, op, offset | dirty);
 
2341
}
 
2342
 
 
2343
static void
 
2344
gen4_render_copy_one(struct sna *sna,
 
2345
                     const struct sna_composite_op *op,
 
2346
                     int sx, int sy,
 
2347
                     int w, int h,
 
2348
                     int dx, int dy)
 
2349
{
 
2350
        gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces);
 
2351
 
 
2352
        OUT_VERTEX(dx+w, dy+h);
 
2353
        OUT_VERTEX_F((sx+w)*op->src.scale[0]);
 
2354
        OUT_VERTEX_F((sy+h)*op->src.scale[1]);
 
2355
 
 
2356
        OUT_VERTEX(dx, dy+h);
 
2357
        OUT_VERTEX_F(sx*op->src.scale[0]);
 
2358
        OUT_VERTEX_F((sy+h)*op->src.scale[1]);
 
2359
 
 
2360
        OUT_VERTEX(dx, dy);
 
2361
        OUT_VERTEX_F(sx*op->src.scale[0]);
 
2362
        OUT_VERTEX_F(sy*op->src.scale[1]);
 
2363
}
 
2364
 
 
2365
static bool
 
2366
gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
 
2367
                       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 
2368
                       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
 
2369
                       const BoxRec *box, int n, unsigned flags)
 
2370
{
 
2371
        struct sna_composite_op tmp;
 
2372
 
 
2373
        DBG(("%s x %d\n", __FUNCTION__, n));
 
2374
 
 
2375
        if (sna_blt_compare_depth(src, dst) &&
 
2376
            sna_blt_copy_boxes(sna, alu,
 
2377
                               src_bo, src_dx, src_dy,
 
2378
                               dst_bo, dst_dx, dst_dy,
 
2379
                               dst->bitsPerPixel,
 
2380
                               box, n))
 
2381
                return true;
 
2382
 
 
2383
        if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
 
2384
fallback_blt:
 
2385
                if (!sna_blt_compare_depth(src, dst))
 
2386
                        return false;
 
2387
 
 
2388
                return sna_blt_copy_boxes_fallback(sna, alu,
 
2389
                                                   src, src_bo, src_dx, src_dy,
 
2390
                                                   dst, dst_bo, dst_dx, dst_dy,
 
2391
                                                   box, n);
 
2392
        }
 
2393
 
 
2394
        memset(&tmp, 0, sizeof(tmp));
 
2395
 
 
2396
        DBG(("%s (%d, %d)->(%d, %d) x %d\n",
 
2397
             __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
 
2398
 
 
2399
        if (dst->depth == src->depth) {
 
2400
                tmp.dst.format = sna_render_format_for_depth(dst->depth);
 
2401
                tmp.src.pict_format = tmp.dst.format;
 
2402
        } else {
 
2403
                tmp.dst.format = sna_format_for_depth(dst->depth);
 
2404
                tmp.src.pict_format = sna_format_for_depth(src->depth);
 
2405
        }
 
2406
        if (!gen4_check_format(tmp.src.pict_format))
 
2407
                goto fallback_blt;
 
2408
 
 
2409
        tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
2410
 
 
2411
        tmp.dst.pixmap = (PixmapPtr)dst;
 
2412
        tmp.dst.width  = dst->width;
 
2413
        tmp.dst.height = dst->height;
 
2414
        tmp.dst.x = tmp.dst.y = 0;
 
2415
        tmp.dst.bo = dst_bo;
 
2416
        tmp.damage = NULL;
 
2417
 
 
2418
        sna_render_composite_redirect_init(&tmp);
 
2419
        if (too_large(tmp.dst.width, tmp.dst.height)) {
 
2420
                BoxRec extents = box[0];
 
2421
                int i;
 
2422
 
 
2423
                for (i = 1; i < n; i++) {
 
2424
                        if (box[i].x1 < extents.x1)
 
2425
                                extents.x1 = box[i].x1;
 
2426
                        if (box[i].y1 < extents.y1)
 
2427
                                extents.y1 = box[i].y1;
 
2428
 
 
2429
                        if (box[i].x2 > extents.x2)
 
2430
                                extents.x2 = box[i].x2;
 
2431
                        if (box[i].y2 > extents.y2)
 
2432
                                extents.y2 = box[i].y2;
 
2433
                }
 
2434
                if (!sna_render_composite_redirect(sna, &tmp,
 
2435
                                                   extents.x1 + dst_dx,
 
2436
                                                   extents.y1 + dst_dy,
 
2437
                                                   extents.x2 - extents.x1,
 
2438
                                                   extents.y2 - extents.y1,
 
2439
                                                   n > 1))
 
2440
                        goto fallback_tiled;
 
2441
        }
 
2442
 
 
2443
        tmp.src.filter = SAMPLER_FILTER_NEAREST;
 
2444
        tmp.src.repeat = SAMPLER_EXTEND_NONE;
 
2445
        tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format);
 
2446
        if (too_large(src->width, src->height)) {
 
2447
                BoxRec extents = box[0];
 
2448
                int i;
 
2449
 
 
2450
                for (i = 1; i < n; i++) {
 
2451
                        if (box[i].x1 < extents.x1)
 
2452
                                extents.x1 = box[i].x1;
 
2453
                        if (box[i].y1 < extents.y1)
 
2454
                                extents.y1 = box[i].y1;
 
2455
 
 
2456
                        if (box[i].x2 > extents.x2)
 
2457
                                extents.x2 = box[i].x2;
 
2458
                        if (box[i].y2 > extents.y2)
 
2459
                                extents.y2 = box[i].y2;
 
2460
                }
 
2461
 
 
2462
                if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
 
2463
                                               extents.x1 + src_dx,
 
2464
                                               extents.y1 + src_dy,
 
2465
                                               extents.x2 - extents.x1,
 
2466
                                               extents.y2 - extents.y1))
 
2467
                        goto fallback_tiled_dst;
 
2468
        } else {
 
2469
                tmp.src.bo = kgem_bo_reference(src_bo);
 
2470
                tmp.src.width  = src->width;
 
2471
                tmp.src.height = src->height;
 
2472
                tmp.src.offset[0] = tmp.src.offset[1] = 0;
 
2473
                tmp.src.scale[0] = 1.f/src->width;
 
2474
                tmp.src.scale[1] = 1.f/src->height;
 
2475
        }
 
2476
 
 
2477
        tmp.is_affine = true;
 
2478
        tmp.floats_per_vertex = 3;
 
2479
        tmp.floats_per_rect = 9;
 
2480
        tmp.u.gen4.wm_kernel = WM_KERNEL;
 
2481
        tmp.u.gen4.ve_id = 2;
 
2482
 
 
2483
        if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 
2484
                kgem_submit(&sna->kgem);
 
2485
                if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 
2486
                        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
2487
                        if (tmp.redirect.real_bo)
 
2488
                                kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
 
2489
 
 
2490
                        goto fallback_blt;
 
2491
                }
 
2492
        }
 
2493
 
 
2494
        dst_dx += tmp.dst.x;
 
2495
        dst_dy += tmp.dst.y;
 
2496
        tmp.dst.x = tmp.dst.y = 0;
 
2497
 
 
2498
        src_dx += tmp.src.offset[0];
 
2499
        src_dy += tmp.src.offset[1];
 
2500
 
 
2501
        gen4_align_vertex(sna, &tmp);
 
2502
        gen4_copy_bind_surfaces(sna, &tmp);
 
2503
 
 
2504
        do {
 
2505
                gen4_render_copy_one(sna, &tmp,
 
2506
                                     box->x1 + src_dx, box->y1 + src_dy,
 
2507
                                     box->x2 - box->x1, box->y2 - box->y1,
 
2508
                                     box->x1 + dst_dx, box->y1 + dst_dy);
 
2509
                box++;
 
2510
        } while (--n);
 
2511
 
 
2512
        gen4_vertex_flush(sna);
 
2513
        sna_render_composite_redirect_done(sna, &tmp);
 
2514
        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
2515
        return true;
 
2516
 
 
2517
fallback_tiled_dst:
 
2518
        if (tmp.redirect.real_bo)
 
2519
                kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
 
2520
fallback_tiled:
 
2521
        if (sna_blt_compare_depth(src, dst) &&
 
2522
            sna_blt_copy_boxes(sna, alu,
 
2523
                               src_bo, src_dx, src_dy,
 
2524
                               dst_bo, dst_dx, dst_dy,
 
2525
                               dst->bitsPerPixel,
 
2526
                               box, n))
 
2527
                return true;
 
2528
 
 
2529
        return sna_tiling_copy_boxes(sna, alu,
 
2530
                                     src, src_bo, src_dx, src_dy,
 
2531
                                     dst, dst_bo, dst_dx, dst_dy,
 
2532
                                     box, n);
 
2533
}
 
2534
 
 
2535
static void
 
2536
gen4_render_copy_blt(struct sna *sna,
 
2537
                     const struct sna_copy_op *op,
 
2538
                     int16_t sx, int16_t sy,
 
2539
                     int16_t w,  int16_t h,
 
2540
                     int16_t dx, int16_t dy)
 
2541
{
 
2542
        gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
 
2543
}
 
2544
 
 
2545
static void
 
2546
gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
 
2547
{
 
2548
        if (sna->render.vertex_offset)
 
2549
                gen4_vertex_flush(sna);
 
2550
}
 
2551
 
 
2552
static bool
 
2553
gen4_render_copy(struct sna *sna, uint8_t alu,
 
2554
                 PixmapPtr src, struct kgem_bo *src_bo,
 
2555
                 PixmapPtr dst, struct kgem_bo *dst_bo,
 
2556
                 struct sna_copy_op *op)
 
2557
{
 
2558
        DBG(("%s: src=%ld, dst=%ld, alu=%d\n",
 
2559
             __FUNCTION__,
 
2560
             src->drawable.serialNumber,
 
2561
             dst->drawable.serialNumber,
 
2562
             alu));
 
2563
 
 
2564
        if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 
2565
            sna_blt_copy(sna, alu,
 
2566
                         src_bo, dst_bo,
 
2567
                         dst->drawable.bitsPerPixel,
 
2568
                         op))
 
2569
                return true;
 
2570
 
 
2571
        if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
 
2572
            too_large(src->drawable.width, src->drawable.height) ||
 
2573
            too_large(dst->drawable.width, dst->drawable.height)) {
 
2574
fallback:
 
2575
                if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 
2576
                        return false;
 
2577
 
 
2578
                return sna_blt_copy(sna, alu, src_bo, dst_bo,
 
2579
                                    dst->drawable.bitsPerPixel,
 
2580
                                    op);
 
2581
        }
 
2582
 
 
2583
        if (dst->drawable.depth == src->drawable.depth) {
 
2584
                op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
 
2585
                op->base.src.pict_format = op->base.dst.format;
 
2586
        } else {
 
2587
                op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
 
2588
                op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
 
2589
        }
 
2590
        if (!gen4_check_format(op->base.src.pict_format))
 
2591
                goto fallback;
 
2592
 
 
2593
        op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
2594
 
 
2595
        op->base.dst.pixmap = dst;
 
2596
        op->base.dst.width  = dst->drawable.width;
 
2597
        op->base.dst.height = dst->drawable.height;
 
2598
        op->base.dst.bo = dst_bo;
 
2599
 
 
2600
        op->base.src.bo = src_bo;
 
2601
        op->base.src.card_format =
 
2602
                gen4_get_card_format(op->base.src.pict_format);
 
2603
        op->base.src.width  = src->drawable.width;
 
2604
        op->base.src.height = src->drawable.height;
 
2605
        op->base.src.scale[0] = 1.f/src->drawable.width;
 
2606
        op->base.src.scale[1] = 1.f/src->drawable.height;
 
2607
        op->base.src.filter = SAMPLER_FILTER_NEAREST;
 
2608
        op->base.src.repeat = SAMPLER_EXTEND_NONE;
 
2609
 
 
2610
        op->base.is_affine = true;
 
2611
        op->base.floats_per_vertex = 3;
 
2612
        op->base.floats_per_rect = 9;
 
2613
        op->base.u.gen4.wm_kernel = WM_KERNEL;
 
2614
        op->base.u.gen4.ve_id = 2;
 
2615
 
 
2616
        if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 
2617
                kgem_submit(&sna->kgem);
 
2618
                if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
 
2619
                        goto fallback;
 
2620
        }
 
2621
 
 
2622
        if (kgem_bo_is_dirty(src_bo)) {
 
2623
                if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 
2624
                    sna_blt_copy(sna, alu,
 
2625
                                 src_bo, dst_bo,
 
2626
                                 dst->drawable.bitsPerPixel,
 
2627
                                 op))
 
2628
                        return true;
 
2629
        }
 
2630
 
 
2631
        gen4_align_vertex(sna, &op->base);
 
2632
        gen4_copy_bind_surfaces(sna, &op->base);
 
2633
 
 
2634
        op->blt  = gen4_render_copy_blt;
 
2635
        op->done = gen4_render_copy_done;
 
2636
        return true;
 
2637
}
 
2638
 
 
2639
static void
 
2640
gen4_render_fill_rectangle(struct sna *sna,
 
2641
                           const struct sna_composite_op *op,
 
2642
                           int x, int y, int w, int h)
 
2643
{
 
2644
        gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
2645
 
 
2646
        OUT_VERTEX(x+w, y+h);
 
2647
        OUT_VERTEX_F(.5);
 
2648
 
 
2649
        OUT_VERTEX(x, y+h);
 
2650
        OUT_VERTEX_F(.5);
 
2651
 
 
2652
        OUT_VERTEX(x, y);
 
2653
        OUT_VERTEX_F(.5);
 
2654
}
 
2655
 
 
2656
static bool
 
2657
gen4_render_fill_boxes(struct sna *sna,
 
2658
                       CARD8 op,
 
2659
                       PictFormat format,
 
2660
                       const xRenderColor *color,
 
2661
                       const DrawableRec *dst, struct kgem_bo *dst_bo,
 
2662
                       const BoxRec *box, int n)
 
2663
{
 
2664
        struct sna_composite_op tmp;
 
2665
        uint32_t pixel;
 
2666
 
 
2667
        if (op >= ARRAY_SIZE(gen4_blend_op)) {
 
2668
                DBG(("%s: fallback due to unhandled blend op: %d\n",
 
2669
                     __FUNCTION__, op));
 
2670
                return false;
 
2671
        }
 
2672
 
 
2673
        if (op <= PictOpSrc) {
 
2674
                uint8_t alu = GXinvalid;
 
2675
 
 
2676
                pixel = 0;
 
2677
                if (op == PictOpClear)
 
2678
                        alu = GXclear;
 
2679
                else if (sna_get_pixel_from_rgba(&pixel,
 
2680
                                                 color->red,
 
2681
                                                 color->green,
 
2682
                                                 color->blue,
 
2683
                                                 color->alpha,
 
2684
                                                 format))
 
2685
                        alu = GXcopy;
 
2686
 
 
2687
                if (alu != GXinvalid &&
 
2688
                    sna_blt_fill_boxes(sna, alu,
 
2689
                                       dst_bo, dst->bitsPerPixel,
 
2690
                                       pixel, box, n))
 
2691
                        return true;
 
2692
 
 
2693
                if (!gen4_check_dst_format(format))
 
2694
                        return false;
 
2695
 
 
2696
                if (too_large(dst->width, dst->height))
 
2697
                        return sna_tiling_fill_boxes(sna, op, format, color,
 
2698
                                                     dst, dst_bo, box, n);
 
2699
        }
 
2700
 
 
2701
        if (op == PictOpClear) {
 
2702
                pixel = 0;
 
2703
                op = PictOpSrc;
 
2704
        } else if (!sna_get_pixel_from_rgba(&pixel,
 
2705
                                            color->red,
 
2706
                                            color->green,
 
2707
                                            color->blue,
 
2708
                                            color->alpha,
 
2709
                                            PICT_a8r8g8b8))
 
2710
                return false;
 
2711
 
 
2712
        DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
 
2713
 
 
2714
        memset(&tmp, 0, sizeof(tmp));
 
2715
 
 
2716
        tmp.op = op;
 
2717
 
 
2718
        tmp.dst.pixmap = (PixmapPtr)dst;
 
2719
        tmp.dst.width  = dst->width;
 
2720
        tmp.dst.height = dst->height;
 
2721
        tmp.dst.format = format;
 
2722
        tmp.dst.bo = dst_bo;
 
2723
 
 
2724
        sna_render_composite_redirect_init(&tmp);
 
2725
        if (too_large(dst->width, dst->height)) {
 
2726
                BoxRec extents;
 
2727
 
 
2728
                boxes_extents(box, n, &extents);
 
2729
                if (!sna_render_composite_redirect(sna, &tmp,
 
2730
                                                   extents.x1, extents.y1,
 
2731
                                                   extents.x2 - extents.x1,
 
2732
                                                   extents.y2 - extents.y1,
 
2733
                                                   n > 1))
 
2734
                        return sna_tiling_fill_boxes(sna, op, format, color,
 
2735
                                                     dst, dst_bo, box, n);
 
2736
        }
 
2737
 
 
2738
        gen4_channel_init_solid(sna, &tmp.src, pixel);
 
2739
 
 
2740
        tmp.is_affine = true;
 
2741
        tmp.floats_per_vertex = 2;
 
2742
        tmp.floats_per_rect = 6;
 
2743
        tmp.u.gen4.wm_kernel = WM_KERNEL;
 
2744
        tmp.u.gen4.ve_id = 1;
 
2745
 
 
2746
        if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 
2747
                kgem_submit(&sna->kgem);
 
2748
                if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 
2749
                        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
2750
                        return false;
 
2751
                }
 
2752
        }
 
2753
 
 
2754
        gen4_align_vertex(sna, &tmp);
 
2755
        gen4_bind_surfaces(sna, &tmp);
 
2756
 
 
2757
        do {
 
2758
                gen4_render_fill_rectangle(sna, &tmp,
 
2759
                                           box->x1, box->y1,
 
2760
                                           box->x2 - box->x1,
 
2761
                                           box->y2 - box->y1);
 
2762
                box++;
 
2763
        } while (--n);
 
2764
 
 
2765
        gen4_vertex_flush(sna);
 
2766
        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
2767
        sna_render_composite_redirect_done(sna, &tmp);
 
2768
        return true;
 
2769
}
 
2770
 
 
2771
static void
 
2772
gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
 
2773
                        int16_t x, int16_t y, int16_t w, int16_t h)
 
2774
{
 
2775
        gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
 
2776
}
 
2777
 
 
2778
fastcall static void
 
2779
gen4_render_fill_op_box(struct sna *sna,
 
2780
                        const struct sna_fill_op *op,
 
2781
                        const BoxRec *box)
 
2782
{
 
2783
        gen4_render_fill_rectangle(sna, &op->base,
 
2784
                                   box->x1, box->y1,
 
2785
                                   box->x2-box->x1, box->y2-box->y1);
 
2786
}
 
2787
 
 
2788
fastcall static void
 
2789
gen4_render_fill_op_boxes(struct sna *sna,
 
2790
                          const struct sna_fill_op *op,
 
2791
                          const BoxRec *box,
 
2792
                          int nbox)
 
2793
{
 
2794
        do {
 
2795
                gen4_render_fill_rectangle(sna, &op->base,
 
2796
                                           box->x1, box->y1,
 
2797
                                           box->x2-box->x1, box->y2-box->y1);
 
2798
                box++;
 
2799
        } while (--nbox);
 
2800
}
 
2801
 
 
2802
static void
 
2803
gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 
2804
{
 
2805
        if (sna->render.vertex_offset)
 
2806
                gen4_vertex_flush(sna);
 
2807
        kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
2808
}
 
2809
 
 
2810
static bool
 
2811
gen4_render_fill(struct sna *sna, uint8_t alu,
 
2812
                 PixmapPtr dst, struct kgem_bo *dst_bo,
 
2813
                 uint32_t color, unsigned flags,
 
2814
                 struct sna_fill_op *op)
 
2815
{
 
2816
        if (sna_blt_fill(sna, alu,
 
2817
                         dst_bo, dst->drawable.bitsPerPixel,
 
2818
                         color,
 
2819
                         op))
 
2820
                return true;
 
2821
 
 
2822
        if (!(alu == GXcopy || alu == GXclear) ||
 
2823
            too_large(dst->drawable.width, dst->drawable.height))
 
2824
                return sna_blt_fill(sna, alu,
 
2825
                                    dst_bo, dst->drawable.bitsPerPixel,
 
2826
                                    color,
 
2827
                                    op);
 
2828
 
 
2829
        if (alu == GXclear)
 
2830
                color = 0;
 
2831
 
 
2832
        op->base.op = color == 0 ? PictOpClear : PictOpSrc;
 
2833
 
 
2834
        op->base.dst.pixmap = dst;
 
2835
        op->base.dst.width  = dst->drawable.width;
 
2836
        op->base.dst.height = dst->drawable.height;
 
2837
        op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
 
2838
        op->base.dst.bo = dst_bo;
 
2839
        op->base.dst.x = op->base.dst.y = 0;
 
2840
 
 
2841
        op->base.need_magic_ca_pass = 0;
 
2842
        op->base.has_component_alpha = 0;
 
2843
 
 
2844
        gen4_channel_init_solid(sna, &op->base.src,
 
2845
                                sna_rgba_for_color(color,
 
2846
                                                   dst->drawable.depth));
 
2847
        op->base.mask.bo = NULL;
 
2848
 
 
2849
        op->base.is_affine = true;
 
2850
        op->base.floats_per_vertex = 2;
 
2851
        op->base.floats_per_rect = 6;
 
2852
        op->base.u.gen4.wm_kernel = WM_KERNEL;
 
2853
        op->base.u.gen4.ve_id = 1;
 
2854
 
 
2855
        if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 
2856
                kgem_submit(&sna->kgem);
 
2857
                if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 
2858
                        kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
2859
                        return false;
 
2860
                }
 
2861
        }
 
2862
 
 
2863
        gen4_align_vertex(sna, &op->base);
 
2864
        gen4_bind_surfaces(sna, &op->base);
 
2865
 
 
2866
        op->blt   = gen4_render_fill_op_blt;
 
2867
        op->box   = gen4_render_fill_op_box;
 
2868
        op->boxes = gen4_render_fill_op_boxes;
 
2869
        op->points = NULL;
 
2870
        op->done  = gen4_render_fill_op_done;
 
2871
        return true;
 
2872
}
 
2873
 
 
2874
static bool
 
2875
gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
2876
                             uint32_t color,
 
2877
                             int16_t x1, int16_t y1, int16_t x2, int16_t y2,
 
2878
                             uint8_t alu)
 
2879
{
 
2880
        BoxRec box;
 
2881
 
 
2882
        box.x1 = x1;
 
2883
        box.y1 = y1;
 
2884
        box.x2 = x2;
 
2885
        box.y2 = y2;
 
2886
 
 
2887
        return sna_blt_fill_boxes(sna, alu,
 
2888
                                  bo, dst->drawable.bitsPerPixel,
 
2889
                                  color, &box, 1);
 
2890
}
 
2891
 
 
2892
static bool
 
2893
gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
2894
                     uint32_t color,
 
2895
                     int16_t x1, int16_t y1,
 
2896
                     int16_t x2, int16_t y2,
 
2897
                     uint8_t alu)
 
2898
{
 
2899
        struct sna_composite_op tmp;
 
2900
 
 
2901
        DBG(("%s: color=%08x\n", __FUNCTION__, color));
 
2902
 
 
2903
        if (gen4_render_fill_one_try_blt(sna, dst, bo, color,
 
2904
                                         x1, y1, x2, y2, alu))
 
2905
                return true;
 
2906
 
 
2907
        /* Must use the BLT if we can't RENDER... */
 
2908
        if (!(alu == GXcopy || alu == GXclear) ||
 
2909
            too_large(dst->drawable.width, dst->drawable.height))
 
2910
                return false;
 
2911
 
 
2912
        if (alu == GXclear)
 
2913
                color = 0;
 
2914
 
 
2915
        tmp.op = color == 0 ? PictOpClear : PictOpSrc;
 
2916
 
 
2917
        tmp.dst.pixmap = dst;
 
2918
        tmp.dst.width  = dst->drawable.width;
 
2919
        tmp.dst.height = dst->drawable.height;
 
2920
        tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
 
2921
        tmp.dst.bo = bo;
 
2922
        tmp.dst.x = tmp.dst.y = 0;
 
2923
 
 
2924
        gen4_channel_init_solid(sna, &tmp.src,
 
2925
                                sna_rgba_for_color(color,
 
2926
                                                   dst->drawable.depth));
 
2927
        tmp.mask.bo = NULL;
 
2928
        tmp.mask.filter = SAMPLER_FILTER_NEAREST;
 
2929
        tmp.mask.repeat = SAMPLER_EXTEND_NONE;
 
2930
 
 
2931
        tmp.is_affine = true;
 
2932
        tmp.floats_per_vertex = 2;
 
2933
        tmp.floats_per_rect = 6;
 
2934
        tmp.has_component_alpha = false;
 
2935
        tmp.need_magic_ca_pass = false;
 
2936
 
 
2937
        tmp.u.gen4.wm_kernel = WM_KERNEL;
 
2938
        tmp.u.gen4.ve_id = 1;
 
2939
 
 
2940
        if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
 
2941
                kgem_submit(&sna->kgem);
 
2942
                if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
 
2943
                        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
2944
                        return false;
 
2945
                }
 
2946
        }
 
2947
 
 
2948
        gen4_align_vertex(sna, &tmp);
 
2949
        gen4_bind_surfaces(sna, &tmp);
 
2950
 
 
2951
        gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
 
2952
 
 
2953
        gen4_vertex_flush(sna);
 
2954
        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
2955
 
 
2956
        return true;
 
2957
}
 
2958
 
 
2959
static void gen4_render_reset(struct sna *sna)
 
2960
{
 
2961
        sna->render_state.gen4.needs_invariant = true;
 
2962
        sna->render_state.gen4.needs_urb = true;
 
2963
        sna->render_state.gen4.ve_id = -1;
 
2964
        sna->render_state.gen4.last_primitive = -1;
 
2965
        sna->render_state.gen4.last_pipelined_pointers = -1;
 
2966
 
 
2967
        sna->render_state.gen4.drawrect_offset = -1;
 
2968
        sna->render_state.gen4.drawrect_limit = -1;
 
2969
        sna->render_state.gen4.surface_table = 0;
 
2970
 
 
2971
        if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
 
2972
                DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
 
2973
                discard_vbo(sna);
 
2974
        }
 
2975
 
 
2976
        sna->render.vertex_offset = 0;
 
2977
        sna->render.nvertex_reloc = 0;
 
2978
        sna->render.vb_id = 0;
 
2979
}
 
2980
 
 
2981
static void gen4_render_fini(struct sna *sna)
 
2982
{
 
2983
        kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
 
2984
}
 
2985
 
 
2986
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
 
2987
{
 
2988
        struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
 
2989
 
 
2990
        /* Set up the vertex shader to be disabled (passthrough) */
 
2991
        vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
 
2992
        vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
 
2993
        vs->vs6.vs_enable = 0;
 
2994
        vs->vs6.vert_cache_disable = 1;
 
2995
 
 
2996
        return sna_static_stream_offsetof(stream, vs);
 
2997
}
 
2998
 
 
2999
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
 
3000
                                     uint32_t kernel)
 
3001
{
 
3002
        struct gen4_sf_unit_state *sf;
 
3003
 
 
3004
        sf = sna_static_stream_map(stream, sizeof(*sf), 32);
 
3005
 
 
3006
        sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
 
3007
        sf->thread0.kernel_start_pointer = kernel >> 6;
 
3008
        sf->thread3.const_urb_entry_read_length = 0;    /* no const URBs */
 
3009
        sf->thread3.const_urb_entry_read_offset = 0;    /* no const URBs */
 
3010
        sf->thread3.urb_entry_read_length = 1;  /* 1 URB per vertex */
 
3011
        /* don't smash vertex header, read start from dw8 */
 
3012
        sf->thread3.urb_entry_read_offset = 1;
 
3013
        sf->thread3.dispatch_grf_start_reg = 3;
 
3014
        sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
 
3015
        sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
 
3016
        sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
 
3017
        sf->sf5.viewport_transform = false;     /* skip viewport */
 
3018
        sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
 
3019
        sf->sf6.scissor = 0;
 
3020
        sf->sf7.trifan_pv = 2;
 
3021
        sf->sf6.dest_org_vbias = 0x8;
 
3022
        sf->sf6.dest_org_hbias = 0x8;
 
3023
 
 
3024
        return sna_static_stream_offsetof(stream, sf);
 
3025
}
 
3026
 
 
3027
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
 
3028
                                          sampler_filter_t src_filter,
 
3029
                                          sampler_extend_t src_extend,
 
3030
                                          sampler_filter_t mask_filter,
 
3031
                                          sampler_extend_t mask_extend)
 
3032
{
 
3033
        struct gen4_sampler_state *sampler_state;
 
3034
 
 
3035
        sampler_state = sna_static_stream_map(stream,
 
3036
                                              sizeof(struct gen4_sampler_state) * 2,
 
3037
                                              32);
 
3038
        sampler_state_init(&sampler_state[0], src_filter, src_extend);
 
3039
        sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
 
3040
 
 
3041
        return sna_static_stream_offsetof(stream, sampler_state);
 
3042
}
 
3043
 
 
3044
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
 
3045
                               int gen,
 
3046
                               bool has_mask,
 
3047
                               uint32_t kernel,
 
3048
                               uint32_t sampler)
 
3049
{
 
3050
        assert((kernel & 63) == 0);
 
3051
        wm->thread0.kernel_start_pointer = kernel >> 6;
 
3052
        wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
 
3053
 
 
3054
        wm->thread1.single_program_flow = 0;
 
3055
 
 
3056
        wm->thread3.const_urb_entry_read_length = 0;
 
3057
        wm->thread3.const_urb_entry_read_offset = 0;
 
3058
 
 
3059
        wm->thread3.urb_entry_read_offset = 0;
 
3060
        wm->thread3.dispatch_grf_start_reg = 3;
 
3061
 
 
3062
        assert((sampler & 31) == 0);
 
3063
        wm->wm4.sampler_state_pointer = sampler >> 5;
 
3064
        wm->wm4.sampler_count = 1;
 
3065
 
 
3066
        wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
 
3067
        wm->wm5.transposed_urb_read = 0;
 
3068
        wm->wm5.thread_dispatch_enable = 1;
 
3069
        /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
 
3070
         * start point
 
3071
         */
 
3072
        wm->wm5.enable_16_pix = 1;
 
3073
        wm->wm5.enable_8_pix = 0;
 
3074
        wm->wm5.early_depth_test = 1;
 
3075
 
 
3076
        /* Each pair of attributes (src/mask coords) is two URB entries */
 
3077
        if (has_mask) {
 
3078
                wm->thread1.binding_table_entry_count = 3;
 
3079
                wm->thread3.urb_entry_read_length = 4;
 
3080
        } else {
 
3081
                wm->thread1.binding_table_entry_count = 2;
 
3082
                wm->thread3.urb_entry_read_length = 2;
 
3083
        }
 
3084
}
 
3085
 
 
3086
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
 
3087
{
 
3088
        uint8_t *ptr, *base;
 
3089
        int i, j;
 
3090
 
 
3091
        base = ptr =
 
3092
                sna_static_stream_map(stream,
 
3093
                                      GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
 
3094
                                      64);
 
3095
 
 
3096
        for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
 
3097
                for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
 
3098
                        struct gen4_cc_unit_state *state =
 
3099
                                (struct gen4_cc_unit_state *)ptr;
 
3100
 
 
3101
                        state->cc3.blend_enable =
 
3102
                                !(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
 
3103
 
 
3104
                        state->cc5.logicop_func = 0xc;  /* COPY */
 
3105
                        state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
 
3106
 
 
3107
                        /* Fill in alpha blend factors same as color, for the future. */
 
3108
                        state->cc5.ia_src_blend_factor = i;
 
3109
                        state->cc5.ia_dest_blend_factor = j;
 
3110
 
 
3111
                        state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
 
3112
                        state->cc6.clamp_post_alpha_blend = 1;
 
3113
                        state->cc6.clamp_pre_alpha_blend = 1;
 
3114
                        state->cc6.src_blend_factor = i;
 
3115
                        state->cc6.dest_blend_factor = j;
 
3116
 
 
3117
                        ptr += 64;
 
3118
                }
 
3119
        }
 
3120
 
 
3121
        return sna_static_stream_offsetof(stream, base);
 
3122
}
 
3123
 
 
3124
static bool gen4_render_setup(struct sna *sna)
 
3125
{
 
3126
        struct gen4_render_state *state = &sna->render_state.gen4;
 
3127
        struct sna_static_stream general;
 
3128
        struct gen4_wm_unit_state_padded *wm_state;
 
3129
        uint32_t sf, wm[KERNEL_COUNT];
 
3130
        int i, j, k, l, m;
 
3131
 
 
3132
        sna_static_stream_init(&general);
 
3133
 
 
3134
        /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
 
3135
         * dumps, you know it points to zero.
 
3136
         */
 
3137
        null_create(&general);
 
3138
 
 
3139
        sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
 
3140
        for (m = 0; m < KERNEL_COUNT; m++) {
 
3141
                if (wm_kernels[m].size) {
 
3142
                        wm[m] = sna_static_stream_add(&general,
 
3143
                                                      wm_kernels[m].data,
 
3144
                                                      wm_kernels[m].size,
 
3145
                                                      64);
 
3146
                } else {
 
3147
                        wm[m] = sna_static_stream_compile_wm(sna, &general,
 
3148
                                                             wm_kernels[m].data,
 
3149
                                                             16);
 
3150
                }
 
3151
        }
 
3152
 
 
3153
        state->vs = gen4_create_vs_unit_state(&general);
 
3154
        state->sf = gen4_create_sf_state(&general, sf);
 
3155
 
 
3156
        wm_state = sna_static_stream_map(&general,
 
3157
                                          sizeof(*wm_state) * KERNEL_COUNT *
 
3158
                                          FILTER_COUNT * EXTEND_COUNT *
 
3159
                                          FILTER_COUNT * EXTEND_COUNT,
 
3160
                                          64);
 
3161
        state->wm = sna_static_stream_offsetof(&general, wm_state);
 
3162
        for (i = 0; i < FILTER_COUNT; i++) {
 
3163
                for (j = 0; j < EXTEND_COUNT; j++) {
 
3164
                        for (k = 0; k < FILTER_COUNT; k++) {
 
3165
                                for (l = 0; l < EXTEND_COUNT; l++) {
 
3166
                                        uint32_t sampler_state;
 
3167
 
 
3168
                                        sampler_state =
 
3169
                                                gen4_create_sampler_state(&general,
 
3170
                                                                          i, j,
 
3171
                                                                          k, l);
 
3172
 
 
3173
                                        for (m = 0; m < KERNEL_COUNT; m++) {
 
3174
                                                gen4_init_wm_state(&wm_state->state,
 
3175
                                                                   sna->kgem.gen,
 
3176
                                                                   wm_kernels[m].has_mask,
 
3177
                                                                   wm[m], sampler_state);
 
3178
                                                wm_state++;
 
3179
                                        }
 
3180
                                }
 
3181
                        }
 
3182
                }
 
3183
        }
 
3184
 
 
3185
        state->cc = gen4_create_cc_unit_state(&general);
 
3186
 
 
3187
        state->general_bo = sna_static_stream_fini(sna, &general);
 
3188
        return state->general_bo != NULL;
 
3189
}
 
3190
 
 
3191
const char *gen4_render_init(struct sna *sna, const char *backend)
 
3192
{
 
3193
        if (!gen4_render_setup(sna))
 
3194
                return backend;
 
3195
 
 
3196
        sna->kgem.retire = gen4_render_retire;
 
3197
        sna->kgem.expire = gen4_render_expire;
 
3198
 
 
3199
#if !NO_COMPOSITE
 
3200
        sna->render.composite = gen4_render_composite;
 
3201
        sna->render.prefer_gpu |= PREFER_GPU_RENDER;
 
3202
#endif
 
3203
#if !NO_COMPOSITE_SPANS
 
3204
        sna->render.check_composite_spans = gen4_check_composite_spans;
 
3205
        sna->render.composite_spans = gen4_render_composite_spans;
 
3206
        if (0)
 
3207
                sna->render.prefer_gpu |= PREFER_GPU_SPANS;
 
3208
#endif
 
3209
 
 
3210
#if !NO_VIDEO
 
3211
        sna->render.video = gen4_render_video;
 
3212
#endif
 
3213
 
 
3214
#if !NO_COPY_BOXES
 
3215
        sna->render.copy_boxes = gen4_render_copy_boxes;
 
3216
#endif
 
3217
#if !NO_COPY
 
3218
        sna->render.copy = gen4_render_copy;
 
3219
#endif
 
3220
 
 
3221
#if !NO_FILL_BOXES
 
3222
        sna->render.fill_boxes = gen4_render_fill_boxes;
 
3223
#endif
 
3224
#if !NO_FILL
 
3225
        sna->render.fill = gen4_render_fill;
 
3226
#endif
 
3227
#if !NO_FILL_ONE
 
3228
        sna->render.fill_one = gen4_render_fill_one;
 
3229
#endif
 
3230
 
 
3231
        sna->render.flush = gen4_render_flush;
 
3232
        sna->render.reset = gen4_render_reset;
 
3233
        sna->render.fini = gen4_render_fini;
 
3234
 
 
3235
        sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
 
3236
        sna->render.max_3d_pitch = 1 << 18;
 
3237
        return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
 
3238
}