~mmach/netext73/xorg-intel

« back to all changes in this revision

Viewing changes to src/sna/gen2_render.c

  • Committer: mmach
  • Date: 2021-03-17 18:24:35 UTC
  • Revision ID: netbit73@gmail.com-20210317182435-7okt04d35etrqgqp
lipiec

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright Â© 2006,2011 Intel Corporation
 
3
 *
 
4
 * Permission is hereby granted, free of charge, to any person obtaining a
 
5
 * copy of this software and associated documentation files (the "Software"),
 
6
 * to deal in the Software without restriction, including without limitation
 
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 
8
 * and/or sell copies of the Software, and to permit persons to whom the
 
9
 * Software is furnished to do so, subject to the following conditions:
 
10
 *
 
11
 * The above copyright notice and this permission notice (including the next
 
12
 * paragraph) shall be included in all copies or substantial portions of the
 
13
 * Software.
 
14
 *
 
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
21
 * SOFTWARE.
 
22
 *
 
23
 * Authors:
 
24
 *    Wang Zhenyu <zhenyu.z.wang@intel.com>
 
25
 *    Eric Anholt <eric@anholt.net>
 
26
 *    Chris Wilson <chris@chris-wilson.co.uk>
 
27
 *
 
28
 */
 
29
 
 
30
#ifdef HAVE_CONFIG_H
 
31
#include "config.h"
 
32
#endif
 
33
 
 
34
#include "sna.h"
 
35
#include "sna_reg.h"
 
36
#include "sna_render.h"
 
37
#include "sna_render_inline.h"
 
38
#include "sna_video.h"
 
39
 
 
40
#include "gen2_render.h"
 
41
 
 
42
#define NO_COMPOSITE 0
 
43
#define NO_COMPOSITE_SPANS 0
 
44
#define NO_COPY 0
 
45
#define NO_COPY_BOXES 0
 
46
#define NO_FILL 0
 
47
#define NO_FILL_ONE 0
 
48
#define NO_FILL_BOXES 0
 
49
 
 
50
#define MAX_3D_SIZE 2048
 
51
#define MAX_3D_PITCH 8192
 
52
#define MAX_INLINE (1 << 18)
 
53
 
 
54
#define BATCH(v) batch_emit(sna, v)
 
55
#define BATCH_ALIGNED(v, a) batch_emit_aligned(sna, v, a)
 
56
#define BATCH_F(v) batch_emit_float(sna, v)
 
57
#define VERTEX(v) batch_emit_float(sna, v)
 
58
 
 
59
static const struct blendinfo {
 
60
        bool dst_alpha;
 
61
        bool src_alpha;
 
62
        uint32_t src_blend;
 
63
        uint32_t dst_blend;
 
64
} gen2_blend_op[] = {
 
65
        /* Clear */
 
66
        {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
 
67
        /* Src */
 
68
        {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
 
69
        /* Dst */
 
70
        {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
 
71
        /* Over */
 
72
        {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
 
73
        /* OverReverse */
 
74
        {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
 
75
        /* In */
 
76
        {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
 
77
        /* InReverse */
 
78
        {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
 
79
        /* Out */
 
80
        {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
 
81
        /* OutReverse */
 
82
        {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
 
83
        /* Atop */
 
84
        {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
 
85
        /* AtopReverse */
 
86
        {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
 
87
        /* Xor */
 
88
        {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
 
89
        /* Add */
 
90
        {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
 
91
};
 
92
 
 
93
static const struct formatinfo {
 
94
        unsigned int fmt;
 
95
        uint32_t card_fmt;
 
96
} i8xx_tex_formats[] = {
 
97
        {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
 
98
        {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
 
99
        {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
 
100
        {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
 
101
        {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
 
102
        {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
 
103
}, i85x_tex_formats[] = {
 
104
        {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
 
105
        {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
 
106
};
 
107
 
 
108
static inline bool
 
109
too_large(int width, int height)
 
110
{
 
111
        return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
 
112
}
 
113
 
 
114
static inline uint32_t
 
115
gen2_buf_tiling(uint32_t tiling)
 
116
{
 
117
        uint32_t v = 0;
 
118
        switch (tiling) {
 
119
        default:
 
120
                assert(0);
 
121
                /* fall through */
 
122
        case I915_TILING_Y:
 
123
                v |= BUF_3D_TILE_WALK_Y;
 
124
                /* fall through */
 
125
        case I915_TILING_X:
 
126
                v |= BUF_3D_TILED_SURFACE;
 
127
                /* fall through */
 
128
        case I915_TILING_NONE:
 
129
                break;
 
130
        }
 
131
        return v;
 
132
}
 
133
 
 
134
static uint32_t
 
135
gen2_get_dst_format(uint32_t format)
 
136
{
 
137
#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)
 
138
        switch (format) {
 
139
        default:
 
140
                assert(0);
 
141
                /* fall through */
 
142
        case PICT_a8r8g8b8:
 
143
        case PICT_x8r8g8b8:
 
144
                return COLR_BUF_ARGB8888 | BIAS;
 
145
        case PICT_r5g6b5:
 
146
                return COLR_BUF_RGB565 | BIAS;
 
147
        case PICT_a1r5g5b5:
 
148
        case PICT_x1r5g5b5:
 
149
                return COLR_BUF_ARGB1555 | BIAS;
 
150
        case PICT_a8:
 
151
                return COLR_BUF_8BIT | BIAS;
 
152
        case PICT_a4r4g4b4:
 
153
        case PICT_x4r4g4b4:
 
154
                return COLR_BUF_ARGB4444 | BIAS;
 
155
        }
 
156
#undef BIAS
 
157
}
 
158
 
 
159
static bool
 
160
gen2_check_dst_format(uint32_t format)
 
161
{
 
162
        switch (format) {
 
163
        case PICT_a8r8g8b8:
 
164
        case PICT_x8r8g8b8:
 
165
        case PICT_r5g6b5:
 
166
        case PICT_a1r5g5b5:
 
167
        case PICT_x1r5g5b5:
 
168
        case PICT_a8:
 
169
        case PICT_a4r4g4b4:
 
170
        case PICT_x4r4g4b4:
 
171
                return true;
 
172
        default:
 
173
                return false;
 
174
        }
 
175
}
 
176
 
 
177
static uint32_t
 
178
gen2_get_card_format(struct sna *sna, uint32_t format)
 
179
{
 
180
        unsigned int i;
 
181
 
 
182
        for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
 
183
                if (i8xx_tex_formats[i].fmt == format)
 
184
                        return i8xx_tex_formats[i].card_fmt;
 
185
 
 
186
        if (sna->kgem.gen < 021) {
 
187
                /* Whilst these are not directly supported on 830/845,
 
188
                 * we only enable them when we can implicitly convert
 
189
                 * them to a supported variant through the texture
 
190
                 * combiners.
 
191
                 */
 
192
                for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
 
193
                        if (i85x_tex_formats[i].fmt == format)
 
194
                                return i8xx_tex_formats[1+i].card_fmt;
 
195
        } else {
 
196
                for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
 
197
                        if (i85x_tex_formats[i].fmt == format)
 
198
                                return i85x_tex_formats[i].card_fmt;
 
199
        }
 
200
 
 
201
        assert(0);
 
202
        return 0;
 
203
}
 
204
 
 
205
static uint32_t
 
206
gen2_check_format(struct sna *sna, PicturePtr p)
 
207
{
 
208
        unsigned int i;
 
209
 
 
210
        for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
 
211
                if (i8xx_tex_formats[i].fmt == p->format)
 
212
                        return true;
 
213
 
 
214
        if (sna->kgem.gen > 021) {
 
215
                for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
 
216
                        if (i85x_tex_formats[i].fmt == p->format)
 
217
                                return true;
 
218
        }
 
219
 
 
220
        return false;
 
221
}
 
222
 
 
223
static uint32_t
 
224
gen2_sampler_tiling_bits(uint32_t tiling)
 
225
{
 
226
        uint32_t bits = 0;
 
227
        switch (tiling) {
 
228
        default:
 
229
                assert(0);
 
230
                /* fall through */
 
231
        case I915_TILING_Y:
 
232
                bits |= TM0S1_TILE_WALK;
 
233
                /* fall through */
 
234
        case I915_TILING_X:
 
235
                bits |= TM0S1_TILED_SURFACE;
 
236
                /* fall through */
 
237
        case I915_TILING_NONE:
 
238
                break;
 
239
        }
 
240
        return bits;
 
241
}
 
242
 
 
243
static bool
 
244
gen2_check_filter(PicturePtr picture)
 
245
{
 
246
        switch (picture->filter) {
 
247
        case PictFilterNearest:
 
248
        case PictFilterBilinear:
 
249
                return true;
 
250
        default:
 
251
                return false;
 
252
        }
 
253
}
 
254
 
 
255
static bool
 
256
gen2_check_repeat(PicturePtr picture)
 
257
{
 
258
        if (!picture->repeat)
 
259
                return true;
 
260
 
 
261
        switch (picture->repeatType) {
 
262
        case RepeatNone:
 
263
        case RepeatNormal:
 
264
        case RepeatPad:
 
265
        case RepeatReflect:
 
266
                return true;
 
267
        default:
 
268
                return false;
 
269
        }
 
270
}
 
271
 
 
272
static void
 
273
gen2_emit_texture(struct sna *sna,
 
274
                  const struct sna_composite_channel *channel,
 
275
                  int unit)
 
276
{
 
277
        uint32_t wrap_mode_u, wrap_mode_v;
 
278
        uint32_t texcoordtype;
 
279
        uint32_t filter;
 
280
 
 
281
        assert(channel->bo);
 
282
 
 
283
        if (channel->is_affine)
 
284
                texcoordtype = TEXCOORDTYPE_CARTESIAN;
 
285
        else
 
286
                texcoordtype = TEXCOORDTYPE_HOMOGENEOUS;
 
287
 
 
288
        switch (channel->repeat) {
 
289
        default:
 
290
                assert(0);
 
291
                /* fall through */
 
292
        case RepeatNone:
 
293
                wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER;
 
294
                break;
 
295
        case RepeatNormal:
 
296
                wrap_mode_u = TEXCOORDMODE_WRAP;
 
297
                break;
 
298
        case RepeatPad:
 
299
                wrap_mode_u = TEXCOORDMODE_CLAMP;
 
300
                break;
 
301
        case RepeatReflect:
 
302
                wrap_mode_u = TEXCOORDMODE_MIRROR;
 
303
                break;
 
304
        }
 
305
        if (channel->is_linear)
 
306
                wrap_mode_v = TEXCOORDMODE_WRAP;
 
307
        else
 
308
                wrap_mode_v = wrap_mode_u;
 
309
 
 
310
        switch (channel->filter) {
 
311
        default:
 
312
                assert(0);
 
313
                /* fall through */
 
314
        case PictFilterNearest:
 
315
                filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
 
316
                          FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
 
317
                          MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
 
318
                break;
 
319
        case PictFilterBilinear:
 
320
                filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
 
321
                          FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
 
322
                          MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
 
323
                break;
 
324
        }
 
325
 
 
326
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4);
 
327
        BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
 
328
                             channel->bo,
 
329
                             I915_GEM_DOMAIN_SAMPLER << 16,
 
330
                             0));
 
331
        BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
 
332
              ((channel->width - 1)  << TM0S1_WIDTH_SHIFT) |
 
333
              gen2_get_card_format(sna, channel->pict_format) |
 
334
              gen2_sampler_tiling_bits(channel->bo->tiling));
 
335
        BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
 
336
        BATCH(filter);
 
337
        BATCH(0);       /* default color */
 
338
 
 
339
        BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
 
340
              ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype |
 
341
              ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) |
 
342
              ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u));
 
343
}
 
344
 
 
345
static void
 
346
gen2_get_blend_factors(const struct sna_composite_op *op,
 
347
                       int blend,
 
348
                       uint32_t *c_out,
 
349
                       uint32_t *a_out)
 
350
{
 
351
        uint32_t cblend, ablend;
 
352
 
 
353
        /* If component alpha is active in the mask and the blend operation
 
354
         * uses the source alpha, then we know we don't need the source
 
355
         * value (otherwise we would have hit a fallback earlier), so we
 
356
         * provide the source alpha (src.A * mask.X) as output color.
 
357
         * Conversely, if CA is set and we don't need the source alpha, then
 
358
         * we produce the source value (src.X * mask.X) and the source alpha
 
359
         * is unused..  Otherwise, we provide the non-CA source value
 
360
         * (src.X * mask.A).
 
361
         *
 
362
         * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8
 
363
         * pictures, but we need to implement it for 830/845 and there's no
 
364
         * harm done in leaving it in.
 
365
         */
 
366
        cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT;
 
367
        ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT;
 
368
 
 
369
        /* Get the source picture's channels into TBx_ARG1 */
 
370
        if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) ||
 
371
            op->dst.format == PICT_a8) {
 
372
                /* Producing source alpha value, so the first set of channels
 
373
                 * is src.A instead of src.X.  We also do this if the destination
 
374
                 * is a8, in which case src.G is what's written, and the other
 
375
                 * channels are ignored.
 
376
                 */
 
377
                if (op->src.is_opaque) {
 
378
                        ablend |= TB0C_ARG1_SEL_ONE;
 
379
                        cblend |= TB0C_ARG1_SEL_ONE;
 
380
                } else if (op->src.is_solid) {
 
381
                        ablend |= TB0C_ARG1_SEL_DIFFUSE;
 
382
                        cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA;
 
383
                } else {
 
384
                        ablend |= TB0C_ARG1_SEL_TEXEL0;
 
385
                        cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
 
386
                }
 
387
        } else {
 
388
                if (op->src.is_solid)
 
389
                        cblend |= TB0C_ARG1_SEL_DIFFUSE;
 
390
                else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
 
391
                        cblend |= TB0C_ARG1_SEL_TEXEL0;
 
392
                else
 
393
                        cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */
 
394
 
 
395
                if (op->src.is_opaque)
 
396
                        ablend |= TB0A_ARG1_SEL_ONE;
 
397
                else if (op->src.is_solid)
 
398
                        ablend |= TB0A_ARG1_SEL_DIFFUSE;
 
399
                else
 
400
                        ablend |= TB0A_ARG1_SEL_TEXEL0;
 
401
        }
 
402
 
 
403
        if (op->mask.bo) {
 
404
                if (op->src.is_solid) {
 
405
                        cblend |= TB0C_ARG2_SEL_TEXEL0;
 
406
                        ablend |= TB0A_ARG2_SEL_TEXEL0;
 
407
                } else {
 
408
                        cblend |= TB0C_ARG2_SEL_TEXEL1;
 
409
                        ablend |= TB0A_ARG2_SEL_TEXEL1;
 
410
                }
 
411
 
 
412
                if (op->dst.format == PICT_a8 || !op->has_component_alpha)
 
413
                        cblend |= TB0C_ARG2_REPLICATE_ALPHA;
 
414
 
 
415
                cblend |= TB0C_OP_MODULATE;
 
416
                ablend |= TB0A_OP_MODULATE;
 
417
        } else if (op->mask.is_solid) {
 
418
                cblend |= TB0C_ARG2_SEL_DIFFUSE;
 
419
                ablend |= TB0A_ARG2_SEL_DIFFUSE;
 
420
 
 
421
                if (op->dst.format == PICT_a8 || !op->has_component_alpha)
 
422
                        cblend |= TB0C_ARG2_REPLICATE_ALPHA;
 
423
 
 
424
                cblend |= TB0C_OP_MODULATE;
 
425
                ablend |= TB0A_OP_MODULATE;
 
426
        } else {
 
427
                cblend |= TB0C_OP_ARG1;
 
428
                ablend |= TB0A_OP_ARG1;
 
429
        }
 
430
 
 
431
        *c_out = cblend;
 
432
        *a_out = ablend;
 
433
}
 
434
 
 
435
static uint32_t gen2_get_blend_cntl(int op,
 
436
                                    bool has_component_alpha,
 
437
                                    uint32_t dst_format)
 
438
{
 
439
        uint32_t sblend, dblend;
 
440
 
 
441
        if (op <= PictOpSrc)
 
442
                return S8_ENABLE_COLOR_BUFFER_WRITE;
 
443
 
 
444
        sblend = gen2_blend_op[op].src_blend;
 
445
        dblend = gen2_blend_op[op].dst_blend;
 
446
 
 
447
        if (gen2_blend_op[op].dst_alpha) {
 
448
                /* If there's no dst alpha channel, adjust the blend op so that
 
449
                 * we'll treat it as always 1.
 
450
                 */
 
451
                if (PICT_FORMAT_A(dst_format) == 0) {
 
452
                        if (sblend == BLENDFACTOR_DST_ALPHA)
 
453
                                sblend = BLENDFACTOR_ONE;
 
454
                        else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
 
455
                                sblend = BLENDFACTOR_ZERO;
 
456
                }
 
457
 
 
458
                /* gen2 engine reads 8bit color buffer into green channel
 
459
                 * in cases like color buffer blending etc., and also writes
 
460
                 * back green channel.  So with dst_alpha blend we should use
 
461
                 * color factor.
 
462
                 */
 
463
                if (dst_format == PICT_a8) {
 
464
                        if (sblend == BLENDFACTOR_DST_ALPHA)
 
465
                                sblend = BLENDFACTOR_DST_COLR;
 
466
                        else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
 
467
                                sblend = BLENDFACTOR_INV_DST_COLR;
 
468
                }
 
469
        }
 
470
 
 
471
        /* If the source alpha is being used, then we should only be in a case
 
472
         * where the source blend factor is 0, and the source blend value is
 
473
         * the mask channels multiplied by the source picture's alpha.
 
474
         */
 
475
        if (has_component_alpha && gen2_blend_op[op].src_alpha) {
 
476
                if (dblend == BLENDFACTOR_SRC_ALPHA)
 
477
                        dblend = BLENDFACTOR_SRC_COLR;
 
478
                else if (dblend == BLENDFACTOR_INV_SRC_ALPHA)
 
479
                        dblend = BLENDFACTOR_INV_SRC_COLR;
 
480
        }
 
481
 
 
482
        return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
 
483
                dblend << S8_DST_BLEND_FACTOR_SHIFT |
 
484
                S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
 
485
                S8_ENABLE_COLOR_BUFFER_WRITE);
 
486
}
 
487
 
 
488
static void gen2_emit_invariant(struct sna *sna)
 
489
{
 
490
        int i;
 
491
 
 
492
        for (i = 0; i < 4; i++) {
 
493
                BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i));
 
494
                BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) |
 
495
                      DISABLE_TEX_STREAM_BUMP |
 
496
                      ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) |
 
497
                      ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i));
 
498
                BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
 
499
                BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i));
 
500
        }
 
501
 
 
502
        BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
 
503
        BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
 
504
              TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
 
505
              TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
 
506
              TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
 
507
 
 
508
        BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
 
509
 
 
510
        BATCH(_3DSTATE_VERTEX_TRANSFORM);
 
511
        BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
 
512
 
 
513
        BATCH(_3DSTATE_W_STATE_CMD);
 
514
        BATCH(MAGIC_W_STATE_DWORD1);
 
515
        BATCH_F(1.0);
 
516
 
 
517
        BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD |
 
518
              DISABLE_INDPT_ALPHA_BLEND |
 
519
              ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD);
 
520
 
 
521
        BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
 
522
        BATCH(0);
 
523
 
 
524
        BATCH(_3DSTATE_MODES_1_CMD |
 
525
              ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD |
 
526
              ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) |
 
527
              ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO));
 
528
 
 
529
        BATCH(_3DSTATE_ENABLES_1_CMD |
 
530
              DISABLE_LOGIC_OP |
 
531
              DISABLE_STENCIL_TEST |
 
532
              DISABLE_DEPTH_BIAS |
 
533
              DISABLE_SPEC_ADD |
 
534
              DISABLE_FOG |
 
535
              DISABLE_ALPHA_TEST |
 
536
              DISABLE_DEPTH_TEST |
 
537
              ENABLE_COLOR_BLEND);
 
538
 
 
539
        BATCH(_3DSTATE_ENABLES_2_CMD |
 
540
              DISABLE_STENCIL_WRITE |
 
541
              DISABLE_DITHER |
 
542
              DISABLE_DEPTH_WRITE |
 
543
              ENABLE_COLOR_MASK |
 
544
              ENABLE_COLOR_WRITE |
 
545
              ENABLE_TEX_CACHE);
 
546
 
 
547
        BATCH(_3DSTATE_STIPPLE);
 
548
        BATCH(0);
 
549
 
 
550
        BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
 
551
              TEXPIPE_COLOR |
 
552
              ENABLE_TEXOUTPUT_WRT_SEL |
 
553
              TEXOP_OUTPUT_CURRENT |
 
554
              DISABLE_TEX_CNTRL_STAGE |
 
555
              TEXOP_SCALE_1X |
 
556
              TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE |
 
557
              TEXBLENDOP_ARG1);
 
558
        BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
 
559
              TEXPIPE_ALPHA |
 
560
              ENABLE_TEXOUTPUT_WRT_SEL |
 
561
              TEXOP_OUTPUT_CURRENT |
 
562
              TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS |
 
563
              TEXBLENDOP_ARG1);
 
564
        BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
 
565
              TEXPIPE_COLOR |
 
566
              TEXBLEND_ARG1 |
 
567
              TEXBLENDARG_MODIFY_PARMS |
 
568
              TEXBLENDARG_DIFFUSE);
 
569
        BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
 
570
              TEXPIPE_ALPHA |
 
571
              TEXBLEND_ARG1 |
 
572
              TEXBLENDARG_MODIFY_PARMS |
 
573
              TEXBLENDARG_DIFFUSE);
 
574
 
 
575
#define INVARIANT_SIZE 35
 
576
 
 
577
        sna->render_state.gen2.need_invariant = false;
 
578
}
 
579
 
 
580
static void
 
581
gen2_get_batch(struct sna *sna, const struct sna_composite_op *op)
 
582
{
 
583
        kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
584
 
 
585
        /* +7 for i830 3DSTATE_BUFFER_INFO w/a */
 
586
        if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40+7)) {
 
587
                DBG(("%s: flushing batch: size %d > %d\n",
 
588
                     __FUNCTION__, INVARIANT_SIZE+40,
 
589
                     sna->kgem.surface-sna->kgem.nbatch));
 
590
                kgem_submit(&sna->kgem);
 
591
                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
 
592
        }
 
593
 
 
594
        if (!kgem_check_reloc(&sna->kgem, 3)) {
 
595
                DBG(("%s: flushing batch: reloc %d >= %d\n",
 
596
                     __FUNCTION__,
 
597
                     sna->kgem.nreloc + 3,
 
598
                     (int)KGEM_RELOC_SIZE(&sna->kgem)));
 
599
                kgem_submit(&sna->kgem);
 
600
                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
 
601
        }
 
602
 
 
603
        if (!kgem_check_exec(&sna->kgem, 3)) {
 
604
                DBG(("%s: flushing batch: exec %d >= %d\n",
 
605
                     __FUNCTION__,
 
606
                     sna->kgem.nexec + 1,
 
607
                     (int)KGEM_EXEC_SIZE(&sna->kgem)));
 
608
                kgem_submit(&sna->kgem);
 
609
                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
 
610
        }
 
611
 
 
612
        if (sna->render_state.gen2.need_invariant)
 
613
                gen2_emit_invariant(sna);
 
614
}
 
615
 
 
616
static void gen2_emit_target(struct sna *sna,
 
617
                             struct kgem_bo *bo,
 
618
                             int width,
 
619
                             int height,
 
620
                             int format)
 
621
{
 
622
        assert(!too_large(width, height));
 
623
        assert(bo->pitch >= 8 && bo->pitch <= MAX_3D_PITCH);
 
624
        assert(sna->render.vertex_offset == 0);
 
625
 
 
626
        assert(bo->unique_id);
 
627
        if (sna->render_state.gen2.target == bo->unique_id) {
 
628
                kgem_bo_mark_dirty(bo);
 
629
                return;
 
630
        }
 
631
 
 
632
        /*
 
633
         * i830 w/a: 3DSTATE_BUFFER_INFO
 
634
         * must not straddle two cachelines.
 
635
         */
 
636
        if (intel_get_device_id(sna->dev) == 0x3577)
 
637
                BATCH_ALIGNED(_3DSTATE_BUF_INFO_CMD, 8);
 
638
        else
 
639
                BATCH(_3DSTATE_BUF_INFO_CMD);
 
640
        BATCH(BUF_3D_ID_COLOR_BACK |
 
641
              gen2_buf_tiling(bo->tiling) |
 
642
              BUF_3D_PITCH(bo->pitch));
 
643
        BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
 
644
                             bo,
 
645
                             I915_GEM_DOMAIN_RENDER << 16 |
 
646
                             I915_GEM_DOMAIN_RENDER,
 
647
                             0));
 
648
 
 
649
        BATCH(_3DSTATE_DST_BUF_VARS_CMD);
 
650
        BATCH(gen2_get_dst_format(format));
 
651
 
 
652
        BATCH(_3DSTATE_DRAW_RECT_CMD);
 
653
        BATCH(0);
 
654
        BATCH(0);       /* ymin, xmin */
 
655
        BATCH(DRAW_YMAX(height - 1) |
 
656
              DRAW_XMAX(width - 1));
 
657
        BATCH(0);       /* yorig, xorig */
 
658
 
 
659
        sna->render_state.gen2.target = bo->unique_id;
 
660
}
 
661
 
 
662
static void gen2_disable_logic_op(struct sna *sna)
 
663
{
 
664
        if (!sna->render_state.gen2.logic_op_enabled)
 
665
                return;
 
666
 
 
667
        DBG(("%s\n", __FUNCTION__));
 
668
 
 
669
        BATCH(_3DSTATE_ENABLES_1_CMD |
 
670
              DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND);
 
671
 
 
672
        sna->render_state.gen2.logic_op_enabled = 0;
 
673
}
 
674
 
 
675
static void gen2_enable_logic_op(struct sna *sna, int op)
 
676
{
 
677
        static const uint8_t logic_op[] = {
 
678
                LOGICOP_CLEAR,          /* GXclear */
 
679
                LOGICOP_AND,            /* GXand */
 
680
                LOGICOP_AND_RVRSE,      /* GXandReverse */
 
681
                LOGICOP_COPY,           /* GXcopy */
 
682
                LOGICOP_AND_INV,        /* GXandInverted */
 
683
                LOGICOP_NOOP,           /* GXnoop */
 
684
                LOGICOP_XOR,            /* GXxor */
 
685
                LOGICOP_OR,             /* GXor */
 
686
                LOGICOP_NOR,            /* GXnor */
 
687
                LOGICOP_EQUIV,          /* GXequiv */
 
688
                LOGICOP_INV,            /* GXinvert */
 
689
                LOGICOP_OR_RVRSE,       /* GXorReverse */
 
690
                LOGICOP_COPY_INV,       /* GXcopyInverted */
 
691
                LOGICOP_OR_INV,         /* GXorInverted */
 
692
                LOGICOP_NAND,           /* GXnand */
 
693
                LOGICOP_SET             /* GXset */
 
694
        };
 
695
 
 
696
        if (sna->render_state.gen2.logic_op_enabled != op+1) {
 
697
                if (!sna->render_state.gen2.logic_op_enabled) {
 
698
                        if (op == GXclear || op == GXcopy)
 
699
                                return;
 
700
 
 
701
                        DBG(("%s\n", __FUNCTION__));
 
702
 
 
703
                        BATCH(_3DSTATE_ENABLES_1_CMD |
 
704
                              ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND);
 
705
                }
 
706
 
 
707
                BATCH(_3DSTATE_MODES_4_CMD |
 
708
                      ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op]));
 
709
                sna->render_state.gen2.logic_op_enabled = op+1;
 
710
        }
 
711
}
 
712
 
 
713
static void gen2_emit_composite_state(struct sna *sna,
 
714
                                      const struct sna_composite_op *op)
 
715
{
 
716
        uint32_t texcoordfmt, v, unwind;
 
717
        uint32_t cblend, ablend;
 
718
        int tex;
 
719
 
 
720
        gen2_get_batch(sna, op);
 
721
 
 
722
        if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 
723
                if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
 
724
                        BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
 
725
                else
 
726
                        BATCH(_3DSTATE_MODES_5_CMD |
 
727
                              PIPELINE_FLUSH_RENDER_CACHE |
 
728
                              PIPELINE_FLUSH_TEXTURE_CACHE);
 
729
                kgem_clear_dirty(&sna->kgem);
 
730
        }
 
731
 
 
732
        gen2_emit_target(sna,
 
733
                         op->dst.bo,
 
734
                         op->dst.width,
 
735
                         op->dst.height,
 
736
                         op->dst.format);
 
737
 
 
738
        unwind = sna->kgem.nbatch;
 
739
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 
740
              I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 
741
        BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12);
 
742
        BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 
743
        BATCH(gen2_get_blend_cntl(op->op,
 
744
                                  op->has_component_alpha,
 
745
                                  op->dst.format));
 
746
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
 
747
                   sna->kgem.batch + unwind + 1,
 
748
                   3 * sizeof(uint32_t)) == 0)
 
749
                sna->kgem.nbatch = unwind;
 
750
        else
 
751
                sna->render_state.gen2.ls1 = unwind;
 
752
 
 
753
        gen2_disable_logic_op(sna);
 
754
 
 
755
        gen2_get_blend_factors(op, op->op, &cblend, &ablend);
 
756
        unwind = sna->kgem.nbatch;
 
757
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 
758
              LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 
759
        BATCH(cblend);
 
760
        BATCH(ablend);
 
761
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
 
762
                   sna->kgem.batch + unwind + 1,
 
763
                   2 * sizeof(uint32_t)) == 0)
 
764
                sna->kgem.nbatch = unwind;
 
765
        else
 
766
                sna->render_state.gen2.ls2 = unwind;
 
767
 
 
768
        tex = texcoordfmt = 0;
 
769
        if (!op->src.is_solid) {
 
770
                if (op->src.is_affine)
 
771
                        texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
 
772
                else
 
773
                        texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
 
774
                gen2_emit_texture(sna, &op->src, tex++);
 
775
        } else {
 
776
                if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
 
777
                        BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
 
778
                        BATCH(op->src.u.gen2.pixel);
 
779
                        sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
 
780
                }
 
781
        }
 
782
        if (op->mask.bo) {
 
783
                if (op->mask.is_affine)
 
784
                        texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
 
785
                else
 
786
                        texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
 
787
                gen2_emit_texture(sna, &op->mask, tex++);
 
788
        } else if (op->mask.is_solid) {
 
789
                if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) {
 
790
                        BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
 
791
                        BATCH(op->mask.u.gen2.pixel);
 
792
                        sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel;
 
793
                }
 
794
        }
 
795
 
 
796
        v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt;
 
797
        if (sna->render_state.gen2.vft != v) {
 
798
                BATCH(v);
 
799
                sna->render_state.gen2.vft = v;
 
800
        }
 
801
}
 
802
 
 
803
static inline void
 
804
gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
 
805
{
 
806
        VERTEX(dstX);
 
807
        VERTEX(dstY);
 
808
}
 
809
 
 
810
inline static void
 
811
gen2_emit_composite_linear(struct sna *sna,
 
812
                           const struct sna_composite_channel *channel,
 
813
                           int16_t x, int16_t y)
 
814
{
 
815
        float v;
 
816
 
 
817
        v = (x * channel->u.linear.dx +
 
818
             y * channel->u.linear.dy +
 
819
             channel->u.linear.offset);
 
820
        DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
 
821
        VERTEX(v);
 
822
        VERTEX(v);
 
823
}
 
824
 
 
825
static void
 
826
gen2_emit_composite_texcoord(struct sna *sna,
 
827
                             const struct sna_composite_channel *channel,
 
828
                             int16_t x, int16_t y)
 
829
{
 
830
        float s = 0, t = 0, w = 1;
 
831
 
 
832
        x += channel->offset[0];
 
833
        y += channel->offset[1];
 
834
 
 
835
        if (channel->is_affine) {
 
836
                sna_get_transformed_coordinates(x, y,
 
837
                                                channel->transform,
 
838
                                                &s, &t);
 
839
                VERTEX(s * channel->scale[0]);
 
840
                VERTEX(t * channel->scale[1]);
 
841
        } else {
 
842
                sna_get_transformed_coordinates_3d(x, y,
 
843
                                                   channel->transform,
 
844
                                                   &s, &t, &w);
 
845
                VERTEX(s * channel->scale[0]);
 
846
                VERTEX(t * channel->scale[1]);
 
847
                VERTEX(w);
 
848
        }
 
849
}
 
850
 
 
851
static void
 
852
gen2_emit_composite_vertex(struct sna *sna,
 
853
                           const struct sna_composite_op *op,
 
854
                           int16_t srcX, int16_t srcY,
 
855
                           int16_t mskX, int16_t mskY,
 
856
                           int16_t dstX, int16_t dstY)
 
857
{
 
858
        gen2_emit_composite_dstcoord(sna, dstX, dstY);
 
859
        if (op->src.is_linear)
 
860
                gen2_emit_composite_linear(sna, &op->src, srcX, srcY);
 
861
        else if (!op->src.is_solid)
 
862
                gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
 
863
 
 
864
        if (op->mask.is_linear)
 
865
                gen2_emit_composite_linear(sna, &op->mask, mskX, mskY);
 
866
        else if (op->mask.bo)
 
867
                gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
 
868
}
 
869
 
 
870
fastcall static void
 
871
gen2_emit_composite_primitive(struct sna *sna,
 
872
                              const struct sna_composite_op *op,
 
873
                              const struct sna_composite_rectangles *r)
 
874
{
 
875
        gen2_emit_composite_vertex(sna, op,
 
876
                                   r->src.x + r->width,
 
877
                                   r->src.y + r->height,
 
878
                                   r->mask.x + r->width,
 
879
                                   r->mask.y + r->height,
 
880
                                   op->dst.x + r->dst.x + r->width,
 
881
                                   op->dst.y + r->dst.y + r->height);
 
882
        gen2_emit_composite_vertex(sna, op,
 
883
                                   r->src.x,
 
884
                                   r->src.y + r->height,
 
885
                                   r->mask.x,
 
886
                                   r->mask.y + r->height,
 
887
                                   op->dst.x + r->dst.x,
 
888
                                   op->dst.y + r->dst.y + r->height);
 
889
        gen2_emit_composite_vertex(sna, op,
 
890
                                   r->src.x,
 
891
                                   r->src.y,
 
892
                                   r->mask.x,
 
893
                                   r->mask.y,
 
894
                                   op->dst.x + r->dst.x,
 
895
                                   op->dst.y + r->dst.y);
 
896
}
 
897
 
 
898
fastcall static void
 
899
gen2_emit_composite_primitive_constant(struct sna *sna,
 
900
                                       const struct sna_composite_op *op,
 
901
                                       const struct sna_composite_rectangles *r)
 
902
{
 
903
        int16_t dst_x = r->dst.x + op->dst.x;
 
904
        int16_t dst_y = r->dst.y + op->dst.y;
 
905
 
 
906
        gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
 
907
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
 
908
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
 
909
}
 
910
 
 
911
fastcall static void
 
912
gen2_emit_composite_primitive_linear(struct sna *sna,
 
913
                                       const struct sna_composite_op *op,
 
914
                                       const struct sna_composite_rectangles *r)
 
915
{
 
916
        int16_t dst_x = r->dst.x + op->dst.x;
 
917
        int16_t dst_y = r->dst.y + op->dst.y;
 
918
 
 
919
        gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
 
920
        gen2_emit_composite_linear(sna, &op->src,
 
921
                                   r->src.x + r->width, r->src.y + r->height);
 
922
 
 
923
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
 
924
        gen2_emit_composite_linear(sna, &op->src,
 
925
                                   r->src.x, r->src.y + r->height);
 
926
 
 
927
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
 
928
        gen2_emit_composite_linear(sna, &op->src,
 
929
                                   r->src.x, r->src.y);
 
930
}
 
931
 
 
932
fastcall static void
 
933
gen2_emit_composite_primitive_identity(struct sna *sna,
 
934
                                       const struct sna_composite_op *op,
 
935
                                       const struct sna_composite_rectangles *r)
 
936
{
 
937
        float w = r->width;
 
938
        float h = r->height;
 
939
        float *v;
 
940
 
 
941
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
942
        sna->kgem.nbatch += 12;
 
943
 
 
944
        v[8] = v[4] = r->dst.x + op->dst.x;
 
945
        v[0] = v[4] + w;
 
946
 
 
947
        v[9] = r->dst.y + op->dst.y;
 
948
        v[5] = v[1] = v[9] + h;
 
949
 
 
950
        v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
 
951
        v[2] = v[6] + w * op->src.scale[0];
 
952
 
 
953
        v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
 
954
        v[7] = v[3] = v[11] + h * op->src.scale[1];
 
955
}
 
956
 
 
957
fastcall static void
 
958
gen2_emit_composite_primitive_affine(struct sna *sna,
 
959
                                     const struct sna_composite_op *op,
 
960
                                     const struct sna_composite_rectangles *r)
 
961
{
 
962
        PictTransform *transform = op->src.transform;
 
963
        int src_x = r->src.x + (int)op->src.offset[0];
 
964
        int src_y = r->src.y + (int)op->src.offset[1];
 
965
        float *v;
 
966
 
 
967
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
968
        sna->kgem.nbatch += 12;
 
969
 
 
970
        v[8] = v[4] = r->dst.x + op->dst.x;
 
971
        v[0] = v[4] + r->width;
 
972
 
 
973
        v[9] = r->dst.y + op->dst.y;
 
974
        v[5] = v[1] = v[9] + r->height;
 
975
 
 
976
        _sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
 
977
                                    transform, op->src.scale,
 
978
                                    &v[2], &v[3]);
 
979
 
 
980
        _sna_get_transformed_scaled(src_x, src_y + r->height,
 
981
                                    transform, op->src.scale,
 
982
                                    &v[6], &v[7]);
 
983
 
 
984
        _sna_get_transformed_scaled(src_x, src_y,
 
985
                                    transform, op->src.scale,
 
986
                                    &v[10], &v[11]);
 
987
}
 
988
 
 
989
fastcall static void
 
990
gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna,
 
991
                                                     const struct sna_composite_op *op,
 
992
                                                     const struct sna_composite_rectangles *r)
 
993
{
 
994
        float w = r->width;
 
995
        float h = r->height;
 
996
        float *v;
 
997
 
 
998
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
999
        sna->kgem.nbatch += 12;
 
1000
 
 
1001
        v[8] = v[4] = r->dst.x + op->dst.x;
 
1002
        v[0] = v[4] + w;
 
1003
 
 
1004
        v[9] = r->dst.y + op->dst.y;
 
1005
        v[5] = v[1] = v[9] + h;
 
1006
 
 
1007
        v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
 
1008
        v[2] = v[6] + w * op->mask.scale[0];
 
1009
 
 
1010
        v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
 
1011
        v[7] = v[3] = v[11] + h * op->mask.scale[1];
 
1012
}
 
1013
 
 
1014
#if defined(sse2) && !defined(__x86_64__)
 
1015
sse2 fastcall static void
 
1016
gen2_emit_composite_primitive_constant__sse2(struct sna *sna,
 
1017
                                             const struct sna_composite_op *op,
 
1018
                                             const struct sna_composite_rectangles *r)
 
1019
{
 
1020
        int16_t dst_x = r->dst.x + op->dst.x;
 
1021
        int16_t dst_y = r->dst.y + op->dst.y;
 
1022
 
 
1023
        gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
 
1024
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
 
1025
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
 
1026
}
 
1027
 
 
1028
sse2 fastcall static void
 
1029
gen2_emit_composite_primitive_linear__sse2(struct sna *sna,
 
1030
                                           const struct sna_composite_op *op,
 
1031
                                           const struct sna_composite_rectangles *r)
 
1032
{
 
1033
        int16_t dst_x = r->dst.x + op->dst.x;
 
1034
        int16_t dst_y = r->dst.y + op->dst.y;
 
1035
 
 
1036
        gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
 
1037
        gen2_emit_composite_linear(sna, &op->src,
 
1038
                                   r->src.x + r->width, r->src.y + r->height);
 
1039
 
 
1040
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
 
1041
        gen2_emit_composite_linear(sna, &op->src,
 
1042
                                   r->src.x, r->src.y + r->height);
 
1043
 
 
1044
        gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
 
1045
        gen2_emit_composite_linear(sna, &op->src,
 
1046
                                   r->src.x, r->src.y);
 
1047
}
 
1048
 
 
1049
sse2 fastcall static void
 
1050
gen2_emit_composite_primitive_identity__sse2(struct sna *sna,
 
1051
                                             const struct sna_composite_op *op,
 
1052
                                             const struct sna_composite_rectangles *r)
 
1053
{
 
1054
        float w = r->width;
 
1055
        float h = r->height;
 
1056
        float *v;
 
1057
 
 
1058
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
1059
        sna->kgem.nbatch += 12;
 
1060
 
 
1061
        v[8] = v[4] = r->dst.x + op->dst.x;
 
1062
        v[0] = v[4] + w;
 
1063
 
 
1064
        v[9] = r->dst.y + op->dst.y;
 
1065
        v[5] = v[1] = v[9] + h;
 
1066
 
 
1067
        v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
 
1068
        v[2] = v[6] + w * op->src.scale[0];
 
1069
 
 
1070
        v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
 
1071
        v[7] = v[3] = v[11] + h * op->src.scale[1];
 
1072
}
 
1073
 
 
1074
sse2 fastcall static void
 
1075
gen2_emit_composite_primitive_affine__sse2(struct sna *sna,
 
1076
                                           const struct sna_composite_op *op,
 
1077
                                           const struct sna_composite_rectangles *r)
 
1078
{
 
1079
        PictTransform *transform = op->src.transform;
 
1080
        int src_x = r->src.x + (int)op->src.offset[0];
 
1081
        int src_y = r->src.y + (int)op->src.offset[1];
 
1082
        float *v;
 
1083
 
 
1084
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
1085
        sna->kgem.nbatch += 12;
 
1086
 
 
1087
        v[8] = v[4] = r->dst.x + op->dst.x;
 
1088
        v[0] = v[4] + r->width;
 
1089
 
 
1090
        v[9] = r->dst.y + op->dst.y;
 
1091
        v[5] = v[1] = v[9] + r->height;
 
1092
 
 
1093
        _sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
 
1094
                                    transform, op->src.scale,
 
1095
                                    &v[2], &v[3]);
 
1096
 
 
1097
        _sna_get_transformed_scaled(src_x, src_y + r->height,
 
1098
                                    transform, op->src.scale,
 
1099
                                    &v[6], &v[7]);
 
1100
 
 
1101
        _sna_get_transformed_scaled(src_x, src_y,
 
1102
                                    transform, op->src.scale,
 
1103
                                    &v[10], &v[11]);
 
1104
}
 
1105
 
 
1106
sse2 fastcall static void
 
1107
gen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
 
1108
                                                           const struct sna_composite_op *op,
 
1109
                                                           const struct sna_composite_rectangles *r)
 
1110
{
 
1111
        float w = r->width;
 
1112
        float h = r->height;
 
1113
        float *v;
 
1114
 
 
1115
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
1116
        sna->kgem.nbatch += 12;
 
1117
 
 
1118
        v[8] = v[4] = r->dst.x + op->dst.x;
 
1119
        v[0] = v[4] + w;
 
1120
 
 
1121
        v[9] = r->dst.y + op->dst.y;
 
1122
        v[5] = v[1] = v[9] + h;
 
1123
 
 
1124
        v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
 
1125
        v[2] = v[6] + w * op->mask.scale[0];
 
1126
 
 
1127
        v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
 
1128
        v[7] = v[3] = v[11] + h * op->mask.scale[1];
 
1129
}
 
1130
#endif
 
1131
 
 
1132
static void gen2_magic_ca_pass(struct sna *sna,
 
1133
                               const struct sna_composite_op *op)
 
1134
{
 
1135
        uint32_t ablend, cblend, *src, *dst;
 
1136
        int n;
 
1137
 
 
1138
        if (!op->need_magic_ca_pass)
 
1139
                return;
 
1140
 
 
1141
        DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__,
 
1142
             sna->kgem.nbatch, sna->render.vertex_offset));
 
1143
 
 
1144
        assert(op->mask.bo);
 
1145
        assert(op->has_component_alpha);
 
1146
 
 
1147
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0);
 
1148
        BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT |
 
1149
              BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT |
 
1150
              S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
 
1151
              S8_ENABLE_COLOR_BUFFER_WRITE);
 
1152
        sna->render_state.gen2.ls1 = 0;
 
1153
 
 
1154
        gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend);
 
1155
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 
1156
              LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 
1157
        BATCH(cblend);
 
1158
        BATCH(ablend);
 
1159
        sna->render_state.gen2.ls2 = 0;
 
1160
 
 
1161
        src = sna->kgem.batch + sna->render.vertex_offset;
 
1162
        dst = sna->kgem.batch + sna->kgem.nbatch;
 
1163
        n = 1 + sna->render.vertex_index;
 
1164
        sna->kgem.nbatch += n;
 
1165
        assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
 
1166
        while (n--)
 
1167
                *dst++ = *src++;
 
1168
}
 
1169
 
 
1170
static void gen2_vertex_flush(struct sna *sna,
 
1171
                              const struct sna_composite_op *op)
 
1172
{
 
1173
        if (sna->render.vertex_index == 0)
 
1174
                return;
 
1175
 
 
1176
        sna->kgem.batch[sna->render.vertex_offset] |=
 
1177
                sna->render.vertex_index - 1;
 
1178
 
 
1179
        gen2_magic_ca_pass(sna, op);
 
1180
 
 
1181
        sna->render.vertex_offset = 0;
 
1182
        sna->render.vertex_index = 0;
 
1183
}
 
1184
 
 
1185
inline static int gen2_get_rectangles(struct sna *sna,
 
1186
                                      const struct sna_composite_op *op,
 
1187
                                      int want)
 
1188
{
 
1189
        int rem = batch_space(sna), size, need;
 
1190
 
 
1191
        DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n",
 
1192
             __FUNCTION__, want, op->floats_per_vertex, rem));
 
1193
 
 
1194
        assert(op->floats_per_vertex);
 
1195
        assert(op->floats_per_rect == 3 * op->floats_per_vertex);
 
1196
 
 
1197
        need = 1;
 
1198
        size = op->floats_per_rect;
 
1199
        if (op->need_magic_ca_pass)
 
1200
                need += 6 + size*sna->render.vertex_index, size *= 2;
 
1201
 
 
1202
        DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n",
 
1203
             __FUNCTION__, want, need, size, rem));
 
1204
        if (rem < need + size) {
 
1205
                gen2_vertex_flush(sna, op);
 
1206
                kgem_submit(&sna->kgem);
 
1207
                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
 
1208
                return 0;
 
1209
        }
 
1210
 
 
1211
        rem -= need;
 
1212
        if (sna->render.vertex_offset == 0) {
 
1213
                if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) ==
 
1214
                    (PRIM3D_INLINE | PRIM3D_RECTLIST)) {
 
1215
                        uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1];
 
1216
                        assert(*b & 0xffff);
 
1217
                        sna->render.vertex_index = 1 + (*b & 0xffff);
 
1218
                        *b = PRIM3D_INLINE | PRIM3D_RECTLIST;
 
1219
                        sna->render.vertex_offset = sna->kgem.nbatch - 1;
 
1220
                        assert(!op->need_magic_ca_pass);
 
1221
                } else {
 
1222
                        sna->render.vertex_offset = sna->kgem.nbatch;
 
1223
                        BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
 
1224
                }
 
1225
 
 
1226
                need = 0;
 
1227
        } else
 
1228
                need = sna->kgem.nbatch - sna->render.vertex_offset;
 
1229
 
 
1230
        if (rem > MAX_INLINE - need)
 
1231
                rem = MAX_INLINE -need;
 
1232
 
 
1233
        if (want > 1 && want * size > rem)
 
1234
                want = rem / size;
 
1235
 
 
1236
        assert(want);
 
1237
        sna->render.vertex_index += want*op->floats_per_rect;
 
1238
        return want;
 
1239
}
 
1240
 
 
1241
fastcall static void
 
1242
gen2_render_composite_blt(struct sna *sna,
 
1243
                          const struct sna_composite_op *op,
 
1244
                          const struct sna_composite_rectangles *r)
 
1245
{
 
1246
        if (!gen2_get_rectangles(sna, op, 1)) {
 
1247
                gen2_emit_composite_state(sna, op);
 
1248
                gen2_get_rectangles(sna, op, 1);
 
1249
        }
 
1250
 
 
1251
        op->prim_emit(sna, op, r);
 
1252
}
 
1253
 
 
1254
fastcall static void
 
1255
gen2_render_composite_box(struct sna *sna,
 
1256
                          const struct sna_composite_op *op,
 
1257
                          const BoxRec *box)
 
1258
{
 
1259
        struct sna_composite_rectangles r;
 
1260
 
 
1261
        if (!gen2_get_rectangles(sna, op, 1)) {
 
1262
                gen2_emit_composite_state(sna, op);
 
1263
                gen2_get_rectangles(sna, op, 1);
 
1264
        }
 
1265
 
 
1266
        DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
 
1267
             box->x1, box->y1,
 
1268
             box->x2 - box->x1,
 
1269
             box->y2 - box->y1));
 
1270
 
 
1271
        r.dst.x  = box->x1; r.dst.y  = box->y1;
 
1272
        r.width = box->x2 - box->x1;
 
1273
        r.height = box->y2 - box->y1;
 
1274
        r.src = r.mask = r.dst;
 
1275
 
 
1276
        op->prim_emit(sna, op, &r);
 
1277
}
 
1278
 
 
1279
static void
 
1280
gen2_render_composite_boxes(struct sna *sna,
 
1281
                            const struct sna_composite_op *op,
 
1282
                            const BoxRec *box, int nbox)
 
1283
{
 
1284
        do {
 
1285
                int nbox_this_time;
 
1286
 
 
1287
                nbox_this_time = gen2_get_rectangles(sna, op, nbox);
 
1288
                if (nbox_this_time == 0) {
 
1289
                        gen2_emit_composite_state(sna, op);
 
1290
                        nbox_this_time = gen2_get_rectangles(sna, op, nbox);
 
1291
                }
 
1292
                nbox -= nbox_this_time;
 
1293
 
 
1294
                do {
 
1295
                        struct sna_composite_rectangles r;
 
1296
 
 
1297
                        DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
 
1298
                             box->x1, box->y1,
 
1299
                             box->x2 - box->x1,
 
1300
                             box->y2 - box->y1));
 
1301
 
 
1302
                        r.dst.x  = box->x1; r.dst.y  = box->y1;
 
1303
                        r.width = box->x2 - box->x1;
 
1304
                        r.height = box->y2 - box->y1;
 
1305
                        r.src = r.mask = r.dst;
 
1306
 
 
1307
                        op->prim_emit(sna, op, &r);
 
1308
                        box++;
 
1309
                } while (--nbox_this_time);
 
1310
        } while (nbox);
 
1311
}
 
1312
 
 
1313
static void gen2_render_composite_done(struct sna *sna,
 
1314
                                       const struct sna_composite_op *op)
 
1315
{
 
1316
        gen2_vertex_flush(sna, op);
 
1317
 
 
1318
        if (op->mask.bo)
 
1319
                kgem_bo_destroy(&sna->kgem, op->mask.bo);
 
1320
        if (op->src.bo)
 
1321
                kgem_bo_destroy(&sna->kgem, op->src.bo);
 
1322
        sna_render_composite_redirect_done(sna, op);
 
1323
}
 
1324
 
 
1325
static bool
 
1326
gen2_composite_solid_init(struct sna *sna,
 
1327
                          struct sna_composite_channel *channel,
 
1328
                          uint32_t color)
 
1329
{
 
1330
        channel->filter = PictFilterNearest;
 
1331
        channel->repeat = RepeatNormal;
 
1332
        channel->is_solid  = true;
 
1333
        channel->is_affine = true;
 
1334
        channel->width  = 1;
 
1335
        channel->height = 1;
 
1336
        channel->pict_format = PICT_a8r8g8b8;
 
1337
 
 
1338
        channel->bo = NULL;
 
1339
        channel->u.gen2.pixel = color;
 
1340
 
 
1341
        channel->scale[0]  = channel->scale[1]  = 1;
 
1342
        channel->offset[0] = channel->offset[1] = 0;
 
1343
        return true;
 
1344
}
 
1345
 
 
1346
#define xFixedToDouble(f) pixman_fixed_to_double(f)
 
1347
 
 
1348
static bool
 
1349
gen2_composite_linear_init(struct sna *sna,
 
1350
                           PicturePtr picture,
 
1351
                           struct sna_composite_channel *channel,
 
1352
                           int x, int y,
 
1353
                           int w, int h,
 
1354
                           int dst_x, int dst_y)
 
1355
{
 
1356
        PictLinearGradient *linear =
 
1357
                (PictLinearGradient *)picture->pSourcePict;
 
1358
        pixman_fixed_t tx, ty;
 
1359
        float x0, y0, sf;
 
1360
        float dx, dy;
 
1361
 
 
1362
        DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
 
1363
             __FUNCTION__,
 
1364
             xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
 
1365
             xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
 
1366
 
 
1367
        if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
 
1368
                return 0;
 
1369
 
 
1370
        if (!sna_transform_is_affine(picture->transform)) {
 
1371
                DBG(("%s: fallback due to projective transform\n",
 
1372
                     __FUNCTION__));
 
1373
                return sna_render_picture_fixup(sna, picture, channel,
 
1374
                                                x, y, w, h, dst_x, dst_y);
 
1375
        }
 
1376
 
 
1377
        channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
 
1378
        if (!channel->bo)
 
1379
                return 0;
 
1380
 
 
1381
        channel->filter = PictFilterNearest;
 
1382
        channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 
1383
        channel->is_linear = true;
 
1384
        channel->width  = channel->bo->pitch / 4;
 
1385
        channel->height = 1;
 
1386
        channel->pict_format = PICT_a8r8g8b8;
 
1387
 
 
1388
        channel->scale[0]  = channel->scale[1]  = 1;
 
1389
        channel->offset[0] = channel->offset[1] = 0;
 
1390
 
 
1391
        if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
 
1392
                dx = xFixedToDouble(linear->p2.x - linear->p1.x);
 
1393
                dy = xFixedToDouble(linear->p2.y - linear->p1.y);
 
1394
 
 
1395
                x0 = xFixedToDouble(linear->p1.x);
 
1396
                y0 = xFixedToDouble(linear->p1.y);
 
1397
 
 
1398
                if (tx | ty) {
 
1399
                        x0 -= pixman_fixed_to_double(tx);
 
1400
                        y0 -= pixman_fixed_to_double(ty);
 
1401
                }
 
1402
        } else {
 
1403
                struct pixman_f_vector p1, p2;
 
1404
                struct pixman_f_transform m, inv;
 
1405
 
 
1406
                pixman_f_transform_from_pixman_transform(&m, picture->transform);
 
1407
                DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
 
1408
                     __FUNCTION__,
 
1409
                     m.m[0][0], m.m[0][1], m.m[0][2],
 
1410
                     m.m[1][0], m.m[1][1], m.m[1][2],
 
1411
                     m.m[2][0], m.m[2][1], m.m[2][2]));
 
1412
                if (!pixman_f_transform_invert(&inv, &m))
 
1413
                        return 0;
 
1414
 
 
1415
                p1.v[0] = pixman_fixed_to_double(linear->p1.x);
 
1416
                p1.v[1] = pixman_fixed_to_double(linear->p1.y);
 
1417
                p1.v[2] = 1.;
 
1418
                pixman_f_transform_point(&inv, &p1);
 
1419
 
 
1420
                p2.v[0] = pixman_fixed_to_double(linear->p2.x);
 
1421
                p2.v[1] = pixman_fixed_to_double(linear->p2.y);
 
1422
                p2.v[2] = 1.;
 
1423
                pixman_f_transform_point(&inv, &p2);
 
1424
 
 
1425
                DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
 
1426
                     __FUNCTION__,
 
1427
                     p1.v[0], p1.v[1], p1.v[2],
 
1428
                     p2.v[0], p2.v[1], p2.v[2]));
 
1429
 
 
1430
                dx = p2.v[0] - p1.v[0];
 
1431
                dy = p2.v[1] - p1.v[1];
 
1432
 
 
1433
                x0 = p1.v[0];
 
1434
                y0 = p1.v[1];
 
1435
        }
 
1436
 
 
1437
        sf = dx*dx + dy*dy;
 
1438
        dx /= sf;
 
1439
        dy /= sf;
 
1440
 
 
1441
        channel->u.linear.dx = dx;
 
1442
        channel->u.linear.dy = dy;
 
1443
        channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
 
1444
 
 
1445
        DBG(("%s: dx=%f, dy=%f, offset=%f\n",
 
1446
             __FUNCTION__, dx, dy, channel->u.linear.offset));
 
1447
 
 
1448
        return channel->bo != NULL;
 
1449
}
 
1450
 
 
1451
static bool source_is_covered(PicturePtr picture,
 
1452
                              int x, int y,
 
1453
                              int width, int height)
 
1454
{
 
1455
        int x1, y1, x2, y2;
 
1456
 
 
1457
        if (picture->repeat && picture->repeatType != RepeatNone)
 
1458
                return true;
 
1459
 
 
1460
        if (picture->pDrawable == NULL)
 
1461
                return false;
 
1462
 
 
1463
        if (picture->transform) {
 
1464
                pixman_box16_t sample;
 
1465
 
 
1466
                sample.x1 = x;
 
1467
                sample.y1 = y;
 
1468
                sample.x2 = x + width;
 
1469
                sample.y2 = y + height;
 
1470
 
 
1471
                pixman_transform_bounds(picture->transform, &sample);
 
1472
 
 
1473
                x1 = sample.x1;
 
1474
                x2 = sample.x2;
 
1475
                y1 = sample.y1;
 
1476
                y2 = sample.y2;
 
1477
        } else {
 
1478
                x1 = x;
 
1479
                y1 = y;
 
1480
                x2 = x + width;
 
1481
                y2 = y + height;
 
1482
        }
 
1483
 
 
1484
        return
 
1485
                x1 >= 0 && y1 >= 0 &&
 
1486
                x2 <= picture->pDrawable->width &&
 
1487
                y2 <= picture->pDrawable->height;
 
1488
}
 
1489
 
 
1490
static bool
 
1491
gen2_check_card_format(struct sna *sna,
 
1492
                       PicturePtr picture,
 
1493
                       struct sna_composite_channel *channel,
 
1494
                       int x, int y, int w, int h,
 
1495
                       bool *fixup_alpha)
 
1496
{
 
1497
        uint32_t format = picture->format;
 
1498
        unsigned int i;
 
1499
 
 
1500
        for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
 
1501
                if (i8xx_tex_formats[i].fmt == format)
 
1502
                        return true;
 
1503
        }
 
1504
 
 
1505
        for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
 
1506
                if (i85x_tex_formats[i].fmt == format) {
 
1507
                        if (sna->kgem.gen >= 021)
 
1508
                                return true;
 
1509
 
 
1510
                        if (source_is_covered(picture, x, y, w,h)) {
 
1511
                                channel->is_opaque = true;
 
1512
                                return true;
 
1513
                        }
 
1514
 
 
1515
                        *fixup_alpha = true;
 
1516
                        return false;
 
1517
                }
 
1518
        }
 
1519
 
 
1520
        *fixup_alpha = false;
 
1521
        return false;
 
1522
}
 
1523
 
 
1524
static int
 
1525
gen2_composite_picture(struct sna *sna,
 
1526
                       PicturePtr picture,
 
1527
                       struct sna_composite_channel *channel,
 
1528
                       int x, int y,
 
1529
                       int w, int h,
 
1530
                       int dst_x, int dst_y,
 
1531
                       bool precise)
 
1532
{
 
1533
        PixmapPtr pixmap;
 
1534
        uint32_t color;
 
1535
        int16_t dx, dy;
 
1536
        bool fixup_alpha;
 
1537
 
 
1538
        DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
 
1539
             __FUNCTION__, x, y, w, h, dst_x, dst_y));
 
1540
 
 
1541
        channel->is_solid = false;
 
1542
        channel->is_linear = false;
 
1543
        channel->is_opaque = false;
 
1544
        channel->is_affine = true;
 
1545
        channel->transform = NULL;
 
1546
        channel->card_format = -1;
 
1547
 
 
1548
        if (sna_picture_is_solid(picture, &color))
 
1549
                return gen2_composite_solid_init(sna, channel, color);
 
1550
 
 
1551
        if (!gen2_check_repeat(picture)) {
 
1552
                DBG(("%s -- fallback, unhandled repeat %d\n",
 
1553
                     __FUNCTION__, picture->repeat));
 
1554
                return sna_render_picture_fixup(sna, picture, channel,
 
1555
                                                x, y, w, h, dst_x, dst_y);
 
1556
        }
 
1557
 
 
1558
        if (!gen2_check_filter(picture)) {
 
1559
                DBG(("%s -- fallback, unhandled filter %d\n",
 
1560
                     __FUNCTION__, picture->filter));
 
1561
                return sna_render_picture_fixup(sna, picture, channel,
 
1562
                                                x, y, w, h, dst_x, dst_y);
 
1563
        }
 
1564
 
 
1565
        if (picture->pDrawable == NULL) {
 
1566
                int ret;
 
1567
 
 
1568
                if (picture->pSourcePict->type == SourcePictTypeLinear)
 
1569
                        return gen2_composite_linear_init(sna, picture, channel,
 
1570
                                                          x, y,
 
1571
                                                          w, h,
 
1572
                                                          dst_x, dst_y);
 
1573
 
 
1574
                DBG(("%s -- fallback, unhandled source %d\n",
 
1575
                     __FUNCTION__, picture->pSourcePict->type));
 
1576
                ret = -1;
 
1577
                if (!precise)
 
1578
                        ret = sna_render_picture_approximate_gradient(sna, picture, channel,
 
1579
                                                                      x, y, w, h, dst_x, dst_y);
 
1580
                if (ret == -1)
 
1581
                        ret = sna_render_picture_fixup(sna, picture, channel,
 
1582
                                                       x, y, w, h, dst_x, dst_y);
 
1583
                return ret;
 
1584
        }
 
1585
 
 
1586
        if (picture->alphaMap) {
 
1587
                DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
 
1588
                return sna_render_picture_fixup(sna, picture, channel,
 
1589
                                                x, y, w, h, dst_x, dst_y);
 
1590
        }
 
1591
 
 
1592
        channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 
1593
        channel->filter = picture->filter;
 
1594
 
 
1595
        pixmap = get_drawable_pixmap(picture->pDrawable);
 
1596
        get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
1597
 
 
1598
        x += dx + picture->pDrawable->x;
 
1599
        y += dy + picture->pDrawable->y;
 
1600
 
 
1601
        channel->is_affine = sna_transform_is_affine(picture->transform);
 
1602
        if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
 
1603
                DBG(("%s: integer translation (%d, %d), removing\n",
 
1604
                     __FUNCTION__, dx, dy));
 
1605
                x += dx;
 
1606
                y += dy;
 
1607
                channel->transform = NULL;
 
1608
                channel->filter = PictFilterNearest;
 
1609
 
 
1610
                if (channel->repeat &&
 
1611
                    (x >= 0 &&
 
1612
                     y >= 0 &&
 
1613
                     x + w <= pixmap->drawable.width &&
 
1614
                     y + h <= pixmap->drawable.height)) {
 
1615
                        struct sna_pixmap *priv = sna_pixmap(pixmap);
 
1616
                        if (priv && priv->clear) {
 
1617
                                DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
 
1618
                                return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color));
 
1619
                        }
 
1620
                }
 
1621
        } else
 
1622
                channel->transform = picture->transform;
 
1623
 
 
1624
        if (!gen2_check_card_format(sna, picture, channel, x,  y, w ,h, &fixup_alpha))
 
1625
                return sna_render_picture_convert(sna, picture, channel, pixmap,
 
1626
                                                  x, y, w, h, dst_x, dst_y, fixup_alpha);
 
1627
 
 
1628
        channel->pict_format = picture->format;
 
1629
        if (too_large(pixmap->drawable.width, pixmap->drawable.height))
 
1630
                return sna_render_picture_extract(sna, picture, channel,
 
1631
                                                  x, y, w, h, dst_x, dst_y);
 
1632
 
 
1633
        return sna_render_pixmap_bo(sna, channel, pixmap,
 
1634
                                    x, y, w, h, dst_x, dst_y);
 
1635
}
 
1636
 
 
1637
static bool
 
1638
gen2_composite_set_target(struct sna *sna,
 
1639
                          struct sna_composite_op *op,
 
1640
                          PicturePtr dst,
 
1641
                          int x, int y, int w, int h,
 
1642
                          bool partial)
 
1643
{
 
1644
        BoxRec box;
 
1645
        unsigned hint;
 
1646
 
 
1647
        op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
 
1648
        op->dst.format = dst->format;
 
1649
        op->dst.width = op->dst.pixmap->drawable.width;
 
1650
        op->dst.height = op->dst.pixmap->drawable.height;
 
1651
 
 
1652
        if (w && h) {
 
1653
                box.x1 = x;
 
1654
                box.y1 = y;
 
1655
                box.x2 = x + w;
 
1656
                box.y2 = y + h;
 
1657
        } else
 
1658
                sna_render_picture_extents(dst, &box);
 
1659
 
 
1660
        hint = PREFER_GPU | RENDER_GPU;
 
1661
        if (!need_tiling(sna, op->dst.width, op->dst.height))
 
1662
                hint |= FORCE_GPU;
 
1663
        if (!partial) {
 
1664
                hint |= IGNORE_DAMAGE;
 
1665
                if (w == op->dst.width && h == op->dst.height)
 
1666
                        hint |= REPLACES;
 
1667
        }
 
1668
 
 
1669
        op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
 
1670
        if (op->dst.bo == NULL)
 
1671
                return false;
 
1672
 
 
1673
        if (hint & REPLACES) {
 
1674
                struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
 
1675
                kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
 
1676
        }
 
1677
 
 
1678
        assert((op->dst.bo->pitch & 7) == 0);
 
1679
 
 
1680
        get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
 
1681
                            &op->dst.x, &op->dst.y);
 
1682
 
 
1683
        DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
 
1684
             __FUNCTION__,
 
1685
             op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
 
1686
             op->dst.width, op->dst.height,
 
1687
             op->dst.bo->pitch,
 
1688
             op->dst.x, op->dst.y,
 
1689
             op->damage ? *op->damage : (void *)-1));
 
1690
 
 
1691
        assert(op->dst.bo->proxy == NULL);
 
1692
 
 
1693
        if (((too_large(op->dst.width, op->dst.height) ||
 
1694
              op->dst.bo->pitch > MAX_3D_PITCH)) &&
 
1695
            !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
 
1696
                return false;
 
1697
 
 
1698
        return true;
 
1699
}
 
1700
 
 
1701
static bool
 
1702
is_unhandled_gradient(PicturePtr picture, bool precise)
 
1703
{
 
1704
        if (picture->pDrawable)
 
1705
                return false;
 
1706
 
 
1707
        switch (picture->pSourcePict->type) {
 
1708
        case SourcePictTypeSolidFill:
 
1709
        case SourcePictTypeLinear:
 
1710
                return false;
 
1711
        default:
 
1712
                return precise;
 
1713
        }
 
1714
}
 
1715
 
 
1716
static bool
 
1717
has_alphamap(PicturePtr p)
 
1718
{
 
1719
        return p->alphaMap != NULL;
 
1720
}
 
1721
 
 
1722
static bool
 
1723
need_upload(PicturePtr p)
 
1724
{
 
1725
        return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 
1726
}
 
1727
 
 
1728
static bool
 
1729
source_is_busy(PixmapPtr pixmap)
 
1730
{
 
1731
        struct sna_pixmap *priv = sna_pixmap(pixmap);
 
1732
        if (priv == NULL)
 
1733
                return false;
 
1734
 
 
1735
        if (priv->clear)
 
1736
                return false;
 
1737
 
 
1738
        if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
 
1739
                return true;
 
1740
 
 
1741
        if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
 
1742
                return true;
 
1743
 
 
1744
        return priv->gpu_damage && !priv->cpu_damage;
 
1745
}
 
1746
 
 
1747
static bool
 
1748
source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
 
1749
{
 
1750
        if (sna_picture_is_solid(p, NULL))
 
1751
                return false;
 
1752
 
 
1753
        if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p))
 
1754
                return true;
 
1755
 
 
1756
        if (pixmap && source_is_busy(pixmap))
 
1757
                return false;
 
1758
 
 
1759
        return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p);
 
1760
}
 
1761
 
 
1762
static bool
 
1763
gen2_composite_fallback(struct sna *sna,
 
1764
                        PicturePtr src,
 
1765
                        PicturePtr mask,
 
1766
                        PicturePtr dst)
 
1767
{
 
1768
        PixmapPtr src_pixmap;
 
1769
        PixmapPtr mask_pixmap;
 
1770
        PixmapPtr dst_pixmap;
 
1771
        bool src_fallback, mask_fallback;
 
1772
 
 
1773
        if (!gen2_check_dst_format(dst->format)) {
 
1774
                DBG(("%s: unknown destination format: %d\n",
 
1775
                     __FUNCTION__, dst->format));
 
1776
                return true;
 
1777
        }
 
1778
 
 
1779
        dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
1780
 
 
1781
        src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
 
1782
        src_fallback = source_fallback(src, src_pixmap,
 
1783
                                       dst->polyMode == PolyModePrecise);
 
1784
 
 
1785
        if (mask) {
 
1786
                mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
 
1787
                mask_fallback = source_fallback(mask, mask_pixmap,
 
1788
                                                dst->polyMode == PolyModePrecise);
 
1789
        } else {
 
1790
                mask_pixmap = NULL;
 
1791
                mask_fallback = NULL;
 
1792
        }
 
1793
 
 
1794
        /* If we are using the destination as a source and need to
 
1795
         * readback in order to upload the source, do it all
 
1796
         * on the cpu.
 
1797
         */
 
1798
        if (src_pixmap == dst_pixmap && src_fallback) {
 
1799
                DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
 
1800
                return true;
 
1801
        }
 
1802
        if (mask_pixmap == dst_pixmap && mask_fallback) {
 
1803
                DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
 
1804
                return true;
 
1805
        }
 
1806
 
 
1807
        /* If anything is on the GPU, push everything out to the GPU */
 
1808
        if (dst_use_gpu(dst_pixmap)) {
 
1809
                DBG(("%s: dst is already on the GPU, try to use GPU\n",
 
1810
                     __FUNCTION__));
 
1811
                return false;
 
1812
        }
 
1813
 
 
1814
        if (src_pixmap && !src_fallback) {
 
1815
                DBG(("%s: src is already on the GPU, try to use GPU\n",
 
1816
                     __FUNCTION__));
 
1817
                return false;
 
1818
        }
 
1819
        if (mask_pixmap && !mask_fallback) {
 
1820
                DBG(("%s: mask is already on the GPU, try to use GPU\n",
 
1821
                     __FUNCTION__));
 
1822
                return false;
 
1823
        }
 
1824
 
 
1825
        /* However if the dst is not on the GPU and we need to
 
1826
         * render one of the sources using the CPU, we may
 
1827
         * as well do the entire operation in place onthe CPU.
 
1828
         */
 
1829
        if (src_fallback) {
 
1830
                DBG(("%s: dst is on the CPU and src will fallback\n",
 
1831
                     __FUNCTION__));
 
1832
                return true;
 
1833
        }
 
1834
 
 
1835
        if (mask && mask_fallback) {
 
1836
                DBG(("%s: dst is on the CPU and mask will fallback\n",
 
1837
                     __FUNCTION__));
 
1838
                return true;
 
1839
        }
 
1840
 
 
1841
        if (too_large(dst_pixmap->drawable.width,
 
1842
                      dst_pixmap->drawable.height) &&
 
1843
            dst_is_cpu(dst_pixmap)) {
 
1844
                DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
 
1845
                return true;
 
1846
        }
 
1847
 
 
1848
        DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
 
1849
             __FUNCTION__));
 
1850
        return dst_use_cpu(dst_pixmap);
 
1851
}
 
1852
 
 
1853
static int
 
1854
reuse_source(struct sna *sna,
 
1855
             PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
 
1856
             PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
 
1857
{
 
1858
        uint32_t color;
 
1859
 
 
1860
        if (src_x != msk_x || src_y != msk_y)
 
1861
                return false;
 
1862
 
 
1863
        if (sna_picture_is_solid(mask, &color))
 
1864
                return gen2_composite_solid_init(sna, mc, color);
 
1865
 
 
1866
        if (sc->is_solid)
 
1867
                return false;
 
1868
 
 
1869
        if (src == mask) {
 
1870
                DBG(("%s: mask is source\n", __FUNCTION__));
 
1871
                *mc = *sc;
 
1872
                mc->bo = kgem_bo_reference(mc->bo);
 
1873
                return true;
 
1874
        }
 
1875
 
 
1876
        if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
 
1877
                return false;
 
1878
 
 
1879
        DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
 
1880
 
 
1881
        if (!sna_transform_equal(src->transform, mask->transform))
 
1882
                return false;
 
1883
 
 
1884
        if (!sna_picture_alphamap_equal(src, mask))
 
1885
                return false;
 
1886
 
 
1887
        if (!gen2_check_repeat(mask))
 
1888
                return false;
 
1889
 
 
1890
        if (!gen2_check_filter(mask))
 
1891
                return false;
 
1892
 
 
1893
        if (!gen2_check_format(sna, mask))
 
1894
                return false;
 
1895
 
 
1896
        DBG(("%s: reusing source channel for mask with a twist\n",
 
1897
             __FUNCTION__));
 
1898
 
 
1899
        *mc = *sc;
 
1900
        mc->repeat = mask->repeat ? mask->repeatType : RepeatNone;
 
1901
        mc->filter = mask->filter;
 
1902
        mc->pict_format = mask->format;
 
1903
        mc->bo = kgem_bo_reference(mc->bo);
 
1904
        return true;
 
1905
}
 
1906
 
 
1907
static bool
 
1908
gen2_render_composite(struct sna *sna,
 
1909
                      uint8_t op,
 
1910
                      PicturePtr src,
 
1911
                      PicturePtr mask,
 
1912
                      PicturePtr dst,
 
1913
                      int16_t src_x,  int16_t src_y,
 
1914
                      int16_t mask_x, int16_t mask_y,
 
1915
                      int16_t dst_x,  int16_t dst_y,
 
1916
                      int16_t width,  int16_t height,
 
1917
                      unsigned flags,
 
1918
                      struct sna_composite_op *tmp)
 
1919
{
 
1920
        DBG(("%s()\n", __FUNCTION__));
 
1921
 
 
1922
        if (op >= ARRAY_SIZE(gen2_blend_op)) {
 
1923
                DBG(("%s: fallback due to unhandled blend op: %d\n",
 
1924
                     __FUNCTION__, op));
 
1925
                return false;
 
1926
        }
 
1927
 
 
1928
        if (mask == NULL &&
 
1929
            sna_blt_composite(sna, op, src, dst,
 
1930
                              src_x, src_y,
 
1931
                              dst_x, dst_y,
 
1932
                              width, height,
 
1933
                              flags, tmp))
 
1934
                return true;
 
1935
 
 
1936
        if (gen2_composite_fallback(sna, src, mask, dst))
 
1937
                goto fallback;
 
1938
 
 
1939
        if (need_tiling(sna, width, height))
 
1940
                return sna_tiling_composite(op, src, mask, dst,
 
1941
                                            src_x,  src_y,
 
1942
                                            mask_x, mask_y,
 
1943
                                            dst_x,  dst_y,
 
1944
                                            width,  height,
 
1945
                                            tmp);
 
1946
 
 
1947
        tmp->op = op;
 
1948
        sna_render_composite_redirect_init(tmp);
 
1949
 
 
1950
        if (!gen2_composite_set_target(sna, tmp, dst,
 
1951
                                       dst_x, dst_y, width, height,
 
1952
                                       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
 
1953
                DBG(("%s: unable to set render target\n",
 
1954
                     __FUNCTION__));
 
1955
                goto fallback;
 
1956
        }
 
1957
 
 
1958
        switch (gen2_composite_picture(sna, src, &tmp->src,
 
1959
                                       src_x, src_y,
 
1960
                                       width, height,
 
1961
                                       dst_x, dst_y,
 
1962
                                       dst->polyMode == PolyModePrecise)) {
 
1963
        case -1:
 
1964
                DBG(("%s: fallback -- unable to prepare source\n",
 
1965
                     __FUNCTION__));
 
1966
                goto cleanup_dst;
 
1967
        case 0:
 
1968
                gen2_composite_solid_init(sna, &tmp->src, 0);
 
1969
                break;
 
1970
        case 1:
 
1971
                if (mask == NULL && tmp->src.bo &&
 
1972
                    sna_blt_composite__convert(sna,
 
1973
                                               dst_x, dst_y, width, height,
 
1974
                                               tmp))
 
1975
                        return true;
 
1976
                break;
 
1977
        }
 
1978
 
 
1979
        if (mask) {
 
1980
                if (!reuse_source(sna,
 
1981
                                  src, &tmp->src, src_x, src_y,
 
1982
                                  mask, &tmp->mask, mask_x, mask_y)) {
 
1983
                        switch (gen2_composite_picture(sna, mask, &tmp->mask,
 
1984
                                                       mask_x, mask_y,
 
1985
                                                       width,  height,
 
1986
                                                       dst_x,  dst_y,
 
1987
                                                       dst->polyMode == PolyModePrecise)) {
 
1988
                        case -1:
 
1989
                                DBG(("%s: fallback -- unable to prepare mask\n",
 
1990
                                     __FUNCTION__));
 
1991
                                goto cleanup_src;
 
1992
                        case 0:
 
1993
                                gen2_composite_solid_init(sna, &tmp->mask, 0);
 
1994
                        case 1:
 
1995
                                break;
 
1996
                        }
 
1997
                }
 
1998
 
 
1999
                if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
 
2000
                        /* Check if it's component alpha that relies on a source alpha
 
2001
                         * and on the source value.  We can only get one of those
 
2002
                         * into the single source value that we get to blend with.
 
2003
                         */
 
2004
                        tmp->has_component_alpha = true;
 
2005
                        if (gen2_blend_op[op].src_alpha &&
 
2006
                            (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
 
2007
                                if (op != PictOpOver) {
 
2008
                                        DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n",
 
2009
                                             __FUNCTION__,
 
2010
                                             gen2_blend_op[op].src_blend));
 
2011
                                        goto cleanup_src;
 
2012
                                }
 
2013
 
 
2014
                                tmp->need_magic_ca_pass = true;
 
2015
                                tmp->op = PictOpOutReverse;
 
2016
                        }
 
2017
                }
 
2018
 
 
2019
                /* convert solid to a texture (pure convenience) */
 
2020
                if (tmp->mask.is_solid && tmp->src.is_solid) {
 
2021
                        assert(tmp->mask.is_affine);
 
2022
                        tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel);
 
2023
                        if (!tmp->mask.bo)
 
2024
                                goto cleanup_src;
 
2025
                }
 
2026
        }
 
2027
 
 
2028
        tmp->floats_per_vertex = 2;
 
2029
        if (!tmp->src.is_solid)
 
2030
                tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
 
2031
        if (tmp->mask.bo)
 
2032
                tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
 
2033
        tmp->floats_per_rect = 3*tmp->floats_per_vertex;
 
2034
 
 
2035
        tmp->prim_emit = gen2_emit_composite_primitive;
 
2036
        if (tmp->mask.bo) {
 
2037
                if (tmp->mask.transform == NULL) {
 
2038
                        if (tmp->src.is_solid) {
 
2039
                                assert(tmp->floats_per_rect == 12);
 
2040
#if defined(sse2) && !defined(__x86_64__)
 
2041
                                if (sna->cpu_features & SSE2) {
 
2042
                                        tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2;
 
2043
                                } else
 
2044
#endif
 
2045
                                {
 
2046
                                        tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask;
 
2047
                                }
 
2048
                        }
 
2049
                }
 
2050
        } else {
 
2051
                if (tmp->src.is_solid) {
 
2052
                        assert(tmp->floats_per_rect == 6);
 
2053
#if defined(sse2) && !defined(__x86_64__)
 
2054
                        if (sna->cpu_features & SSE2) {
 
2055
                                tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2;
 
2056
                        } else
 
2057
#endif
 
2058
                        {
 
2059
                                tmp->prim_emit = gen2_emit_composite_primitive_constant;
 
2060
                        }
 
2061
                } else if (tmp->src.is_linear) {
 
2062
                        assert(tmp->floats_per_rect == 12);
 
2063
#if defined(sse2) && !defined(__x86_64__)
 
2064
                        if (sna->cpu_features & SSE2) {
 
2065
                                tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2;
 
2066
                        } else
 
2067
#endif
 
2068
                        {
 
2069
                                tmp->prim_emit = gen2_emit_composite_primitive_linear;
 
2070
                        }
 
2071
                } else if (tmp->src.transform == NULL) {
 
2072
                        assert(tmp->floats_per_rect == 12);
 
2073
#if defined(sse2) && !defined(__x86_64__)
 
2074
                        if (sna->cpu_features & SSE2) {
 
2075
                                tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2;
 
2076
                        } else
 
2077
#endif
 
2078
                        {
 
2079
                                tmp->prim_emit = gen2_emit_composite_primitive_identity;
 
2080
                        }
 
2081
                } else if (tmp->src.is_affine) {
 
2082
                        assert(tmp->floats_per_rect == 12);
 
2083
                        tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
 
2084
                        tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 
2085
#if defined(sse2) && !defined(__x86_64__)
 
2086
                        if (sna->cpu_features & SSE2) {
 
2087
                                tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2;
 
2088
                        } else
 
2089
#endif
 
2090
                        {
 
2091
                                tmp->prim_emit = gen2_emit_composite_primitive_affine;
 
2092
                        }
 
2093
                }
 
2094
        }
 
2095
 
 
2096
        tmp->blt   = gen2_render_composite_blt;
 
2097
        tmp->box   = gen2_render_composite_box;
 
2098
        tmp->boxes = gen2_render_composite_boxes;
 
2099
        tmp->done  = gen2_render_composite_done;
 
2100
 
 
2101
        if (!kgem_check_bo(&sna->kgem,
 
2102
                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
 
2103
                           NULL)) {
 
2104
                kgem_submit(&sna->kgem);
 
2105
                if (!kgem_check_bo(&sna->kgem,
 
2106
                                   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
 
2107
                                   NULL)) {
 
2108
                        DBG(("%s: fallback, operation does not fit into GTT\n",
 
2109
                             __FUNCTION__));
 
2110
                        goto cleanup_mask;
 
2111
                }
 
2112
        }
 
2113
 
 
2114
        gen2_emit_composite_state(sna, tmp);
 
2115
        return true;
 
2116
 
 
2117
cleanup_mask:
 
2118
        if (tmp->mask.bo) {
 
2119
                kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
 
2120
                tmp->mask.bo = NULL;
 
2121
        }
 
2122
cleanup_src:
 
2123
        if (tmp->src.bo) {
 
2124
                kgem_bo_destroy(&sna->kgem, tmp->src.bo);
 
2125
                tmp->src.bo = NULL;
 
2126
        }
 
2127
cleanup_dst:
 
2128
        if (tmp->redirect.real_bo) {
 
2129
                kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
 
2130
                tmp->redirect.real_bo = NULL;
 
2131
        }
 
2132
fallback:
 
2133
        return (mask == NULL &&
 
2134
                sna_blt_composite(sna, op, src, dst,
 
2135
                                  src_x, src_y,
 
2136
                                  dst_x, dst_y,
 
2137
                                  width, height,
 
2138
                                  flags | COMPOSITE_FALLBACK, tmp));
 
2139
}
 
2140
 
 
2141
fastcall static void
 
2142
gen2_emit_composite_spans_primitive_constant(struct sna *sna,
 
2143
                                             const struct sna_composite_spans_op *op,
 
2144
                                             const BoxRec *box,
 
2145
                                             float opacity)
 
2146
{
 
2147
        float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
2148
        uint32_t alpha = (uint8_t)(255 * opacity) << 24;
 
2149
        sna->kgem.nbatch += 9;
 
2150
 
 
2151
        v[0] = op->base.dst.x + box->x2;
 
2152
        v[1] = op->base.dst.y + box->y2;
 
2153
        *((uint32_t *)v + 2) = alpha;
 
2154
 
 
2155
        v[3] = op->base.dst.x + box->x1;
 
2156
        v[4] = v[1];
 
2157
        *((uint32_t *)v + 5) = alpha;
 
2158
 
 
2159
        v[6] = v[3];
 
2160
        v[7] = op->base.dst.y + box->y1;
 
2161
        *((uint32_t *)v + 8) = alpha;
 
2162
}
 
2163
 
 
2164
fastcall static void
 
2165
gen2_emit_composite_spans_primitive_linear(struct sna *sna,
 
2166
                                             const struct sna_composite_spans_op *op,
 
2167
                                             const BoxRec *box,
 
2168
                                             float opacity)
 
2169
{
 
2170
        union {
 
2171
                float f;
 
2172
                uint32_t u;
 
2173
        } alpha;
 
2174
 
 
2175
        alpha.u = (uint8_t)(255 * opacity) << 24;
 
2176
 
 
2177
        gen2_emit_composite_dstcoord(sna,
 
2178
                                     op->base.dst.x + box->x2,
 
2179
                                     op->base.dst.y + box->y2);
 
2180
        VERTEX(alpha.f);
 
2181
        gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
 
2182
 
 
2183
        gen2_emit_composite_dstcoord(sna,
 
2184
                                     op->base.dst.x + box->x1,
 
2185
                                     op->base.dst.y + box->y2);
 
2186
        VERTEX(alpha.f);
 
2187
        gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
 
2188
 
 
2189
        gen2_emit_composite_dstcoord(sna,
 
2190
                                     op->base.dst.x + box->x1,
 
2191
                                     op->base.dst.y + box->y1);
 
2192
        VERTEX(alpha.f);
 
2193
        gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
 
2194
}
 
2195
 
 
2196
fastcall static void
 
2197
gen2_emit_composite_spans_primitive_identity_source(struct sna *sna,
 
2198
                                                    const struct sna_composite_spans_op *op,
 
2199
                                                    const BoxRec *box,
 
2200
                                                    float opacity)
 
2201
{
 
2202
        float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
2203
        uint32_t alpha = (uint8_t)(255 * opacity) << 24;
 
2204
        sna->kgem.nbatch += 15;
 
2205
 
 
2206
        v[0] = op->base.dst.x + box->x2;
 
2207
        v[1] = op->base.dst.y + box->y2;
 
2208
        *((uint32_t *)v + 2) = alpha;
 
2209
        v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
 
2210
        v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
 
2211
 
 
2212
        v[5] = op->base.dst.x + box->x1;
 
2213
        v[6] = v[1];
 
2214
        *((uint32_t *)v + 7) = alpha;
 
2215
        v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
 
2216
        v[9] = v[4];
 
2217
 
 
2218
        v[10] = v[5];
 
2219
        v[11] = op->base.dst.y + box->y1;
 
2220
        *((uint32_t *)v + 12) = alpha;
 
2221
        v[13] = v[8];
 
2222
        v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
 
2223
}
 
2224
 
 
2225
fastcall static void
 
2226
gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
 
2227
                                                  const struct sna_composite_spans_op *op,
 
2228
                                                  const BoxRec *box,
 
2229
                                                  float opacity)
 
2230
{
 
2231
        PictTransform *transform = op->base.src.transform;
 
2232
        uint32_t alpha = (uint8_t)(255 * opacity) << 24;
 
2233
        float *v;
 
2234
 
 
2235
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
2236
        sna->kgem.nbatch += 15;
 
2237
 
 
2238
        v[0]  = op->base.dst.x + box->x2;
 
2239
        v[6]  = v[1] = op->base.dst.y + box->y2;
 
2240
        v[10] = v[5] = op->base.dst.x + box->x1;
 
2241
        v[11] = op->base.dst.y + box->y1;
 
2242
        *((uint32_t *)v + 2) = alpha;
 
2243
        *((uint32_t *)v + 7) = alpha;
 
2244
        *((uint32_t *)v + 12) = alpha;
 
2245
 
 
2246
        _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
 
2247
                                    (int)op->base.src.offset[1] + box->y2,
 
2248
                                    transform, op->base.src.scale,
 
2249
                                    &v[3], &v[4]);
 
2250
 
 
2251
        _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
 
2252
                                    (int)op->base.src.offset[1] + box->y2,
 
2253
                                    transform, op->base.src.scale,
 
2254
                                    &v[8], &v[9]);
 
2255
 
 
2256
        _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
 
2257
                                    (int)op->base.src.offset[1] + box->y1,
 
2258
                                    transform, op->base.src.scale,
 
2259
                                    &v[13], &v[14]);
 
2260
}
 
2261
 
 
2262
#if defined(sse2) && !defined(__x86_64__)
 
2263
sse2 fastcall static void
 
2264
gen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
 
2265
                                                   const struct sna_composite_spans_op *op,
 
2266
                                                   const BoxRec *box,
 
2267
                                                   float opacity)
 
2268
{
 
2269
        float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
2270
        uint32_t alpha = (uint8_t)(255 * opacity) << 24;
 
2271
        sna->kgem.nbatch += 9;
 
2272
 
 
2273
        v[0] = op->base.dst.x + box->x2;
 
2274
        v[1] = op->base.dst.y + box->y2;
 
2275
        *((uint32_t *)v + 2) = alpha;
 
2276
 
 
2277
        v[3] = op->base.dst.x + box->x1;
 
2278
        v[4] = v[1];
 
2279
        *((uint32_t *)v + 5) = alpha;
 
2280
 
 
2281
        v[6] = v[3];
 
2282
        v[7] = op->base.dst.y + box->y1;
 
2283
        *((uint32_t *)v + 8) = alpha;
 
2284
}
 
2285
 
 
2286
sse2 fastcall static void
 
2287
gen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna,
 
2288
                                                 const struct sna_composite_spans_op *op,
 
2289
                                                 const BoxRec *box,
 
2290
                                                 float opacity)
 
2291
{
 
2292
        union {
 
2293
                float f;
 
2294
                uint32_t u;
 
2295
        } alpha;
 
2296
 
 
2297
        alpha.u = (uint8_t)(255 * opacity) << 24;
 
2298
 
 
2299
        gen2_emit_composite_dstcoord(sna,
 
2300
                                     op->base.dst.x + box->x2,
 
2301
                                     op->base.dst.y + box->y2);
 
2302
        VERTEX(alpha.f);
 
2303
        gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
 
2304
 
 
2305
        gen2_emit_composite_dstcoord(sna,
 
2306
                                     op->base.dst.x + box->x1,
 
2307
                                     op->base.dst.y + box->y2);
 
2308
        VERTEX(alpha.f);
 
2309
        gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
 
2310
 
 
2311
        gen2_emit_composite_dstcoord(sna,
 
2312
                                     op->base.dst.x + box->x1,
 
2313
                                     op->base.dst.y + box->y1);
 
2314
        VERTEX(alpha.f);
 
2315
        gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
 
2316
}
 
2317
 
 
2318
sse2 fastcall static void
 
2319
gen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
 
2320
                                                          const struct sna_composite_spans_op *op,
 
2321
                                                          const BoxRec *box,
 
2322
                                                          float opacity)
 
2323
{
 
2324
        float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
2325
        uint32_t alpha = (uint8_t)(255 * opacity) << 24;
 
2326
        sna->kgem.nbatch += 15;
 
2327
 
 
2328
        v[0] = op->base.dst.x + box->x2;
 
2329
        v[1] = op->base.dst.y + box->y2;
 
2330
        *((uint32_t *)v + 2) = alpha;
 
2331
        v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
 
2332
        v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
 
2333
 
 
2334
        v[5] = op->base.dst.x + box->x1;
 
2335
        v[6] = v[1];
 
2336
        *((uint32_t *)v + 7) = alpha;
 
2337
        v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
 
2338
        v[9] = v[4];
 
2339
 
 
2340
        v[10] = v[5];
 
2341
        v[11] = op->base.dst.y + box->y1;
 
2342
        *((uint32_t *)v + 12) = alpha;
 
2343
        v[13] = v[8];
 
2344
        v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
 
2345
}
 
2346
 
 
2347
sse2 fastcall static void
 
2348
gen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
 
2349
                                                        const struct sna_composite_spans_op *op,
 
2350
                                                        const BoxRec *box,
 
2351
                                                        float opacity)
 
2352
{
 
2353
        PictTransform *transform = op->base.src.transform;
 
2354
        uint32_t alpha = (uint8_t)(255 * opacity) << 24;
 
2355
        float *v;
 
2356
 
 
2357
        v = (float *)sna->kgem.batch + sna->kgem.nbatch;
 
2358
        sna->kgem.nbatch += 15;
 
2359
 
 
2360
        v[0]  = op->base.dst.x + box->x2;
 
2361
        v[6]  = v[1] = op->base.dst.y + box->y2;
 
2362
        v[10] = v[5] = op->base.dst.x + box->x1;
 
2363
        v[11] = op->base.dst.y + box->y1;
 
2364
        *((uint32_t *)v + 2) = alpha;
 
2365
        *((uint32_t *)v + 7) = alpha;
 
2366
        *((uint32_t *)v + 12) = alpha;
 
2367
 
 
2368
        _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
 
2369
                                    (int)op->base.src.offset[1] + box->y2,
 
2370
                                    transform, op->base.src.scale,
 
2371
                                    &v[3], &v[4]);
 
2372
 
 
2373
        _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
 
2374
                                    (int)op->base.src.offset[1] + box->y2,
 
2375
                                    transform, op->base.src.scale,
 
2376
                                    &v[8], &v[9]);
 
2377
 
 
2378
        _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
 
2379
                                    (int)op->base.src.offset[1] + box->y1,
 
2380
                                    transform, op->base.src.scale,
 
2381
                                    &v[13], &v[14]);
 
2382
}
 
2383
#endif
 
2384
 
 
2385
static void
 
2386
gen2_emit_composite_spans_vertex(struct sna *sna,
 
2387
                                 const struct sna_composite_spans_op *op,
 
2388
                                 int16_t x, int16_t y,
 
2389
                                 float opacity)
 
2390
{
 
2391
        gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
 
2392
        BATCH((uint8_t)(opacity * 255) << 24);
 
2393
        assert(!op->base.src.is_solid);
 
2394
        if (op->base.src.is_linear)
 
2395
                gen2_emit_composite_linear(sna, &op->base.src, x, y);
 
2396
        else
 
2397
                gen2_emit_composite_texcoord(sna, &op->base.src, x, y);
 
2398
}
 
2399
 
 
2400
fastcall static void
 
2401
gen2_emit_composite_spans_primitive(struct sna *sna,
 
2402
                                    const struct sna_composite_spans_op *op,
 
2403
                                    const BoxRec *box,
 
2404
                                    float opacity)
 
2405
{
 
2406
        gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity);
 
2407
        gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity);
 
2408
        gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity);
 
2409
}
 
2410
 
 
2411
static void
 
2412
gen2_emit_spans_pipeline(struct sna *sna,
 
2413
                         const struct sna_composite_spans_op *op)
 
2414
{
 
2415
        uint32_t cblend, ablend;
 
2416
        uint32_t unwind;
 
2417
 
 
2418
        cblend =
 
2419
                TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE |
 
2420
                TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA |
 
2421
                TB0C_OUTPUT_WRITE_CURRENT;
 
2422
        ablend =
 
2423
                TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE |
 
2424
                TB0A_ARG1_SEL_DIFFUSE |
 
2425
                TB0A_OUTPUT_WRITE_CURRENT;
 
2426
 
 
2427
        if (op->base.src.is_solid) {
 
2428
                ablend |= TB0A_ARG2_SEL_SPECULAR;
 
2429
                cblend |= TB0C_ARG2_SEL_SPECULAR;
 
2430
                if (op->base.dst.format == PICT_a8)
 
2431
                        cblend |= TB0C_ARG2_REPLICATE_ALPHA;
 
2432
        } else if (op->base.dst.format == PICT_a8) {
 
2433
                ablend |= TB0A_ARG2_SEL_TEXEL0;
 
2434
                cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA;
 
2435
        } else {
 
2436
                if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0)
 
2437
                        cblend |= TB0C_ARG2_SEL_TEXEL0;
 
2438
                else
 
2439
                        cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT;
 
2440
 
 
2441
                if (op->base.src.is_opaque)
 
2442
                        ablend |= TB0A_ARG2_SEL_ONE;
 
2443
                else
 
2444
                        ablend |= TB0A_ARG2_SEL_TEXEL0;
 
2445
        }
 
2446
 
 
2447
        unwind = sna->kgem.nbatch;
 
2448
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 
2449
              LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 
2450
        BATCH(cblend);
 
2451
        BATCH(ablend);
 
2452
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
 
2453
                   sna->kgem.batch + unwind + 1,
 
2454
                   2 * sizeof(uint32_t)) == 0)
 
2455
                sna->kgem.nbatch = unwind;
 
2456
        else
 
2457
                sna->render_state.gen2.ls2 = unwind;
 
2458
}
 
2459
 
 
2460
static void gen2_emit_composite_spans_state(struct sna *sna,
 
2461
                                            const struct sna_composite_spans_op *op)
 
2462
{
 
2463
        uint32_t unwind;
 
2464
 
 
2465
        gen2_get_batch(sna, &op->base);
 
2466
        gen2_emit_target(sna,
 
2467
                         op->base.dst.bo,
 
2468
                         op->base.dst.width,
 
2469
                         op->base.dst.height,
 
2470
                         op->base.dst.format);
 
2471
 
 
2472
        unwind = sna->kgem.nbatch;
 
2473
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 
2474
              I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 
2475
        BATCH(!op->base.src.is_solid << 12);
 
2476
        BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT);
 
2477
        BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format));
 
2478
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
 
2479
                   sna->kgem.batch + unwind + 1,
 
2480
                   3 * sizeof(uint32_t)) == 0)
 
2481
                sna->kgem.nbatch = unwind;
 
2482
        else
 
2483
                sna->render_state.gen2.ls1 = unwind;
 
2484
 
 
2485
        gen2_disable_logic_op(sna);
 
2486
        gen2_emit_spans_pipeline(sna, op);
 
2487
 
 
2488
        if (op->base.src.is_solid) {
 
2489
                if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) {
 
2490
                        BATCH(_3DSTATE_DFLT_SPECULAR_CMD);
 
2491
                        BATCH(op->base.src.u.gen2.pixel);
 
2492
                        sna->render_state.gen2.specular = op->base.src.u.gen2.pixel;
 
2493
                }
 
2494
        } else {
 
2495
                uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD |
 
2496
                        (op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D);
 
2497
                if (sna->render_state.gen2.vft != v) {
 
2498
                        BATCH(v);
 
2499
                        sna->render_state.gen2.vft = v;
 
2500
                }
 
2501
                gen2_emit_texture(sna, &op->base.src, 0);
 
2502
        }
 
2503
}
 
2504
 
 
2505
fastcall static void
 
2506
gen2_render_composite_spans_box(struct sna *sna,
 
2507
                                const struct sna_composite_spans_op *op,
 
2508
                                const BoxRec *box, float opacity)
 
2509
{
 
2510
        DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
 
2511
             __FUNCTION__,
 
2512
             op->base.src.offset[0], op->base.src.offset[1],
 
2513
             opacity,
 
2514
             op->base.dst.x, op->base.dst.y,
 
2515
             box->x1, box->y1,
 
2516
             box->x2 - box->x1,
 
2517
             box->y2 - box->y1));
 
2518
 
 
2519
        if (gen2_get_rectangles(sna, &op->base, 1) == 0) {
 
2520
                gen2_emit_composite_spans_state(sna, op);
 
2521
                gen2_get_rectangles(sna, &op->base, 1);
 
2522
        }
 
2523
 
 
2524
        op->prim_emit(sna, op, box, opacity);
 
2525
}
 
2526
 
 
2527
static void
 
2528
gen2_render_composite_spans_boxes(struct sna *sna,
 
2529
                                  const struct sna_composite_spans_op *op,
 
2530
                                  const BoxRec *box, int nbox,
 
2531
                                  float opacity)
 
2532
{
 
2533
        DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
 
2534
             __FUNCTION__, nbox,
 
2535
             op->base.src.offset[0], op->base.src.offset[1],
 
2536
             opacity,
 
2537
             op->base.dst.x, op->base.dst.y));
 
2538
 
 
2539
        do {
 
2540
                int nbox_this_time;
 
2541
 
 
2542
                nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
 
2543
                if (nbox_this_time == 0) {
 
2544
                        gen2_emit_composite_spans_state(sna, op);
 
2545
                        nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
 
2546
                }
 
2547
                nbox -= nbox_this_time;
 
2548
 
 
2549
                do {
 
2550
                        DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
 
2551
                             box->x1, box->y1,
 
2552
                             box->x2 - box->x1,
 
2553
                             box->y2 - box->y1));
 
2554
 
 
2555
                        op->prim_emit(sna, op, box++, opacity);
 
2556
                } while (--nbox_this_time);
 
2557
        } while (nbox);
 
2558
}
 
2559
 
 
2560
fastcall static void
 
2561
gen2_render_composite_spans_done(struct sna *sna,
 
2562
                                 const struct sna_composite_spans_op *op)
 
2563
{
 
2564
        DBG(("%s()\n", __FUNCTION__));
 
2565
 
 
2566
        gen2_vertex_flush(sna, &op->base);
 
2567
 
 
2568
        if (op->base.src.bo)
 
2569
                kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
2570
 
 
2571
        sna_render_composite_redirect_done(sna, &op->base);
 
2572
}
 
2573
 
 
2574
static bool
 
2575
gen2_check_composite_spans(struct sna *sna,
 
2576
                           uint8_t op, PicturePtr src, PicturePtr dst,
 
2577
                           int16_t width, int16_t height, unsigned flags)
 
2578
{
 
2579
        if (op >= ARRAY_SIZE(gen2_blend_op))
 
2580
                return false;
 
2581
 
 
2582
        if (gen2_composite_fallback(sna, src, NULL, dst))
 
2583
                return false;
 
2584
 
 
2585
        if (need_tiling(sna, width, height)) {
 
2586
                if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
 
2587
                        DBG(("%s: fallback, tiled operation not on GPU\n",
 
2588
                             __FUNCTION__));
 
2589
                        return false;
 
2590
                }
 
2591
        }
 
2592
 
 
2593
        return true;
 
2594
}
 
2595
 
 
2596
static bool
 
2597
gen2_render_composite_spans(struct sna *sna,
 
2598
                            uint8_t op,
 
2599
                            PicturePtr src,
 
2600
                            PicturePtr dst,
 
2601
                            int16_t src_x,  int16_t src_y,
 
2602
                            int16_t dst_x,  int16_t dst_y,
 
2603
                            int16_t width,  int16_t height,
 
2604
                            unsigned flags,
 
2605
                            struct sna_composite_spans_op *tmp)
 
2606
{
 
2607
        DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
 
2608
             src_x, src_y, dst_x, dst_y, width, height));
 
2609
 
 
2610
        assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags));
 
2611
        if (need_tiling(sna, width, height)) {
 
2612
                DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
 
2613
                     __FUNCTION__, width, height));
 
2614
                return sna_tiling_composite_spans(op, src, dst,
 
2615
                                                  src_x, src_y, dst_x, dst_y,
 
2616
                                                  width, height, flags, tmp);
 
2617
        }
 
2618
 
 
2619
        tmp->base.op = op;
 
2620
        sna_render_composite_redirect_init(&tmp->base);
 
2621
        if (!gen2_composite_set_target(sna, &tmp->base, dst,
 
2622
                                       dst_x, dst_y, width, height,
 
2623
                                       true)) {
 
2624
                DBG(("%s: unable to set render target\n",
 
2625
                     __FUNCTION__));
 
2626
                return false;
 
2627
        }
 
2628
 
 
2629
        switch (gen2_composite_picture(sna, src, &tmp->base.src,
 
2630
                                       src_x, src_y,
 
2631
                                       width, height,
 
2632
                                       dst_x, dst_y,
 
2633
                                       dst->polyMode == PolyModePrecise)) {
 
2634
        case -1:
 
2635
                goto cleanup_dst;
 
2636
        case 0:
 
2637
                gen2_composite_solid_init(sna, &tmp->base.src, 0);
 
2638
        case 1:
 
2639
                break;
 
2640
        }
 
2641
        assert(tmp->base.src.bo || tmp->base.src.is_solid);
 
2642
 
 
2643
        tmp->prim_emit = gen2_emit_composite_spans_primitive;
 
2644
        tmp->base.floats_per_vertex = 3;
 
2645
        if (tmp->base.src.is_solid) {
 
2646
#if defined(sse2) && !defined(__x86_64__)
 
2647
                if (sna->cpu_features & SSE2) {
 
2648
                        tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2;
 
2649
                } else
 
2650
#endif
 
2651
                {
 
2652
                        tmp->prim_emit = gen2_emit_composite_spans_primitive_constant;
 
2653
                }
 
2654
        } else if (tmp->base.src.is_linear) {
 
2655
                tmp->base.floats_per_vertex += 2;
 
2656
#if defined(sse2) && !defined(__x86_64__)
 
2657
                if (sna->cpu_features & SSE2) {
 
2658
                        tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2;
 
2659
                } else
 
2660
#endif
 
2661
                {
 
2662
                        tmp->prim_emit = gen2_emit_composite_spans_primitive_linear;
 
2663
                }
 
2664
        } else {
 
2665
                assert(tmp->base.src.bo);
 
2666
                tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
 
2667
                if (tmp->base.src.transform == NULL) {
 
2668
#if defined(sse2) && !defined(__x86_64__)
 
2669
                        if (sna->cpu_features & SSE2) {
 
2670
                                tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2;
 
2671
                        } else
 
2672
#endif
 
2673
                        {
 
2674
                                tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source;
 
2675
                        }
 
2676
                } else if (tmp->base.src.is_affine) {
 
2677
                        tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
 
2678
                        tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
 
2679
#if defined(sse2) && !defined(__x86_64__)
 
2680
                        if (sna->cpu_features & SSE2) {
 
2681
                                tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2;
 
2682
                        } else
 
2683
#endif
 
2684
                        {
 
2685
                                tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source;
 
2686
                        }
 
2687
                }
 
2688
        }
 
2689
        tmp->base.mask.bo = NULL;
 
2690
        tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
 
2691
 
 
2692
        tmp->box   = gen2_render_composite_spans_box;
 
2693
        tmp->boxes = gen2_render_composite_spans_boxes;
 
2694
        tmp->done  = gen2_render_composite_spans_done;
 
2695
 
 
2696
        if (!kgem_check_bo(&sna->kgem,
 
2697
                           tmp->base.dst.bo, tmp->base.src.bo,
 
2698
                           NULL)) {
 
2699
                kgem_submit(&sna->kgem);
 
2700
                if (!kgem_check_bo(&sna->kgem,
 
2701
                                   tmp->base.dst.bo, tmp->base.src.bo,
 
2702
                                   NULL))
 
2703
                        goto cleanup_src;
 
2704
        }
 
2705
 
 
2706
        gen2_emit_composite_spans_state(sna, tmp);
 
2707
        return true;
 
2708
 
 
2709
cleanup_src:
 
2710
        if (tmp->base.src.bo)
 
2711
                kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
 
2712
cleanup_dst:
 
2713
        if (tmp->base.redirect.real_bo)
 
2714
                kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
 
2715
        return false;
 
2716
}
 
2717
 
 
2718
static void
 
2719
gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op)
 
2720
{
 
2721
        uint32_t blend, unwind;
 
2722
 
 
2723
        unwind = sna->kgem.nbatch;
 
2724
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 
2725
              LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 
2726
 
 
2727
        blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
 
2728
                TB0C_ARG1_SEL_DIFFUSE |
 
2729
                TB0C_OUTPUT_WRITE_CURRENT;
 
2730
        if (op->dst.format == PICT_a8)
 
2731
                blend |= TB0C_ARG1_REPLICATE_ALPHA;
 
2732
        BATCH(blend);
 
2733
 
 
2734
        BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
 
2735
              TB0A_ARG1_SEL_DIFFUSE |
 
2736
              TB0A_OUTPUT_WRITE_CURRENT);
 
2737
 
 
2738
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
 
2739
                   sna->kgem.batch + unwind + 1,
 
2740
                   2 * sizeof(uint32_t)) == 0)
 
2741
                sna->kgem.nbatch = unwind;
 
2742
        else
 
2743
                sna->render_state.gen2.ls2 = unwind;
 
2744
}
 
2745
 
 
2746
static void gen2_emit_fill_composite_state(struct sna *sna,
 
2747
                                           const struct sna_composite_op *op,
 
2748
                                           uint32_t pixel)
 
2749
{
 
2750
        uint32_t ls1;
 
2751
 
 
2752
        gen2_get_batch(sna, op);
 
2753
        gen2_emit_target(sna,
 
2754
                         op->dst.bo,
 
2755
                         op->dst.width,
 
2756
                         op->dst.height,
 
2757
                         op->dst.format);
 
2758
 
 
2759
        ls1 = sna->kgem.nbatch;
 
2760
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 
2761
              I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 
2762
        BATCH(0);
 
2763
        BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 
2764
        BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format));
 
2765
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
 
2766
                   sna->kgem.batch + ls1 + 1,
 
2767
                   3 * sizeof(uint32_t)) == 0)
 
2768
                sna->kgem.nbatch = ls1;
 
2769
        else
 
2770
                sna->render_state.gen2.ls1 = ls1;
 
2771
 
 
2772
        gen2_emit_fill_pipeline(sna, op);
 
2773
 
 
2774
        if (pixel != sna->render_state.gen2.diffuse) {
 
2775
                BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
 
2776
                BATCH(pixel);
 
2777
                sna->render_state.gen2.diffuse = pixel;
 
2778
        }
 
2779
}
 
2780
 
 
2781
static bool
 
2782
gen2_render_fill_boxes_try_blt(struct sna *sna,
 
2783
                               CARD8 op, PictFormat format,
 
2784
                               const xRenderColor *color,
 
2785
                               const DrawableRec *dst, struct kgem_bo *dst_bo,
 
2786
                               const BoxRec *box, int n)
 
2787
{
 
2788
        uint8_t alu;
 
2789
        uint32_t pixel;
 
2790
 
 
2791
        if (op > PictOpSrc)
 
2792
                return false;
 
2793
 
 
2794
        if (op == PictOpClear) {
 
2795
                alu = GXclear;
 
2796
                pixel = 0;
 
2797
        } else if (!sna_get_pixel_from_rgba(&pixel,
 
2798
                                            color->red,
 
2799
                                            color->green,
 
2800
                                            color->blue,
 
2801
                                            color->alpha,
 
2802
                                            format))
 
2803
                return false;
 
2804
        else
 
2805
                alu = GXcopy;
 
2806
 
 
2807
        return sna_blt_fill_boxes(sna, alu,
 
2808
                                  dst_bo, dst->bitsPerPixel,
 
2809
                                  pixel, box, n);
 
2810
}
 
2811
 
 
2812
static bool
 
2813
gen2_render_fill_boxes(struct sna *sna,
 
2814
                       CARD8 op,
 
2815
                       PictFormat format,
 
2816
                       const xRenderColor *color,
 
2817
                       const DrawableRec *dst, struct kgem_bo *dst_bo,
 
2818
                       const BoxRec *box, int n)
 
2819
{
 
2820
        struct sna_composite_op tmp;
 
2821
        uint32_t pixel;
 
2822
 
 
2823
        if (op >= ARRAY_SIZE(gen2_blend_op)) {
 
2824
                DBG(("%s: fallback due to unhandled blend op: %d\n",
 
2825
                     __FUNCTION__, op));
 
2826
                return false;
 
2827
        }
 
2828
 
 
2829
#if NO_FILL_BOXES
 
2830
        return gen2_render_fill_boxes_try_blt(sna, op, format, color,
 
2831
                                              dst, dst_bo,
 
2832
                                              box, n);
 
2833
#endif
 
2834
        if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
 
2835
                                           dst, dst_bo,
 
2836
                                           box, n))
 
2837
                return true;
 
2838
 
 
2839
 
 
2840
        DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
 
2841
             __FUNCTION__, op, (int)format,
 
2842
             color->red, color->green, color->blue, color->alpha));
 
2843
 
 
2844
        if (too_large(dst->width, dst->height) ||
 
2845
            dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH ||
 
2846
            !gen2_check_dst_format(format)) {
 
2847
                DBG(("%s: try blt, too large or incompatible destination\n",
 
2848
                     __FUNCTION__));
 
2849
                if (!gen2_check_dst_format(format))
 
2850
                        return false;
 
2851
 
 
2852
                assert(dst_bo->pitch >= 8);
 
2853
                return sna_tiling_fill_boxes(sna, op, format, color,
 
2854
                                             dst, dst_bo, box, n);
 
2855
        }
 
2856
 
 
2857
        if (op == PictOpClear)
 
2858
                pixel = 0;
 
2859
        else if (!sna_get_pixel_from_rgba(&pixel,
 
2860
                                          color->red,
 
2861
                                          color->green,
 
2862
                                          color->blue,
 
2863
                                          color->alpha,
 
2864
                                          PICT_a8r8g8b8))
 
2865
                return false;
 
2866
 
 
2867
        DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
 
2868
             __FUNCTION__, op, (int)format, pixel));
 
2869
 
 
2870
        memset(&tmp, 0, sizeof(tmp));
 
2871
        tmp.op = op;
 
2872
        tmp.dst.pixmap = (PixmapPtr)dst;
 
2873
        tmp.dst.width = dst->width;
 
2874
        tmp.dst.height = dst->height;
 
2875
        tmp.dst.format = format;
 
2876
        tmp.dst.bo = dst_bo;
 
2877
        tmp.floats_per_vertex = 2;
 
2878
        tmp.floats_per_rect = 6;
 
2879
 
 
2880
        if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 
2881
                kgem_submit(&sna->kgem);
 
2882
                if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
 
2883
                        return false;
 
2884
        }
 
2885
 
 
2886
        gen2_emit_fill_composite_state(sna, &tmp, pixel);
 
2887
 
 
2888
        do {
 
2889
                int n_this_time = gen2_get_rectangles(sna, &tmp, n);
 
2890
                if (n_this_time == 0) {
 
2891
                        gen2_emit_fill_composite_state(sna, &tmp, pixel);
 
2892
                        n_this_time = gen2_get_rectangles(sna, &tmp, n);
 
2893
                }
 
2894
                n -= n_this_time;
 
2895
 
 
2896
                do {
 
2897
                        DBG(("  (%d, %d), (%d, %d): %x\n",
 
2898
                             box->x1, box->y1, box->x2, box->y2, pixel));
 
2899
                        VERTEX(box->x2);
 
2900
                        VERTEX(box->y2);
 
2901
                        VERTEX(box->x1);
 
2902
                        VERTEX(box->y2);
 
2903
                        VERTEX(box->x1);
 
2904
                        VERTEX(box->y1);
 
2905
                        box++;
 
2906
                } while (--n_this_time);
 
2907
        } while (n);
 
2908
 
 
2909
        gen2_vertex_flush(sna, &tmp);
 
2910
        return true;
 
2911
}
 
2912
 
 
2913
static void gen2_emit_fill_state(struct sna *sna,
 
2914
                                 const struct sna_composite_op *op)
 
2915
{
 
2916
        uint32_t ls1;
 
2917
 
 
2918
        gen2_get_batch(sna, op);
 
2919
        gen2_emit_target(sna,
 
2920
                         op->dst.bo,
 
2921
                         op->dst.width,
 
2922
                         op->dst.height,
 
2923
                         op->dst.format);
 
2924
 
 
2925
        ls1 = sna->kgem.nbatch;
 
2926
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 
2927
              I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 
2928
        BATCH(0);
 
2929
        BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 
2930
        BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
 
2931
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
 
2932
                   sna->kgem.batch + ls1 + 1,
 
2933
                   3 * sizeof(uint32_t)) == 0)
 
2934
                sna->kgem.nbatch = ls1;
 
2935
        else
 
2936
                sna->render_state.gen2.ls1 = ls1;
 
2937
 
 
2938
        gen2_enable_logic_op(sna, op->op);
 
2939
        gen2_emit_fill_pipeline(sna, op);
 
2940
 
 
2941
        if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
 
2942
                BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
 
2943
                BATCH(op->src.u.gen2.pixel);
 
2944
                sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
 
2945
        }
 
2946
}
 
2947
 
 
2948
static void
 
2949
gen2_render_fill_op_blt(struct sna *sna,
 
2950
                        const struct sna_fill_op *op,
 
2951
                        int16_t x, int16_t y, int16_t w, int16_t h)
 
2952
{
 
2953
        if (!gen2_get_rectangles(sna, &op->base, 1)) {
 
2954
                gen2_emit_fill_state(sna, &op->base);
 
2955
                gen2_get_rectangles(sna, &op->base, 1);
 
2956
        }
 
2957
 
 
2958
        VERTEX(x+w);
 
2959
        VERTEX(y+h);
 
2960
        VERTEX(x);
 
2961
        VERTEX(y+h);
 
2962
        VERTEX(x);
 
2963
        VERTEX(y);
 
2964
}
 
2965
 
 
2966
fastcall static void
 
2967
gen2_render_fill_op_box(struct sna *sna,
 
2968
                        const struct sna_fill_op *op,
 
2969
                        const BoxRec *box)
 
2970
{
 
2971
        if (!gen2_get_rectangles(sna, &op->base, 1)) {
 
2972
                gen2_emit_fill_state(sna, &op->base);
 
2973
                gen2_get_rectangles(sna, &op->base, 1);
 
2974
        }
 
2975
 
 
2976
        VERTEX(box->x2);
 
2977
        VERTEX(box->y2);
 
2978
        VERTEX(box->x1);
 
2979
        VERTEX(box->y2);
 
2980
        VERTEX(box->x1);
 
2981
        VERTEX(box->y1);
 
2982
}
 
2983
 
 
2984
fastcall static void
 
2985
gen2_render_fill_op_boxes(struct sna *sna,
 
2986
                          const struct sna_fill_op *op,
 
2987
                          const BoxRec *box,
 
2988
                          int nbox)
 
2989
{
 
2990
        DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
 
2991
             box->x1, box->y1, box->x2, box->y2, nbox));
 
2992
 
 
2993
        do {
 
2994
                int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
 
2995
                if (nbox_this_time == 0) {
 
2996
                        gen2_emit_fill_state(sna, &op->base);
 
2997
                        nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
 
2998
                }
 
2999
                nbox -= nbox_this_time;
 
3000
 
 
3001
                do {
 
3002
                        VERTEX(box->x2);
 
3003
                        VERTEX(box->y2);
 
3004
                        VERTEX(box->x1);
 
3005
                        VERTEX(box->y2);
 
3006
                        VERTEX(box->x1);
 
3007
                        VERTEX(box->y1);
 
3008
                        box++;
 
3009
                } while (--nbox_this_time);
 
3010
        } while (nbox);
 
3011
}
 
3012
 
 
3013
static void
 
3014
gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 
3015
{
 
3016
        gen2_vertex_flush(sna, &op->base);
 
3017
}
 
3018
 
 
3019
static bool
 
3020
gen2_render_fill(struct sna *sna, uint8_t alu,
 
3021
                 PixmapPtr dst, struct kgem_bo *dst_bo,
 
3022
                 uint32_t color, unsigned flags,
 
3023
                 struct sna_fill_op *tmp)
 
3024
{
 
3025
#if NO_FILL
 
3026
        return sna_blt_fill(sna, alu,
 
3027
                            dst_bo, dst->drawable.bitsPerPixel,
 
3028
                            color,
 
3029
                            tmp);
 
3030
#endif
 
3031
 
 
3032
        /* Prefer to use the BLT if already engaged */
 
3033
        if (sna_blt_fill(sna, alu,
 
3034
                         dst_bo, dst->drawable.bitsPerPixel,
 
3035
                         color,
 
3036
                         tmp))
 
3037
                return true;
 
3038
 
 
3039
        /* Must use the BLT if we can't RENDER... */
 
3040
        if (too_large(dst->drawable.width, dst->drawable.height) ||
 
3041
            dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH)
 
3042
                return false;
 
3043
 
 
3044
        tmp->base.op = alu;
 
3045
        tmp->base.dst.pixmap = dst;
 
3046
        tmp->base.dst.width = dst->drawable.width;
 
3047
        tmp->base.dst.height = dst->drawable.height;
 
3048
        tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
 
3049
        tmp->base.dst.bo = dst_bo;
 
3050
        tmp->base.dst.x = tmp->base.dst.y = 0;
 
3051
        tmp->base.floats_per_vertex = 2;
 
3052
        tmp->base.floats_per_rect = 6;
 
3053
 
 
3054
        tmp->base.src.u.gen2.pixel =
 
3055
                sna_rgba_for_color(color, dst->drawable.depth);
 
3056
 
 
3057
        if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
 
3058
                kgem_submit(&sna->kgem);
 
3059
                return sna_blt_fill(sna, alu,
 
3060
                                    dst_bo, dst->drawable.bitsPerPixel,
 
3061
                                    color,
 
3062
                                    tmp);
 
3063
        }
 
3064
 
 
3065
        tmp->blt   = gen2_render_fill_op_blt;
 
3066
        tmp->box   = gen2_render_fill_op_box;
 
3067
        tmp->boxes = gen2_render_fill_op_boxes;
 
3068
        tmp->points = NULL;
 
3069
        tmp->done  = gen2_render_fill_op_done;
 
3070
 
 
3071
        gen2_emit_fill_state(sna, &tmp->base);
 
3072
        return true;
 
3073
}
 
3074
 
 
3075
static bool
 
3076
gen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
3077
                             uint32_t color,
 
3078
                             int16_t x1, int16_t y1, int16_t x2, int16_t y2,
 
3079
                             uint8_t alu)
 
3080
{
 
3081
        BoxRec box;
 
3082
 
 
3083
        box.x1 = x1;
 
3084
        box.y1 = y1;
 
3085
        box.x2 = x2;
 
3086
        box.y2 = y2;
 
3087
 
 
3088
        return sna_blt_fill_boxes(sna, alu,
 
3089
                                  bo, dst->drawable.bitsPerPixel,
 
3090
                                  color, &box, 1);
 
3091
}
 
3092
 
 
3093
static bool
 
3094
gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
3095
                     uint32_t color,
 
3096
                     int16_t x1, int16_t y1,
 
3097
                     int16_t x2, int16_t y2,
 
3098
                     uint8_t alu)
 
3099
{
 
3100
        struct sna_composite_op tmp;
 
3101
 
 
3102
#if NO_FILL_ONE
 
3103
        return gen2_render_fill_one_try_blt(sna, dst, bo, color,
 
3104
                                            x1, y1, x2, y2, alu);
 
3105
#endif
 
3106
 
 
3107
        /* Prefer to use the BLT if already engaged */
 
3108
        if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
 
3109
                                         x1, y1, x2, y2, alu))
 
3110
                return true;
 
3111
 
 
3112
        /* Must use the BLT if we can't RENDER... */
 
3113
        if (too_large(dst->drawable.width, dst->drawable.height) ||
 
3114
            bo->pitch < 8 || bo->pitch > MAX_3D_PITCH)
 
3115
                return false;
 
3116
 
 
3117
        if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
 
3118
                kgem_submit(&sna->kgem);
 
3119
 
 
3120
                if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
 
3121
                                                 x1, y1, x2, y2, alu))
 
3122
                        return true;
 
3123
 
 
3124
                if (!kgem_check_bo(&sna->kgem, bo, NULL))
 
3125
                        return false;
 
3126
        }
 
3127
 
 
3128
        tmp.op = alu;
 
3129
        tmp.dst.pixmap = dst;
 
3130
        tmp.dst.width = dst->drawable.width;
 
3131
        tmp.dst.height = dst->drawable.height;
 
3132
        tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
 
3133
        tmp.dst.bo = bo;
 
3134
        tmp.floats_per_vertex = 2;
 
3135
        tmp.floats_per_rect = 6;
 
3136
        tmp.need_magic_ca_pass = false;
 
3137
 
 
3138
        tmp.src.u.gen2.pixel =
 
3139
                sna_rgba_for_color(color, dst->drawable.depth);
 
3140
 
 
3141
        gen2_emit_fill_state(sna, &tmp);
 
3142
        gen2_get_rectangles(sna, &tmp, 1);
 
3143
        DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__,
 
3144
             x1, y1, x2, y2, tmp.src.u.gen2.pixel));
 
3145
        VERTEX(x2);
 
3146
        VERTEX(y2);
 
3147
        VERTEX(x1);
 
3148
        VERTEX(y2);
 
3149
        VERTEX(x1);
 
3150
        VERTEX(y1);
 
3151
        gen2_vertex_flush(sna, &tmp);
 
3152
 
 
3153
        return true;
 
3154
}
 
3155
 
 
3156
static void
 
3157
gen2_emit_video_state(struct sna *sna,
 
3158
                      struct sna_video *video,
 
3159
                      struct sna_video_frame *frame,
 
3160
                      PixmapPtr pixmap,
 
3161
                      struct kgem_bo *dst_bo,
 
3162
                      int width, int height,
 
3163
                      bool bilinear)
 
3164
{
 
3165
        uint32_t ms1, v, unwind;
 
3166
 
 
3167
        gen2_emit_target(sna, dst_bo, width, height,
 
3168
                         sna_format_for_depth(pixmap->drawable.depth));
 
3169
 
 
3170
        unwind = sna->kgem.nbatch;
 
3171
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 
3172
              I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 
3173
        BATCH(1 << 12);
 
3174
        BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 
3175
        BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
 
3176
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
 
3177
                   sna->kgem.batch + unwind + 1,
 
3178
                   3 * sizeof(uint32_t)) == 0)
 
3179
                sna->kgem.nbatch = unwind;
 
3180
        else
 
3181
                sna->render_state.gen2.ls1 = unwind;
 
3182
 
 
3183
        gen2_disable_logic_op(sna);
 
3184
 
 
3185
        unwind = sna->kgem.nbatch;
 
3186
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 
3187
              LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 
3188
        BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT |
 
3189
              TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0);
 
3190
        BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT |
 
3191
              TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE);
 
3192
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
 
3193
                   sna->kgem.batch + unwind + 1,
 
3194
                   2 * sizeof(uint32_t)) == 0)
 
3195
                sna->kgem.nbatch = unwind;
 
3196
        else
 
3197
                sna->render_state.gen2.ls2 = unwind;
 
3198
 
 
3199
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4);
 
3200
        BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
 
3201
                             frame->bo,
 
3202
                             I915_GEM_DOMAIN_SAMPLER << 16,
 
3203
                             0));
 
3204
        ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION;
 
3205
        switch (frame->id) {
 
3206
        case FOURCC_YUY2:
 
3207
                ms1 |= MT_422_YCRCB_NORMAL;
 
3208
                break;
 
3209
        case FOURCC_UYVY:
 
3210
                ms1 |= MT_422_YCRCB_SWAPY;
 
3211
                break;
 
3212
        }
 
3213
        BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) |
 
3214
              ((frame->width - 1)  << TM0S1_WIDTH_SHIFT) |
 
3215
              ms1 |
 
3216
              gen2_sampler_tiling_bits(frame->bo->tiling));
 
3217
        BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
 
3218
        if (bilinear)
 
3219
                BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
 
3220
                      FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
 
3221
                      MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
 
3222
        else
 
3223
                BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
 
3224
                      FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
 
3225
                      MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
 
3226
        BATCH(0);       /* default color */
 
3227
 
 
3228
        BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) |
 
3229
              ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN |
 
3230
              ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) |
 
3231
              ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP));
 
3232
 
 
3233
        v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
 
3234
        if (sna->render_state.gen2.vft != v) {
 
3235
                BATCH(v);
 
3236
                sna->render_state.gen2.vft = v;
 
3237
        }
 
3238
}
 
3239
 
 
3240
static void
 
3241
gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo)
 
3242
{
 
3243
        kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 
3244
 
 
3245
        if (!kgem_check_batch(&sna->kgem, 120) ||
 
3246
            !kgem_check_reloc(&sna->kgem, 4) ||
 
3247
            !kgem_check_exec(&sna->kgem, 2)) {
 
3248
                _kgem_submit(&sna->kgem);
 
3249
                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
 
3250
        }
 
3251
 
 
3252
        if (sna->render_state.gen2.need_invariant)
 
3253
                gen2_emit_invariant(sna);
 
3254
}
 
3255
 
 
3256
static int
 
3257
gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
 
3258
{
 
3259
        int size = floats_per_vertex * 3;
 
3260
        int rem = batch_space(sna) - 1;
 
3261
 
 
3262
        if (rem > MAX_INLINE)
 
3263
                rem = MAX_INLINE;
 
3264
 
 
3265
        if (size * want > rem)
 
3266
                want = rem / size;
 
3267
 
 
3268
        return want;
 
3269
}
 
3270
 
 
3271
static bool
 
3272
gen2_render_video(struct sna *sna,
 
3273
                  struct sna_video *video,
 
3274
                  struct sna_video_frame *frame,
 
3275
                  RegionPtr dstRegion,
 
3276
                  PixmapPtr pixmap)
 
3277
{
 
3278
        struct sna_pixmap *priv = sna_pixmap(pixmap);
 
3279
        const BoxRec *pbox = region_rects(dstRegion);
 
3280
        int nbox = region_num_rects(dstRegion);
 
3281
        int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
 
3282
        int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
 
3283
        int src_width = frame->src.x2 - frame->src.x1;
 
3284
        int src_height = frame->src.y2 - frame->src.y1;
 
3285
        float src_offset_x, src_offset_y;
 
3286
        float src_scale_x, src_scale_y;
 
3287
        int pix_xoff, pix_yoff;
 
3288
        struct kgem_bo *dst_bo;
 
3289
        bool bilinear;
 
3290
        int copy = 0;
 
3291
 
 
3292
        DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
 
3293
             src_width, src_height, frame->width, frame->height, dst_width, dst_height));
 
3294
 
 
3295
        assert(priv->gpu_bo);
 
3296
        dst_bo = priv->gpu_bo;
 
3297
 
 
3298
        bilinear = src_width != dst_width || src_height != dst_height;
 
3299
 
 
3300
        src_scale_x = (float)src_width / dst_width / frame->width;
 
3301
        src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
3302
 
 
3303
        src_scale_y = (float)src_height / dst_height / frame->height;
 
3304
        src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
3305
        DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
 
3306
             __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
 
3307
 
 
3308
        if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
 
3309
            dst_bo->pitch > MAX_3D_PITCH) {
 
3310
                int bpp = pixmap->drawable.bitsPerPixel;
 
3311
 
 
3312
                if (too_large(dst_width, dst_height))
 
3313
                        return false;
 
3314
 
 
3315
                dst_bo = kgem_create_2d(&sna->kgem,
 
3316
                                        dst_width, dst_height, bpp,
 
3317
                                        kgem_choose_tiling(&sna->kgem,
 
3318
                                                           I915_TILING_X,
 
3319
                                                           dst_width, dst_height, bpp),
 
3320
                                        0);
 
3321
                if (!dst_bo)
 
3322
                        return false;
 
3323
 
 
3324
                pix_xoff = -dstRegion->extents.x1;
 
3325
                pix_yoff = -dstRegion->extents.y1;
 
3326
                copy = 1;
 
3327
        } else {
 
3328
                /* Set up the offset for translating from the given region
 
3329
                 * (in screen coordinates) to the backing pixmap.
 
3330
                 */
 
3331
#ifdef COMPOSITE
 
3332
                pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
 
3333
                pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
 
3334
#else
 
3335
                pix_xoff = 0;
 
3336
                pix_yoff = 0;
 
3337
#endif
 
3338
 
 
3339
                dst_width  = pixmap->drawable.width;
 
3340
                dst_height = pixmap->drawable.height;
 
3341
        }
 
3342
 
 
3343
        gen2_video_get_batch(sna, dst_bo);
 
3344
        gen2_emit_video_state(sna, video, frame, pixmap,
 
3345
                              dst_bo, dst_width, dst_height, bilinear);
 
3346
        do {
 
3347
                int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4);
 
3348
                if (nbox_this_time == 0) {
 
3349
                        gen2_video_get_batch(sna, dst_bo);
 
3350
                        gen2_emit_video_state(sna, video, frame, pixmap,
 
3351
                                              dst_bo, dst_width, dst_height, bilinear);
 
3352
                        nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4);
 
3353
                        assert(nbox_this_time);
 
3354
                }
 
3355
                nbox -= nbox_this_time;
 
3356
 
 
3357
                BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST |
 
3358
                      ((12 * nbox_this_time) - 1));
 
3359
                do {
 
3360
                        int box_x1 = pbox->x1;
 
3361
                        int box_y1 = pbox->y1;
 
3362
                        int box_x2 = pbox->x2;
 
3363
                        int box_y2 = pbox->y2;
 
3364
 
 
3365
                        pbox++;
 
3366
 
 
3367
                        DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
 
3368
                             __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
 
3369
                             box_x1 * src_scale_x + src_offset_x,
 
3370
                             box_y1 * src_scale_y + src_offset_y,
 
3371
                             box_x2 * src_scale_x + src_offset_x,
 
3372
                             box_y2 * src_scale_y + src_offset_y));
 
3373
 
 
3374
                        /* bottom right */
 
3375
                        BATCH_F(box_x2 + pix_xoff);
 
3376
                        BATCH_F(box_y2 + pix_yoff);
 
3377
                        BATCH_F(box_x2 * src_scale_x + src_offset_x);
 
3378
                        BATCH_F(box_y2 * src_scale_y + src_offset_y);
 
3379
 
 
3380
                        /* bottom left */
 
3381
                        BATCH_F(box_x1 + pix_xoff);
 
3382
                        BATCH_F(box_y2 + pix_yoff);
 
3383
                        BATCH_F(box_x1 * src_scale_x + src_offset_x);
 
3384
                        BATCH_F(box_y2 * src_scale_y + src_offset_y);
 
3385
 
 
3386
                        /* top left */
 
3387
                        BATCH_F(box_x1 + pix_xoff);
 
3388
                        BATCH_F(box_y1 + pix_yoff);
 
3389
                        BATCH_F(box_x1 * src_scale_x + src_offset_x);
 
3390
                        BATCH_F(box_y1 * src_scale_y + src_offset_y);
 
3391
                } while (--nbox_this_time);
 
3392
        } while (nbox);
 
3393
 
 
3394
        if (copy) {
 
3395
#ifdef COMPOSITE
 
3396
                pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
 
3397
                pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
 
3398
#else
 
3399
                pix_xoff = 0;
 
3400
                pix_yoff = 0;
 
3401
#endif
 
3402
                sna_blt_copy_boxes(sna, GXcopy,
 
3403
                                   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
 
3404
                                   priv->gpu_bo, pix_xoff, pix_yoff,
 
3405
                                   pixmap->drawable.bitsPerPixel,
 
3406
                                   region_rects(dstRegion),
 
3407
                                   region_num_rects(dstRegion));
 
3408
 
 
3409
                kgem_bo_destroy(&sna->kgem, dst_bo);
 
3410
        }
 
3411
 
 
3412
        if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
 
3413
                if ((pix_xoff | pix_yoff) == 0) {
 
3414
                        sna_damage_add(&priv->gpu_damage, dstRegion);
 
3415
                } else {
 
3416
                        sna_damage_add_boxes(&priv->gpu_damage,
 
3417
                                             region_rects(dstRegion),
 
3418
                                             region_num_rects(dstRegion),
 
3419
                                             pix_xoff, pix_yoff);
 
3420
                }
 
3421
        }
 
3422
 
 
3423
        return true;
 
3424
}
 
3425
 
 
3426
static void
 
3427
gen2_render_copy_setup_source(struct sna_composite_channel *channel,
 
3428
                              const DrawableRec *draw,
 
3429
                              struct kgem_bo *bo)
 
3430
{
 
3431
        assert(draw->width && draw->height);
 
3432
 
 
3433
        channel->filter = PictFilterNearest;
 
3434
        channel->repeat = RepeatNone;
 
3435
        channel->width  = draw->width;
 
3436
        channel->height = draw->height;
 
3437
        channel->scale[0] = 1.f/draw->width;
 
3438
        channel->scale[1] = 1.f/draw->height;
 
3439
        channel->offset[0] = 0;
 
3440
        channel->offset[1] = 0;
 
3441
        channel->pict_format = sna_format_for_depth(draw->depth);
 
3442
        channel->bo = bo;
 
3443
        channel->is_affine = 1;
 
3444
 
 
3445
        DBG(("%s: source=%d, (%dx%d), format=%08x\n",
 
3446
             __FUNCTION__, bo->handle,
 
3447
             channel->width, channel->height,
 
3448
             channel->pict_format));
 
3449
}
 
3450
 
 
3451
static void
 
3452
gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op)
 
3453
{
 
3454
        uint32_t blend, unwind;
 
3455
 
 
3456
        unwind = sna->kgem.nbatch;
 
3457
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
 
3458
              LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 
3459
 
 
3460
        blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
 
3461
                TB0C_OUTPUT_WRITE_CURRENT;
 
3462
        if (op->dst.format == PICT_a8)
 
3463
                blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0;
 
3464
        else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
 
3465
                blend |= TB0C_ARG1_SEL_TEXEL0;
 
3466
        else
 
3467
                blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;  /* 0.0 */
 
3468
        BATCH(blend);
 
3469
 
 
3470
        blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
 
3471
                TB0A_OUTPUT_WRITE_CURRENT;
 
3472
        if (PICT_FORMAT_A(op->src.pict_format) == 0)
 
3473
                blend |= TB0A_ARG1_SEL_ONE;
 
3474
        else
 
3475
                blend |= TB0A_ARG1_SEL_TEXEL0;
 
3476
        BATCH(blend);
 
3477
 
 
3478
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
 
3479
                   sna->kgem.batch + unwind + 1,
 
3480
                   2 * sizeof(uint32_t)) == 0)
 
3481
                sna->kgem.nbatch = unwind;
 
3482
        else
 
3483
                sna->render_state.gen2.ls2 = unwind;
 
3484
}
 
3485
 
 
3486
static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op)
 
3487
{
 
3488
        uint32_t ls1, v;
 
3489
 
 
3490
        gen2_get_batch(sna, op);
 
3491
 
 
3492
        if (kgem_bo_is_dirty(op->src.bo)) {
 
3493
                if (op->src.bo == op->dst.bo)
 
3494
                        BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
 
3495
                else
 
3496
                        BATCH(_3DSTATE_MODES_5_CMD |
 
3497
                              PIPELINE_FLUSH_RENDER_CACHE |
 
3498
                              PIPELINE_FLUSH_TEXTURE_CACHE);
 
3499
                kgem_clear_dirty(&sna->kgem);
 
3500
        }
 
3501
        gen2_emit_target(sna,
 
3502
                         op->dst.bo,
 
3503
                         op->dst.width,
 
3504
                         op->dst.height,
 
3505
                         op->dst.format);
 
3506
 
 
3507
        ls1 = sna->kgem.nbatch;
 
3508
        BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 
3509
              I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 
3510
        BATCH(1<<12);
 
3511
        BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 
3512
        BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
 
3513
        if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
 
3514
                   sna->kgem.batch + ls1 + 1,
 
3515
                   3 * sizeof(uint32_t)) == 0)
 
3516
                sna->kgem.nbatch = ls1;
 
3517
        else
 
3518
                sna->render_state.gen2.ls1 = ls1;
 
3519
 
 
3520
        gen2_enable_logic_op(sna, op->op);
 
3521
        gen2_emit_copy_pipeline(sna, op);
 
3522
 
 
3523
        v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
 
3524
        if (sna->render_state.gen2.vft != v) {
 
3525
                BATCH(v);
 
3526
                sna->render_state.gen2.vft = v;
 
3527
        }
 
3528
 
 
3529
        gen2_emit_texture(sna, &op->src, 0);
 
3530
}
 
3531
 
 
3532
static bool
 
3533
gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
 
3534
                       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 
3535
                       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
 
3536
                       const BoxRec *box, int n, unsigned flags)
 
3537
{
 
3538
        struct sna_composite_op tmp;
 
3539
 
 
3540
#if NO_COPY_BOXES
 
3541
        if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 
3542
                return false;
 
3543
 
 
3544
        return sna_blt_copy_boxes(sna, alu,
 
3545
                                  src_bo, src_dx, src_dy,
 
3546
                                  dst_bo, dst_dx, dst_dy,
 
3547
                                  dst->drawable.bitsPerPixel,
 
3548
                                  box, n);
 
3549
#endif
 
3550
 
 
3551
        DBG(("%s (%d, %d)->(%d, %d) x %d\n",
 
3552
             __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
 
3553
 
 
3554
        if (sna_blt_compare_depth(src, dst) &&
 
3555
            sna_blt_copy_boxes(sna, alu,
 
3556
                               src_bo, src_dx, src_dy,
 
3557
                               dst_bo, dst_dx, dst_dy,
 
3558
                               dst->bitsPerPixel,
 
3559
                               box, n))
 
3560
                return true;
 
3561
 
 
3562
        if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */
 
3563
            too_large(src->width, src->height) ||
 
3564
            src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) {
 
3565
fallback:
 
3566
                return sna_blt_copy_boxes_fallback(sna, alu,
 
3567
                                                   src, src_bo, src_dx, src_dy,
 
3568
                                                   dst, dst_bo, dst_dx, dst_dy,
 
3569
                                                   box, n);
 
3570
        }
 
3571
 
 
3572
        if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 
3573
                kgem_submit(&sna->kgem);
 
3574
                if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
 
3575
                        goto fallback;
 
3576
        }
 
3577
 
 
3578
        assert(dst_bo->pitch >= 8);
 
3579
 
 
3580
        memset(&tmp, 0, sizeof(tmp));
 
3581
        tmp.op = alu;
 
3582
 
 
3583
        tmp.dst.pixmap = (PixmapPtr)dst;
 
3584
        tmp.dst.width = dst->width;
 
3585
        tmp.dst.height = dst->height;
 
3586
        tmp.dst.format = sna_format_for_depth(dst->depth);
 
3587
        tmp.dst.bo = dst_bo;
 
3588
        tmp.dst.x = tmp.dst.y = 0;
 
3589
        tmp.damage = NULL;
 
3590
 
 
3591
        DBG(("%s: target=%d, format=%08x, size=%dx%d\n",
 
3592
             __FUNCTION__, dst_bo->handle,
 
3593
             (unsigned)tmp.dst.format,
 
3594
             tmp.dst.width,
 
3595
             tmp.dst.height));
 
3596
 
 
3597
        sna_render_composite_redirect_init(&tmp);
 
3598
        if (too_large(tmp.dst.width, tmp.dst.height) ||
 
3599
            dst_bo->pitch > MAX_3D_PITCH) {
 
3600
                BoxRec extents = box[0];
 
3601
                int i;
 
3602
 
 
3603
                for (i = 1; i < n; i++) {
 
3604
                        if (box[i].x1 < extents.x1)
 
3605
                                extents.x1 = box[i].x1;
 
3606
                        if (box[i].y1 < extents.y1)
 
3607
                                extents.y1 = box[i].y1;
 
3608
 
 
3609
                        if (box[i].x2 > extents.x2)
 
3610
                                extents.x2 = box[i].x2;
 
3611
                        if (box[i].y2 > extents.y2)
 
3612
                                extents.y2 = box[i].y2;
 
3613
                }
 
3614
                if (!sna_render_composite_redirect(sna, &tmp,
 
3615
                                                   extents.x1 + dst_dx,
 
3616
                                                   extents.y1 + dst_dy,
 
3617
                                                   extents.x2 - extents.x1,
 
3618
                                                   extents.y2 - extents.y1,
 
3619
                                                   alu != GXcopy || n > 1))
 
3620
                        goto fallback_tiled;
 
3621
        }
 
3622
 
 
3623
        tmp.floats_per_vertex = 4;
 
3624
        tmp.floats_per_rect = 12;
 
3625
 
 
3626
        dst_dx += tmp.dst.x;
 
3627
        dst_dy += tmp.dst.y;
 
3628
        tmp.dst.x = tmp.dst.y = 0;
 
3629
 
 
3630
        gen2_render_copy_setup_source(&tmp.src, src, src_bo);
 
3631
        gen2_emit_copy_state(sna, &tmp);
 
3632
        do {
 
3633
                int n_this_time;
 
3634
 
 
3635
                n_this_time = gen2_get_rectangles(sna, &tmp, n);
 
3636
                if (n_this_time == 0) {
 
3637
                        gen2_emit_copy_state(sna, &tmp);
 
3638
                        n_this_time = gen2_get_rectangles(sna, &tmp, n);
 
3639
                }
 
3640
                n -= n_this_time;
 
3641
 
 
3642
                do {
 
3643
                        DBG(("  (%d, %d) -> (%d, %d) + (%d, %d)\n",
 
3644
                             box->x1 + src_dx, box->y1 + src_dy,
 
3645
                             box->x1 + dst_dx, box->y1 + dst_dy,
 
3646
                             box->x2 - box->x1, box->y2 - box->y1));
 
3647
                        VERTEX(box->x2 + dst_dx);
 
3648
                        VERTEX(box->y2 + dst_dy);
 
3649
                        VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
 
3650
                        VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
 
3651
 
 
3652
                        VERTEX(box->x1 + dst_dx);
 
3653
                        VERTEX(box->y2 + dst_dy);
 
3654
                        VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
 
3655
                        VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
 
3656
 
 
3657
                        VERTEX(box->x1 + dst_dx);
 
3658
                        VERTEX(box->y1 + dst_dy);
 
3659
                        VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
 
3660
                        VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
 
3661
 
 
3662
                        box++;
 
3663
                } while (--n_this_time);
 
3664
        } while (n);
 
3665
 
 
3666
        gen2_vertex_flush(sna, &tmp);
 
3667
        sna_render_composite_redirect_done(sna, &tmp);
 
3668
        return true;
 
3669
 
 
3670
fallback_tiled:
 
3671
        return sna_tiling_copy_boxes(sna, alu,
 
3672
                                     src, src_bo, src_dx, src_dy,
 
3673
                                     dst, dst_bo, dst_dx, dst_dy,
 
3674
                                     box, n);
 
3675
}
 
3676
 
 
3677
static void
 
3678
gen2_render_copy_blt(struct sna *sna,
 
3679
                     const struct sna_copy_op *op,
 
3680
                     int16_t sx, int16_t sy,
 
3681
                     int16_t w, int16_t h,
 
3682
                     int16_t dx, int16_t dy)
 
3683
{
 
3684
        if (!gen2_get_rectangles(sna, &op->base, 1)) {
 
3685
                gen2_emit_copy_state(sna, &op->base);
 
3686
                gen2_get_rectangles(sna, &op->base, 1);
 
3687
        }
 
3688
 
 
3689
        VERTEX(dx+w);
 
3690
        VERTEX(dy+h);
 
3691
        VERTEX((sx+w)*op->base.src.scale[0]);
 
3692
        VERTEX((sy+h)*op->base.src.scale[1]);
 
3693
 
 
3694
        VERTEX(dx);
 
3695
        VERTEX(dy+h);
 
3696
        VERTEX(sx*op->base.src.scale[0]);
 
3697
        VERTEX((sy+h)*op->base.src.scale[1]);
 
3698
 
 
3699
        VERTEX(dx);
 
3700
        VERTEX(dy);
 
3701
        VERTEX(sx*op->base.src.scale[0]);
 
3702
        VERTEX(sy*op->base.src.scale[1]);
 
3703
}
 
3704
 
 
3705
static void
 
3706
gen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
 
3707
{
 
3708
        gen2_vertex_flush(sna, &op->base);
 
3709
}
 
3710
 
 
3711
static bool
 
3712
gen2_render_copy(struct sna *sna, uint8_t alu,
 
3713
                 PixmapPtr src, struct kgem_bo *src_bo,
 
3714
                 PixmapPtr dst, struct kgem_bo *dst_bo,
 
3715
                 struct sna_copy_op *tmp)
 
3716
{
 
3717
#if NO_COPY
 
3718
        if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 
3719
                return false;
 
3720
 
 
3721
        return sna_blt_copy(sna, alu,
 
3722
                            src_bo, dst_bo,
 
3723
                            dst->drawable.bitsPerPixel,
 
3724
                            tmp);
 
3725
#endif
 
3726
 
 
3727
        /* Prefer to use the BLT */
 
3728
        if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 
3729
            sna_blt_copy(sna, alu,
 
3730
                         src_bo, dst_bo,
 
3731
                         dst->drawable.bitsPerPixel,
 
3732
                         tmp))
 
3733
                return true;
 
3734
 
 
3735
        /* Must use the BLT if we can't RENDER... */
 
3736
        if (too_large(src->drawable.width, src->drawable.height) ||
 
3737
            too_large(dst->drawable.width, dst->drawable.height) ||
 
3738
            src_bo->pitch > MAX_3D_PITCH ||
 
3739
            dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) {
 
3740
fallback:
 
3741
                if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 
3742
                        return false;
 
3743
 
 
3744
                return sna_blt_copy(sna, alu, src_bo, dst_bo,
 
3745
                                    dst->drawable.bitsPerPixel,
 
3746
                                    tmp);
 
3747
        }
 
3748
 
 
3749
        tmp->base.op = alu;
 
3750
 
 
3751
        tmp->base.dst.pixmap = dst;
 
3752
        tmp->base.dst.width = dst->drawable.width;
 
3753
        tmp->base.dst.height = dst->drawable.height;
 
3754
        tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
 
3755
        tmp->base.dst.bo = dst_bo;
 
3756
 
 
3757
        gen2_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo);
 
3758
        tmp->base.mask.bo = NULL;
 
3759
 
 
3760
        tmp->base.floats_per_vertex = 4;
 
3761
        tmp->base.floats_per_rect = 12;
 
3762
 
 
3763
        if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 
3764
                kgem_submit(&sna->kgem);
 
3765
                if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
 
3766
                        goto fallback;
 
3767
        }
 
3768
 
 
3769
        tmp->blt  = gen2_render_copy_blt;
 
3770
        tmp->done = gen2_render_copy_done;
 
3771
 
 
3772
        gen2_emit_composite_state(sna, &tmp->base);
 
3773
        return true;
 
3774
}
 
3775
 
 
3776
static void
 
3777
gen2_render_reset(struct sna *sna)
 
3778
{
 
3779
        sna->render_state.gen2.need_invariant = true;
 
3780
        sna->render_state.gen2.logic_op_enabled = 0;
 
3781
        sna->render_state.gen2.target = 0;
 
3782
 
 
3783
        sna->render_state.gen2.ls1 = 0;
 
3784
        sna->render_state.gen2.ls2 = 0;
 
3785
        sna->render_state.gen2.vft = 0;
 
3786
 
 
3787
        sna->render_state.gen2.diffuse = 0x0c0ffee0;
 
3788
        sna->render_state.gen2.specular = 0x0c0ffee0;
 
3789
}
 
3790
 
 
3791
static void
 
3792
gen2_render_flush(struct sna *sna)
 
3793
{
 
3794
        assert(sna->render.vertex_index == 0);
 
3795
        assert(sna->render.vertex_offset == 0);
 
3796
}
 
3797
 
 
3798
static void
 
3799
gen2_render_context_switch(struct kgem *kgem,
 
3800
                           int new_mode)
 
3801
{
 
3802
        struct sna *sna = container_of(kgem, struct sna, kgem);
 
3803
 
 
3804
        if (!kgem->nbatch)
 
3805
                return;
 
3806
 
 
3807
        /* Reload BLT registers following a lost context */
 
3808
        sna->blt_state.fill_bo = 0;
 
3809
 
 
3810
        if (kgem_ring_is_idle(kgem, kgem->ring)) {
 
3811
                DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
 
3812
                _kgem_submit(kgem);
 
3813
        }
 
3814
}
 
3815
 
 
3816
const char *gen2_render_init(struct sna *sna, const char *backend)
 
3817
{
 
3818
        struct sna_render *render = &sna->render;
 
3819
 
 
3820
        sna->kgem.context_switch = gen2_render_context_switch;
 
3821
 
 
3822
        /* Use the BLT (and overlay) for everything except when forced to
 
3823
         * use the texture combiners.
 
3824
         */
 
3825
#if !NO_COMPOSITE
 
3826
        render->composite = gen2_render_composite;
 
3827
        render->prefer_gpu |= PREFER_GPU_RENDER;
 
3828
#endif
 
3829
#if !NO_COMPOSITE_SPANS
 
3830
        render->check_composite_spans = gen2_check_composite_spans;
 
3831
        render->composite_spans = gen2_render_composite_spans;
 
3832
        render->prefer_gpu |= PREFER_GPU_SPANS;
 
3833
#endif
 
3834
        render->fill_boxes = gen2_render_fill_boxes;
 
3835
        render->fill = gen2_render_fill;
 
3836
        render->fill_one = gen2_render_fill_one;
 
3837
        render->copy = gen2_render_copy;
 
3838
        render->copy_boxes = gen2_render_copy_boxes;
 
3839
 
 
3840
        render->video = gen2_render_video;
 
3841
 
 
3842
        render->reset = gen2_render_reset;
 
3843
        render->flush = gen2_render_flush;
 
3844
 
 
3845
        render->max_3d_size = MAX_3D_SIZE;
 
3846
        render->max_3d_pitch = MAX_3D_PITCH;
 
3847
        return "Almador (gen2)";
 
3848
}