~ubuntu-branches/ubuntu/natty/xserver-xorg-video-radeonhd/natty

« back to all changes in this revision

Viewing changes to src/r600_exa.c

  • Committer: Bazaar Package Importer
  • Author(s): Brice Goglin
  • Date: 2009-04-10 08:09:08 UTC
  • mfrom: (6.2.1 sid)
  • Revision ID: james.westby@ubuntu.com-20090410080908-zoz25ktobo22dn4a
Tags: 1.2.5-1
New upstream release.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright 2008 Advanced Micro Devices, Inc.
 
3
 *
 
4
 * Permission is hereby granted, free of charge, to any person obtaining a
 
5
 * copy of this software and associated documentation files (the "Software"),
 
6
 * to deal in the Software without restriction, including without limitation
 
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 
8
 * and/or sell copies of the Software, and to permit persons to whom the
 
9
 * Software is furnished to do so, subject to the following conditions:
 
10
 *
 
11
 * The above copyright notice and this permission notice (including the next
 
12
 * paragraph) shall be included in all copies or substantial portions of the
 
13
 * Software.
 
14
 *
 
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
21
 * SOFTWARE.
 
22
 *
 
23
 * Author: Alex Deucher <alexander.deucher@amd.com>
 
24
 *
 
25
 */
 
26
 
 
27
#ifdef HAVE_CONFIG_H
 
28
#include "config.h"
 
29
#endif
 
30
 
 
31
#include "xf86.h"
 
32
 
 
33
#include "exa.h"
 
34
 
 
35
#include "rhd.h"
 
36
#include "rhd_cs.h"
 
37
#include "r6xx_accel.h"
 
38
#include "r600_shader.h"
 
39
#include "r600_reg.h"
 
40
#include "r600_state.h"
 
41
 
 
42
/* #define SHOW_VERTEXES */
 
43
 
 
44
#       define RADEON_ROP3_ZERO             0x00000000
 
45
#       define RADEON_ROP3_DSa              0x00880000
 
46
#       define RADEON_ROP3_SDna             0x00440000
 
47
#       define RADEON_ROP3_S                0x00cc0000
 
48
#       define RADEON_ROP3_DSna             0x00220000
 
49
#       define RADEON_ROP3_D                0x00aa0000
 
50
#       define RADEON_ROP3_DSx              0x00660000
 
51
#       define RADEON_ROP3_DSo              0x00ee0000
 
52
#       define RADEON_ROP3_DSon             0x00110000
 
53
#       define RADEON_ROP3_DSxn             0x00990000
 
54
#       define RADEON_ROP3_Dn               0x00550000
 
55
#       define RADEON_ROP3_SDno             0x00dd0000
 
56
#       define RADEON_ROP3_Sn               0x00330000
 
57
#       define RADEON_ROP3_DSno             0x00bb0000
 
58
#       define RADEON_ROP3_DSan             0x00770000
 
59
#       define RADEON_ROP3_ONE              0x00ff0000
 
60
 
 
61
uint32_t RADEON_ROP[16] = {
 
62
    RADEON_ROP3_ZERO, /* GXclear        */
 
63
    RADEON_ROP3_DSa,  /* Gxand          */
 
64
    RADEON_ROP3_SDna, /* GXandReverse   */
 
65
    RADEON_ROP3_S,    /* GXcopy         */
 
66
    RADEON_ROP3_DSna, /* GXandInverted  */
 
67
    RADEON_ROP3_D,    /* GXnoop         */
 
68
    RADEON_ROP3_DSx,  /* GXxor          */
 
69
    RADEON_ROP3_DSo,  /* GXor           */
 
70
    RADEON_ROP3_DSon, /* GXnor          */
 
71
    RADEON_ROP3_DSxn, /* GXequiv        */
 
72
    RADEON_ROP3_Dn,   /* GXinvert       */
 
73
    RADEON_ROP3_SDno, /* GXorReverse    */
 
74
    RADEON_ROP3_Sn,   /* GXcopyInverted */
 
75
    RADEON_ROP3_DSno, /* GXorInverted   */
 
76
    RADEON_ROP3_DSan, /* GXnand         */
 
77
    RADEON_ROP3_ONE,  /* GXset          */
 
78
};
 
79
 
 
80
static void
 
81
R600DoneSolid(PixmapPtr pPix);
 
82
 
 
83
static void
 
84
R600DoneComposite(PixmapPtr pDst);
 
85
 
 
86
static Bool
 
87
R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 
88
{
 
89
    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 
90
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
91
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
92
    cb_config_t     cb_conf;
 
93
    shader_config_t vs_conf, ps_conf;
 
94
    int pmask = 0;
 
95
    uint32_t a, r, g, b;
 
96
    float ps_alu_consts[4];
 
97
 
 
98
    accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
99
    accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height;
 
100
    accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
 
101
 
 
102
    /* bad pitch */
 
103
    if (accel_state->dst_pitch & 7)
 
104
        return FALSE;
 
105
 
 
106
    /* bad offset */
 
107
    if (accel_state->dst_mc_addr & 0xff)
 
108
        return FALSE;
 
109
 
 
110
    if (pPix->drawable.bitsPerPixel == 24)
 
111
        return FALSE;
 
112
 
 
113
    CLEAR (cb_conf);
 
114
    CLEAR (vs_conf);
 
115
    CLEAR (ps_conf);
 
116
 
 
117
    /* return FALSE; */
 
118
 
 
119
#ifdef SHOW_VERTEXES
 
120
    ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height,
 
121
           pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix));
 
122
#endif
 
123
 
 
124
    accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
 
125
 
 
126
    /* Init */
 
127
    start_3d(pScrn, accel_state->ib);
 
128
 
 
129
    /* cp_set_surface_sync(pScrn, accel_state->ib); */
 
130
 
 
131
    set_default_state(pScrn, accel_state->ib);
 
132
 
 
133
    /* Scissor / viewport */
 
134
    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
 
135
    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
 
136
 
 
137
    accel_state->vs_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
138
        accel_state->solid_vs_offset;
 
139
    accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
140
        accel_state->solid_ps_offset;
 
141
    accel_state->vs_size = 512;
 
142
    accel_state->ps_size = 512;
 
143
 
 
144
    /* Shader */
 
145
 
 
146
    /* flush SQ cache */
 
147
    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
 
148
                        accel_state->vs_size, accel_state->vs_mc_addr);
 
149
 
 
150
    vs_conf.shader_addr         = accel_state->vs_mc_addr;
 
151
    vs_conf.num_gprs            = 2;
 
152
    vs_conf.stack_size          = 0;
 
153
    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
 
154
 
 
155
    /* flush SQ cache */
 
156
    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
 
157
                        accel_state->ps_size, accel_state->ps_mc_addr);
 
158
 
 
159
    ps_conf.shader_addr         = accel_state->ps_mc_addr;
 
160
    ps_conf.num_gprs            = 1;
 
161
    ps_conf.stack_size          = 0;
 
162
    ps_conf.uncached_first_inst = 1;
 
163
    ps_conf.clamp_consts        = 0;
 
164
    ps_conf.export_mode         = 2;
 
165
    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
 
166
 
 
167
    /* Render setup */
 
168
    if (pm & 0x000000ff)
 
169
        pmask |= 4; /* B */
 
170
    if (pm & 0x0000ff00)
 
171
        pmask |= 2; /* G */
 
172
    if (pm & 0x00ff0000)
 
173
        pmask |= 1; /* R */
 
174
    if (pm & 0xff000000)
 
175
        pmask |= 8; /* A */
 
176
    EREG(accel_state->ib, CB_SHADER_MASK,                      (pmask << OUTPUT0_ENABLE_shift));
 
177
    EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
 
178
    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[alu]);
 
179
 
 
180
 
 
181
    cb_conf.id = 0;
 
182
    cb_conf.w = accel_state->dst_pitch;
 
183
    cb_conf.h = pPix->drawable.height;
 
184
    cb_conf.base = accel_state->dst_mc_addr;
 
185
 
 
186
    if (pPix->drawable.bitsPerPixel == 8) {
 
187
        cb_conf.format = COLOR_8;
 
188
        cb_conf.comp_swap = 3; /* A */
 
189
    } else if (pPix->drawable.bitsPerPixel == 16) {
 
190
        cb_conf.format = COLOR_5_6_5;
 
191
        cb_conf.comp_swap = 2; /* RGB */
 
192
    } else {
 
193
        cb_conf.format = COLOR_8_8_8_8;
 
194
        cb_conf.comp_swap = 1; /* ARGB */
 
195
    }
 
196
    cb_conf.source_format = 1;
 
197
    cb_conf.blend_clamp = 1;
 
198
    set_render_target(pScrn, accel_state->ib, &cb_conf);
 
199
 
 
200
    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit                        |
 
201
                                                                (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)       |
 
202
                                                                (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
 
203
    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)            | /* EARLY_Z_THEN_LATE_Z */
 
204
                                                                DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
 
205
 
 
206
    /* Interpolator setup */
 
207
    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
 
208
    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
 
209
    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
 
210
 
 
211
    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
 
212
     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
 
213
    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
 
214
    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 (0 << NUM_INTERP_shift));
 
215
    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
 
216
    /* color semantic id 0 -> GPR[0] */
 
217
    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)        |
 
218
                                                                (0x03 << DEFAULT_VAL_shift)     |
 
219
                                                                FLAT_SHADE_bit          |
 
220
                                                                SEL_CENTROID_bit));
 
221
    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                FLAT_SHADE_ENA_bit | 0);
 
222
 
 
223
    /* PS alu constants */
 
224
    if (pPix->drawable.bitsPerPixel == 16) {
 
225
        r = (fg >> 11) & 0x1f;
 
226
        g = (fg >> 5) & 0x3f;
 
227
        b = (fg >> 0) & 0x1f;
 
228
        ps_alu_consts[0] = (float)r / 31; /* R */
 
229
        ps_alu_consts[1] = (float)g / 63; /* G */
 
230
        ps_alu_consts[2] = (float)b / 31; /* B */
 
231
        ps_alu_consts[3] = 1.0; /* A */
 
232
    } else if (pPix->drawable.bitsPerPixel == 8) {
 
233
        a = (fg >> 0) & 0xff;
 
234
        ps_alu_consts[0] = 0.0; /* R */
 
235
        ps_alu_consts[1] = 0.0; /* G */
 
236
        ps_alu_consts[2] = 0.0; /* B */
 
237
        ps_alu_consts[3] = (float)a / 255; /* A */
 
238
    } else {
 
239
        a = (fg >> 24) & 0xff;
 
240
        r = (fg >> 16) & 0xff;
 
241
        g = (fg >> 8) & 0xff;
 
242
        b = (fg >> 0) & 0xff;
 
243
        ps_alu_consts[0] = (float)r / 255; /* R */
 
244
        ps_alu_consts[1] = (float)g / 255; /* G */
 
245
        ps_alu_consts[2] = (float)b / 255; /* B */
 
246
        ps_alu_consts[3] = (float)a / 255; /* A */
 
247
    }
 
248
    set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
 
249
 
 
250
    accel_state->vb_index = 0;
 
251
 
 
252
#ifdef SHOW_VERTEXES
 
253
    ErrorF("PM: 0x%08x\n", pm);
 
254
#endif
 
255
 
 
256
    return TRUE;
 
257
}
 
258
 
 
259
 
 
260
static void
 
261
R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
 
262
{
 
263
    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 
264
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
265
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
266
    float *vb;
 
267
 
 
268
    if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) {
 
269
        R600DoneSolid(pPix);
 
270
        accel_state->vb_index = 0;
 
271
        accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
 
272
    }
 
273
 
 
274
    vb = (pointer)((char*)accel_state->ib->address +
 
275
                   (accel_state->ib->total / 2) +
 
276
                   accel_state->vb_index * 8);
 
277
 
 
278
    vb[0] = (float)x1;
 
279
    vb[1] = (float)y1;
 
280
 
 
281
    vb[2] = (float)x1;
 
282
    vb[3] = (float)y2;
 
283
 
 
284
    vb[4] = (float)x2;
 
285
    vb[5] = (float)y2;
 
286
 
 
287
    accel_state->vb_index += 3;
 
288
 
 
289
}
 
290
 
 
291
static void
 
292
R600DoneSolid(PixmapPtr pPix)
 
293
{
 
294
    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 
295
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
296
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
297
    draw_config_t   draw_conf;
 
298
    vtx_resource_t  vtx_res;
 
299
 
 
300
    CLEAR (draw_conf);
 
301
    CLEAR (vtx_res);
 
302
 
 
303
    if (accel_state->vb_index == 0) {
 
304
        R600IBDiscard(pScrn, accel_state->ib);
 
305
        return;
 
306
    }
 
307
 
 
308
    accel_state->vb_mc_addr = RHDDRIGetIntGARTLocation(pScrn) +
 
309
        (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
 
310
    accel_state->vb_size = accel_state->vb_index * 8;
 
311
 
 
312
    /* flush vertex cache */
 
313
    if ((rhdPtr->ChipSet == RHD_RV610) ||
 
314
        (rhdPtr->ChipSet == RHD_RV620) ||
 
315
        (rhdPtr->ChipSet == RHD_M72) ||
 
316
        (rhdPtr->ChipSet == RHD_M74) ||
 
317
        (rhdPtr->ChipSet == RHD_M82) ||
 
318
        (rhdPtr->ChipSet == RHD_RS780) ||
 
319
        (rhdPtr->ChipSet == RHD_RV710))
 
320
        cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
 
321
                            accel_state->vb_size, accel_state->vb_mc_addr);
 
322
    else
 
323
        cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
 
324
                            accel_state->vb_size, accel_state->vb_mc_addr);
 
325
 
 
326
    /* Vertex buffer setup */
 
327
    vtx_res.id              = SQ_VTX_RESOURCE_vs;
 
328
    vtx_res.vtx_size_dw     = 8 / 4;
 
329
    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
 
330
    vtx_res.mem_req_size    = 1;
 
331
    vtx_res.vb_addr         = accel_state->vb_mc_addr;
 
332
    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
 
333
 
 
334
    /* Draw */
 
335
    draw_conf.prim_type          = DI_PT_RECTLIST;
 
336
    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
 
337
    draw_conf.num_instances      = 1;
 
338
    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
 
339
    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
 
340
 
 
341
    draw_auto(pScrn, accel_state->ib, &draw_conf);
 
342
 
 
343
    wait_3d_idle_clean(pScrn, accel_state->ib);
 
344
 
 
345
    /* sync dst surface */
 
346
    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
 
347
                        accel_state->dst_size, accel_state->dst_mc_addr);
 
348
 
 
349
    R600CPFlushIndirect(pScrn, accel_state->ib);
 
350
}
 
351
 
 
352
static void
 
353
R600DoPrepareCopy(ScrnInfoPtr pScrn,
 
354
                  int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp,
 
355
                  int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp,
 
356
                  int rop, Pixel planemask)
 
357
{
 
358
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
359
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
360
    int pmask = 0;
 
361
    cb_config_t     cb_conf;
 
362
    tex_resource_t  tex_res;
 
363
    tex_sampler_t   tex_samp;
 
364
    shader_config_t vs_conf, ps_conf;
 
365
 
 
366
    CLEAR (cb_conf);
 
367
    CLEAR (tex_res);
 
368
    CLEAR (tex_samp);
 
369
    CLEAR (vs_conf);
 
370
    CLEAR (ps_conf);
 
371
 
 
372
    accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
 
373
 
 
374
    /* Init */
 
375
    start_3d(pScrn, accel_state->ib);
 
376
 
 
377
    /* cp_set_surface_sync(pScrn, accel_state->ib); */
 
378
 
 
379
    set_default_state(pScrn, accel_state->ib);
 
380
 
 
381
    /* Scissor / viewport */
 
382
    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
 
383
    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
 
384
 
 
385
    accel_state->vs_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
386
        accel_state->copy_vs_offset;
 
387
    accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
388
        accel_state->copy_ps_offset;
 
389
    accel_state->vs_size = 512;
 
390
    accel_state->ps_size = 512;
 
391
 
 
392
    /* Shader */
 
393
 
 
394
    /* flush SQ cache */
 
395
    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
 
396
                        accel_state->vs_size, accel_state->vs_mc_addr);
 
397
 
 
398
    vs_conf.shader_addr         = accel_state->vs_mc_addr;
 
399
    vs_conf.num_gprs            = 2;
 
400
    vs_conf.stack_size          = 0;
 
401
    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
 
402
 
 
403
    /* flush SQ cache */
 
404
    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
 
405
                        accel_state->ps_size, accel_state->ps_mc_addr);
 
406
 
 
407
    ps_conf.shader_addr         = accel_state->ps_mc_addr;
 
408
    ps_conf.num_gprs            = 1;
 
409
    ps_conf.stack_size          = 0;
 
410
    ps_conf.uncached_first_inst = 1;
 
411
    ps_conf.clamp_consts        = 0;
 
412
    ps_conf.export_mode         = 2;
 
413
    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
 
414
 
 
415
    accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8);
 
416
    accel_state->src_mc_addr[0] = src_offset;
 
417
    accel_state->src_pitch[0] = src_pitch;
 
418
    accel_state->src_width[0] = src_width;
 
419
    accel_state->src_height[0] = src_height;
 
420
    accel_state->src_bpp[0] = src_bpp;
 
421
 
 
422
    /* flush texture cache */
 
423
    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
 
424
                        accel_state->src_size[0], accel_state->src_mc_addr[0]);
 
425
 
 
426
    /* Texture */
 
427
    tex_res.id                  = 0;
 
428
    tex_res.w                   = src_width;
 
429
    tex_res.h                   = src_height;
 
430
    tex_res.pitch               = accel_state->src_pitch[0];
 
431
    tex_res.depth               = 0;
 
432
    tex_res.dim                 = SQ_TEX_DIM_2D;
 
433
    tex_res.base                = accel_state->src_mc_addr[0];
 
434
    tex_res.mip_base            = accel_state->src_mc_addr[0];
 
435
    if (src_bpp == 8) {
 
436
        tex_res.format              = FMT_8;
 
437
        tex_res.dst_sel_x           = SQ_SEL_1; /* R */
 
438
        tex_res.dst_sel_y           = SQ_SEL_1; /* G */
 
439
        tex_res.dst_sel_z           = SQ_SEL_1; /* B */
 
440
        tex_res.dst_sel_w           = SQ_SEL_X; /* A */
 
441
    } else if (src_bpp == 16) {
 
442
        tex_res.format              = FMT_5_6_5;
 
443
        tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
 
444
        tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
 
445
        tex_res.dst_sel_z           = SQ_SEL_X; /* B */
 
446
        tex_res.dst_sel_w           = SQ_SEL_1; /* A */
 
447
    } else {
 
448
        tex_res.format              = FMT_8_8_8_8;
 
449
        tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
 
450
        tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
 
451
        tex_res.dst_sel_z           = SQ_SEL_X; /* B */
 
452
        tex_res.dst_sel_w           = SQ_SEL_W; /* A */
 
453
    }
 
454
 
 
455
    tex_res.request_size        = 1;
 
456
    tex_res.base_level          = 0;
 
457
    tex_res.last_level          = 0;
 
458
    tex_res.perf_modulation     = 0;
 
459
    set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
460
 
 
461
    tex_samp.id                 = 0;
 
462
    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 
463
    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
 
464
    tex_samp.clamp_z            = SQ_TEX_WRAP;
 
465
    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
 
466
    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
 
467
    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
 
468
    tex_samp.mip_filter         = 0;                    /* no mipmap */
 
469
    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 
470
 
 
471
 
 
472
    /* Render setup */
 
473
    if (planemask & 0x000000ff)
 
474
        pmask |= 4; /* B */
 
475
    if (planemask & 0x0000ff00)
 
476
        pmask |= 2; /* G */
 
477
    if (planemask & 0x00ff0000)
 
478
        pmask |= 1; /* R */
 
479
    if (planemask & 0xff000000)
 
480
        pmask |= 8; /* A */
 
481
    EREG  (accel_state->ib, CB_SHADER_MASK,                      (pmask << OUTPUT0_ENABLE_shift));
 
482
    EREG  (accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
 
483
    EREG  (accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[rop]);
 
484
 
 
485
    accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8);
 
486
    accel_state->dst_mc_addr = dst_offset;
 
487
    accel_state->dst_pitch = dst_pitch;
 
488
    accel_state->dst_height = dst_height;
 
489
    accel_state->dst_bpp = dst_bpp;
 
490
 
 
491
    cb_conf.id = 0;
 
492
    cb_conf.w = accel_state->dst_pitch;
 
493
    cb_conf.h = dst_height;
 
494
    cb_conf.base = accel_state->dst_mc_addr;
 
495
    if (dst_bpp == 8) {
 
496
        cb_conf.format = COLOR_8;
 
497
        cb_conf.comp_swap = 3; /* A */
 
498
    } else if (dst_bpp == 16) {
 
499
        cb_conf.format = COLOR_5_6_5;
 
500
        cb_conf.comp_swap = 2; /* RGB */
 
501
    } else {
 
502
        cb_conf.format = COLOR_8_8_8_8;
 
503
        cb_conf.comp_swap = 1; /* ARGB */
 
504
    }
 
505
    cb_conf.source_format = 1;
 
506
    cb_conf.blend_clamp = 1;
 
507
    set_render_target(pScrn, accel_state->ib, &cb_conf);
 
508
 
 
509
    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit                        |
 
510
                                                                (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)       |
 
511
                                                                (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
 
512
    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)            | /* EARLY_Z_THEN_LATE_Z */
 
513
                                                                DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
 
514
 
 
515
    /* Interpolator setup */
 
516
    /* export tex coord from VS */
 
517
    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
 
518
    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
 
519
 
 
520
    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
 
521
     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
 
522
    /* input tex coord from VS */
 
523
    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
 
524
    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
 
525
    /* color semantic id 0 -> GPR[0] */
 
526
    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)        |
 
527
                                                                (0x01 << DEFAULT_VAL_shift)     |
 
528
                                                                SEL_CENTROID_bit));
 
529
    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
 
530
 
 
531
    accel_state->vb_index = 0;
 
532
 
 
533
}
 
534
 
 
535
static void
 
536
R600DoCopy(ScrnInfoPtr pScrn)
 
537
{
 
538
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
539
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
540
    draw_config_t   draw_conf;
 
541
    vtx_resource_t  vtx_res;
 
542
 
 
543
    CLEAR (draw_conf);
 
544
    CLEAR (vtx_res);
 
545
 
 
546
    if (accel_state->vb_index == 0) {
 
547
        R600IBDiscard(pScrn, accel_state->ib);
 
548
        return;
 
549
    }
 
550
 
 
551
    accel_state->vb_mc_addr = RHDDRIGetIntGARTLocation(pScrn) +
 
552
        (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
 
553
    accel_state->vb_size = accel_state->vb_index * 16;
 
554
 
 
555
    /* flush vertex cache */
 
556
    if ((rhdPtr->ChipSet == RHD_RV610) ||
 
557
        (rhdPtr->ChipSet == RHD_RV620) ||
 
558
        (rhdPtr->ChipSet == RHD_M72) ||
 
559
        (rhdPtr->ChipSet == RHD_M74) ||
 
560
        (rhdPtr->ChipSet == RHD_M82) ||
 
561
        (rhdPtr->ChipSet == RHD_RS780) ||
 
562
        (rhdPtr->ChipSet == RHD_RV710))
 
563
        cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
 
564
                            accel_state->vb_size, accel_state->vb_mc_addr);
 
565
    else
 
566
        cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
 
567
                            accel_state->vb_size, accel_state->vb_mc_addr);
 
568
 
 
569
    /* Vertex buffer setup */
 
570
    vtx_res.id              = SQ_VTX_RESOURCE_vs;
 
571
    vtx_res.vtx_size_dw     = 16 / 4;
 
572
    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
 
573
    vtx_res.mem_req_size    = 1;
 
574
    vtx_res.vb_addr         = accel_state->vb_mc_addr;
 
575
    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
 
576
 
 
577
    draw_conf.prim_type          = DI_PT_RECTLIST;
 
578
    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
 
579
    draw_conf.num_instances      = 1;
 
580
    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
 
581
    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
 
582
 
 
583
    draw_auto(pScrn, accel_state->ib, &draw_conf);
 
584
 
 
585
    wait_3d_idle_clean(pScrn, accel_state->ib);
 
586
 
 
587
    /* sync dst surface */
 
588
    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
 
589
                        accel_state->dst_size, accel_state->dst_mc_addr);
 
590
 
 
591
    R600CPFlushIndirect(pScrn, accel_state->ib);
 
592
}
 
593
 
 
594
static void
 
595
R600AppendCopyVertex(ScrnInfoPtr pScrn,
 
596
                     int srcX, int srcY,
 
597
                     int dstX, int dstY,
 
598
                     int w, int h)
 
599
{
 
600
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
601
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
602
    float *vb;
 
603
 
 
604
    if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
 
605
        R600DoCopy(pScrn);
 
606
        accel_state->vb_index = 0;
 
607
        accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
 
608
    }
 
609
 
 
610
    vb = (pointer)((char*)accel_state->ib->address +
 
611
                   (accel_state->ib->total / 2) +
 
612
                   accel_state->vb_index * 16);
 
613
 
 
614
    vb[0] = (float)dstX;
 
615
    vb[1] = (float)dstY;
 
616
    vb[2] = (float)srcX;
 
617
    vb[3] = (float)srcY;
 
618
 
 
619
    vb[4] = (float)dstX;
 
620
    vb[5] = (float)(dstY + h);
 
621
    vb[6] = (float)srcX;
 
622
    vb[7] = (float)(srcY + h);
 
623
 
 
624
    vb[8] = (float)(dstX + w);
 
625
    vb[9] = (float)(dstY + h);
 
626
    vb[10] = (float)(srcX + w);
 
627
    vb[11] = (float)(srcY + h);
 
628
 
 
629
    accel_state->vb_index += 3;
 
630
}
 
631
 
 
632
static Bool
 
633
R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
 
634
                int xdir, int ydir,
 
635
                int rop,
 
636
                Pixel planemask)
 
637
{
 
638
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
639
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
640
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
641
 
 
642
    accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
 
643
    accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
 
644
 
 
645
    accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
646
    accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
647
 
 
648
    accel_state->src_width[0] = pSrc->drawable.width;
 
649
    accel_state->src_height[0] = pSrc->drawable.height;
 
650
    accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel;
 
651
    accel_state->dst_height = pDst->drawable.height;
 
652
    accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
 
653
 
 
654
    /* bad pitch */
 
655
    if (accel_state->src_pitch[0] & 7)
 
656
        return FALSE;
 
657
    if (accel_state->dst_pitch & 7)
 
658
        return FALSE;
 
659
 
 
660
    /* bad offset */
 
661
    if (accel_state->src_mc_addr[0] & 0xff)
 
662
        return FALSE;
 
663
    if (accel_state->dst_mc_addr & 0xff)
 
664
        return FALSE;
 
665
 
 
666
    if (pSrc->drawable.bitsPerPixel == 24)
 
667
        return FALSE;
 
668
    if (pDst->drawable.bitsPerPixel == 24)
 
669
        return FALSE;
 
670
 
 
671
    /* return FALSE; */
 
672
 
 
673
#ifdef SHOW_VERTEXES
 
674
    ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height,
 
675
           pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc));
 
676
    ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height,
 
677
           pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst));
 
678
#endif
 
679
 
 
680
    accel_state->rop = rop;
 
681
    accel_state->planemask = planemask;
 
682
 
 
683
    if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) {
 
684
        unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8;
 
685
        accel_state->same_surface = TRUE;
 
686
 
 
687
        if (accel_state->copy_area) {
 
688
            exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
 
689
            accel_state->copy_area = NULL;
 
690
        }
 
691
        accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
 
692
    } else {
 
693
 
 
694
        accel_state->same_surface = FALSE;
 
695
 
 
696
        R600DoPrepareCopy(pScrn,
 
697
                          accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height,
 
698
                          accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel,
 
699
                          accel_state->dst_pitch, pDst->drawable.height,
 
700
                          accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel,
 
701
                          rop, planemask);
 
702
 
 
703
    }
 
704
 
 
705
    return TRUE;
 
706
}
 
707
 
 
708
static Bool
 
709
is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2)
 
710
{
 
711
    if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */
 
712
        ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */
 
713
        ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */
 
714
        ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)))   /* BR x2, y2 */
 
715
        return TRUE;
 
716
    else
 
717
        return FALSE;
 
718
}
 
719
 
 
720
static void
 
721
R600OverlapCopy(PixmapPtr pDst,
 
722
                int srcX, int srcY,
 
723
                int dstX, int dstY,
 
724
                int w, int h)
 
725
{
 
726
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
727
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
728
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
729
    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
 
730
    uint32_t dst_offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
731
    int i, hchunk, vchunk;
 
732
 
 
733
    if (is_overlap(srcX, srcX + w, srcY, srcY + h,
 
734
                   dstX, dstX + w, dstY, dstY + h)) {
 
735
        /* Calculate height/width of non-overlapping area */
 
736
        hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX);
 
737
        vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY);
 
738
 
 
739
        /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only
 
740
         * by copying a part of the  non-overlapping portion, then adjusting coordinates
 
741
         * Choose horizontal vs vertical to minimize the total number of copy operations
 
742
         */
 
743
        if (vchunk != 0 && hchunk != 0) { /* diagonal */
 
744
            if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */
 
745
                if (srcY > dstY ) { /* diagonal up */
 
746
                    R600DoPrepareCopy(pScrn,
 
747
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
748
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
749
                                      accel_state->rop, accel_state->planemask);
 
750
                    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk);
 
751
                    R600DoCopy(pScrn);
 
752
 
 
753
                    srcY = srcY + vchunk;
 
754
                    dstY = dstY + vchunk;
 
755
                } else { /* diagonal down */
 
756
                    R600DoPrepareCopy(pScrn,
 
757
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
758
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
759
                                      accel_state->rop, accel_state->planemask);
 
760
                    R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk);
 
761
                    R600DoCopy(pScrn);
 
762
                }
 
763
                h = h - vchunk;
 
764
                vchunk = 0;
 
765
            } else { /* reduce to vertical */
 
766
                if (srcX > dstX ) { /* diagonal left */
 
767
                    R600DoPrepareCopy(pScrn,
 
768
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
769
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
770
                                      accel_state->rop, accel_state->planemask);
 
771
                    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h);
 
772
                    R600DoCopy(pScrn);
 
773
 
 
774
                    srcX = srcX + hchunk;
 
775
                    dstX = dstX + hchunk;
 
776
                } else { /* diagonal right */
 
777
                    R600DoPrepareCopy(pScrn,
 
778
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
779
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
780
                                      accel_state->rop, accel_state->planemask);
 
781
                    R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h);
 
782
                    R600DoCopy(pScrn);
 
783
                }
 
784
                w = w - hchunk;
 
785
                hchunk = 0;
 
786
            }
 
787
        }
 
788
 
 
789
        if (vchunk == 0) { /* left/right */
 
790
            if (srcX < dstX) { /* right */
 
791
                /* copy right to left */
 
792
                for (i = w; i > 0; i -= hchunk) {
 
793
                    R600DoPrepareCopy(pScrn,
 
794
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
795
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
796
                                      accel_state->rop, accel_state->planemask);
 
797
                    R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
 
798
                    R600DoCopy(pScrn);
 
799
                }
 
800
            } else { /* left */
 
801
                /* copy left to right */
 
802
                for (i = 0; i < w; i += hchunk) {
 
803
                    R600DoPrepareCopy(pScrn,
 
804
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
805
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
806
                                      accel_state->rop, accel_state->planemask);
 
807
 
 
808
                    R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h);
 
809
                    R600DoCopy(pScrn);
 
810
                }
 
811
            }
 
812
        } else { /* up/down */
 
813
            if (srcY > dstY) { /* up */
 
814
                /* copy top to bottom */
 
815
                for (i = 0; i < h; i += vchunk) {
 
816
                    R600DoPrepareCopy(pScrn,
 
817
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
818
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
819
                                      accel_state->rop, accel_state->planemask);
 
820
 
 
821
                    if (vchunk > h - i) vchunk = h - i;
 
822
                    R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
 
823
                    R600DoCopy(pScrn);
 
824
                }
 
825
            } else { /* down */
 
826
                /* copy bottom to top */
 
827
                for (i = h; i > 0; i -= vchunk) {
 
828
                    R600DoPrepareCopy(pScrn,
 
829
                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
830
                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
831
                                      accel_state->rop, accel_state->planemask);
 
832
 
 
833
                    if (vchunk > i) vchunk = i;
 
834
                    R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk);
 
835
                    R600DoCopy(pScrn);
 
836
                }
 
837
            }
 
838
        }
 
839
    } else {
 
840
        R600DoPrepareCopy(pScrn,
 
841
                          dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
842
                          dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
 
843
                          accel_state->rop, accel_state->planemask);
 
844
 
 
845
        R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
 
846
        R600DoCopy(pScrn);
 
847
    }
 
848
}
 
849
 
 
850
static void
 
851
R600Copy(PixmapPtr pDst,
 
852
         int srcX, int srcY,
 
853
         int dstX, int dstY,
 
854
         int w, int h)
 
855
{
 
856
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
857
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
858
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
859
 
 
860
    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
 
861
        return;
 
862
 
 
863
    if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) {
 
864
        if (accel_state->copy_area) {
 
865
            uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
 
866
            uint32_t orig_offset, tmp_offset;
 
867
 
 
868
            tmp_offset = accel_state->copy_area->offset + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
869
            orig_offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
870
 
 
871
            R600DoPrepareCopy(pScrn,
 
872
                              pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
 
873
                              pitch,                       pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
 
874
                              accel_state->rop, accel_state->planemask);
 
875
            R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
 
876
            R600DoCopy(pScrn);
 
877
            R600DoPrepareCopy(pScrn,
 
878
                              pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
 
879
                              pitch,                       pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
 
880
                              accel_state->rop, accel_state->planemask);
 
881
            R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
 
882
            R600DoCopy(pScrn);
 
883
        } else
 
884
            R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h);
 
885
    } else if (accel_state->same_surface) {
 
886
        uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
 
887
        uint32_t offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
888
 
 
889
        R600DoPrepareCopy(pScrn,
 
890
                          pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
 
891
                          pitch,                       pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
 
892
                          accel_state->rop, accel_state->planemask);
 
893
        R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
 
894
        R600DoCopy(pScrn);
 
895
    } else {
 
896
        R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
 
897
    }
 
898
 
 
899
}
 
900
 
 
901
static void
 
902
R600DoneCopy(PixmapPtr pDst)
 
903
{
 
904
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
905
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
906
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
907
 
 
908
    if (!accel_state->same_surface)
 
909
        R600DoCopy(pScrn);
 
910
 
 
911
    if (accel_state->copy_area) {
 
912
        exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
 
913
        accel_state->copy_area = NULL;
 
914
    }
 
915
}
 
916
 
 
917
#define RADEON_TRACE_FALL 0
 
918
#define RADEON_TRACE_DRAW 0
 
919
 
 
920
#if RADEON_TRACE_FALL
 
921
#define RADEON_FALLBACK(x)              \
 
922
do {                                    \
 
923
        ErrorF("%s: ", __FUNCTION__);   \
 
924
        ErrorF x;                       \
 
925
        return FALSE;                   \
 
926
} while (0)
 
927
#else
 
928
#define RADEON_FALLBACK(x) return FALSE
 
929
#endif
 
930
 
 
931
#define xFixedToFloat(f) (((float) (f)) / 65536)
 
932
 
 
933
static inline void transformPoint(PictTransform *transform, xPointFixed *point)
 
934
{
 
935
    PictVector v;
 
936
    v.vector[0] = point->x;
 
937
    v.vector[1] = point->y;
 
938
    v.vector[2] = xFixed1;
 
939
    PictureTransformPoint(transform, &v);
 
940
    point->x = v.vector[0];
 
941
    point->y = v.vector[1];
 
942
}
 
943
 
 
944
struct blendinfo {
 
945
    Bool dst_alpha;
 
946
    Bool src_alpha;
 
947
    uint32_t blend_cntl;
 
948
};
 
949
 
 
950
static struct blendinfo R600BlendOp[] = {
 
951
    /* Clear */
 
952
    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
 
953
    /* Src */
 
954
    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
 
955
    /* Dst */
 
956
    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
 
957
    /* Over */
 
958
    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
 
959
    /* OverReverse */
 
960
    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
 
961
    /* In */
 
962
    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
 
963
    /* InReverse */
 
964
    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
 
965
    /* Out */
 
966
    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
 
967
    /* OutReverse */
 
968
    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
 
969
    /* Atop */
 
970
    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
 
971
    /* AtopReverse */
 
972
    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
 
973
    /* Xor */
 
974
    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
 
975
    /* Add */
 
976
    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
 
977
};
 
978
 
 
979
struct formatinfo {
 
980
    unsigned int fmt;
 
981
    uint32_t card_fmt;
 
982
};
 
983
 
 
984
static struct formatinfo R600TexFormats[] = {
 
985
    {PICT_a8r8g8b8,     FMT_8_8_8_8},
 
986
    {PICT_x8r8g8b8,     FMT_8_8_8_8},
 
987
    {PICT_a8b8g8r8,     FMT_8_8_8_8},
 
988
    {PICT_x8b8g8r8,     FMT_8_8_8_8},
 
989
    {PICT_r5g6b5,       FMT_5_6_5},
 
990
    {PICT_a1r5g5b5,     FMT_1_5_5_5},
 
991
    {PICT_x1r5g5b5,     FMT_1_5_5_5},
 
992
    {PICT_a8,           FMT_8},
 
993
};
 
994
 
 
995
static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
 
996
{
 
997
    uint32_t sblend, dblend;
 
998
 
 
999
    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
 
1000
    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
 
1001
 
 
1002
    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
 
1003
     * it as always 1.
 
1004
     */
 
1005
    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
 
1006
        if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
 
1007
            sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
 
1008
        else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
 
1009
            sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
 
1010
    }
 
1011
 
 
1012
    /* If the source alpha is being used, then we should only be in a case where
 
1013
     * the source blend factor is 0, and the source blend value is the mask
 
1014
     * channels multiplied by the source picture's alpha.
 
1015
     */
 
1016
    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
 
1017
        if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
 
1018
            dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
 
1019
        } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
 
1020
            dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
 
1021
        }
 
1022
    }
 
1023
 
 
1024
    return sblend | dblend;
 
1025
}
 
1026
 
 
1027
static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
 
1028
{
 
1029
    switch (pDstPicture->format) {
 
1030
    case PICT_a8r8g8b8:
 
1031
    case PICT_x8r8g8b8:
 
1032
        *dst_format = COLOR_8_8_8_8;
 
1033
        break;
 
1034
    case PICT_r5g6b5:
 
1035
        *dst_format = COLOR_5_6_5;
 
1036
        break;
 
1037
    case PICT_a1r5g5b5:
 
1038
    case PICT_x1r5g5b5:
 
1039
        *dst_format = COLOR_1_5_5_5;
 
1040
        break;
 
1041
    case PICT_a8:
 
1042
        *dst_format = COLOR_8;
 
1043
        break;
 
1044
    default:
 
1045
        RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
 
1046
               (int)pDstPicture->format));
 
1047
    }
 
1048
    return TRUE;
 
1049
}
 
1050
 
 
1051
static Bool R600CheckCompositeTexture(PicturePtr pPict,
 
1052
                                      PicturePtr pDstPict,
 
1053
                                      int op,
 
1054
                                      int unit)
 
1055
{
 
1056
    int w = pPict->pDrawable->width;
 
1057
    int h = pPict->pDrawable->height;
 
1058
    unsigned int i;
 
1059
    int max_tex_w, max_tex_h;
 
1060
 
 
1061
    max_tex_w = 8192;
 
1062
    max_tex_h = 8192;
 
1063
 
 
1064
    if ((w > max_tex_w) || (h > max_tex_h))
 
1065
        RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
 
1066
 
 
1067
    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
 
1068
        if (R600TexFormats[i].fmt == pPict->format)
 
1069
            break;
 
1070
    }
 
1071
    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
 
1072
        RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
 
1073
                         (int)pPict->format));
 
1074
 
 
1075
    if (pPict->filter != PictFilterNearest &&
 
1076
        pPict->filter != PictFilterBilinear)
 
1077
        RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
 
1078
 
 
1079
    /* for REPEAT_NONE, Render semantics are that sampling outside the source
 
1080
     * picture results in alpha=0 pixels. We can implement this with a border color
 
1081
     * *if* our source texture has an alpha channel, otherwise we need to fall
 
1082
     * back. If we're not transformed then we hope that upper layers have clipped
 
1083
     * rendering to the bounds of the source drawable, in which case it doesn't
 
1084
     * matter. I have not, however, verified that the X server always does such
 
1085
     * clipping.
 
1086
     */
 
1087
    /* FIXME R6xx */
 
1088
    if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
 
1089
        if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
 
1090
            RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
 
1091
    }
 
1092
 
 
1093
    return TRUE;
 
1094
}
 
1095
 
 
1096
static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
 
1097
                                        int unit)
 
1098
{
 
1099
    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 
1100
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1101
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
1102
    int w = pPict->pDrawable->width;
 
1103
    int h = pPict->pDrawable->height;
 
1104
    unsigned int i;
 
1105
    tex_resource_t  tex_res;
 
1106
    tex_sampler_t   tex_samp;
 
1107
    int pix_r, pix_g, pix_b, pix_a;
 
1108
 
 
1109
    CLEAR (tex_res);
 
1110
    CLEAR (tex_samp);
 
1111
 
 
1112
    accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
 
1113
    accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * h;
 
1114
    accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
1115
 
 
1116
    if (accel_state->src_pitch[1] & 7)
 
1117
        RADEON_FALLBACK(("Bad pitch %d 0x%x\n", (int)accel_state->src_pitch[unit], unit));
 
1118
 
 
1119
    if (accel_state->src_mc_addr[1] & 0xff)
 
1120
        RADEON_FALLBACK(("Bad offset %d 0x%x\n", (int)accel_state->src_mc_addr[unit], unit));
 
1121
 
 
1122
    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
 
1123
        if (R600TexFormats[i].fmt == pPict->format)
 
1124
            break;
 
1125
    }
 
1126
 
 
1127
    accel_state->texW[unit] = w;
 
1128
    accel_state->texH[unit] = h;
 
1129
 
 
1130
    /* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */
 
1131
 
 
1132
    /* flush texture cache */
 
1133
    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
 
1134
                        accel_state->src_size[unit], accel_state->src_mc_addr[unit]);
 
1135
 
 
1136
    /* Texture */
 
1137
    tex_res.id                  = unit;
 
1138
    tex_res.w                   = w;
 
1139
    tex_res.h                   = h;
 
1140
    tex_res.pitch               = accel_state->src_pitch[unit];
 
1141
    tex_res.depth               = 0;
 
1142
    tex_res.dim                 = SQ_TEX_DIM_2D;
 
1143
    tex_res.base                = accel_state->src_mc_addr[unit];
 
1144
    tex_res.mip_base            = accel_state->src_mc_addr[unit];
 
1145
    tex_res.format              = R600TexFormats[i].card_fmt;
 
1146
    tex_res.request_size        = 1;
 
1147
 
 
1148
    /* component swizzles */
 
1149
    switch (pPict->format) {
 
1150
    case PICT_a1r5g5b5:
 
1151
    case PICT_a8r8g8b8:
 
1152
        pix_r = SQ_SEL_Z; /* R */
 
1153
        pix_g = SQ_SEL_Y; /* G */
 
1154
        pix_b = SQ_SEL_X; /* B */
 
1155
        pix_a = SQ_SEL_W; /* A */
 
1156
        break;
 
1157
    case PICT_a8b8g8r8:
 
1158
        pix_r = SQ_SEL_X; /* R */
 
1159
        pix_g = SQ_SEL_Y; /* G */
 
1160
        pix_b = SQ_SEL_Z; /* B */
 
1161
        pix_a = SQ_SEL_W; /* A */
 
1162
        break;
 
1163
    case PICT_x8b8g8r8:
 
1164
        pix_r = SQ_SEL_X; /* R */
 
1165
        pix_g = SQ_SEL_Y; /* G */
 
1166
        pix_b = SQ_SEL_Z; /* B */
 
1167
        pix_a = SQ_SEL_1; /* A */
 
1168
        break;
 
1169
    case PICT_x1r5g5b5:
 
1170
    case PICT_x8r8g8b8:
 
1171
    case PICT_r5g6b5:
 
1172
        pix_r = SQ_SEL_Z; /* R */
 
1173
        pix_g = SQ_SEL_Y; /* G */
 
1174
        pix_b = SQ_SEL_X; /* B */
 
1175
        pix_a = SQ_SEL_1; /* A */
 
1176
        break;
 
1177
    case PICT_a8:
 
1178
        pix_r = SQ_SEL_0; /* R */
 
1179
        pix_g = SQ_SEL_0; /* G */
 
1180
        pix_b = SQ_SEL_0; /* B */
 
1181
        pix_a = SQ_SEL_X; /* A */
 
1182
        break;
 
1183
    default:
 
1184
        RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
 
1185
    }
 
1186
 
 
1187
    if (unit == 0) {
 
1188
        if (!accel_state->has_mask) {
 
1189
            if (PICT_FORMAT_RGB(pPict->format) == 0) {
 
1190
                pix_r = SQ_SEL_0;
 
1191
                pix_g = SQ_SEL_0;
 
1192
                pix_b = SQ_SEL_0;
 
1193
            }
 
1194
 
 
1195
            if (PICT_FORMAT_A(pPict->format) == 0)
 
1196
                pix_a = SQ_SEL_1;
 
1197
        } else {
 
1198
            if (accel_state->component_alpha) {
 
1199
                if (accel_state->src_alpha) {
 
1200
                    if (PICT_FORMAT_A(pPict->format) == 0) {
 
1201
                        pix_r = SQ_SEL_1;
 
1202
                        pix_g = SQ_SEL_1;
 
1203
                        pix_b = SQ_SEL_1;
 
1204
                        pix_a = SQ_SEL_1;
 
1205
                    } else {
 
1206
                        pix_r = pix_a;
 
1207
                        pix_g = pix_a;
 
1208
                        pix_b = pix_a;
 
1209
                    }
 
1210
                } else {
 
1211
                    if (PICT_FORMAT_A(pPict->format) == 0)
 
1212
                        pix_a = SQ_SEL_1;
 
1213
                }
 
1214
            } else {
 
1215
                if (PICT_FORMAT_RGB(pPict->format) == 0) {
 
1216
                    pix_r = SQ_SEL_0;
 
1217
                    pix_g = SQ_SEL_0;
 
1218
                    pix_b = SQ_SEL_0;
 
1219
                }
 
1220
 
 
1221
                if (PICT_FORMAT_A(pPict->format) == 0)
 
1222
                    pix_a = SQ_SEL_1;
 
1223
            }
 
1224
        }
 
1225
    } else {
 
1226
        if (accel_state->component_alpha) {
 
1227
            if (PICT_FORMAT_A(pPict->format) == 0)
 
1228
                pix_a = SQ_SEL_1;
 
1229
        } else {
 
1230
            if (PICT_FORMAT_A(pPict->format) == 0) {
 
1231
                pix_r = SQ_SEL_1;
 
1232
                pix_g = SQ_SEL_1;
 
1233
                pix_b = SQ_SEL_1;
 
1234
                pix_a = SQ_SEL_1;
 
1235
            } else {
 
1236
                pix_r = pix_a;
 
1237
                pix_g = pix_a;
 
1238
                pix_b = pix_a;
 
1239
            }
 
1240
        }
 
1241
    }
 
1242
 
 
1243
    tex_res.dst_sel_x           = pix_r; /* R */
 
1244
    tex_res.dst_sel_y           = pix_g; /* G */
 
1245
    tex_res.dst_sel_z           = pix_b; /* B */
 
1246
    tex_res.dst_sel_w           = pix_a; /* A */
 
1247
 
 
1248
    tex_res.base_level          = 0;
 
1249
    tex_res.last_level          = 0;
 
1250
    tex_res.perf_modulation     = 0;
 
1251
    set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
1252
 
 
1253
    tex_samp.id                 = unit;
 
1254
    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
 
1255
 
 
1256
    if (pPict->repeat) {
 
1257
        switch (pPict->repeatType) {
 
1258
        case RepeatNormal:
 
1259
            tex_samp.clamp_x            = SQ_TEX_WRAP;
 
1260
            tex_samp.clamp_y            = SQ_TEX_WRAP;
 
1261
            break;
 
1262
        case RepeatPad:
 
1263
            tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 
1264
            tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
 
1265
            break;
 
1266
        case RepeatReflect:
 
1267
            tex_samp.clamp_x            = SQ_TEX_MIRROR;
 
1268
            tex_samp.clamp_y            = SQ_TEX_MIRROR;
 
1269
            break;
 
1270
        case RepeatNone:
 
1271
            tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
 
1272
            tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
 
1273
            break;
 
1274
        default:
 
1275
            RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
 
1276
        }
 
1277
    } else {
 
1278
        tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
 
1279
        tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
 
1280
    }
 
1281
 
 
1282
    switch (pPict->filter) {
 
1283
    case PictFilterNearest:
 
1284
        tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
 
1285
        tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
 
1286
        break;
 
1287
    case PictFilterBilinear:
 
1288
        tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
 
1289
        tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
 
1290
        break;
 
1291
    default:
 
1292
        RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
 
1293
    }
 
1294
 
 
1295
    tex_samp.clamp_z            = SQ_TEX_WRAP;
 
1296
    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
 
1297
    tex_samp.mip_filter         = 0;                    /* no mipmap */
 
1298
    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 
1299
 
 
1300
    if (pPict->transform != 0) {
 
1301
        accel_state->is_transform[unit] = TRUE;
 
1302
        accel_state->transform[unit] = pPict->transform;
 
1303
    } else
 
1304
        accel_state->is_transform[unit] = FALSE;
 
1305
 
 
1306
    return TRUE;
 
1307
}
 
1308
 
 
1309
static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
 
1310
                               PicturePtr pDstPicture)
 
1311
{
 
1312
    uint32_t tmp1;
 
1313
/*    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen; */
 
1314
    PixmapPtr pSrcPixmap, pDstPixmap;
 
1315
/*    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; */
 
1316
/*    RHDPtr rhdPtr = RHDPTR(pScrn); */
 
1317
    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
 
1318
 
 
1319
    /* Check for unsupported compositing operations. */
 
1320
    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
 
1321
        RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
 
1322
 
 
1323
    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
 
1324
 
 
1325
    max_tex_w = 8192;
 
1326
    max_tex_h = 8192;
 
1327
    max_dst_w = 8192;
 
1328
    max_dst_h = 8192;
 
1329
 
 
1330
    if (pSrcPixmap->drawable.width >= max_tex_w ||
 
1331
        pSrcPixmap->drawable.height >= max_tex_h) {
 
1332
        RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
 
1333
                         pSrcPixmap->drawable.width,
 
1334
                         pSrcPixmap->drawable.height));
 
1335
    }
 
1336
 
 
1337
    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
 
1338
 
 
1339
    if (pDstPixmap->drawable.width >= max_dst_w ||
 
1340
        pDstPixmap->drawable.height >= max_dst_h) {
 
1341
        RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
 
1342
                         pDstPixmap->drawable.width,
 
1343
                         pDstPixmap->drawable.height));
 
1344
    }
 
1345
 
 
1346
    if (pMaskPicture) {
 
1347
        PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
 
1348
 
 
1349
        if (pMaskPixmap->drawable.width >= max_tex_w ||
 
1350
            pMaskPixmap->drawable.height >= max_tex_h) {
 
1351
            RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
 
1352
                             pMaskPixmap->drawable.width,
 
1353
                             pMaskPixmap->drawable.height));
 
1354
        }
 
1355
 
 
1356
        if (pMaskPicture->componentAlpha) {
 
1357
            /* Check if it's component alpha that relies on a source alpha and
 
1358
             * on the source value.  We can only get one of those into the
 
1359
             * single source value that we get to blend with.
 
1360
             */
 
1361
            if (R600BlendOp[op].src_alpha &&
 
1362
                (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
 
1363
                (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
 
1364
                RADEON_FALLBACK(("Component alpha not supported with source "
 
1365
                                 "alpha and source value blending.\n"));
 
1366
            }
 
1367
        }
 
1368
 
 
1369
        if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
 
1370
            return FALSE;
 
1371
    }
 
1372
 
 
1373
    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
 
1374
        return FALSE;
 
1375
 
 
1376
    if (!R600GetDestFormat(pDstPicture, &tmp1))
 
1377
        return FALSE;
 
1378
 
 
1379
    return TRUE;
 
1380
 
 
1381
}
 
1382
 
 
1383
static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
1384
                                 PicturePtr pMaskPicture, PicturePtr pDstPicture,
 
1385
                                 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
 
1386
{
 
1387
    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
 
1388
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1389
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
1390
    uint32_t blendcntl, dst_format;
 
1391
    cb_config_t cb_conf;
 
1392
    shader_config_t vs_conf, ps_conf;
 
1393
 
 
1394
    /* return FALSE; */
 
1395
 
 
1396
    if (pMask) {
 
1397
        accel_state->has_mask = TRUE;
 
1398
        if (pMaskPicture->componentAlpha) {
 
1399
            accel_state->component_alpha = TRUE;
 
1400
            if (R600BlendOp[op].src_alpha)
 
1401
                accel_state->src_alpha = TRUE;
 
1402
            else
 
1403
                accel_state->src_alpha = FALSE;
 
1404
        } else {
 
1405
            accel_state->component_alpha = FALSE;
 
1406
            accel_state->src_alpha = FALSE;
 
1407
        }
 
1408
    } else {
 
1409
        accel_state->has_mask = FALSE;
 
1410
        accel_state->component_alpha = FALSE;
 
1411
        accel_state->src_alpha = FALSE;
 
1412
    }
 
1413
 
 
1414
    accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
 
1415
    accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
 
1416
    accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height;
 
1417
 
 
1418
    if (accel_state->dst_pitch & 7)
 
1419
        RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch));
 
1420
 
 
1421
    if (accel_state->dst_mc_addr & 0xff)
 
1422
        RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr));
 
1423
 
 
1424
    if (!R600GetDestFormat(pDstPicture, &dst_format))
 
1425
        return FALSE;
 
1426
 
 
1427
    CLEAR (cb_conf);
 
1428
    CLEAR (vs_conf);
 
1429
    CLEAR (ps_conf);
 
1430
 
 
1431
    accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
 
1432
 
 
1433
    /* Init */
 
1434
    start_3d(pScrn, accel_state->ib);
 
1435
 
 
1436
    /* cp_set_surface_sync(pScrn, accel_state->ib); */
 
1437
 
 
1438
    set_default_state(pScrn, accel_state->ib);
 
1439
 
 
1440
    /* Scissor / viewport */
 
1441
    EREG  (accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
 
1442
    EREG  (accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
 
1443
 
 
1444
    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
 
1445
        R600IBDiscard(pScrn, accel_state->ib);
 
1446
        return FALSE;
 
1447
    }
 
1448
 
 
1449
    if (pMask != NULL) {
 
1450
        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
 
1451
            R600IBDiscard(pScrn, accel_state->ib);
 
1452
            return FALSE;
 
1453
        }
 
1454
    } else
 
1455
        accel_state->is_transform[1] = FALSE;
 
1456
 
 
1457
    if (pMask) {
 
1458
        set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
 
1459
        accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
1460
            accel_state->comp_mask_ps_offset;
 
1461
    } else {
 
1462
        set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
 
1463
        accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
1464
            accel_state->comp_ps_offset;
 
1465
    }
 
1466
 
 
1467
    accel_state->vs_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
 
1468
        accel_state->comp_vs_offset;
 
1469
 
 
1470
    accel_state->vs_size = 512;
 
1471
    accel_state->ps_size = 512;
 
1472
 
 
1473
    /* Shader */
 
1474
 
 
1475
    /* flush SQ cache */
 
1476
    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
 
1477
                        accel_state->vs_size, accel_state->vs_mc_addr);
 
1478
 
 
1479
    vs_conf.shader_addr         = accel_state->vs_mc_addr;
 
1480
    vs_conf.num_gprs            = 3;
 
1481
    vs_conf.stack_size          = 1;
 
1482
    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
 
1483
 
 
1484
    /* flush SQ cache */
 
1485
    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
 
1486
                        accel_state->ps_size, accel_state->ps_mc_addr);
 
1487
 
 
1488
    ps_conf.shader_addr         = accel_state->ps_mc_addr;
 
1489
    ps_conf.num_gprs            = 3;
 
1490
    ps_conf.stack_size          = 0;
 
1491
    ps_conf.uncached_first_inst = 1;
 
1492
    ps_conf.clamp_consts        = 0;
 
1493
    ps_conf.export_mode         = 2;
 
1494
    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
 
1495
 
 
1496
    EREG  (accel_state->ib, CB_SHADER_MASK,                      (0xf << OUTPUT0_ENABLE_shift));
 
1497
    EREG  (accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
 
1498
 
 
1499
    blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
 
1500
 
 
1501
    if (rhdPtr->ChipSet == RHD_R600) {
 
1502
        /* no per-MRT blend on R600 */
 
1503
        EREG  (accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
 
1504
        EREG  (accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
 
1505
    } else {
 
1506
        EREG  (accel_state->ib, CB_COLOR_CONTROL,                    (RADEON_ROP[3] |
 
1507
                                                                      (1 << TARGET_BLEND_ENABLE_shift) |
 
1508
                                                                      PER_MRT_BLEND_bit));
 
1509
        EREG  (accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
 
1510
    }
 
1511
 
 
1512
    cb_conf.id = 0;
 
1513
    cb_conf.w = accel_state->dst_pitch;
 
1514
    cb_conf.h = pDst->drawable.height;
 
1515
    cb_conf.base = accel_state->dst_mc_addr;
 
1516
    cb_conf.format = dst_format;
 
1517
 
 
1518
    switch (pDstPicture->format) {
 
1519
    case PICT_a8r8g8b8:
 
1520
    case PICT_x8r8g8b8:
 
1521
    case PICT_a1r5g5b5:
 
1522
    case PICT_x1r5g5b5:
 
1523
    default:
 
1524
        cb_conf.comp_swap = 1; /* ARGB */
 
1525
        break;
 
1526
    case PICT_r5g6b5:
 
1527
        cb_conf.comp_swap = 2; /* RGB */
 
1528
        break;
 
1529
    case PICT_a8:
 
1530
        cb_conf.comp_swap = 3; /* A */
 
1531
        break;
 
1532
    }
 
1533
    cb_conf.source_format = 1;
 
1534
    cb_conf.blend_clamp = 1;
 
1535
    set_render_target(pScrn, accel_state->ib, &cb_conf);
 
1536
 
 
1537
    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit                        |
 
1538
                                                                (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)       |
 
1539
                                                                (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
 
1540
    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)            | /* EARLY_Z_THEN_LATE_Z */
 
1541
                                                                DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
 
1542
 
 
1543
    /* Interpolator setup */
 
1544
    if (pMask) {
 
1545
        /* export 2 tex coords from VS */
 
1546
        EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
 
1547
        /* src = semantic id 0; mask = semantic id 1 */
 
1548
        EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
 
1549
                                                (1 << SEMANTIC_1_shift)));
 
1550
        /* input 2 tex coords from VS */
 
1551
        EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
 
1552
    } else {
 
1553
        /* export 1 tex coords from VS */
 
1554
        EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
 
1555
        /* src = semantic id 0 */
 
1556
        EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
 
1557
        /* input 1 tex coords from VS */
 
1558
        EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
 
1559
    }
 
1560
    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
 
1561
    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
 
1562
    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)        |
 
1563
                                                                (0x01 << DEFAULT_VAL_shift)     |
 
1564
                                                                SEL_CENTROID_bit));
 
1565
    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
 
1566
    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2),       ((1    << SEMANTIC_shift)        |
 
1567
                                                                (0x01 << DEFAULT_VAL_shift)     |
 
1568
                                                                SEL_CENTROID_bit));
 
1569
    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
 
1570
 
 
1571
    accel_state->vb_index = 0;
 
1572
 
 
1573
    return TRUE;
 
1574
}
 
1575
 
 
1576
static void R600Composite(PixmapPtr pDst,
 
1577
                          int srcX, int srcY,
 
1578
                          int maskX, int maskY,
 
1579
                          int dstX, int dstY,
 
1580
                          int w, int h)
 
1581
{
 
1582
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
1583
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1584
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
1585
    float *vb;
 
1586
    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
 
1587
 
 
1588
    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
 
1589
       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
 
1590
 
 
1591
    srcTopLeft.x     = IntToxFixed(srcX);
 
1592
    srcTopLeft.y     = IntToxFixed(srcY);
 
1593
    srcTopRight.x    = IntToxFixed(srcX + w);
 
1594
    srcTopRight.y    = IntToxFixed(srcY);
 
1595
    srcBottomLeft.x  = IntToxFixed(srcX);
 
1596
    srcBottomLeft.y  = IntToxFixed(srcY + h);
 
1597
    srcBottomRight.x = IntToxFixed(srcX + w);
 
1598
    srcBottomRight.y = IntToxFixed(srcY + h);
 
1599
 
 
1600
    /* XXX do transform in vertex shader */
 
1601
    if (accel_state->is_transform[0]) {
 
1602
        transformPoint(accel_state->transform[0], &srcTopLeft);
 
1603
        transformPoint(accel_state->transform[0], &srcTopRight);
 
1604
        transformPoint(accel_state->transform[0], &srcBottomLeft);
 
1605
        transformPoint(accel_state->transform[0], &srcBottomRight);
 
1606
    }
 
1607
 
 
1608
    if (accel_state->has_mask) {
 
1609
        xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
 
1610
 
 
1611
        if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) {
 
1612
            R600DoneComposite(pDst);
 
1613
            accel_state->vb_index = 0;
 
1614
            accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
 
1615
        }
 
1616
 
 
1617
        vb = (pointer)((char*)accel_state->ib->address +
 
1618
                       (accel_state->ib->total / 2) +
 
1619
                       accel_state->vb_index * 24);
 
1620
 
 
1621
        maskTopLeft.x     = IntToxFixed(maskX);
 
1622
        maskTopLeft.y     = IntToxFixed(maskY);
 
1623
        maskTopRight.x    = IntToxFixed(maskX + w);
 
1624
        maskTopRight.y    = IntToxFixed(maskY);
 
1625
        maskBottomLeft.x  = IntToxFixed(maskX);
 
1626
        maskBottomLeft.y  = IntToxFixed(maskY + h);
 
1627
        maskBottomRight.x = IntToxFixed(maskX + w);
 
1628
        maskBottomRight.y = IntToxFixed(maskY + h);
 
1629
 
 
1630
        if (accel_state->is_transform[1]) {
 
1631
            transformPoint(accel_state->transform[1], &maskTopLeft);
 
1632
            transformPoint(accel_state->transform[1], &maskTopRight);
 
1633
            transformPoint(accel_state->transform[1], &maskBottomLeft);
 
1634
            transformPoint(accel_state->transform[1], &maskBottomRight);
 
1635
        }
 
1636
 
 
1637
        vb[0] = (float)dstX;
 
1638
        vb[1] = (float)dstY;
 
1639
        vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
 
1640
        vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
 
1641
        vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1];
 
1642
        vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1];
 
1643
 
 
1644
        vb[6] = (float)dstX;
 
1645
        vb[7] = (float)(dstY + h);
 
1646
        vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
 
1647
        vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
 
1648
        vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1];
 
1649
        vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1];
 
1650
 
 
1651
        vb[12] = (float)(dstX + w);
 
1652
        vb[13] = (float)(dstY + h);
 
1653
        vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
 
1654
        vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
 
1655
        vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1];
 
1656
        vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1];
 
1657
 
 
1658
    } else {
 
1659
        vb = (pointer)((char*)accel_state->ib->address +
 
1660
                       (accel_state->ib->total / 2) +
 
1661
                       accel_state->vb_index * 16);
 
1662
 
 
1663
        vb[0] = (float)dstX;
 
1664
        vb[1] = (float)dstY;
 
1665
        vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
 
1666
        vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
 
1667
 
 
1668
        vb[4] = (float)dstX;
 
1669
        vb[5] = (float)(dstY + h);
 
1670
        vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
 
1671
        vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
 
1672
 
 
1673
        vb[8] = (float)(dstX + w);
 
1674
        vb[9] = (float)(dstY + h);
 
1675
        vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
 
1676
        vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
 
1677
    }
 
1678
 
 
1679
    accel_state->vb_index += 3;
 
1680
 
 
1681
}
 
1682
 
 
1683
static void R600DoneComposite(PixmapPtr pDst)
 
1684
{
 
1685
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
1686
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1687
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
1688
    draw_config_t   draw_conf;
 
1689
    vtx_resource_t  vtx_res;
 
1690
 
 
1691
    CLEAR (draw_conf);
 
1692
    CLEAR (vtx_res);
 
1693
 
 
1694
    if (accel_state->vb_index == 0) {
 
1695
        R600IBDiscard(pScrn, accel_state->ib);
 
1696
        return;
 
1697
    }
 
1698
 
 
1699
    accel_state->vb_mc_addr = RHDDRIGetIntGARTLocation(pScrn) +
 
1700
        (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
 
1701
 
 
1702
 
 
1703
    /* Vertex buffer setup */
 
1704
    if (accel_state->has_mask) {
 
1705
        accel_state->vb_size = accel_state->vb_index * 24;
 
1706
        vtx_res.id              = SQ_VTX_RESOURCE_vs;
 
1707
        vtx_res.vtx_size_dw     = 24 / 4;
 
1708
        vtx_res.vtx_num_entries = accel_state->vb_size / 4;
 
1709
        vtx_res.mem_req_size    = 1;
 
1710
        vtx_res.vb_addr         = accel_state->vb_mc_addr;
 
1711
    } else {
 
1712
        accel_state->vb_size = accel_state->vb_index * 16;
 
1713
        vtx_res.id              = SQ_VTX_RESOURCE_vs;
 
1714
        vtx_res.vtx_size_dw     = 16 / 4;
 
1715
        vtx_res.vtx_num_entries = accel_state->vb_size / 4;
 
1716
        vtx_res.mem_req_size    = 1;
 
1717
        vtx_res.vb_addr         = accel_state->vb_mc_addr;
 
1718
    }
 
1719
    /* flush vertex cache */
 
1720
    if ((rhdPtr->ChipSet == RHD_RV610) ||
 
1721
        (rhdPtr->ChipSet == RHD_RV620) ||
 
1722
        (rhdPtr->ChipSet == RHD_M72) ||
 
1723
        (rhdPtr->ChipSet == RHD_M74) ||
 
1724
        (rhdPtr->ChipSet == RHD_M82) ||
 
1725
        (rhdPtr->ChipSet == RHD_RS780) ||
 
1726
        (rhdPtr->ChipSet == RHD_RV710))
 
1727
        cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
 
1728
                            accel_state->vb_size, accel_state->vb_mc_addr);
 
1729
    else
 
1730
        cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
 
1731
                            accel_state->vb_size, accel_state->vb_mc_addr);
 
1732
 
 
1733
    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
 
1734
 
 
1735
    draw_conf.prim_type          = DI_PT_RECTLIST;
 
1736
    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
 
1737
    draw_conf.num_instances      = 1;
 
1738
    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
 
1739
    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
 
1740
 
 
1741
    draw_auto(pScrn, accel_state->ib, &draw_conf);
 
1742
 
 
1743
    wait_3d_idle_clean(pScrn, accel_state->ib);
 
1744
 
 
1745
    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
 
1746
                        accel_state->dst_size, accel_state->dst_mc_addr);
 
1747
 
 
1748
    R600CPFlushIndirect(pScrn, accel_state->ib);
 
1749
}
 
1750
 
 
1751
Bool
 
1752
R600CopyToVRAM(ScrnInfoPtr pScrn,
 
1753
               char *src, int src_pitch,
 
1754
               uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp,
 
1755
               int x, int y, int w, int h)
 
1756
{
 
1757
    struct RhdCS *CS = RHDPTR(pScrn)->CS;
 
1758
    uint32_t scratch_mc_addr;
 
1759
    int wpass = w * (bpp/8);
 
1760
    int scratch_pitch_bytes = (wpass + 255) & ~255;
 
1761
    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
 
1762
    int scratch_offset = 0, hpass, temph;
 
1763
    char *dst;
 
1764
    drmBufPtr scratch;
 
1765
 
 
1766
    if (dst_pitch & 7)
 
1767
        return FALSE;
 
1768
 
 
1769
    if (dst_mc_addr & 0xff)
 
1770
        return FALSE;
 
1771
 
 
1772
    scratch = RHDDRMCPBuffer(pScrn->scrnIndex);
 
1773
    if (scratch == NULL)
 
1774
        return FALSE;
 
1775
 
 
1776
    scratch_mc_addr = RHDDRIGetIntGARTLocation(pScrn) + (scratch->idx * scratch->total);
 
1777
    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
 
1778
    dst = (char *)scratch->address;
 
1779
 
 
1780
    /* memcopy from sys to scratch */
 
1781
    while (temph--) {
 
1782
        memcpy (dst, src, wpass);
 
1783
        src += src_pitch;
 
1784
        dst += scratch_pitch_bytes;
 
1785
    }
 
1786
 
 
1787
    while (h) {
 
1788
        uint32_t offset = scratch_mc_addr + scratch_offset;
 
1789
        int oldhpass = hpass;
 
1790
        h -= oldhpass;
 
1791
        temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
 
1792
 
 
1793
        if (hpass) {
 
1794
            scratch_offset = scratch->total/2 - scratch_offset;
 
1795
            dst = (char *)scratch->address + scratch_offset;
 
1796
            /* wait for the engine to be idle */
 
1797
            RHDCSIdle(CS);
 
1798
            /* memcopy from sys to scratch */
 
1799
            while (temph--) {
 
1800
                memcpy (dst, src, wpass);
 
1801
                src += src_pitch;
 
1802
                dst += scratch_pitch_bytes;
 
1803
            }
 
1804
        }
 
1805
        /* blit from scratch to vram */
 
1806
        R600DoPrepareCopy(pScrn,
 
1807
                          scratch_pitch, w, oldhpass, offset, bpp,
 
1808
                          dst_pitch, dst_height, dst_mc_addr, bpp,
 
1809
                          3, 0xffffffff);
 
1810
        R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
 
1811
        R600DoCopy(pScrn);
 
1812
        y += oldhpass;
 
1813
    }
 
1814
 
 
1815
    R600IBDiscard(pScrn, scratch);
 
1816
 
 
1817
    return TRUE;
 
1818
}
 
1819
 
 
1820
static Bool
 
1821
R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
 
1822
                   char *src, int src_pitch)
 
1823
{
 
1824
    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
 
1825
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1826
    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
 
1827
    uint32_t dst_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + exaGetPixmapOffset(pDst);
 
1828
    uint32_t dst_height = pDst->drawable.height;
 
1829
    int bpp = pDst->drawable.bitsPerPixel;
 
1830
 
 
1831
    return R600CopyToVRAM(pScrn,
 
1832
                          src, src_pitch,
 
1833
                          dst_pitch, dst_mc_addr, dst_height, bpp,
 
1834
                          x, y, w, h);
 
1835
}
 
1836
 
 
1837
static Bool
 
1838
R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
 
1839
                       char *dst, int dst_pitch)
 
1840
{
 
1841
    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
 
1842
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1843
    struct RhdCS *CS = rhdPtr->CS;
 
1844
    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
 
1845
    uint32_t src_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + exaGetPixmapOffset(pSrc);
 
1846
    uint32_t src_width = pSrc->drawable.width;
 
1847
    uint32_t src_height = pSrc->drawable.height;
 
1848
    int bpp = pSrc->drawable.bitsPerPixel;
 
1849
    uint32_t scratch_mc_addr;
 
1850
    int scratch_pitch_bytes = (dst_pitch + 255) & ~255;
 
1851
    int scratch_offset = 0, hpass;
 
1852
    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
 
1853
    int wpass = w * (bpp/8);
 
1854
    drmBufPtr scratch;
 
1855
 
 
1856
    if (src_pitch & 7)
 
1857
        return FALSE;
 
1858
 
 
1859
    scratch = RHDDRMCPBuffer(pScrn->scrnIndex);
 
1860
    if (scratch == NULL)
 
1861
        return FALSE;
 
1862
 
 
1863
    scratch_mc_addr = RHDDRIGetIntGARTLocation(pScrn) + (scratch->idx * scratch->total);
 
1864
    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
 
1865
 
 
1866
    /* blit from vram to scratch */
 
1867
    R600DoPrepareCopy(pScrn,
 
1868
                      src_pitch, src_width, src_height, src_mc_addr, bpp,
 
1869
                      scratch_pitch, hpass, scratch_mc_addr, bpp,
 
1870
                      3, 0xffffffff);
 
1871
    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
 
1872
    R600DoCopy(pScrn);
 
1873
 
 
1874
    while (h) {
 
1875
        char *src = (char *)scratch->address + scratch_offset;
 
1876
        int oldhpass = hpass;
 
1877
        h -= oldhpass;
 
1878
        y += oldhpass;
 
1879
        hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
 
1880
 
 
1881
        if (hpass) {
 
1882
            scratch_offset = scratch->total/2 - scratch_offset;
 
1883
            /* blit from vram to scratch */
 
1884
            R600DoPrepareCopy(pScrn,
 
1885
                              src_pitch, src_width, src_height, src_mc_addr, bpp,
 
1886
                              scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp,
 
1887
                              3, 0xffffffff);
 
1888
            R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
 
1889
            R600DoCopy(pScrn);
 
1890
        }
 
1891
 
 
1892
        /* wait for the engine to be idle */
 
1893
        RHDCSIdle(CS);
 
1894
        /* memcopy from scratch to sys */
 
1895
        while (oldhpass--) {
 
1896
            memcpy (dst, src, wpass);
 
1897
            dst += dst_pitch;
 
1898
            src += scratch_pitch_bytes;
 
1899
        }
 
1900
    }
 
1901
 
 
1902
    R600IBDiscard(pScrn, scratch);
 
1903
 
 
1904
    return TRUE;
 
1905
 
 
1906
}
 
1907
 
 
1908
void
 
1909
R6xxEXACloseScreen(ScreenPtr pScreen)
 
1910
{
 
1911
    exaDriverFini(pScreen);
 
1912
}
 
1913
 
 
1914
void
 
1915
R6xxEXADestroy(ScrnInfoPtr pScrn)
 
1916
{
 
1917
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1918
 
 
1919
    if (rhdPtr->EXAInfo) {
 
1920
        xfree(rhdPtr->EXAInfo);
 
1921
        rhdPtr->EXAInfo = NULL;
 
1922
    }
 
1923
 
 
1924
    if (rhdPtr->TwoDPrivate) {
 
1925
        xfree(rhdPtr->TwoDPrivate);
 
1926
        rhdPtr->TwoDPrivate = NULL;
 
1927
    }
 
1928
}
 
1929
 
 
1930
/* no need to needlessly flush the caches/wait for idle
 
1931
 * the drawing code does this already (and mesa code should be designed to do so as well)
 
1932
 * excessive idling/flushing seems to cause stability problems on
 
1933
 * r7xx and drawing glitches on r6xx.
 
1934
 */
 
1935
void
 
1936
R6xxCacheFlush(struct RhdCS *CS)
 
1937
{
 
1938
    CS = CS; /* nop - avoid compiler warning */
 
1939
}
 
1940
 
 
1941
void
 
1942
R6xxEngineWaitIdleFull(struct RhdCS *CS)
 
1943
{
 
1944
    CS = CS; /* nop - avoid compiler warning */
 
1945
}
 
1946
 
 
1947
static int
 
1948
R600EXAMarkSync(ScreenPtr pScreen)
 
1949
{
 
1950
    struct r6xx_accel_state *accel_state = RHDPTR(xf86Screens[pScreen->myNum])->TwoDPrivate;
 
1951
 
 
1952
    return ++accel_state->exaSyncMarker;
 
1953
 
 
1954
}
 
1955
 
 
1956
static void
 
1957
R600EXASync(ScreenPtr pScreen, int marker)
 
1958
{
 
1959
    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
 
1960
    struct r6xx_accel_state *accel_state = RHDPTR(pScrn)->TwoDPrivate;
 
1961
 
 
1962
    if (accel_state->exaMarkerSynced != marker) {
 
1963
        struct RhdCS *CS = RHDPTR(pScrn)->CS;
 
1964
 
 
1965
        RHDCSIdle(CS);
 
1966
 
 
1967
        accel_state->exaMarkerSynced = marker;
 
1968
    }
 
1969
}
 
1970
 
 
1971
static Bool
 
1972
R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 
1973
{
 
1974
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1975
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
1976
    /* 512 bytes per shader for now */
 
1977
    int size = 512 * 9;
 
1978
 
 
1979
    accel_state->shaders = NULL;
 
1980
 
 
1981
    accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
 
1982
                                             TRUE, NULL, NULL);
 
1983
 
 
1984
    if (accel_state->shaders == NULL)
 
1985
        return FALSE;
 
1986
    return TRUE;
 
1987
}
 
1988
 
 
1989
Bool
 
1990
R600LoadShaders(ScrnInfoPtr pScrn)
 
1991
{
 
1992
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
1993
    struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
 
1994
    enum RHD_CHIPSETS ChipSet = rhdPtr->ChipSet;
 
1995
    uint32_t *shader;
 
1996
 
 
1997
    shader = (pointer)((char *)rhdPtr->FbBase + rhdPtr->FbScanoutStart + accel_state->shaders->offset);
 
1998
 
 
1999
    /*  solid vs --------------------------------------- */
 
2000
    accel_state->solid_vs_offset = 0;
 
2001
    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
 
2002
 
 
2003
    /*  solid ps --------------------------------------- */
 
2004
    accel_state->solid_ps_offset = 512;
 
2005
    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
 
2006
 
 
2007
    /*  copy vs --------------------------------------- */
 
2008
    accel_state->copy_vs_offset = 1024;
 
2009
    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
 
2010
 
 
2011
    /*  copy ps --------------------------------------- */
 
2012
    accel_state->copy_ps_offset = 1536;
 
2013
    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
 
2014
 
 
2015
    /*  comp vs --------------------------------------- */
 
2016
    accel_state->comp_vs_offset = 2048;
 
2017
    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
 
2018
 
 
2019
    /*  comp ps --------------------------------------- */
 
2020
    accel_state->comp_ps_offset = 2560;
 
2021
    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
 
2022
 
 
2023
    /*  comp mask ps --------------------------------------- */
 
2024
    accel_state->comp_mask_ps_offset = 3072;
 
2025
    R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
 
2026
 
 
2027
    /*  xv vs --------------------------------------- */
 
2028
    accel_state->xv_vs_offset = 3584;
 
2029
    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
 
2030
 
 
2031
    /*  xv ps --------------------------------------- */
 
2032
    accel_state->xv_ps_offset = 4096;
 
2033
    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
 
2034
 
 
2035
    return TRUE;
 
2036
}
 
2037
 
 
2038
static Bool
 
2039
R600PrepareAccess(PixmapPtr pPix, int index)
 
2040
{
 
2041
    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 
2042
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
2043
 
 
2044
    /* flush HDP read/write caches */
 
2045
    RHDRegWrite(rhdPtr, HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
 
2046
 
 
2047
    return TRUE;
 
2048
}
 
2049
 
 
2050
static void
 
2051
R600FinishAccess(PixmapPtr pPix, int index)
 
2052
{
 
2053
    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 
2054
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
2055
 
 
2056
    /* flush HDP read/write caches */
 
2057
    RHDRegWrite(rhdPtr, HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
 
2058
 
 
2059
}
 
2060
 
 
2061
 
 
2062
Bool
 
2063
R6xxEXAInit(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 
2064
{
 
2065
    RHDPtr rhdPtr = RHDPTR(pScrn);
 
2066
    struct RhdCS *CS = rhdPtr->CS;
 
2067
    ExaDriverRec *EXAInfo;
 
2068
    struct r6xx_accel_state *accel_state;
 
2069
 
 
2070
    RHDFUNC(pScrn);
 
2071
 
 
2072
    EXAInfo = exaDriverAlloc();
 
2073
    if (EXAInfo == NULL || !CS)
 
2074
        return FALSE;
 
2075
 
 
2076
    accel_state = xnfcalloc(1, sizeof(struct r6xx_accel_state));
 
2077
 
 
2078
    EXAInfo->exa_major = EXA_VERSION_MAJOR;
 
2079
    EXAInfo->exa_minor = EXA_VERSION_MINOR;
 
2080
 
 
2081
    EXAInfo->flags = EXA_OFFSCREEN_PIXMAPS;
 
2082
    EXAInfo->pixmapOffsetAlign = 256;
 
2083
    EXAInfo->pixmapPitchAlign = 256;
 
2084
 
 
2085
#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
 
2086
    EXAInfo->maxPitchBytes = 32768;
 
2087
    EXAInfo->maxX = 8192;
 
2088
#else
 
2089
    EXAInfo->maxX = 8192;
 
2090
#endif
 
2091
    EXAInfo->maxY = 8192;
 
2092
 
 
2093
    EXAInfo->memoryBase = (CARD8 *) rhdPtr->FbBase + rhdPtr->FbScanoutStart;
 
2094
    EXAInfo->offScreenBase = rhdPtr->FbOffscreenStart - rhdPtr->FbScanoutStart;
 
2095
    EXAInfo->memorySize = rhdPtr->FbScanoutSize + rhdPtr->FbOffscreenSize;
 
2096
 
 
2097
    EXAInfo->PrepareSolid = R600PrepareSolid;
 
2098
    EXAInfo->Solid = R600Solid;
 
2099
    EXAInfo->DoneSolid = R600DoneSolid;
 
2100
 
 
2101
    EXAInfo->PrepareCopy = R600PrepareCopy;
 
2102
    EXAInfo->Copy = R600Copy;
 
2103
    EXAInfo->DoneCopy = R600DoneCopy;
 
2104
 
 
2105
    EXAInfo->CheckComposite = R600CheckComposite;
 
2106
    EXAInfo->PrepareComposite = R600PrepareComposite;
 
2107
    EXAInfo->Composite = R600Composite;
 
2108
    EXAInfo->DoneComposite = R600DoneComposite;
 
2109
 
 
2110
    if (rhdPtr->cardType != RHD_CARD_AGP) {
 
2111
        EXAInfo->UploadToScreen = R600UploadToScreen;
 
2112
        EXAInfo->DownloadFromScreen = R600DownloadFromScreen;
 
2113
    }
 
2114
 
 
2115
    EXAInfo->PrepareAccess = R600PrepareAccess;
 
2116
    EXAInfo->FinishAccess = R600FinishAccess;
 
2117
 
 
2118
    EXAInfo->MarkSync = R600EXAMarkSync;
 
2119
    EXAInfo->WaitMarker = R600EXASync;
 
2120
 
 
2121
    if (!exaDriverInit(pScreen, EXAInfo)) {
 
2122
        xfree(accel_state);
 
2123
        xfree(EXAInfo);
 
2124
        return FALSE;
 
2125
    }
 
2126
 
 
2127
    RHDPTR(pScrn)->EXAInfo = EXAInfo;
 
2128
 
 
2129
    accel_state->XHas3DEngineState = FALSE;
 
2130
    accel_state->copy_area = NULL;
 
2131
 
 
2132
    rhdPtr->TwoDPrivate = accel_state;
 
2133
 
 
2134
    if (!R600AllocShaders(pScrn, pScreen)) {
 
2135
        xfree(accel_state);
 
2136
        xfree(EXAInfo);
 
2137
        return FALSE;
 
2138
    }
 
2139
 
 
2140
    if (!R600LoadShaders(pScrn)) {
 
2141
        xfree(accel_state);
 
2142
        xfree(EXAInfo);
 
2143
        return FALSE;
 
2144
    }
 
2145
 
 
2146
    exaMarkSync(pScreen);
 
2147
 
 
2148
    return TRUE;
 
2149
}
 
2150