2
* Copyright 2008 Advanced Micro Devices, Inc.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
* Author: Alex Deucher <alexander.deucher@amd.com>
37
#include "r6xx_accel.h"
38
#include "r600_shader.h"
40
#include "r600_state.h"
42
/* #define SHOW_VERTEXES */
44
# define RADEON_ROP3_ZERO 0x00000000
45
# define RADEON_ROP3_DSa 0x00880000
46
# define RADEON_ROP3_SDna 0x00440000
47
# define RADEON_ROP3_S 0x00cc0000
48
# define RADEON_ROP3_DSna 0x00220000
49
# define RADEON_ROP3_D 0x00aa0000
50
# define RADEON_ROP3_DSx 0x00660000
51
# define RADEON_ROP3_DSo 0x00ee0000
52
# define RADEON_ROP3_DSon 0x00110000
53
# define RADEON_ROP3_DSxn 0x00990000
54
# define RADEON_ROP3_Dn 0x00550000
55
# define RADEON_ROP3_SDno 0x00dd0000
56
# define RADEON_ROP3_Sn 0x00330000
57
# define RADEON_ROP3_DSno 0x00bb0000
58
# define RADEON_ROP3_DSan 0x00770000
59
# define RADEON_ROP3_ONE 0x00ff0000
61
uint32_t RADEON_ROP[16] = {
62
RADEON_ROP3_ZERO, /* GXclear */
63
RADEON_ROP3_DSa, /* Gxand */
64
RADEON_ROP3_SDna, /* GXandReverse */
65
RADEON_ROP3_S, /* GXcopy */
66
RADEON_ROP3_DSna, /* GXandInverted */
67
RADEON_ROP3_D, /* GXnoop */
68
RADEON_ROP3_DSx, /* GXxor */
69
RADEON_ROP3_DSo, /* GXor */
70
RADEON_ROP3_DSon, /* GXnor */
71
RADEON_ROP3_DSxn, /* GXequiv */
72
RADEON_ROP3_Dn, /* GXinvert */
73
RADEON_ROP3_SDno, /* GXorReverse */
74
RADEON_ROP3_Sn, /* GXcopyInverted */
75
RADEON_ROP3_DSno, /* GXorInverted */
76
RADEON_ROP3_DSan, /* GXnand */
77
RADEON_ROP3_ONE, /* GXset */
81
R600DoneSolid(PixmapPtr pPix);
84
R600DoneComposite(PixmapPtr pDst);
87
R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
89
ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
90
RHDPtr rhdPtr = RHDPTR(pScrn);
91
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
93
shader_config_t vs_conf, ps_conf;
96
float ps_alu_consts[4];
98
accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
99
accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height;
100
accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
103
if (accel_state->dst_pitch & 7)
107
if (accel_state->dst_mc_addr & 0xff)
110
if (pPix->drawable.bitsPerPixel == 24)
120
ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height,
121
pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix));
124
accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
127
start_3d(pScrn, accel_state->ib);
129
/* cp_set_surface_sync(pScrn, accel_state->ib); */
131
set_default_state(pScrn, accel_state->ib);
133
/* Scissor / viewport */
134
EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
135
EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
137
accel_state->vs_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
138
accel_state->solid_vs_offset;
139
accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
140
accel_state->solid_ps_offset;
141
accel_state->vs_size = 512;
142
accel_state->ps_size = 512;
147
cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
148
accel_state->vs_size, accel_state->vs_mc_addr);
150
vs_conf.shader_addr = accel_state->vs_mc_addr;
151
vs_conf.num_gprs = 2;
152
vs_conf.stack_size = 0;
153
vs_setup (pScrn, accel_state->ib, &vs_conf);
156
cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
157
accel_state->ps_size, accel_state->ps_mc_addr);
159
ps_conf.shader_addr = accel_state->ps_mc_addr;
160
ps_conf.num_gprs = 1;
161
ps_conf.stack_size = 0;
162
ps_conf.uncached_first_inst = 1;
163
ps_conf.clamp_consts = 0;
164
ps_conf.export_mode = 2;
165
ps_setup (pScrn, accel_state->ib, &ps_conf);
176
EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift));
177
EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
178
EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]);
182
cb_conf.w = accel_state->dst_pitch;
183
cb_conf.h = pPix->drawable.height;
184
cb_conf.base = accel_state->dst_mc_addr;
186
if (pPix->drawable.bitsPerPixel == 8) {
187
cb_conf.format = COLOR_8;
188
cb_conf.comp_swap = 3; /* A */
189
} else if (pPix->drawable.bitsPerPixel == 16) {
190
cb_conf.format = COLOR_5_6_5;
191
cb_conf.comp_swap = 2; /* RGB */
193
cb_conf.format = COLOR_8_8_8_8;
194
cb_conf.comp_swap = 1; /* ARGB */
196
cb_conf.source_format = 1;
197
cb_conf.blend_clamp = 1;
198
set_render_target(pScrn, accel_state->ib, &cb_conf);
200
EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
201
(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
202
(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
203
EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
204
DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
206
/* Interpolator setup */
207
/* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
208
EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
209
EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
211
/* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
212
* *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
213
/* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
214
EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift));
215
EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
216
/* color semantic id 0 -> GPR[0] */
217
EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
218
(0x03 << DEFAULT_VAL_shift) |
221
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0);
223
/* PS alu constants */
224
if (pPix->drawable.bitsPerPixel == 16) {
225
r = (fg >> 11) & 0x1f;
226
g = (fg >> 5) & 0x3f;
227
b = (fg >> 0) & 0x1f;
228
ps_alu_consts[0] = (float)r / 31; /* R */
229
ps_alu_consts[1] = (float)g / 63; /* G */
230
ps_alu_consts[2] = (float)b / 31; /* B */
231
ps_alu_consts[3] = 1.0; /* A */
232
} else if (pPix->drawable.bitsPerPixel == 8) {
233
a = (fg >> 0) & 0xff;
234
ps_alu_consts[0] = 0.0; /* R */
235
ps_alu_consts[1] = 0.0; /* G */
236
ps_alu_consts[2] = 0.0; /* B */
237
ps_alu_consts[3] = (float)a / 255; /* A */
239
a = (fg >> 24) & 0xff;
240
r = (fg >> 16) & 0xff;
241
g = (fg >> 8) & 0xff;
242
b = (fg >> 0) & 0xff;
243
ps_alu_consts[0] = (float)r / 255; /* R */
244
ps_alu_consts[1] = (float)g / 255; /* G */
245
ps_alu_consts[2] = (float)b / 255; /* B */
246
ps_alu_consts[3] = (float)a / 255; /* A */
248
set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
250
accel_state->vb_index = 0;
253
ErrorF("PM: 0x%08x\n", pm);
261
R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
263
ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
264
RHDPtr rhdPtr = RHDPTR(pScrn);
265
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
268
if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) {
270
accel_state->vb_index = 0;
271
accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
274
vb = (pointer)((char*)accel_state->ib->address +
275
(accel_state->ib->total / 2) +
276
accel_state->vb_index * 8);
287
accel_state->vb_index += 3;
292
R600DoneSolid(PixmapPtr pPix)
294
ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
295
RHDPtr rhdPtr = RHDPTR(pScrn);
296
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
297
draw_config_t draw_conf;
298
vtx_resource_t vtx_res;
303
if (accel_state->vb_index == 0) {
304
R600IBDiscard(pScrn, accel_state->ib);
308
accel_state->vb_mc_addr = RHDDRIGetIntGARTLocation(pScrn) +
309
(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
310
accel_state->vb_size = accel_state->vb_index * 8;
312
/* flush vertex cache */
313
if ((rhdPtr->ChipSet == RHD_RV610) ||
314
(rhdPtr->ChipSet == RHD_RV620) ||
315
(rhdPtr->ChipSet == RHD_M72) ||
316
(rhdPtr->ChipSet == RHD_M74) ||
317
(rhdPtr->ChipSet == RHD_M82) ||
318
(rhdPtr->ChipSet == RHD_RS780) ||
319
(rhdPtr->ChipSet == RHD_RV710))
320
cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
321
accel_state->vb_size, accel_state->vb_mc_addr);
323
cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
324
accel_state->vb_size, accel_state->vb_mc_addr);
326
/* Vertex buffer setup */
327
vtx_res.id = SQ_VTX_RESOURCE_vs;
328
vtx_res.vtx_size_dw = 8 / 4;
329
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
330
vtx_res.mem_req_size = 1;
331
vtx_res.vb_addr = accel_state->vb_mc_addr;
332
set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
335
draw_conf.prim_type = DI_PT_RECTLIST;
336
draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
337
draw_conf.num_instances = 1;
338
draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
339
draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
341
draw_auto(pScrn, accel_state->ib, &draw_conf);
343
wait_3d_idle_clean(pScrn, accel_state->ib);
345
/* sync dst surface */
346
cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
347
accel_state->dst_size, accel_state->dst_mc_addr);
349
R600CPFlushIndirect(pScrn, accel_state->ib);
353
R600DoPrepareCopy(ScrnInfoPtr pScrn,
354
int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp,
355
int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp,
356
int rop, Pixel planemask)
358
RHDPtr rhdPtr = RHDPTR(pScrn);
359
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
362
tex_resource_t tex_res;
363
tex_sampler_t tex_samp;
364
shader_config_t vs_conf, ps_conf;
372
accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
375
start_3d(pScrn, accel_state->ib);
377
/* cp_set_surface_sync(pScrn, accel_state->ib); */
379
set_default_state(pScrn, accel_state->ib);
381
/* Scissor / viewport */
382
EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
383
EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
385
accel_state->vs_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
386
accel_state->copy_vs_offset;
387
accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
388
accel_state->copy_ps_offset;
389
accel_state->vs_size = 512;
390
accel_state->ps_size = 512;
395
cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
396
accel_state->vs_size, accel_state->vs_mc_addr);
398
vs_conf.shader_addr = accel_state->vs_mc_addr;
399
vs_conf.num_gprs = 2;
400
vs_conf.stack_size = 0;
401
vs_setup (pScrn, accel_state->ib, &vs_conf);
404
cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
405
accel_state->ps_size, accel_state->ps_mc_addr);
407
ps_conf.shader_addr = accel_state->ps_mc_addr;
408
ps_conf.num_gprs = 1;
409
ps_conf.stack_size = 0;
410
ps_conf.uncached_first_inst = 1;
411
ps_conf.clamp_consts = 0;
412
ps_conf.export_mode = 2;
413
ps_setup (pScrn, accel_state->ib, &ps_conf);
415
accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8);
416
accel_state->src_mc_addr[0] = src_offset;
417
accel_state->src_pitch[0] = src_pitch;
418
accel_state->src_width[0] = src_width;
419
accel_state->src_height[0] = src_height;
420
accel_state->src_bpp[0] = src_bpp;
422
/* flush texture cache */
423
cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
424
accel_state->src_size[0], accel_state->src_mc_addr[0]);
428
tex_res.w = src_width;
429
tex_res.h = src_height;
430
tex_res.pitch = accel_state->src_pitch[0];
432
tex_res.dim = SQ_TEX_DIM_2D;
433
tex_res.base = accel_state->src_mc_addr[0];
434
tex_res.mip_base = accel_state->src_mc_addr[0];
436
tex_res.format = FMT_8;
437
tex_res.dst_sel_x = SQ_SEL_1; /* R */
438
tex_res.dst_sel_y = SQ_SEL_1; /* G */
439
tex_res.dst_sel_z = SQ_SEL_1; /* B */
440
tex_res.dst_sel_w = SQ_SEL_X; /* A */
441
} else if (src_bpp == 16) {
442
tex_res.format = FMT_5_6_5;
443
tex_res.dst_sel_x = SQ_SEL_Z; /* R */
444
tex_res.dst_sel_y = SQ_SEL_Y; /* G */
445
tex_res.dst_sel_z = SQ_SEL_X; /* B */
446
tex_res.dst_sel_w = SQ_SEL_1; /* A */
448
tex_res.format = FMT_8_8_8_8;
449
tex_res.dst_sel_x = SQ_SEL_Z; /* R */
450
tex_res.dst_sel_y = SQ_SEL_Y; /* G */
451
tex_res.dst_sel_z = SQ_SEL_X; /* B */
452
tex_res.dst_sel_w = SQ_SEL_W; /* A */
455
tex_res.request_size = 1;
456
tex_res.base_level = 0;
457
tex_res.last_level = 0;
458
tex_res.perf_modulation = 0;
459
set_tex_resource (pScrn, accel_state->ib, &tex_res);
462
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
463
tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
464
tex_samp.clamp_z = SQ_TEX_WRAP;
465
tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
466
tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
467
tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
468
tex_samp.mip_filter = 0; /* no mipmap */
469
set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
473
if (planemask & 0x000000ff)
475
if (planemask & 0x0000ff00)
477
if (planemask & 0x00ff0000)
479
if (planemask & 0xff000000)
481
EREG (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift));
482
EREG (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
483
EREG (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]);
485
accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8);
486
accel_state->dst_mc_addr = dst_offset;
487
accel_state->dst_pitch = dst_pitch;
488
accel_state->dst_height = dst_height;
489
accel_state->dst_bpp = dst_bpp;
492
cb_conf.w = accel_state->dst_pitch;
493
cb_conf.h = dst_height;
494
cb_conf.base = accel_state->dst_mc_addr;
496
cb_conf.format = COLOR_8;
497
cb_conf.comp_swap = 3; /* A */
498
} else if (dst_bpp == 16) {
499
cb_conf.format = COLOR_5_6_5;
500
cb_conf.comp_swap = 2; /* RGB */
502
cb_conf.format = COLOR_8_8_8_8;
503
cb_conf.comp_swap = 1; /* ARGB */
505
cb_conf.source_format = 1;
506
cb_conf.blend_clamp = 1;
507
set_render_target(pScrn, accel_state->ib, &cb_conf);
509
EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
510
(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
511
(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
512
EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
513
DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
515
/* Interpolator setup */
516
/* export tex coord from VS */
517
EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
518
EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
520
/* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
521
* *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
522
/* input tex coord from VS */
523
EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift)));
524
EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
525
/* color semantic id 0 -> GPR[0] */
526
EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
527
(0x01 << DEFAULT_VAL_shift) |
529
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0);
531
accel_state->vb_index = 0;
536
R600DoCopy(ScrnInfoPtr pScrn)
538
RHDPtr rhdPtr = RHDPTR(pScrn);
539
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
540
draw_config_t draw_conf;
541
vtx_resource_t vtx_res;
546
if (accel_state->vb_index == 0) {
547
R600IBDiscard(pScrn, accel_state->ib);
551
accel_state->vb_mc_addr = RHDDRIGetIntGARTLocation(pScrn) +
552
(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
553
accel_state->vb_size = accel_state->vb_index * 16;
555
/* flush vertex cache */
556
if ((rhdPtr->ChipSet == RHD_RV610) ||
557
(rhdPtr->ChipSet == RHD_RV620) ||
558
(rhdPtr->ChipSet == RHD_M72) ||
559
(rhdPtr->ChipSet == RHD_M74) ||
560
(rhdPtr->ChipSet == RHD_M82) ||
561
(rhdPtr->ChipSet == RHD_RS780) ||
562
(rhdPtr->ChipSet == RHD_RV710))
563
cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
564
accel_state->vb_size, accel_state->vb_mc_addr);
566
cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
567
accel_state->vb_size, accel_state->vb_mc_addr);
569
/* Vertex buffer setup */
570
vtx_res.id = SQ_VTX_RESOURCE_vs;
571
vtx_res.vtx_size_dw = 16 / 4;
572
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
573
vtx_res.mem_req_size = 1;
574
vtx_res.vb_addr = accel_state->vb_mc_addr;
575
set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
577
draw_conf.prim_type = DI_PT_RECTLIST;
578
draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
579
draw_conf.num_instances = 1;
580
draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
581
draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
583
draw_auto(pScrn, accel_state->ib, &draw_conf);
585
wait_3d_idle_clean(pScrn, accel_state->ib);
587
/* sync dst surface */
588
cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
589
accel_state->dst_size, accel_state->dst_mc_addr);
591
R600CPFlushIndirect(pScrn, accel_state->ib);
595
R600AppendCopyVertex(ScrnInfoPtr pScrn,
600
RHDPtr rhdPtr = RHDPTR(pScrn);
601
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
604
if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
606
accel_state->vb_index = 0;
607
accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
610
vb = (pointer)((char*)accel_state->ib->address +
611
(accel_state->ib->total / 2) +
612
accel_state->vb_index * 16);
620
vb[5] = (float)(dstY + h);
622
vb[7] = (float)(srcY + h);
624
vb[8] = (float)(dstX + w);
625
vb[9] = (float)(dstY + h);
626
vb[10] = (float)(srcX + w);
627
vb[11] = (float)(srcY + h);
629
accel_state->vb_index += 3;
633
R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
638
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
639
RHDPtr rhdPtr = RHDPTR(pScrn);
640
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
642
accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
643
accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
645
accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
646
accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
648
accel_state->src_width[0] = pSrc->drawable.width;
649
accel_state->src_height[0] = pSrc->drawable.height;
650
accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel;
651
accel_state->dst_height = pDst->drawable.height;
652
accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
655
if (accel_state->src_pitch[0] & 7)
657
if (accel_state->dst_pitch & 7)
661
if (accel_state->src_mc_addr[0] & 0xff)
663
if (accel_state->dst_mc_addr & 0xff)
666
if (pSrc->drawable.bitsPerPixel == 24)
668
if (pDst->drawable.bitsPerPixel == 24)
674
ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height,
675
pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc));
676
ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height,
677
pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst));
680
accel_state->rop = rop;
681
accel_state->planemask = planemask;
683
if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) {
684
unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8;
685
accel_state->same_surface = TRUE;
687
if (accel_state->copy_area) {
688
exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
689
accel_state->copy_area = NULL;
691
accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
694
accel_state->same_surface = FALSE;
696
R600DoPrepareCopy(pScrn,
697
accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height,
698
accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel,
699
accel_state->dst_pitch, pDst->drawable.height,
700
accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel,
709
is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2)
711
if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */
712
((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */
713
((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */
714
((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2))) /* BR x2, y2 */
721
R600OverlapCopy(PixmapPtr pDst,
726
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
727
RHDPtr rhdPtr = RHDPTR(pScrn);
728
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
729
uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
730
uint32_t dst_offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
731
int i, hchunk, vchunk;
733
if (is_overlap(srcX, srcX + w, srcY, srcY + h,
734
dstX, dstX + w, dstY, dstY + h)) {
735
/* Calculate height/width of non-overlapping area */
736
hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX);
737
vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY);
739
/* Diagonally offset overlap is reduced to either horizontal or vertical offset-only
740
* by copying a part of the non-overlapping portion, then adjusting coordinates
741
* Choose horizontal vs vertical to minimize the total number of copy operations
743
if (vchunk != 0 && hchunk != 0) { /* diagonal */
744
if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */
745
if (srcY > dstY ) { /* diagonal up */
746
R600DoPrepareCopy(pScrn,
747
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
748
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
749
accel_state->rop, accel_state->planemask);
750
R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk);
753
srcY = srcY + vchunk;
754
dstY = dstY + vchunk;
755
} else { /* diagonal down */
756
R600DoPrepareCopy(pScrn,
757
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
758
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
759
accel_state->rop, accel_state->planemask);
760
R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk);
765
} else { /* reduce to vertical */
766
if (srcX > dstX ) { /* diagonal left */
767
R600DoPrepareCopy(pScrn,
768
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
769
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
770
accel_state->rop, accel_state->planemask);
771
R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h);
774
srcX = srcX + hchunk;
775
dstX = dstX + hchunk;
776
} else { /* diagonal right */
777
R600DoPrepareCopy(pScrn,
778
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
779
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
780
accel_state->rop, accel_state->planemask);
781
R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h);
789
if (vchunk == 0) { /* left/right */
790
if (srcX < dstX) { /* right */
791
/* copy right to left */
792
for (i = w; i > 0; i -= hchunk) {
793
R600DoPrepareCopy(pScrn,
794
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
795
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
796
accel_state->rop, accel_state->planemask);
797
R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
801
/* copy left to right */
802
for (i = 0; i < w; i += hchunk) {
803
R600DoPrepareCopy(pScrn,
804
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
805
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
806
accel_state->rop, accel_state->planemask);
808
R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h);
812
} else { /* up/down */
813
if (srcY > dstY) { /* up */
814
/* copy top to bottom */
815
for (i = 0; i < h; i += vchunk) {
816
R600DoPrepareCopy(pScrn,
817
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
818
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
819
accel_state->rop, accel_state->planemask);
821
if (vchunk > h - i) vchunk = h - i;
822
R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
826
/* copy bottom to top */
827
for (i = h; i > 0; i -= vchunk) {
828
R600DoPrepareCopy(pScrn,
829
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
830
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
831
accel_state->rop, accel_state->planemask);
833
if (vchunk > i) vchunk = i;
834
R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk);
840
R600DoPrepareCopy(pScrn,
841
dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
842
dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
843
accel_state->rop, accel_state->planemask);
845
R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
851
R600Copy(PixmapPtr pDst,
856
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
857
RHDPtr rhdPtr = RHDPTR(pScrn);
858
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
860
if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
863
if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) {
864
if (accel_state->copy_area) {
865
uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
866
uint32_t orig_offset, tmp_offset;
868
tmp_offset = accel_state->copy_area->offset + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
869
orig_offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
871
R600DoPrepareCopy(pScrn,
872
pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
873
pitch, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
874
accel_state->rop, accel_state->planemask);
875
R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
877
R600DoPrepareCopy(pScrn,
878
pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
879
pitch, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
880
accel_state->rop, accel_state->planemask);
881
R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
884
R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h);
885
} else if (accel_state->same_surface) {
886
uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
887
uint32_t offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
889
R600DoPrepareCopy(pScrn,
890
pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
891
pitch, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
892
accel_state->rop, accel_state->planemask);
893
R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
896
R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
902
R600DoneCopy(PixmapPtr pDst)
904
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
905
RHDPtr rhdPtr = RHDPTR(pScrn);
906
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
908
if (!accel_state->same_surface)
911
if (accel_state->copy_area) {
912
exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
913
accel_state->copy_area = NULL;
917
#define RADEON_TRACE_FALL 0
918
#define RADEON_TRACE_DRAW 0
920
#if RADEON_TRACE_FALL
921
#define RADEON_FALLBACK(x) \
923
ErrorF("%s: ", __FUNCTION__); \
928
#define RADEON_FALLBACK(x) return FALSE
931
#define xFixedToFloat(f) (((float) (f)) / 65536)
933
static inline void transformPoint(PictTransform *transform, xPointFixed *point)
936
v.vector[0] = point->x;
937
v.vector[1] = point->y;
938
v.vector[2] = xFixed1;
939
PictureTransformPoint(transform, &v);
940
point->x = v.vector[0];
941
point->y = v.vector[1];
950
static struct blendinfo R600BlendOp[] = {
952
{0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
954
{0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
956
{0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
958
{0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
960
{1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
962
{1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
964
{0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
966
{1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
968
{0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
970
{1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
972
{1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
974
{1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
976
{0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
984
static struct formatinfo R600TexFormats[] = {
985
{PICT_a8r8g8b8, FMT_8_8_8_8},
986
{PICT_x8r8g8b8, FMT_8_8_8_8},
987
{PICT_a8b8g8r8, FMT_8_8_8_8},
988
{PICT_x8b8g8r8, FMT_8_8_8_8},
989
{PICT_r5g6b5, FMT_5_6_5},
990
{PICT_a1r5g5b5, FMT_1_5_5_5},
991
{PICT_x1r5g5b5, FMT_1_5_5_5},
995
static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
997
uint32_t sblend, dblend;
999
sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
1000
dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
1002
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
1005
if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
1006
if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
1007
sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
1008
else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
1009
sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
1012
/* If the source alpha is being used, then we should only be in a case where
1013
* the source blend factor is 0, and the source blend value is the mask
1014
* channels multiplied by the source picture's alpha.
1016
if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
1017
if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
1018
dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
1019
} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
1020
dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
1024
return sblend | dblend;
1027
static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
1029
switch (pDstPicture->format) {
1032
*dst_format = COLOR_8_8_8_8;
1035
*dst_format = COLOR_5_6_5;
1039
*dst_format = COLOR_1_5_5_5;
1042
*dst_format = COLOR_8;
1045
RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
1046
(int)pDstPicture->format));
1051
static Bool R600CheckCompositeTexture(PicturePtr pPict,
1052
PicturePtr pDstPict,
1056
int w = pPict->pDrawable->width;
1057
int h = pPict->pDrawable->height;
1059
int max_tex_w, max_tex_h;
1064
if ((w > max_tex_w) || (h > max_tex_h))
1065
RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1067
for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1068
if (R600TexFormats[i].fmt == pPict->format)
1071
if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
1072
RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1073
(int)pPict->format));
1075
if (pPict->filter != PictFilterNearest &&
1076
pPict->filter != PictFilterBilinear)
1077
RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1079
/* for REPEAT_NONE, Render semantics are that sampling outside the source
1080
* picture results in alpha=0 pixels. We can implement this with a border color
1081
* *if* our source texture has an alpha channel, otherwise we need to fall
1082
* back. If we're not transformed then we hope that upper layers have clipped
1083
* rendering to the bounds of the source drawable, in which case it doesn't
1084
* matter. I have not, however, verified that the X server always does such
1088
if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
1089
if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1090
RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1096
static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
1099
ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
1100
RHDPtr rhdPtr = RHDPTR(pScrn);
1101
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
1102
int w = pPict->pDrawable->width;
1103
int h = pPict->pDrawable->height;
1105
tex_resource_t tex_res;
1106
tex_sampler_t tex_samp;
1107
int pix_r, pix_g, pix_b, pix_a;
1112
accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
1113
accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * h;
1114
accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
1116
if (accel_state->src_pitch[1] & 7)
1117
RADEON_FALLBACK(("Bad pitch %d 0x%x\n", (int)accel_state->src_pitch[unit], unit));
1119
if (accel_state->src_mc_addr[1] & 0xff)
1120
RADEON_FALLBACK(("Bad offset %d 0x%x\n", (int)accel_state->src_mc_addr[unit], unit));
1122
for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1123
if (R600TexFormats[i].fmt == pPict->format)
1127
accel_state->texW[unit] = w;
1128
accel_state->texH[unit] = h;
1130
/* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */
1132
/* flush texture cache */
1133
cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1134
accel_state->src_size[unit], accel_state->src_mc_addr[unit]);
1140
tex_res.pitch = accel_state->src_pitch[unit];
1142
tex_res.dim = SQ_TEX_DIM_2D;
1143
tex_res.base = accel_state->src_mc_addr[unit];
1144
tex_res.mip_base = accel_state->src_mc_addr[unit];
1145
tex_res.format = R600TexFormats[i].card_fmt;
1146
tex_res.request_size = 1;
1148
/* component swizzles */
1149
switch (pPict->format) {
1152
pix_r = SQ_SEL_Z; /* R */
1153
pix_g = SQ_SEL_Y; /* G */
1154
pix_b = SQ_SEL_X; /* B */
1155
pix_a = SQ_SEL_W; /* A */
1158
pix_r = SQ_SEL_X; /* R */
1159
pix_g = SQ_SEL_Y; /* G */
1160
pix_b = SQ_SEL_Z; /* B */
1161
pix_a = SQ_SEL_W; /* A */
1164
pix_r = SQ_SEL_X; /* R */
1165
pix_g = SQ_SEL_Y; /* G */
1166
pix_b = SQ_SEL_Z; /* B */
1167
pix_a = SQ_SEL_1; /* A */
1172
pix_r = SQ_SEL_Z; /* R */
1173
pix_g = SQ_SEL_Y; /* G */
1174
pix_b = SQ_SEL_X; /* B */
1175
pix_a = SQ_SEL_1; /* A */
1178
pix_r = SQ_SEL_0; /* R */
1179
pix_g = SQ_SEL_0; /* G */
1180
pix_b = SQ_SEL_0; /* B */
1181
pix_a = SQ_SEL_X; /* A */
1184
RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1188
if (!accel_state->has_mask) {
1189
if (PICT_FORMAT_RGB(pPict->format) == 0) {
1195
if (PICT_FORMAT_A(pPict->format) == 0)
1198
if (accel_state->component_alpha) {
1199
if (accel_state->src_alpha) {
1200
if (PICT_FORMAT_A(pPict->format) == 0) {
1211
if (PICT_FORMAT_A(pPict->format) == 0)
1215
if (PICT_FORMAT_RGB(pPict->format) == 0) {
1221
if (PICT_FORMAT_A(pPict->format) == 0)
1226
if (accel_state->component_alpha) {
1227
if (PICT_FORMAT_A(pPict->format) == 0)
1230
if (PICT_FORMAT_A(pPict->format) == 0) {
1243
tex_res.dst_sel_x = pix_r; /* R */
1244
tex_res.dst_sel_y = pix_g; /* G */
1245
tex_res.dst_sel_z = pix_b; /* B */
1246
tex_res.dst_sel_w = pix_a; /* A */
1248
tex_res.base_level = 0;
1249
tex_res.last_level = 0;
1250
tex_res.perf_modulation = 0;
1251
set_tex_resource (pScrn, accel_state->ib, &tex_res);
1254
tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1256
if (pPict->repeat) {
1257
switch (pPict->repeatType) {
1259
tex_samp.clamp_x = SQ_TEX_WRAP;
1260
tex_samp.clamp_y = SQ_TEX_WRAP;
1263
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
1264
tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
1267
tex_samp.clamp_x = SQ_TEX_MIRROR;
1268
tex_samp.clamp_y = SQ_TEX_MIRROR;
1271
tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
1272
tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
1275
RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
1278
tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
1279
tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
1282
switch (pPict->filter) {
1283
case PictFilterNearest:
1284
tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
1285
tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
1287
case PictFilterBilinear:
1288
tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
1289
tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
1292
RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1295
tex_samp.clamp_z = SQ_TEX_WRAP;
1296
tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
1297
tex_samp.mip_filter = 0; /* no mipmap */
1298
set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
1300
if (pPict->transform != 0) {
1301
accel_state->is_transform[unit] = TRUE;
1302
accel_state->transform[unit] = pPict->transform;
1304
accel_state->is_transform[unit] = FALSE;
1309
static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1310
PicturePtr pDstPicture)
1313
/* ScreenPtr pScreen = pDstPicture->pDrawable->pScreen; */
1314
PixmapPtr pSrcPixmap, pDstPixmap;
1315
/* ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; */
1316
/* RHDPtr rhdPtr = RHDPTR(pScrn); */
1317
int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1319
/* Check for unsupported compositing operations. */
1320
if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1321
RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1323
pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1330
if (pSrcPixmap->drawable.width >= max_tex_w ||
1331
pSrcPixmap->drawable.height >= max_tex_h) {
1332
RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1333
pSrcPixmap->drawable.width,
1334
pSrcPixmap->drawable.height));
1337
pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1339
if (pDstPixmap->drawable.width >= max_dst_w ||
1340
pDstPixmap->drawable.height >= max_dst_h) {
1341
RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1342
pDstPixmap->drawable.width,
1343
pDstPixmap->drawable.height));
1347
PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1349
if (pMaskPixmap->drawable.width >= max_tex_w ||
1350
pMaskPixmap->drawable.height >= max_tex_h) {
1351
RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1352
pMaskPixmap->drawable.width,
1353
pMaskPixmap->drawable.height));
1356
if (pMaskPicture->componentAlpha) {
1357
/* Check if it's component alpha that relies on a source alpha and
1358
* on the source value. We can only get one of those into the
1359
* single source value that we get to blend with.
1361
if (R600BlendOp[op].src_alpha &&
1362
(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1363
(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1364
RADEON_FALLBACK(("Component alpha not supported with source "
1365
"alpha and source value blending.\n"));
1369
if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1373
if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1376
if (!R600GetDestFormat(pDstPicture, &tmp1))
1383
static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1384
PicturePtr pMaskPicture, PicturePtr pDstPicture,
1385
PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1387
ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1388
RHDPtr rhdPtr = RHDPTR(pScrn);
1389
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
1390
uint32_t blendcntl, dst_format;
1391
cb_config_t cb_conf;
1392
shader_config_t vs_conf, ps_conf;
1397
accel_state->has_mask = TRUE;
1398
if (pMaskPicture->componentAlpha) {
1399
accel_state->component_alpha = TRUE;
1400
if (R600BlendOp[op].src_alpha)
1401
accel_state->src_alpha = TRUE;
1403
accel_state->src_alpha = FALSE;
1405
accel_state->component_alpha = FALSE;
1406
accel_state->src_alpha = FALSE;
1409
accel_state->has_mask = FALSE;
1410
accel_state->component_alpha = FALSE;
1411
accel_state->src_alpha = FALSE;
1414
accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
1415
accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1416
accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height;
1418
if (accel_state->dst_pitch & 7)
1419
RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch));
1421
if (accel_state->dst_mc_addr & 0xff)
1422
RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr));
1424
if (!R600GetDestFormat(pDstPicture, &dst_format))
1431
accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
1434
start_3d(pScrn, accel_state->ib);
1436
/* cp_set_surface_sync(pScrn, accel_state->ib); */
1438
set_default_state(pScrn, accel_state->ib);
1440
/* Scissor / viewport */
1441
EREG (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
1442
EREG (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
1444
if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1445
R600IBDiscard(pScrn, accel_state->ib);
1449
if (pMask != NULL) {
1450
if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1451
R600IBDiscard(pScrn, accel_state->ib);
1455
accel_state->is_transform[1] = FALSE;
1458
set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1459
accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
1460
accel_state->comp_mask_ps_offset;
1462
set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1463
accel_state->ps_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
1464
accel_state->comp_ps_offset;
1467
accel_state->vs_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + accel_state->shaders->offset +
1468
accel_state->comp_vs_offset;
1470
accel_state->vs_size = 512;
1471
accel_state->ps_size = 512;
1475
/* flush SQ cache */
1476
cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1477
accel_state->vs_size, accel_state->vs_mc_addr);
1479
vs_conf.shader_addr = accel_state->vs_mc_addr;
1480
vs_conf.num_gprs = 3;
1481
vs_conf.stack_size = 1;
1482
vs_setup (pScrn, accel_state->ib, &vs_conf);
1484
/* flush SQ cache */
1485
cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1486
accel_state->ps_size, accel_state->ps_mc_addr);
1488
ps_conf.shader_addr = accel_state->ps_mc_addr;
1489
ps_conf.num_gprs = 3;
1490
ps_conf.stack_size = 0;
1491
ps_conf.uncached_first_inst = 1;
1492
ps_conf.clamp_consts = 0;
1493
ps_conf.export_mode = 2;
1494
ps_setup (pScrn, accel_state->ib, &ps_conf);
1496
EREG (accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift));
1497
EREG (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
1499
blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1501
if (rhdPtr->ChipSet == RHD_R600) {
1502
/* no per-MRT blend on R600 */
1503
EREG (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
1504
EREG (accel_state->ib, CB_BLEND_CONTROL, blendcntl);
1506
EREG (accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] |
1507
(1 << TARGET_BLEND_ENABLE_shift) |
1508
PER_MRT_BLEND_bit));
1509
EREG (accel_state->ib, CB_BLEND0_CONTROL, blendcntl);
1513
cb_conf.w = accel_state->dst_pitch;
1514
cb_conf.h = pDst->drawable.height;
1515
cb_conf.base = accel_state->dst_mc_addr;
1516
cb_conf.format = dst_format;
1518
switch (pDstPicture->format) {
1524
cb_conf.comp_swap = 1; /* ARGB */
1527
cb_conf.comp_swap = 2; /* RGB */
1530
cb_conf.comp_swap = 3; /* A */
1533
cb_conf.source_format = 1;
1534
cb_conf.blend_clamp = 1;
1535
set_render_target(pScrn, accel_state->ib, &cb_conf);
1537
EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
1538
(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
1539
(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
1540
EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
1541
DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1543
/* Interpolator setup */
1545
/* export 2 tex coords from VS */
1546
EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
1547
/* src = semantic id 0; mask = semantic id 1 */
1548
EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1549
(1 << SEMANTIC_1_shift)));
1550
/* input 2 tex coords from VS */
1551
EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
1553
/* export 1 tex coords from VS */
1554
EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
1555
/* src = semantic id 0 */
1556
EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
1557
/* input 1 tex coords from VS */
1558
EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
1560
EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
1561
/* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1562
EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
1563
(0x01 << DEFAULT_VAL_shift) |
1565
/* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1566
EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) |
1567
(0x01 << DEFAULT_VAL_shift) |
1569
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0);
1571
accel_state->vb_index = 0;
1576
static void R600Composite(PixmapPtr pDst,
1578
int maskX, int maskY,
1582
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1583
RHDPtr rhdPtr = RHDPTR(pScrn);
1584
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
1586
xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
1588
/* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1589
srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1591
srcTopLeft.x = IntToxFixed(srcX);
1592
srcTopLeft.y = IntToxFixed(srcY);
1593
srcTopRight.x = IntToxFixed(srcX + w);
1594
srcTopRight.y = IntToxFixed(srcY);
1595
srcBottomLeft.x = IntToxFixed(srcX);
1596
srcBottomLeft.y = IntToxFixed(srcY + h);
1597
srcBottomRight.x = IntToxFixed(srcX + w);
1598
srcBottomRight.y = IntToxFixed(srcY + h);
1600
/* XXX do transform in vertex shader */
1601
if (accel_state->is_transform[0]) {
1602
transformPoint(accel_state->transform[0], &srcTopLeft);
1603
transformPoint(accel_state->transform[0], &srcTopRight);
1604
transformPoint(accel_state->transform[0], &srcBottomLeft);
1605
transformPoint(accel_state->transform[0], &srcBottomRight);
1608
if (accel_state->has_mask) {
1609
xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
1611
if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) {
1612
R600DoneComposite(pDst);
1613
accel_state->vb_index = 0;
1614
accel_state->ib = RHDDRMCPBuffer(pScrn->scrnIndex);
1617
vb = (pointer)((char*)accel_state->ib->address +
1618
(accel_state->ib->total / 2) +
1619
accel_state->vb_index * 24);
1621
maskTopLeft.x = IntToxFixed(maskX);
1622
maskTopLeft.y = IntToxFixed(maskY);
1623
maskTopRight.x = IntToxFixed(maskX + w);
1624
maskTopRight.y = IntToxFixed(maskY);
1625
maskBottomLeft.x = IntToxFixed(maskX);
1626
maskBottomLeft.y = IntToxFixed(maskY + h);
1627
maskBottomRight.x = IntToxFixed(maskX + w);
1628
maskBottomRight.y = IntToxFixed(maskY + h);
1630
if (accel_state->is_transform[1]) {
1631
transformPoint(accel_state->transform[1], &maskTopLeft);
1632
transformPoint(accel_state->transform[1], &maskTopRight);
1633
transformPoint(accel_state->transform[1], &maskBottomLeft);
1634
transformPoint(accel_state->transform[1], &maskBottomRight);
1637
vb[0] = (float)dstX;
1638
vb[1] = (float)dstY;
1639
vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
1640
vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
1641
vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1];
1642
vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1];
1644
vb[6] = (float)dstX;
1645
vb[7] = (float)(dstY + h);
1646
vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
1647
vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
1648
vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1];
1649
vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1];
1651
vb[12] = (float)(dstX + w);
1652
vb[13] = (float)(dstY + h);
1653
vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
1654
vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
1655
vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1];
1656
vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1];
1659
vb = (pointer)((char*)accel_state->ib->address +
1660
(accel_state->ib->total / 2) +
1661
accel_state->vb_index * 16);
1663
vb[0] = (float)dstX;
1664
vb[1] = (float)dstY;
1665
vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
1666
vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
1668
vb[4] = (float)dstX;
1669
vb[5] = (float)(dstY + h);
1670
vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
1671
vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
1673
vb[8] = (float)(dstX + w);
1674
vb[9] = (float)(dstY + h);
1675
vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
1676
vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
1679
accel_state->vb_index += 3;
1683
static void R600DoneComposite(PixmapPtr pDst)
1685
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1686
RHDPtr rhdPtr = RHDPTR(pScrn);
1687
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
1688
draw_config_t draw_conf;
1689
vtx_resource_t vtx_res;
1694
if (accel_state->vb_index == 0) {
1695
R600IBDiscard(pScrn, accel_state->ib);
1699
accel_state->vb_mc_addr = RHDDRIGetIntGARTLocation(pScrn) +
1700
(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
1703
/* Vertex buffer setup */
1704
if (accel_state->has_mask) {
1705
accel_state->vb_size = accel_state->vb_index * 24;
1706
vtx_res.id = SQ_VTX_RESOURCE_vs;
1707
vtx_res.vtx_size_dw = 24 / 4;
1708
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1709
vtx_res.mem_req_size = 1;
1710
vtx_res.vb_addr = accel_state->vb_mc_addr;
1712
accel_state->vb_size = accel_state->vb_index * 16;
1713
vtx_res.id = SQ_VTX_RESOURCE_vs;
1714
vtx_res.vtx_size_dw = 16 / 4;
1715
vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1716
vtx_res.mem_req_size = 1;
1717
vtx_res.vb_addr = accel_state->vb_mc_addr;
1719
/* flush vertex cache */
1720
if ((rhdPtr->ChipSet == RHD_RV610) ||
1721
(rhdPtr->ChipSet == RHD_RV620) ||
1722
(rhdPtr->ChipSet == RHD_M72) ||
1723
(rhdPtr->ChipSet == RHD_M74) ||
1724
(rhdPtr->ChipSet == RHD_M82) ||
1725
(rhdPtr->ChipSet == RHD_RS780) ||
1726
(rhdPtr->ChipSet == RHD_RV710))
1727
cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1728
accel_state->vb_size, accel_state->vb_mc_addr);
1730
cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
1731
accel_state->vb_size, accel_state->vb_mc_addr);
1733
set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
1735
draw_conf.prim_type = DI_PT_RECTLIST;
1736
draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1737
draw_conf.num_instances = 1;
1738
draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1739
draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
1741
draw_auto(pScrn, accel_state->ib, &draw_conf);
1743
wait_3d_idle_clean(pScrn, accel_state->ib);
1745
cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1746
accel_state->dst_size, accel_state->dst_mc_addr);
1748
R600CPFlushIndirect(pScrn, accel_state->ib);
1752
R600CopyToVRAM(ScrnInfoPtr pScrn,
1753
char *src, int src_pitch,
1754
uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp,
1755
int x, int y, int w, int h)
1757
struct RhdCS *CS = RHDPTR(pScrn)->CS;
1758
uint32_t scratch_mc_addr;
1759
int wpass = w * (bpp/8);
1760
int scratch_pitch_bytes = (wpass + 255) & ~255;
1761
uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1762
int scratch_offset = 0, hpass, temph;
1769
if (dst_mc_addr & 0xff)
1772
scratch = RHDDRMCPBuffer(pScrn->scrnIndex);
1773
if (scratch == NULL)
1776
scratch_mc_addr = RHDDRIGetIntGARTLocation(pScrn) + (scratch->idx * scratch->total);
1777
temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1778
dst = (char *)scratch->address;
1780
/* memcopy from sys to scratch */
1782
memcpy (dst, src, wpass);
1784
dst += scratch_pitch_bytes;
1788
uint32_t offset = scratch_mc_addr + scratch_offset;
1789
int oldhpass = hpass;
1791
temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1794
scratch_offset = scratch->total/2 - scratch_offset;
1795
dst = (char *)scratch->address + scratch_offset;
1796
/* wait for the engine to be idle */
1798
/* memcopy from sys to scratch */
1800
memcpy (dst, src, wpass);
1802
dst += scratch_pitch_bytes;
1805
/* blit from scratch to vram */
1806
R600DoPrepareCopy(pScrn,
1807
scratch_pitch, w, oldhpass, offset, bpp,
1808
dst_pitch, dst_height, dst_mc_addr, bpp,
1810
R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1815
R600IBDiscard(pScrn, scratch);
1821
R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1822
char *src, int src_pitch)
1824
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1825
RHDPtr rhdPtr = RHDPTR(pScrn);
1826
uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1827
uint32_t dst_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + exaGetPixmapOffset(pDst);
1828
uint32_t dst_height = pDst->drawable.height;
1829
int bpp = pDst->drawable.bitsPerPixel;
1831
return R600CopyToVRAM(pScrn,
1833
dst_pitch, dst_mc_addr, dst_height, bpp,
1838
R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1839
char *dst, int dst_pitch)
1841
ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1842
RHDPtr rhdPtr = RHDPTR(pScrn);
1843
struct RhdCS *CS = rhdPtr->CS;
1844
uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1845
uint32_t src_mc_addr = rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart + exaGetPixmapOffset(pSrc);
1846
uint32_t src_width = pSrc->drawable.width;
1847
uint32_t src_height = pSrc->drawable.height;
1848
int bpp = pSrc->drawable.bitsPerPixel;
1849
uint32_t scratch_mc_addr;
1850
int scratch_pitch_bytes = (dst_pitch + 255) & ~255;
1851
int scratch_offset = 0, hpass;
1852
uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1853
int wpass = w * (bpp/8);
1859
scratch = RHDDRMCPBuffer(pScrn->scrnIndex);
1860
if (scratch == NULL)
1863
scratch_mc_addr = RHDDRIGetIntGARTLocation(pScrn) + (scratch->idx * scratch->total);
1864
hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1866
/* blit from vram to scratch */
1867
R600DoPrepareCopy(pScrn,
1868
src_pitch, src_width, src_height, src_mc_addr, bpp,
1869
scratch_pitch, hpass, scratch_mc_addr, bpp,
1871
R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1875
char *src = (char *)scratch->address + scratch_offset;
1876
int oldhpass = hpass;
1879
hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1882
scratch_offset = scratch->total/2 - scratch_offset;
1883
/* blit from vram to scratch */
1884
R600DoPrepareCopy(pScrn,
1885
src_pitch, src_width, src_height, src_mc_addr, bpp,
1886
scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp,
1888
R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1892
/* wait for the engine to be idle */
1894
/* memcopy from scratch to sys */
1895
while (oldhpass--) {
1896
memcpy (dst, src, wpass);
1898
src += scratch_pitch_bytes;
1902
R600IBDiscard(pScrn, scratch);
1909
R6xxEXACloseScreen(ScreenPtr pScreen)
1911
exaDriverFini(pScreen);
1915
R6xxEXADestroy(ScrnInfoPtr pScrn)
1917
RHDPtr rhdPtr = RHDPTR(pScrn);
1919
if (rhdPtr->EXAInfo) {
1920
xfree(rhdPtr->EXAInfo);
1921
rhdPtr->EXAInfo = NULL;
1924
if (rhdPtr->TwoDPrivate) {
1925
xfree(rhdPtr->TwoDPrivate);
1926
rhdPtr->TwoDPrivate = NULL;
1930
/* no need to needlessly flush the caches/wait for idle
1931
* the drawing code does this already (and mesa code should be designed to do so as well)
1932
* excessive idling/flushing seems to cause stability problems on
1933
* r7xx and drawing glitches on r6xx.
1936
R6xxCacheFlush(struct RhdCS *CS)
1938
CS = CS; /* nop - avoid compiler warning */
1942
R6xxEngineWaitIdleFull(struct RhdCS *CS)
1944
CS = CS; /* nop - avoid compiler warning */
1948
R600EXAMarkSync(ScreenPtr pScreen)
1950
struct r6xx_accel_state *accel_state = RHDPTR(xf86Screens[pScreen->myNum])->TwoDPrivate;
1952
return ++accel_state->exaSyncMarker;
1957
R600EXASync(ScreenPtr pScreen, int marker)
1959
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1960
struct r6xx_accel_state *accel_state = RHDPTR(pScrn)->TwoDPrivate;
1962
if (accel_state->exaMarkerSynced != marker) {
1963
struct RhdCS *CS = RHDPTR(pScrn)->CS;
1967
accel_state->exaMarkerSynced = marker;
1972
R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1974
RHDPtr rhdPtr = RHDPTR(pScrn);
1975
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
1976
/* 512 bytes per shader for now */
1979
accel_state->shaders = NULL;
1981
accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
1984
if (accel_state->shaders == NULL)
1990
R600LoadShaders(ScrnInfoPtr pScrn)
1992
RHDPtr rhdPtr = RHDPTR(pScrn);
1993
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
1994
enum RHD_CHIPSETS ChipSet = rhdPtr->ChipSet;
1997
shader = (pointer)((char *)rhdPtr->FbBase + rhdPtr->FbScanoutStart + accel_state->shaders->offset);
1999
/* solid vs --------------------------------------- */
2000
accel_state->solid_vs_offset = 0;
2001
R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2003
/* solid ps --------------------------------------- */
2004
accel_state->solid_ps_offset = 512;
2005
R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2007
/* copy vs --------------------------------------- */
2008
accel_state->copy_vs_offset = 1024;
2009
R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2011
/* copy ps --------------------------------------- */
2012
accel_state->copy_ps_offset = 1536;
2013
R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2015
/* comp vs --------------------------------------- */
2016
accel_state->comp_vs_offset = 2048;
2017
R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2019
/* comp ps --------------------------------------- */
2020
accel_state->comp_ps_offset = 2560;
2021
R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2023
/* comp mask ps --------------------------------------- */
2024
accel_state->comp_mask_ps_offset = 3072;
2025
R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
2027
/* xv vs --------------------------------------- */
2028
accel_state->xv_vs_offset = 3584;
2029
R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2031
/* xv ps --------------------------------------- */
2032
accel_state->xv_ps_offset = 4096;
2033
R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2039
R600PrepareAccess(PixmapPtr pPix, int index)
2041
ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2042
RHDPtr rhdPtr = RHDPTR(pScrn);
2044
/* flush HDP read/write caches */
2045
RHDRegWrite(rhdPtr, HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2051
R600FinishAccess(PixmapPtr pPix, int index)
2053
ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2054
RHDPtr rhdPtr = RHDPTR(pScrn);
2056
/* flush HDP read/write caches */
2057
RHDRegWrite(rhdPtr, HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2063
R6xxEXAInit(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2065
RHDPtr rhdPtr = RHDPTR(pScrn);
2066
struct RhdCS *CS = rhdPtr->CS;
2067
ExaDriverRec *EXAInfo;
2068
struct r6xx_accel_state *accel_state;
2072
EXAInfo = exaDriverAlloc();
2073
if (EXAInfo == NULL || !CS)
2076
accel_state = xnfcalloc(1, sizeof(struct r6xx_accel_state));
2078
EXAInfo->exa_major = EXA_VERSION_MAJOR;
2079
EXAInfo->exa_minor = EXA_VERSION_MINOR;
2081
EXAInfo->flags = EXA_OFFSCREEN_PIXMAPS;
2082
EXAInfo->pixmapOffsetAlign = 256;
2083
EXAInfo->pixmapPitchAlign = 256;
2085
#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2086
EXAInfo->maxPitchBytes = 32768;
2087
EXAInfo->maxX = 8192;
2089
EXAInfo->maxX = 8192;
2091
EXAInfo->maxY = 8192;
2093
EXAInfo->memoryBase = (CARD8 *) rhdPtr->FbBase + rhdPtr->FbScanoutStart;
2094
EXAInfo->offScreenBase = rhdPtr->FbOffscreenStart - rhdPtr->FbScanoutStart;
2095
EXAInfo->memorySize = rhdPtr->FbScanoutSize + rhdPtr->FbOffscreenSize;
2097
EXAInfo->PrepareSolid = R600PrepareSolid;
2098
EXAInfo->Solid = R600Solid;
2099
EXAInfo->DoneSolid = R600DoneSolid;
2101
EXAInfo->PrepareCopy = R600PrepareCopy;
2102
EXAInfo->Copy = R600Copy;
2103
EXAInfo->DoneCopy = R600DoneCopy;
2105
EXAInfo->CheckComposite = R600CheckComposite;
2106
EXAInfo->PrepareComposite = R600PrepareComposite;
2107
EXAInfo->Composite = R600Composite;
2108
EXAInfo->DoneComposite = R600DoneComposite;
2110
if (rhdPtr->cardType != RHD_CARD_AGP) {
2111
EXAInfo->UploadToScreen = R600UploadToScreen;
2112
EXAInfo->DownloadFromScreen = R600DownloadFromScreen;
2115
EXAInfo->PrepareAccess = R600PrepareAccess;
2116
EXAInfo->FinishAccess = R600FinishAccess;
2118
EXAInfo->MarkSync = R600EXAMarkSync;
2119
EXAInfo->WaitMarker = R600EXASync;
2121
if (!exaDriverInit(pScreen, EXAInfo)) {
2127
RHDPTR(pScrn)->EXAInfo = EXAInfo;
2129
accel_state->XHas3DEngineState = FALSE;
2130
accel_state->copy_area = NULL;
2132
rhdPtr->TwoDPrivate = accel_state;
2134
if (!R600AllocShaders(pScrn, pScreen)) {
2140
if (!R600LoadShaders(pScrn)) {
2146
exaMarkSync(pScreen);