1
/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
3
* Copyright (C) The Weather Channel, Inc. 2002.
4
* Copyright (C) 2004 Nicolai Haehnle.
7
* The Weather Channel (TM) funded Tungsten Graphics to develop the
8
* initial release of the Radeon 8500 driver under the XFree86 license.
9
* This notice must be preserved.
11
* Permission is hereby granted, free of charge, to any person obtaining a
12
* copy of this software and associated documentation files (the "Software"),
13
* to deal in the Software without restriction, including without limitation
14
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
15
* and/or sell copies of the Software, and to permit persons to whom the
16
* Software is furnished to do so, subject to the following conditions:
18
* The above copyright notice and this permission notice (including the next
19
* paragraph) shall be included in all copies or substantial portions of the
22
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28
* DEALINGS IN THE SOFTWARE.
31
* Nicolai Haehnle <prefect_@gmx.net>
36
#include "drm_buffer.h"
37
#include "radeon_drm.h"
38
#include "radeon_drv.h"
41
#include <asm/unaligned.h>
43
#define R300_SIMULTANEOUS_CLIPRECTS 4
45
/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
47
static const int r300_cliprect_cntl[4] = {
55
* Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
56
* buffer, starting with index n.
58
static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
59
drm_radeon_kcmd_buffer_t *cmdbuf, int n)
61
struct drm_clip_rect box;
66
nr = cmdbuf->nbox - n;
67
if (nr > R300_SIMULTANEOUS_CLIPRECTS)
68
nr = R300_SIMULTANEOUS_CLIPRECTS;
70
DRM_DEBUG("%i cliprects\n", nr);
73
BEGIN_RING(6 + nr * 2);
74
OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
76
for (i = 0; i < nr; ++i) {
77
if (DRM_COPY_FROM_USER_UNCHECKED
78
(&box, &cmdbuf->boxes[n + i], sizeof(box))) {
79
DRM_ERROR("copy cliprect faulted\n");
83
box.x2--; /* Hardware expects inclusive bottom-right corner */
86
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
96
box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
98
box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
100
box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
102
box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
106
OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
107
(box.y1 << R300_CLIPRECT_Y_SHIFT));
108
OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
109
(box.y2 << R300_CLIPRECT_Y_SHIFT));
113
OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
115
/* TODO/SECURITY: Force scissors to a safe value, otherwise the
116
* client might be able to trample over memory.
117
* The impact should be very limited, but I'd rather be safe than
120
OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
122
OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
125
/* Why we allow zero cliprect rendering:
126
* There are some commands in a command buffer that must be submitted
127
* even when there are no cliprects, e.g. DMA buffer discard
128
* or state setting (though state setting could be avoided by
129
* simulating a loss of context).
131
* Now since the cmdbuf interface is so chaotic right now (and is
132
* bound to remain that way for a bit until things settle down),
133
* it is basically impossible to filter out the commands that are
134
* necessary and those that aren't.
136
* So I choose the safe way and don't do any filtering at all;
137
* instead, I simply set up the engine so that all rendering
138
* can't produce any fragments.
141
OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
145
/* flus cache and wait idle clean after cliprect change */
147
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
148
OUT_RING(R300_RB3D_DC_FLUSH);
151
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
152
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
155
dev_priv->track_flush |= RADEON_FLUSH_EMITED;
160
static u8 r300_reg_flags[0x10000 >> 2];
162
void r300_init_reg_flags(struct drm_device *dev)
165
drm_radeon_private_t *dev_priv = dev->dev_private;
167
memset(r300_reg_flags, 0, 0x10000 >> 2);
168
#define ADD_RANGE_MARK(reg, count,mark) \
169
for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
170
r300_reg_flags[i]|=(mark);
173
#define MARK_CHECK_OFFSET 2
175
#define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE)
177
/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
178
ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
179
ADD_RANGE(R300_VAP_CNTL, 1);
180
ADD_RANGE(R300_SE_VTE_CNTL, 2);
181
ADD_RANGE(0x2134, 2);
182
ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
183
ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
184
ADD_RANGE(0x21DC, 1);
185
ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
186
ADD_RANGE(R300_VAP_CLIP_X_0, 4);
187
ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
188
ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
189
ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
190
ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
191
ADD_RANGE(R300_GB_ENABLE, 1);
192
ADD_RANGE(R300_GB_MSPOS0, 5);
193
ADD_RANGE(R300_TX_INVALTAGS, 1);
194
ADD_RANGE(R300_TX_ENABLE, 1);
195
ADD_RANGE(0x4200, 4);
196
ADD_RANGE(0x4214, 1);
197
ADD_RANGE(R300_RE_POINTSIZE, 1);
198
ADD_RANGE(0x4230, 3);
199
ADD_RANGE(R300_RE_LINE_CNT, 1);
200
ADD_RANGE(R300_RE_UNK4238, 1);
201
ADD_RANGE(0x4260, 3);
202
ADD_RANGE(R300_RE_SHADE, 4);
203
ADD_RANGE(R300_RE_POLYGON_MODE, 5);
204
ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
205
ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
206
ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
207
ADD_RANGE(R300_RE_CULL_CNTL, 1);
208
ADD_RANGE(0x42C0, 2);
209
ADD_RANGE(R300_RS_CNTL_0, 2);
211
ADD_RANGE(R300_SU_REG_DEST, 1);
212
if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
213
ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
215
ADD_RANGE(R300_SC_HYPERZ, 2);
216
ADD_RANGE(0x43E8, 1);
218
ADD_RANGE(0x46A4, 5);
220
ADD_RANGE(R300_RE_FOG_STATE, 1);
221
ADD_RANGE(R300_FOG_COLOR_R, 3);
222
ADD_RANGE(R300_PP_ALPHA_TEST, 2);
223
ADD_RANGE(0x4BD8, 1);
224
ADD_RANGE(R300_PFS_PARAM_0_X, 64);
225
ADD_RANGE(0x4E00, 1);
226
ADD_RANGE(R300_RB3D_CBLEND, 2);
227
ADD_RANGE(R300_RB3D_COLORMASK, 1);
228
ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
229
ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
230
ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
231
ADD_RANGE(0x4E50, 9);
232
ADD_RANGE(0x4E88, 1);
233
ADD_RANGE(0x4EA0, 2);
234
ADD_RANGE(R300_ZB_CNTL, 3);
235
ADD_RANGE(R300_ZB_FORMAT, 4);
236
ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
237
ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
238
ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
239
ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
240
ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
242
ADD_RANGE(R300_TX_FILTER_0, 16);
243
ADD_RANGE(R300_TX_FILTER1_0, 16);
244
ADD_RANGE(R300_TX_SIZE_0, 16);
245
ADD_RANGE(R300_TX_FORMAT_0, 16);
246
ADD_RANGE(R300_TX_PITCH_0, 16);
247
/* Texture offset is dangerous and needs more checking */
248
ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
249
ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
250
ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
252
/* Sporadic registers used as primitives are emitted */
253
ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
254
ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
255
ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
256
ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
258
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
259
ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
260
ADD_RANGE(R500_US_CONFIG, 2);
261
ADD_RANGE(R500_US_CODE_ADDR, 3);
262
ADD_RANGE(R500_US_FC_CTRL, 1);
263
ADD_RANGE(R500_RS_IP_0, 16);
264
ADD_RANGE(R500_RS_INST_0, 16);
265
ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
266
ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
267
ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
269
ADD_RANGE(R300_PFS_CNTL_0, 3);
270
ADD_RANGE(R300_PFS_NODE_0, 4);
271
ADD_RANGE(R300_PFS_TEXI_0, 64);
272
ADD_RANGE(R300_PFS_INSTR0_0, 64);
273
ADD_RANGE(R300_PFS_INSTR1_0, 64);
274
ADD_RANGE(R300_PFS_INSTR2_0, 64);
275
ADD_RANGE(R300_PFS_INSTR3_0, 64);
276
ADD_RANGE(R300_RS_INTERP_0, 8);
277
ADD_RANGE(R300_RS_ROUTE_0, 8);
282
static __inline__ int r300_check_range(unsigned reg, int count)
287
for (i = (reg >> 2); i < (reg >> 2) + count; i++)
288
if (r300_reg_flags[i] != MARK_SAFE)
293
static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
295
drm_radeon_kcmd_buffer_t
297
drm_r300_cmd_header_t
306
sz = header.packet0.count;
307
reg = (header.packet0.reghi << 8) | header.packet0.reglo;
309
if ((sz > 64) || (sz < 0)) {
310
DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
315
for (i = 0; i < sz; i++) {
316
switch (r300_reg_flags[(reg >> 2) + i]) {
319
case MARK_CHECK_OFFSET:
320
value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
321
if (!radeon_check_offset(dev_priv, *value)) {
322
DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
328
DRM_ERROR("Register %04x failed check as flag=%02x\n",
329
reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
335
OUT_RING(CP_PACKET0(reg, sz - 1));
336
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
343
* Emits a packet0 setting arbitrary registers.
344
* Called by r300_do_cp_cmdbuf.
346
* Note that checks are performed on contents and addresses of the registers
348
static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
349
drm_radeon_kcmd_buffer_t *cmdbuf,
350
drm_r300_cmd_header_t header)
356
sz = header.packet0.count;
357
reg = (header.packet0.reghi << 8) | header.packet0.reglo;
362
if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
365
if (reg + sz * 4 >= 0x10000) {
366
DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
371
if (r300_check_range(reg, sz)) {
372
/* go and check everything */
373
return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
376
/* the rest of the data is safe to emit, whatever the values the user passed */
379
OUT_RING(CP_PACKET0(reg, sz - 1));
380
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
387
* Uploads user-supplied vertex program instructions or parameters onto
389
* Called by r300_do_cp_cmdbuf.
391
static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
392
drm_radeon_kcmd_buffer_t *cmdbuf,
393
drm_r300_cmd_header_t header)
399
sz = header.vpu.count;
400
addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
404
if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
407
/* VAP is very sensitive so we purge cache before we program it
408
* and we also flush its state before & after */
410
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
411
OUT_RING(R300_RB3D_DC_FLUSH);
412
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
413
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
414
OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
418
dev_priv->track_flush |= RADEON_FLUSH_EMITED;
420
BEGIN_RING(3 + sz * 4);
421
OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
422
OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
423
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
427
OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
435
* Emit a clear packet from userspace.
436
* Called by r300_emit_packet3.
438
static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
439
drm_radeon_kcmd_buffer_t *cmdbuf)
443
if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
447
OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
448
OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
449
(1 << R300_PRIM_NUM_VERTICES_SHIFT));
450
OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
454
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
455
OUT_RING(R300_RB3D_DC_FLUSH);
456
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
457
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
460
dev_priv->track_flush |= RADEON_FLUSH_EMITED;
465
static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
466
drm_radeon_kcmd_buffer_t *cmdbuf,
470
#define MAX_ARRAY_PACKET 64
475
count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
477
if ((count + 1) > MAX_ARRAY_PACKET) {
478
DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
482
/* carefully check packet contents */
484
/* We have already read the header so advance the buffer. */
485
drm_buffer_advance(cmdbuf->buffer, 4);
487
narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
490
while ((k < narrays) && (i < (count + 1))) {
491
i++; /* skip attribute field */
492
data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
493
if (!radeon_check_offset(dev_priv, *data)) {
495
("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
503
/* have one more to process, they come in pairs */
504
data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
505
if (!radeon_check_offset(dev_priv, *data)) {
507
("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
514
/* do the counts match what we expect ? */
515
if ((k != narrays) || (i != (count + 1))) {
517
("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
518
k, i, narrays, count + 1);
522
/* all clear, output packet */
524
BEGIN_RING(count + 2);
526
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
532
static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
533
drm_radeon_kcmd_buffer_t *cmdbuf)
535
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
540
count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
544
u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
545
if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
546
| RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
548
u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
549
offset = *cmd2 << 10;
550
ret = !radeon_check_offset(dev_priv, offset);
552
DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
557
if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
558
(*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
559
u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
560
offset = *cmd3 << 10;
561
ret = !radeon_check_offset(dev_priv, offset);
563
DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
571
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
577
static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
578
drm_radeon_kcmd_buffer_t *cmdbuf)
580
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
581
u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
586
count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
588
expected_count = *cmd1 >> 16;
589
if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
590
expected_count = (expected_count+1)/2;
592
if (count && count != expected_count) {
593
DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
594
count, expected_count);
599
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
603
drm_r300_cmd_header_t stack_header, *header;
604
u32 *cmd1, *cmd2, *cmd3;
606
if (drm_buffer_unprocessed(cmdbuf->buffer)
607
< 4*4 + sizeof(stack_header)) {
608
DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
612
header = drm_buffer_read_object(cmdbuf->buffer,
613
sizeof(stack_header), &stack_header);
615
cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
616
cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
617
cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
618
cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
620
if (header->header.cmd_type != R300_CMD_PACKET3 ||
621
header->packet3.packet != R300_CMD_PACKET3_RAW ||
622
*cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
623
DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
627
if ((*cmd1 & 0x8000ffff) != 0x80000810) {
628
DRM_ERROR("Invalid indx_buffer reg address %08X\n",
632
if (!radeon_check_offset(dev_priv, *cmd2)) {
633
DRM_ERROR("Invalid indx_buffer offset is %08X\n",
637
if (*cmd3 != expected_count) {
638
DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
639
*cmd3, expected_count);
644
OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
651
static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
652
drm_radeon_kcmd_buffer_t *cmdbuf)
658
if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
661
/* Fixme !! This simply emits a packet without much checking.
662
We need to be smarter. */
664
/* obtain first word - actual packet3 header */
665
header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
667
/* Is it packet 3 ? */
668
if ((*header >> 30) != 0x3) {
669
DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
673
count = (*header >> 16) & 0x3fff;
675
/* Check again now that we know how much data to expect */
676
if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
678
("Expected packet3 of length %d but have only %d bytes left\n",
679
(count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
683
/* Is it a packet type we know about ? */
684
switch (*header & 0xff00) {
685
case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
686
return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
688
case RADEON_CNTL_BITBLT_MULTI:
689
return r300_emit_bitblt_multi(dev_priv, cmdbuf);
691
case RADEON_CP_INDX_BUFFER:
692
DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
694
case RADEON_CP_3D_DRAW_IMMD_2:
695
/* triggers drawing using in-packet vertex data */
696
case RADEON_CP_3D_DRAW_VBUF_2:
697
/* triggers drawing of vertex buffers setup elsewhere */
698
dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
699
RADEON_PURGE_EMITED);
701
case RADEON_CP_3D_DRAW_INDX_2:
702
/* triggers drawing using indices to vertex buffer */
703
/* whenever we send vertex we clear flush & purge */
704
dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
705
RADEON_PURGE_EMITED);
706
return r300_emit_draw_indx_2(dev_priv, cmdbuf);
707
case RADEON_WAIT_FOR_IDLE:
709
/* these packets are safe */
712
DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
716
BEGIN_RING(count + 2);
717
OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
724
* Emit a rendering packet3 from userspace.
725
* Called by r300_do_cp_cmdbuf.
727
static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
728
drm_radeon_kcmd_buffer_t *cmdbuf,
729
drm_r300_cmd_header_t header)
733
int orig_iter = cmdbuf->buffer->iterator;
735
/* This is a do-while-loop so that we run the interior at least once,
736
* even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
740
if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
741
ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
745
cmdbuf->buffer->iterator = orig_iter;
748
switch (header.packet3.packet) {
749
case R300_CMD_PACKET3_CLEAR:
750
DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
751
ret = r300_emit_clear(dev_priv, cmdbuf);
753
DRM_ERROR("r300_emit_clear failed\n");
758
case R300_CMD_PACKET3_RAW:
759
DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
760
ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
762
DRM_ERROR("r300_emit_raw_packet3 failed\n");
768
DRM_ERROR("bad packet3 type %i at byte %d\n",
769
header.packet3.packet,
770
cmdbuf->buffer->iterator - (int)sizeof(header));
774
n += R300_SIMULTANEOUS_CLIPRECTS;
775
} while (n < cmdbuf->nbox);
780
/* Some of the R300 chips seem to be extremely touchy about the two registers
781
* that are configured in r300_pacify.
782
* Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
783
* sends a command buffer that contains only state setting commands and a
784
* vertex program/parameter upload sequence, this will eventually lead to a
785
* lockup, unless the sequence is bracketed by calls to r300_pacify.
786
* So we should take great care to *always* call r300_pacify before
787
* *anything* 3D related, and again afterwards. This is what the
788
* call bracket in r300_do_cp_cmdbuf is for.
792
* Emit the sequence to pacify R300.
794
static void r300_pacify(drm_radeon_private_t *dev_priv)
796
uint32_t cache_z, cache_3d, cache_2d;
799
cache_z = R300_ZC_FLUSH;
800
cache_2d = R300_RB2D_DC_FLUSH;
801
cache_3d = R300_RB3D_DC_FLUSH;
802
if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
803
/* we can purge, primitive where draw since last purge */
804
cache_z |= R300_ZC_FREE;
805
cache_2d |= R300_RB2D_DC_FREE;
806
cache_3d |= R300_RB3D_DC_FREE;
809
/* flush & purge zbuffer */
811
OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
814
/* flush & purge 3d */
816
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
819
/* flush & purge texture */
821
OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
824
/* FIXME: is this one really needed ? */
826
OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
830
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
831
OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
833
/* flush & purge 2d through E2 as RB2D will trigger lockup */
835
OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
837
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
838
OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
839
RADEON_WAIT_HOST_IDLECLEAN);
841
/* set flush & purge flags */
842
dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
846
* Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
847
* The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
848
* be careful about how this function is called.
850
static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
852
drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
853
struct drm_radeon_master_private *master_priv = master->driver_priv;
855
buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
860
static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
861
drm_r300_cmd_header_t header)
866
if (!header.wait.flags)
871
switch(header.wait.flags) {
873
wait_until = RADEON_WAIT_2D_IDLE;
876
wait_until = RADEON_WAIT_3D_IDLE;
878
case R300_NEW_WAIT_2D_3D:
879
wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
881
case R300_NEW_WAIT_2D_2D_CLEAN:
882
wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
884
case R300_NEW_WAIT_3D_3D_CLEAN:
885
wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
887
case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
888
wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
889
wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
896
OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
897
OUT_RING(wait_until);
901
static int r300_scratch(drm_radeon_private_t *dev_priv,
902
drm_radeon_kcmd_buffer_t *cmdbuf,
903
drm_r300_cmd_header_t header)
906
u32 i, *buf_idx, h_pending;
911
if (drm_buffer_unprocessed(cmdbuf->buffer) <
912
(sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
916
if (header.scratch.reg >= 5) {
920
dev_priv->scratch_ages[header.scratch.reg]++;
922
ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
923
sizeof(stack_ptr_addr), &stack_ptr_addr);
924
ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
926
for (i=0; i < header.scratch.n_bufs; i++) {
927
buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
928
*buf_idx *= 2; /* 8 bytes per buf */
930
if (DRM_COPY_TO_USER(ref_age_base + *buf_idx,
931
&dev_priv->scratch_ages[header.scratch.reg],
935
if (DRM_COPY_FROM_USER(&h_pending,
936
ref_age_base + *buf_idx + 1,
945
if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1,
950
drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
954
OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
955
OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
962
* Uploads user-supplied vertex program instructions or parameters onto
964
* Called by r300_do_cp_cmdbuf.
966
static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
967
drm_radeon_kcmd_buffer_t *cmdbuf,
968
drm_r300_cmd_header_t header)
977
sz = header.r500fp.count;
978
/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
979
addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
981
type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
982
isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
984
addr |= (type << 16);
985
addr |= (isclamp << 17);
987
stride = type ? 4 : 6;
989
DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
992
if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
995
BEGIN_RING(3 + sz * stride);
996
OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
997
OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
998
OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
1007
* Parses and validates a user-supplied command buffer and emits appropriate
1008
* commands on the DMA ring buffer.
1009
* Called by the ioctl handler function radeon_cp_cmdbuf.
1011
int r300_do_cp_cmdbuf(struct drm_device *dev,
1012
struct drm_file *file_priv,
1013
drm_radeon_kcmd_buffer_t *cmdbuf)
1015
drm_radeon_private_t *dev_priv = dev->dev_private;
1016
struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1017
struct drm_device_dma *dma = dev->dma;
1018
struct drm_buf *buf = NULL;
1019
int emit_dispatch_age = 0;
1025
r300_pacify(dev_priv);
1027
if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1028
ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1033
while (drm_buffer_unprocessed(cmdbuf->buffer)
1034
>= sizeof(drm_r300_cmd_header_t)) {
1036
drm_r300_cmd_header_t *header, stack_header;
1038
header = drm_buffer_read_object(cmdbuf->buffer,
1039
sizeof(stack_header), &stack_header);
1041
switch (header->header.cmd_type) {
1042
case R300_CMD_PACKET0:
1043
DRM_DEBUG("R300_CMD_PACKET0\n");
1044
ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
1046
DRM_ERROR("r300_emit_packet0 failed\n");
1052
DRM_DEBUG("R300_CMD_VPU\n");
1053
ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
1055
DRM_ERROR("r300_emit_vpu failed\n");
1060
case R300_CMD_PACKET3:
1061
DRM_DEBUG("R300_CMD_PACKET3\n");
1062
ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
1064
DRM_ERROR("r300_emit_packet3 failed\n");
1069
case R300_CMD_END3D:
1070
DRM_DEBUG("R300_CMD_END3D\n");
1072
Ideally userspace driver should not need to issue this call,
1073
i.e. the drm driver should issue it automatically and prevent
1076
In practice, we do not understand why this call is needed and what
1077
it does (except for some vague guesses that it has to do with cache
1078
coherence) and so the user space driver does it.
1080
Once we are sure which uses prevent lockups the code could be moved
1081
into the kernel and the userspace driver will not
1082
need to use this command.
1084
Note that issuing this command does not hurt anything
1085
except, possibly, performance */
1086
r300_pacify(dev_priv);
1089
case R300_CMD_CP_DELAY:
1090
/* simple enough, we can do it here */
1091
DRM_DEBUG("R300_CMD_CP_DELAY\n");
1096
BEGIN_RING(header->delay.count);
1097
for (i = 0; i < header->delay.count; i++)
1098
OUT_RING(RADEON_CP_PACKET2);
1103
case R300_CMD_DMA_DISCARD:
1104
DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1105
idx = header->dma.buf_idx;
1106
if (idx < 0 || idx >= dma->buf_count) {
1107
DRM_ERROR("buffer index %d (of %d max)\n",
1108
idx, dma->buf_count - 1);
1113
buf = dma->buflist[idx];
1114
if (buf->file_priv != file_priv || buf->pending) {
1115
DRM_ERROR("bad buffer %p %p %d\n",
1116
buf->file_priv, file_priv,
1122
emit_dispatch_age = 1;
1123
r300_discard_buffer(dev, file_priv->master, buf);
1127
DRM_DEBUG("R300_CMD_WAIT\n");
1128
r300_cmd_wait(dev_priv, *header);
1131
case R300_CMD_SCRATCH:
1132
DRM_DEBUG("R300_CMD_SCRATCH\n");
1133
ret = r300_scratch(dev_priv, cmdbuf, *header);
1135
DRM_ERROR("r300_scratch failed\n");
1140
case R300_CMD_R500FP:
1141
if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1142
DRM_ERROR("Calling r500 command on r300 card\n");
1146
DRM_DEBUG("R300_CMD_R500FP\n");
1147
ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
1149
DRM_ERROR("r300_emit_r500fp failed\n");
1154
DRM_ERROR("bad cmd_type %i at byte %d\n",
1155
header->header.cmd_type,
1156
cmdbuf->buffer->iterator - (int)sizeof(*header));
1165
r300_pacify(dev_priv);
1167
/* We emit the vertex buffer age here, outside the pacifier "brackets"
1169
* (1) This may coalesce multiple age emissions into a single one and
1170
* (2) more importantly, some chips lock up hard when scratch registers
1171
* are written inside the pacifier bracket.
1173
if (emit_dispatch_age) {
1176
/* Emit the vertex buffer age */
1178
RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);