2
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4
The Weather Channel (TM) funded Tungsten Graphics to develop the
5
initial release of the Radeon 8500 driver under the XFree86 license.
6
This notice must be preserved.
8
Permission is hereby granted, free of charge, to any person obtaining
9
a copy of this software and associated documentation files (the
10
"Software"), to deal in the Software without restriction, including
11
without limitation the rights to use, copy, modify, merge, publish,
12
distribute, sublicense, and/or sell copies of the Software, and to
13
permit persons to whom the Software is furnished to do so, subject to
14
the following conditions:
16
The above copyright notice and this permission notice (including the
17
next paragraph) shall be included in all copies or substantial
18
portions of the Software.
20
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
**************************************************************************/
33
* \author Keith Whitwell <keith@tungstengraphics.com>
43
#include "swrast_setup/swrast_setup.h"
44
#include "math/m_translate.h"
46
#include "tnl/t_context.h"
48
#include "r300_context.h"
49
#include "radeon_ioctl.h"
50
#include "r300_state.h"
51
#include "r300_emit.h"
52
#include "r300_ioctl.h"
58
#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59
SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60
SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61
SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62
SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63
SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64
#error Cannot change these!
67
#define DEBUG_ALL DEBUG_VERTS
69
#if defined(USE_X86_ASM)
70
#define COPY_DWORDS( dst, src, nr ) \
73
__asm__ __volatile__( "rep ; movsl" \
74
: "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
80
#define COPY_DWORDS( dst, src, nr ) \
83
for ( j = 0 ; j < nr ; j++ ) \
84
dst[j] = ((int *)src)[j]; \
89
static void r300EmitVec4(GLcontext * ctx,
90
struct r300_dma_region *rvb,
91
GLvoid * data, int stride, int count)
94
int *out = (int *)(rvb->address + rvb->start);
96
if (RADEON_DEBUG & DEBUG_VERTS)
97
fprintf(stderr, "%s count %d stride %d\n",
98
__FUNCTION__, count, stride);
101
COPY_DWORDS(out, data, count);
103
for (i = 0; i < count; i++) {
104
out[0] = *(int *)data;
110
static void r300EmitVec8(GLcontext * ctx,
111
struct r300_dma_region *rvb,
112
GLvoid * data, int stride, int count)
115
int *out = (int *)(rvb->address + rvb->start);
117
if (RADEON_DEBUG & DEBUG_VERTS)
118
fprintf(stderr, "%s count %d stride %d\n",
119
__FUNCTION__, count, stride);
122
COPY_DWORDS(out, data, count * 2);
124
for (i = 0; i < count; i++) {
125
out[0] = *(int *)data;
126
out[1] = *(int *)(data + 4);
132
static void r300EmitVec12(GLcontext * ctx,
133
struct r300_dma_region *rvb,
134
GLvoid * data, int stride, int count)
137
int *out = (int *)(rvb->address + rvb->start);
139
if (RADEON_DEBUG & DEBUG_VERTS)
140
fprintf(stderr, "%s count %d stride %d out %p data %p\n",
141
__FUNCTION__, count, stride, (void *)out, (void *)data);
144
COPY_DWORDS(out, data, count * 3);
146
for (i = 0; i < count; i++) {
147
out[0] = *(int *)data;
148
out[1] = *(int *)(data + 4);
149
out[2] = *(int *)(data + 8);
155
static void r300EmitVec16(GLcontext * ctx,
156
struct r300_dma_region *rvb,
157
GLvoid * data, int stride, int count)
160
int *out = (int *)(rvb->address + rvb->start);
162
if (RADEON_DEBUG & DEBUG_VERTS)
163
fprintf(stderr, "%s count %d stride %d\n",
164
__FUNCTION__, count, stride);
167
COPY_DWORDS(out, data, count * 4);
169
for (i = 0; i < count; i++) {
170
out[0] = *(int *)data;
171
out[1] = *(int *)(data + 4);
172
out[2] = *(int *)(data + 8);
173
out[3] = *(int *)(data + 12);
179
static void r300EmitVec(GLcontext * ctx,
180
struct r300_dma_region *rvb,
181
GLvoid * data, int size, int stride, int count)
183
r300ContextPtr rmesa = R300_CONTEXT(ctx);
185
if (RADEON_DEBUG & DEBUG_VERTS)
186
fprintf(stderr, "%s count %d size %d stride %d\n",
187
__FUNCTION__, count, size, stride);
189
/* Gets triggered when playing with future_hw_tcl_on ... */
193
r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
195
rvb->aos_offset = GET_START(rvb);
198
r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */
199
rvb->aos_offset = GET_START(rvb);
200
rvb->aos_stride = size;
207
r300EmitVec4(ctx, rvb, data, stride, count);
210
r300EmitVec8(ctx, rvb, data, stride, count);
213
r300EmitVec12(ctx, rvb, data, stride, count);
216
r300EmitVec16(ctx, rvb, data, stride, count);
226
static GLuint t_type(struct dt *dt)
229
case GL_UNSIGNED_BYTE:
230
return AOS_FORMAT_UBYTE;
232
return AOS_FORMAT_USHORT;
234
return AOS_FORMAT_FLOAT;
240
return AOS_FORMAT_FLOAT;
243
static GLuint t_vir0_size(struct dt *dt)
246
case GL_UNSIGNED_BYTE:
260
static GLuint t_aos_size(struct dt *dt)
263
case GL_UNSIGNED_BYTE:
277
static GLuint t_vir0(uint32_t * dst, struct dt *dt, int *inputs,
278
GLint * tab, GLuint nr)
282
for (i = 0; i + 1 < nr; i += 2) {
283
dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) |
284
(t_type(&dt[tab[i]]) << 14);
286
(t_vir0_size(&dt[tab[i + 1]]) |
287
(inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]])
291
dw |= (1 << (13 + 16));
297
dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]]
299
(t_type(&dt[tab[nr - 1]]) << 14);
305
return (nr + 1) >> 1;
308
static GLuint t_swizzle(int swizzle[4])
310
return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
311
(swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
312
(swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
313
(swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
316
static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr)
320
for (i = 0; i + 1 < nr; i += 2) {
321
dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
323
(t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE)
329
t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
331
return (nr + 1) >> 1;
334
static GLuint t_emit_size(struct dt *dt)
339
static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
341
r300ContextPtr r300 = R300_CONTEXT(ctx);
344
if (InputsRead & (1 << VERT_ATTRIB_POS))
345
vic_1 |= R300_INPUT_CNTL_POS;
347
if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
348
vic_1 |= R300_INPUT_CNTL_NORMAL;
350
if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
351
vic_1 |= R300_INPUT_CNTL_COLOR;
353
r300->state.texture.tc_count = 0;
354
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
355
if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
356
r300->state.texture.tc_count++;
357
vic_1 |= R300_INPUT_CNTL_TC0 << i;
363
/* Emit vertex data to GART memory
364
* Route inputs to the vertex processor
365
* This function should never return R300_FALLBACK_TCL when using software tcl.
368
int r300EmitArrays(GLcontext * ctx)
370
r300ContextPtr rmesa = R300_CONTEXT(ctx);
371
r300ContextPtr r300 = rmesa;
372
struct radeon_vertex_buffer *VB = &rmesa->state.VB;
374
GLuint count = VB->Count;
376
GLuint InputsRead = 0, OutputsWritten = 0;
378
int vir_inputs[VERT_ATTRIB_MAX];
379
GLint tab[VERT_ATTRIB_MAX];
380
int swizzle[VERT_ATTRIB_MAX][4];
383
struct r300_vertex_program *prog =
384
(struct r300_vertex_program *)
385
CURRENT_VERTEX_SHADER(ctx);
386
inputs = prog->inputs;
387
InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
388
OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
390
DECLARE_RENDERINPUTS(inputs_bitset);
391
inputs = r300->state.sw_tcl_inputs;
393
RENDERINPUTS_COPY(inputs_bitset,
394
TNL_CONTEXT(ctx)->render_inputs_bitset);
396
assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
397
InputsRead |= 1 << VERT_ATTRIB_POS;
398
OutputsWritten |= 1 << VERT_RESULT_HPOS;
400
assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
403
assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
404
InputsRead |= 1 << VERT_ATTRIB_COLOR0;
405
OutputsWritten |= 1 << VERT_RESULT_COL0;
407
if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
408
InputsRead |= 1 << VERT_ATTRIB_COLOR1;
409
OutputsWritten |= 1 << VERT_RESULT_COL1;
412
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
413
if (RENDERINPUTS_TEST
414
(inputs_bitset, _TNL_ATTRIB_TEX(i))) {
415
InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
416
OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
419
for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
420
if (InputsRead & (1 << i))
426
(r300->radeon.radeonScreen->
427
chip_flags & RADEON_CHIPSET_TCL)) {
428
/* Fixed, apply to vir0 only */
429
memcpy(vir_inputs, inputs,
430
VERT_ATTRIB_MAX * sizeof(int));
433
if (InputsRead & VERT_ATTRIB_POS)
434
inputs[VERT_ATTRIB_POS] = 0;
436
if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
437
inputs[VERT_ATTRIB_COLOR0] = 2;
439
if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
440
inputs[VERT_ATTRIB_COLOR1] = 3;
442
for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
443
if (InputsRead & (1 << i))
444
inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
447
RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
451
assert(OutputsWritten);
453
for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
454
if (InputsRead & (1 << i))
457
if (nr > R300_MAX_AOS_ARRAYS)
458
return R300_FALLBACK_TCL;
460
for (i = 0; i < nr; i++) {
462
int comp_size, fix, found = 0;
464
swizzle[i][0] = SWIZZLE_ZERO;
465
swizzle[i][1] = SWIZZLE_ZERO;
466
swizzle[i][2] = SWIZZLE_ZERO;
467
swizzle[i][3] = SWIZZLE_ONE;
469
for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++)
473
#define SWAP_INT(a, b) do { \
480
if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) {
481
SWAP_INT(swizzle[i][0], swizzle[i][3]);
482
SWAP_INT(swizzle[i][1], swizzle[i][2]);
484
#endif /* MESA_BIG_ENDIAN */
486
if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data,
487
/*(count-1)*stride */ 4)) {
488
if (VB->AttribPtr[tab[i]].stride % 4)
489
return R300_FALLBACK_TCL;
491
rmesa->state.aos[i].address =
492
VB->AttribPtr[tab[i]].data;
493
rmesa->state.aos[i].start = 0;
494
rmesa->state.aos[i].aos_offset =
495
r300GartOffsetFromVirtual(rmesa,
497
AttribPtr[tab[i]].data);
498
rmesa->state.aos[i].aos_stride =
499
VB->AttribPtr[tab[i]].stride / 4;
501
rmesa->state.aos[i].aos_size =
502
t_emit_size(&VB->AttribPtr[tab[i]]);
504
/* TODO: r300EmitVec can only handle 4 byte vectors */
505
if (VB->AttribPtr[tab[i]].type != GL_FLOAT)
506
return R300_FALLBACK_TCL;
508
r300EmitVec(ctx, &rmesa->state.aos[i],
509
VB->AttribPtr[tab[i]].data,
510
t_emit_size(&VB->AttribPtr[tab[i]]),
511
VB->AttribPtr[tab[i]].stride, count);
514
rmesa->state.aos[i].aos_size =
515
t_aos_size(&VB->AttribPtr[tab[i]]);
517
comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type);
519
for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) {
520
if ((rmesa->state.aos[i].aos_offset -
521
comp_size * fix) % 4)
530
WARN_ONCE("Feeling lucky?\n");
533
rmesa->state.aos[i].aos_offset -= comp_size * fix;
535
for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++)
536
swizzle[i][ci] += fix;
539
("Cannot handle offset %x with stride %d, comp %d\n",
540
rmesa->state.aos[i].aos_offset,
541
rmesa->state.aos[i].aos_stride,
542
VB->AttribPtr[tab[i]].size);
543
return R300_FALLBACK_TCL;
547
/* setup INPUT_ROUTE */
548
R300_STATECHANGE(r300, vir[0]);
549
((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
550
t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr,
553
R300_STATECHANGE(r300, vir[1]);
554
((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
555
t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr);
557
/* Set up input_cntl */
558
/* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
559
R300_STATECHANGE(r300, vic);
560
r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */
561
r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead);
563
/* Stage 3: VAP output */
565
R300_STATECHANGE(r300, vof);
567
r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0;
568
r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0;
570
if (OutputsWritten & (1 << VERT_RESULT_HPOS))
571
r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
572
R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
574
if (OutputsWritten & (1 << VERT_RESULT_COL0))
575
r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
576
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
578
if (OutputsWritten & (1 << VERT_RESULT_COL1))
579
r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
580
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
582
/*if(OutputsWritten & (1 << VERT_RESULT_BFC0))
583
r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
585
if(OutputsWritten & (1 << VERT_RESULT_BFC1))
586
r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */
587
//if(OutputsWritten & (1 << VERT_RESULT_FOGC))
589
if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
590
r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
591
R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
593
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
594
if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i)))
595
r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i));
597
rmesa->state.aos_count = nr;
599
return R300_FALLBACK_NONE;
603
void r300UseArrays(GLcontext * ctx)
605
r300ContextPtr rmesa = R300_CONTEXT(ctx);
608
if (rmesa->state.elt_dma.buf)
609
r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
611
for (i = 0; i < rmesa->state.aos_count; i++) {
612
if (rmesa->state.aos[i].buf)
613
r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
618
void r300ReleaseArrays(GLcontext * ctx)
620
r300ContextPtr rmesa = R300_CONTEXT(ctx);
623
r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
624
for (i = 0; i < rmesa->state.aos_count; i++) {
625
r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);