1
/**************************************************************************
3
Copyright (C) 2004 Nicolai Haehnle.
4
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
6
The Weather Channel (TM) funded Tungsten Graphics to develop the
7
initial release of the Radeon 8500 driver under the XFree86 license.
8
This notice must be preserved.
12
Permission is hereby granted, free of charge, to any person obtaining a
13
copy of this software and associated documentation files (the "Software"),
14
to deal in the Software without restriction, including without limitation
15
on the rights to use, copy, modify, merge, publish, distribute, sub
16
license, and/or sell copies of the Software, and to permit persons to whom
17
the Software is furnished to do so, subject to the following conditions:
19
The above copyright notice and this permission notice (including the next
20
paragraph) shall be included in all copies or substantial portions of the
23
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26
ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29
USE OR OTHER DEALINGS IN THE SOFTWARE.
31
**************************************************************************/
34
#include "radeon_common.h"
35
#include "main/simple_list.h"
37
#if defined(USE_X86_ASM)
38
#define COPY_DWORDS( dst, src, nr ) \
41
__asm__ __volatile__( "rep ; movsl" \
42
: "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
48
#define COPY_DWORDS( dst, src, nr ) \
51
for ( j = 0 ; j < nr ; j++ ) \
52
dst[j] = ((int *)src)[j]; \
57
void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
61
if (RADEON_DEBUG & RADEON_VERTS)
62
fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63
__FUNCTION__, count, stride, (void *)out, (void *)data);
66
COPY_DWORDS(out, data, count);
68
for (i = 0; i < count; i++) {
69
out[0] = *(int *)data;
75
void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
79
if (RADEON_DEBUG & RADEON_VERTS)
80
fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81
__FUNCTION__, count, stride, (void *)out, (void *)data);
84
COPY_DWORDS(out, data, count * 2);
86
for (i = 0; i < count; i++) {
87
out[0] = *(int *)data;
88
out[1] = *(int *)(data + 4);
94
void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
98
if (RADEON_DEBUG & RADEON_VERTS)
99
fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100
__FUNCTION__, count, stride, (void *)out, (void *)data);
103
COPY_DWORDS(out, data, count * 3);
106
for (i = 0; i < count; i++) {
107
out[0] = *(int *)data;
108
out[1] = *(int *)(data + 4);
109
out[2] = *(int *)(data + 8);
115
void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
119
if (RADEON_DEBUG & RADEON_VERTS)
120
fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121
__FUNCTION__, count, stride, (void *)out, (void *)data);
124
COPY_DWORDS(out, data, count * 4);
126
for (i = 0; i < count; i++) {
127
out[0] = *(int *)data;
128
out[1] = *(int *)(data + 4);
129
out[2] = *(int *)(data + 8);
130
out[3] = *(int *)(data + 12);
136
void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
137
const GLvoid * data, int size, int stride, int count)
139
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
143
radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
147
radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
151
aos->components = size;
154
radeon_bo_map(aos->bo, 1);
155
out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
157
case 1: radeonEmitVec4(out, data, stride, count); break;
158
case 2: radeonEmitVec8(out, data, stride, count); break;
159
case 3: radeonEmitVec12(out, data, stride, count); break;
160
case 4: radeonEmitVec16(out, data, stride, count); break;
165
radeon_bo_unmap(aos->bo);
168
void radeon_init_dma(radeonContextPtr rmesa)
170
make_empty_list(&rmesa->dma.free);
171
make_empty_list(&rmesa->dma.wait);
172
make_empty_list(&rmesa->dma.reserved);
173
rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
176
void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
178
struct radeon_dma_bo *dma_bo = NULL;
179
/* we set minimum sizes to at least requested size
180
aligned to next 16 bytes. */
181
if (size > rmesa->dma.minimum_size)
182
rmesa->dma.minimum_size = (size + 15) & (~15);
184
radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
185
__FUNCTION__, size, rmesa->dma.minimum_size);
187
if (is_empty_list(&rmesa->dma.free)
188
|| last_elem(&rmesa->dma.free)->bo->size < size) {
189
dma_bo = CALLOC_STRUCT(radeon_dma_bo);
193
dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
194
0, rmesa->dma.minimum_size, 4,
195
RADEON_GEM_DOMAIN_GTT, 0);
198
rcommonFlushCmdBuf(rmesa, __FUNCTION__);
201
insert_at_head(&rmesa->dma.reserved, dma_bo);
203
/* We push and pop buffers from end of list so we can keep
204
counter on unused buffers for later freeing them from
206
dma_bo = last_elem(&rmesa->dma.free);
207
remove_from_list(dma_bo);
208
insert_at_head(&rmesa->dma.reserved, dma_bo);
211
rmesa->dma.current_used = 0;
212
rmesa->dma.current_vertexptr = 0;
214
if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
215
first_elem(&rmesa->dma.reserved)->bo,
216
RADEON_GEM_DOMAIN_GTT, 0))
217
fprintf(stderr,"failure to revalidate BOs - badness\n");
219
if (is_empty_list(&rmesa->dma.reserved)) {
220
/* Cmd buff have been flushed in radeon_revalidate_bos */
223
radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
226
/* Allocates a region from rmesa->dma.current. If there isn't enough
227
* space in current, grab a new buffer (and discard what was left of current)
229
void radeonAllocDmaRegion(radeonContextPtr rmesa,
230
struct radeon_bo **pbo, int *poffset,
231
int bytes, int alignment)
233
if (RADEON_DEBUG & RADEON_IOCTL)
234
fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
236
if (rmesa->dma.flush)
237
rmesa->dma.flush(rmesa->glCtx);
239
assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
242
rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
244
if (is_empty_list(&rmesa->dma.reserved)
245
|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
246
radeonRefillCurrentDmaRegion(rmesa, bytes);
248
*poffset = rmesa->dma.current_used;
249
*pbo = first_elem(&rmesa->dma.reserved)->bo;
252
/* Always align to at least 16 bytes */
253
rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
254
rmesa->dma.current_vertexptr = rmesa->dma.current_used;
256
assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
259
void radeonFreeDmaRegions(radeonContextPtr rmesa)
261
struct radeon_dma_bo *dma_bo;
262
struct radeon_dma_bo *temp;
263
if (RADEON_DEBUG & RADEON_DMA)
264
fprintf(stderr, "%s\n", __FUNCTION__);
266
foreach_s(dma_bo, temp, &rmesa->dma.free) {
267
remove_from_list(dma_bo);
268
radeon_bo_unref(dma_bo->bo);
272
foreach_s(dma_bo, temp, &rmesa->dma.wait) {
273
remove_from_list(dma_bo);
274
radeon_bo_unref(dma_bo->bo);
278
foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
279
remove_from_list(dma_bo);
280
radeon_bo_unref(dma_bo->bo);
285
void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
287
if (is_empty_list(&rmesa->dma.reserved))
290
if (RADEON_DEBUG & RADEON_IOCTL)
291
fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
292
rmesa->dma.current_used -= return_bytes;
293
rmesa->dma.current_vertexptr = rmesa->dma.current_used;
296
static int radeon_bo_is_idle(struct radeon_bo* bo)
299
int ret = radeon_bo_is_busy(bo, &domain);
300
if (ret == -EINVAL) {
301
WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
302
"This may cause small performance drop for you.\n");
304
return ret != -EBUSY;
307
void radeonReleaseDmaRegions(radeonContextPtr rmesa)
309
struct radeon_dma_bo *dma_bo;
310
struct radeon_dma_bo *temp;
311
const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
312
const int time = rmesa->dma.free.expire_counter;
314
if (RADEON_DEBUG & RADEON_DMA) {
318
foreach(dma_bo, &rmesa->dma.free)
321
foreach(dma_bo, &rmesa->dma.wait)
324
foreach(dma_bo, &rmesa->dma.reserved)
327
fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
328
__FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
331
if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
332
/* request updated cs processing information from kernel */
333
legacy_track_pending(rmesa->radeonScreen->bom, 0);
336
/* move waiting bos to free list.
337
wait list provides gpu time to handle data before reuse */
338
foreach_s(dma_bo, temp, &rmesa->dma.wait) {
339
if (dma_bo->expire_counter == time) {
340
WARN_ONCE("Leaking dma buffer object!\n");
341
radeon_bo_unref(dma_bo->bo);
342
remove_from_list(dma_bo);
346
/* free objects that are too small to be used because of large request */
347
if (dma_bo->bo->size < rmesa->dma.minimum_size) {
348
radeon_bo_unref(dma_bo->bo);
349
remove_from_list(dma_bo);
353
if (!radeon_bo_is_idle(dma_bo->bo)) {
354
if (rmesa->radeonScreen->driScreen->dri2.enabled)
358
remove_from_list(dma_bo);
359
dma_bo->expire_counter = expire_at;
360
insert_at_tail(&rmesa->dma.free, dma_bo);
363
/* move reserved to wait list */
364
foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
365
radeon_bo_unmap(dma_bo->bo);
366
/* free objects that are too small to be used because of large request */
367
if (dma_bo->bo->size < rmesa->dma.minimum_size) {
368
radeon_bo_unref(dma_bo->bo);
369
remove_from_list(dma_bo);
373
remove_from_list(dma_bo);
374
dma_bo->expire_counter = expire_at;
375
insert_at_tail(&rmesa->dma.wait, dma_bo);
378
/* free bos that have been unused for some time */
379
foreach_s(dma_bo, temp, &rmesa->dma.free) {
380
if (dma_bo->expire_counter != time)
382
remove_from_list(dma_bo);
383
radeon_bo_unref(dma_bo->bo);
390
/* Flush vertices in the current dma region.
392
void rcommon_flush_last_swtcl_prim( struct gl_context *ctx )
394
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
395
struct radeon_dma *dma = &rmesa->dma;
397
if (RADEON_DEBUG & RADEON_IOCTL)
398
fprintf(stderr, "%s\n", __FUNCTION__);
401
radeon_bo_unmap(rmesa->swtcl.bo);
403
if (!is_empty_list(&dma->reserved)) {
404
GLuint current_offset = dma->current_used;
406
assert (dma->current_used +
407
rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
408
dma->current_vertexptr);
410
if (dma->current_used != dma->current_vertexptr) {
411
dma->current_used = dma->current_vertexptr;
413
rmesa->vtbl.swtcl_flush(ctx, current_offset);
415
rmesa->swtcl.numverts = 0;
417
radeon_bo_unref(rmesa->swtcl.bo);
418
rmesa->swtcl.bo = NULL;
420
/* Alloc space in the current dma region.
423
rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
425
GLuint bytes = vsize * nverts;
427
if (RADEON_DEBUG & RADEON_IOCTL)
428
fprintf(stderr, "%s\n", __FUNCTION__);
430
if(is_empty_list(&rmesa->dma.reserved)
431
||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
432
if (rmesa->dma.flush) {
433
rmesa->dma.flush(rmesa->glCtx);
436
radeonRefillCurrentDmaRegion(rmesa, bytes);
441
if (!rmesa->dma.flush) {
442
/* if cmdbuf flushed DMA restart */
443
rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
444
rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
447
ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
448
ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
449
ASSERT( rmesa->dma.current_used +
450
rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
451
rmesa->dma.current_vertexptr );
453
if (!rmesa->swtcl.bo) {
454
rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
455
radeon_bo_ref(rmesa->swtcl.bo);
456
radeon_bo_map(rmesa->swtcl.bo, 1);
459
head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
460
rmesa->dma.current_vertexptr += bytes;
461
rmesa->swtcl.numverts += nverts;
465
void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
467
radeonContextPtr radeon = RADEON_CONTEXT( ctx );
469
if (RADEON_DEBUG & RADEON_IOCTL)
470
fprintf(stderr, "%s\n", __FUNCTION__);
472
if (radeon->dma.flush) {
473
radeon->dma.flush(radeon->glCtx);
475
for (i = 0; i < radeon->tcl.aos_count; i++) {
476
if (radeon->tcl.aos[i].bo) {
477
radeon_bo_unref(radeon->tcl.aos[i].bo);
478
radeon->tcl.aos[i].bo = NULL;