2
* Copyright © 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
3
* Copyright © 2009 Joakim Sindholt <opensource@zhasha.com>
4
* Copyright © 2011 Marek Olšák <maraeo@gmail.com>
5
* Copyright © 2015 Advanced Micro Devices, Inc.
8
* Permission is hereby granted, free of charge, to any person obtaining
9
* a copy of this software and associated documentation files (the
10
* "Software"), to deal in the Software without restriction, including
11
* without limitation the rights to use, copy, modify, merge, publish,
12
* distribute, sub license, and/or sell copies of the Software, and to
13
* permit persons to whom the Software is furnished to do so, subject to
14
* the following conditions:
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
20
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23
* USE OR OTHER DEALINGS IN THE SOFTWARE.
25
* The above copyright notice and this permission notice (including the
26
* next paragraph) shall be included in all copies or substantial portions
30
#include "amdgpu_cs.h"
31
#include "amdgpu_public.h"
33
#include "util/os_file.h"
34
#include "util/os_misc.h"
35
#include "util/u_cpu_detect.h"
36
#include "util/u_hash_table.h"
37
#include "util/hash_table.h"
38
#include "util/xmlconfig.h"
39
#include "drm-uapi/amdgpu_drm.h"
44
#include "ac_llvm_util.h"
47
static struct hash_table *dev_tab = NULL;
48
static simple_mtx_t dev_tab_mutex = _SIMPLE_MTX_INITIALIZER_NP;
51
DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
54
static void handle_env_var_force_family(struct amdgpu_winsys *ws)
56
const char *family = debug_get_option("SI_FORCE_FAMILY", NULL);
62
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
63
if (!strcmp(family, ac_get_llvm_processor_name(i))) {
64
/* Override family and chip_class. */
66
ws->info.name = "NOOP";
67
strcpy(ws->info.lowercase_name , "noop");
69
if (i >= CHIP_SIENNA_CICHLID)
70
ws->info.chip_class = GFX10_3;
71
else if (i >= CHIP_NAVI10)
72
ws->info.chip_class = GFX10;
73
else if (i >= CHIP_VEGA10)
74
ws->info.chip_class = GFX9;
75
else if (i >= CHIP_TONGA)
76
ws->info.chip_class = GFX8;
77
else if (i >= CHIP_BONAIRE)
78
ws->info.chip_class = GFX7;
80
ws->info.chip_class = GFX6;
82
/* Don't submit any IBs. */
83
setenv("RADEON_NOOP", "1", 1);
88
fprintf(stderr, "radeonsi: Unknown family: %s\n", family);
92
/* Helper function to do the ioctls needed for setup and init. */
93
static bool do_winsys_init(struct amdgpu_winsys *ws,
94
const struct pipe_screen_config *config,
97
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
100
/* TODO: Enable this once the kernel handles it efficiently. */
101
if (ws->info.has_dedicated_vram)
102
ws->info.has_local_buffers = false;
104
handle_env_var_force_family(ws);
106
ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
108
fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
112
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
113
strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
114
ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false);
116
ws->debug_all_bos = debug_get_option_all_bos();
118
ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL ||
119
strstr(debug_get_option("AMD_DEBUG", ""), "reserve_vmid") != NULL ||
120
strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
121
ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
122
driQueryOptionb(config->options, "radeonsi_zerovram");
127
amdgpu_device_deinitialize(ws->dev);
132
static void do_winsys_deinit(struct amdgpu_winsys *ws)
134
if (ws->reserve_vmid)
135
amdgpu_vm_unreserve_vmid(ws->dev, 0);
137
if (util_queue_is_initialized(&ws->cs_queue))
138
util_queue_destroy(&ws->cs_queue);
140
simple_mtx_destroy(&ws->bo_fence_lock);
141
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
142
if (ws->bo_slabs[i].groups)
143
pb_slabs_deinit(&ws->bo_slabs[i]);
145
pb_cache_deinit(&ws->bo_cache);
146
_mesa_hash_table_destroy(ws->bo_export_table, NULL);
147
simple_mtx_destroy(&ws->sws_list_lock);
149
simple_mtx_destroy(&ws->global_bo_list_lock);
151
simple_mtx_destroy(&ws->bo_export_table_lock);
153
ac_addrlib_destroy(ws->addrlib);
154
amdgpu_device_deinitialize(ws->dev);
158
static void amdgpu_winsys_destroy_locked(struct radeon_winsys *rws, bool locked)
160
struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
161
struct amdgpu_winsys *ws = sws->aws;
164
/* When the reference counter drops to zero, remove the device pointer
166
* This must happen while the mutex is locked, so that
167
* amdgpu_winsys_create in another thread doesn't get the winsys
168
* from the table when the counter drops to 0.
171
simple_mtx_lock(&dev_tab_mutex);
173
destroy = pipe_reference(&ws->reference, NULL);
174
if (destroy && dev_tab) {
175
_mesa_hash_table_remove_key(dev_tab, ws->dev);
176
if (_mesa_hash_table_num_entries(dev_tab) == 0) {
177
_mesa_hash_table_destroy(dev_tab, NULL);
183
simple_mtx_unlock(&dev_tab_mutex);
186
do_winsys_deinit(ws);
192
static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
194
amdgpu_winsys_destroy_locked(rws, false);
197
static void amdgpu_winsys_query_info(struct radeon_winsys *rws,
198
struct radeon_info *info,
199
bool enable_smart_access_memory,
200
bool disable_smart_access_memory)
202
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
204
if (disable_smart_access_memory)
205
ws->info.smart_access_memory = false;
206
else if (enable_smart_access_memory && ws->info.all_vram_visible)
207
ws->info.smart_access_memory = true;
212
static bool amdgpu_cs_request_feature(struct radeon_cmdbuf *rcs,
213
enum radeon_feature_id fid,
219
static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
220
enum radeon_value_id value)
222
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
223
struct amdgpu_heap_info heap;
227
case RADEON_REQUESTED_VRAM_MEMORY:
228
return ws->allocated_vram;
229
case RADEON_REQUESTED_GTT_MEMORY:
230
return ws->allocated_gtt;
231
case RADEON_MAPPED_VRAM:
232
return ws->mapped_vram;
233
case RADEON_MAPPED_GTT:
234
return ws->mapped_gtt;
235
case RADEON_SLAB_WASTED_VRAM:
236
return ws->slab_wasted_vram;
237
case RADEON_SLAB_WASTED_GTT:
238
return ws->slab_wasted_gtt;
239
case RADEON_BUFFER_WAIT_TIME_NS:
240
return ws->buffer_wait_time;
241
case RADEON_NUM_MAPPED_BUFFERS:
242
return ws->num_mapped_buffers;
243
case RADEON_TIMESTAMP:
244
amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
246
case RADEON_NUM_GFX_IBS:
247
return ws->num_gfx_IBs;
248
case RADEON_NUM_SDMA_IBS:
249
return ws->num_sdma_IBs;
250
case RADEON_GFX_BO_LIST_COUNTER:
251
return ws->gfx_bo_list_counter;
252
case RADEON_GFX_IB_SIZE_COUNTER:
253
return ws->gfx_ib_size_counter;
254
case RADEON_NUM_BYTES_MOVED:
255
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
257
case RADEON_NUM_EVICTIONS:
258
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval);
260
case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
261
amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval);
263
case RADEON_VRAM_USAGE:
264
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
265
return heap.heap_usage;
266
case RADEON_VRAM_VIS_USAGE:
267
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
268
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &heap);
269
return heap.heap_usage;
270
case RADEON_GTT_USAGE:
271
amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
272
return heap.heap_usage;
273
case RADEON_GPU_TEMPERATURE:
274
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval);
276
case RADEON_CURRENT_SCLK:
277
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval);
279
case RADEON_CURRENT_MCLK:
280
amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval);
282
case RADEON_CS_THREAD_TIME:
283
return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
288
static bool amdgpu_read_registers(struct radeon_winsys *rws,
290
unsigned num_registers, uint32_t *out)
292
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
294
return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
295
0xffffffff, 0, out) == 0;
298
static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
300
struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
301
struct amdgpu_winsys *aws = sws->aws;
304
simple_mtx_lock(&aws->sws_list_lock);
306
ret = pipe_reference(&sws->reference, NULL);
308
struct amdgpu_screen_winsys **sws_iter;
309
struct amdgpu_winsys *aws = sws->aws;
311
/* Remove this amdgpu_screen_winsys from amdgpu_winsys' list, so that
312
* amdgpu_winsys_create can't re-use it anymore
314
for (sws_iter = &aws->sws_list; *sws_iter; sws_iter = &(*sws_iter)->next) {
315
if (*sws_iter == sws) {
316
*sws_iter = sws->next;
322
simple_mtx_unlock(&aws->sws_list_lock);
324
if (ret && sws->kms_handles) {
325
struct drm_gem_close args;
327
hash_table_foreach(sws->kms_handles, entry) {
328
args.handle = (uintptr_t)entry->data;
329
drmIoctl(sws->fd, DRM_IOCTL_GEM_CLOSE, &args);
331
_mesa_hash_table_destroy(sws->kms_handles, NULL);
337
static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
340
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
342
util_set_thread_affinity(ws->cs_queue.threads[0],
343
util_get_cpu_caps()->L3_affinity_mask[cache],
344
NULL, util_get_cpu_caps()->num_cpu_mask_bits);
347
static uint32_t kms_handle_hash(const void *key)
349
const struct amdgpu_winsys_bo *bo = key;
351
return bo->u.real.kms_handle;
354
static bool kms_handle_equals(const void *a, const void *b)
359
static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs)
361
struct amdgpu_cs *cs = amdgpu_cs(rcs);
362
return cs->csc->secure;
365
PUBLIC struct radeon_winsys *
366
amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
367
radeon_screen_create_t screen_create)
369
struct amdgpu_screen_winsys *ws;
370
struct amdgpu_winsys *aws;
371
amdgpu_device_handle dev;
372
uint32_t drm_major, drm_minor;
375
ws = CALLOC_STRUCT(amdgpu_screen_winsys);
379
pipe_reference_init(&ws->reference, 1);
380
ws->fd = os_dupfd_cloexec(fd);
382
/* Look up the winsys from the dev table. */
383
simple_mtx_lock(&dev_tab_mutex);
385
dev_tab = util_hash_table_create_ptr_keys();
387
/* Initialize the amdgpu device. This should always return the same pointer
388
* for the same fd. */
389
r = amdgpu_device_initialize(ws->fd, &drm_major, &drm_minor, &dev);
391
fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
395
/* Lookup a winsys if we have already created one for this device. */
396
aws = util_hash_table_get(dev_tab, dev);
398
struct amdgpu_screen_winsys *sws_iter;
400
/* Release the device handle, because we don't need it anymore.
401
* This function is returning an existing winsys instance, which
402
* has its own device handle.
404
amdgpu_device_deinitialize(dev);
406
simple_mtx_lock(&aws->sws_list_lock);
407
for (sws_iter = aws->sws_list; sws_iter; sws_iter = sws_iter->next) {
408
r = os_same_file_description(sws_iter->fd, ws->fd);
414
pipe_reference(NULL, &ws->reference);
415
simple_mtx_unlock(&aws->sws_list_lock);
421
os_log_message("amdgpu: os_same_file_description couldn't "
422
"determine if two DRM fds reference the same "
423
"file description.\n"
424
"If they do, bad things may happen!\n");
429
simple_mtx_unlock(&aws->sws_list_lock);
431
ws->kms_handles = _mesa_hash_table_create(NULL, kms_handle_hash,
433
if (!ws->kms_handles)
436
pipe_reference(NULL, &aws->reference);
438
/* Create a new winsys. */
439
aws = CALLOC_STRUCT(amdgpu_winsys);
445
aws->info.drm_major = drm_major;
446
aws->info.drm_minor = drm_minor;
447
aws->dummy_ws.aws = aws; /* only the pointer is used */
449
if (!do_winsys_init(aws, config, fd))
452
/* Create managers. */
453
pb_cache_init(&aws->bo_cache, RADEON_MAX_CACHED_HEAPS,
454
500000, aws->check_vm ? 1.0f : 2.0f, 0,
455
(aws->info.vram_size + aws->info.gart_size) / 8, aws,
456
/* Cast to void* because one of the function parameters
457
* is a struct pointer instead of void*. */
458
(void*)amdgpu_bo_destroy, (void*)amdgpu_bo_can_reclaim);
460
unsigned min_slab_order = 8; /* 256 bytes */
461
unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
462
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
465
/* Divide the size order range among slab managers. */
466
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
467
unsigned min_order = min_slab_order;
468
unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
471
if (!pb_slabs_init(&aws->bo_slabs[i],
472
min_order, max_order,
473
RADEON_MAX_SLAB_HEAPS, true,
475
amdgpu_bo_can_reclaim_slab,
476
amdgpu_bo_slab_alloc_normal,
477
/* Cast to void* because one of the function parameters
478
* is a struct pointer instead of void*. */
479
(void*)amdgpu_bo_slab_free)) {
480
amdgpu_winsys_destroy(&ws->base);
481
simple_mtx_unlock(&dev_tab_mutex);
485
if (aws->info.has_tmz_support &&
486
!pb_slabs_init(&aws->bo_slabs_encrypted[i],
487
min_order, max_order,
488
RADEON_MAX_SLAB_HEAPS, true,
490
amdgpu_bo_can_reclaim_slab,
491
amdgpu_bo_slab_alloc_encrypted,
492
/* Cast to void* because one of the function parameters
493
* is a struct pointer instead of void*. */
494
(void*)amdgpu_bo_slab_free)) {
495
amdgpu_winsys_destroy(&ws->base);
496
simple_mtx_unlock(&dev_tab_mutex);
500
min_slab_order = max_order + 1;
503
aws->info.min_alloc_size = 1 << aws->bo_slabs[0].min_order;
506
pipe_reference_init(&aws->reference, 1);
508
list_inithead(&aws->global_bo_list);
510
aws->bo_export_table = util_hash_table_create_ptr_keys();
512
(void) simple_mtx_init(&aws->sws_list_lock, mtx_plain);
514
(void) simple_mtx_init(&aws->global_bo_list_lock, mtx_plain);
516
(void) simple_mtx_init(&aws->bo_fence_lock, mtx_plain);
517
(void) simple_mtx_init(&aws->bo_export_table_lock, mtx_plain);
519
if (!util_queue_init(&aws->cs_queue, "cs", 8, 1,
520
UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL)) {
521
amdgpu_winsys_destroy(&ws->base);
522
simple_mtx_unlock(&dev_tab_mutex);
526
_mesa_hash_table_insert(dev_tab, dev, aws);
528
if (aws->reserve_vmid) {
529
r = amdgpu_vm_reserve_vmid(dev, 0);
531
amdgpu_winsys_destroy(&ws->base);
532
simple_mtx_unlock(&dev_tab_mutex);
541
ws->base.unref = amdgpu_winsys_unref;
542
ws->base.destroy = amdgpu_winsys_destroy;
543
ws->base.query_info = amdgpu_winsys_query_info;
544
ws->base.cs_request_feature = amdgpu_cs_request_feature;
545
ws->base.query_value = amdgpu_query_value;
546
ws->base.read_registers = amdgpu_read_registers;
547
ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
548
ws->base.cs_is_secure = amdgpu_cs_is_secure;
550
amdgpu_bo_init_functions(ws);
551
amdgpu_cs_init_functions(ws);
552
amdgpu_surface_init_functions(ws);
554
simple_mtx_lock(&aws->sws_list_lock);
555
ws->next = aws->sws_list;
557
simple_mtx_unlock(&aws->sws_list_lock);
559
/* Create the screen at the end. The winsys must be initialized
562
* Alternatively, we could create the screen based on "ws->gen"
563
* and link all drivers into one binary blob. */
564
ws->base.screen = screen_create(&ws->base, config);
565
if (!ws->base.screen) {
566
amdgpu_winsys_destroy_locked(&ws->base, true);
567
simple_mtx_unlock(&dev_tab_mutex);
572
/* We must unlock the mutex once the winsys is fully initialized, so that
573
* other threads attempting to create the winsys from the same fd will
574
* get a fully initialized winsys and not just half-way initialized. */
575
simple_mtx_unlock(&dev_tab_mutex);
583
_mesa_hash_table_destroy(ws->kms_handles, NULL);
586
simple_mtx_unlock(&dev_tab_mutex);