54
tu_drm_get_param(const struct tu_physical_device *dev,
54
tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
58
56
/* Technically this requires a pipe, but the kernel only supports one pipe
59
57
* anyway at the time of writing and most of these are clearly pipe
108
105
uint64_t *va_start, uint64_t *va_size)
111
int ret = tu_drm_get_param(dev, MSM_PARAM_VA_START, &value);
108
int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_START, &value);
115
112
*va_start = value;
117
ret = tu_drm_get_param(dev, MSM_PARAM_VA_SIZE, &value);
114
ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_SIZE, &value);
153
msm_device_init(struct tu_device *dev)
155
int fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
157
return vk_startup_errorf(
158
dev->physical_device->instance, VK_ERROR_INITIALIZATION_FAILED,
159
"failed to open device %s", dev->physical_device->fd_path);
162
int ret = tu_drm_get_param(fd, MSM_PARAM_FAULTS, &dev->fault_count);
165
return vk_startup_errorf(dev->physical_device->instance,
166
VK_ERROR_INITIALIZATION_FAILED,
167
"Failed to get initial fault count: %d", ret);
176
msm_device_finish(struct tu_device *dev)
156
182
msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
158
return tu_drm_get_param(dev->physical_device, MSM_PARAM_TIMESTAMP, ts);
184
return tu_drm_get_param(dev->fd, MSM_PARAM_TIMESTAMP, ts);
162
188
msm_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
164
int ret = tu_drm_get_param(dev->physical_device, MSM_PARAM_SUSPENDS, suspend_count);
190
int ret = tu_drm_get_param(dev->fd, MSM_PARAM_SUSPENDS, suspend_count);
169
195
msm_device_check_status(struct tu_device *device)
171
struct tu_physical_device *physical_device = device->physical_device;
173
uint64_t last_fault_count = physical_device->fault_count;
174
int ret = tu_drm_get_param(physical_device, MSM_PARAM_FAULTS, &physical_device->fault_count);
197
uint64_t last_fault_count = device->fault_count;
198
int ret = tu_drm_get_param(device->fd, MSM_PARAM_FAULTS, &device->fault_count);
176
200
return vk_device_set_lost(&device->vk, "error getting GPU fault count: %d", ret);
178
if (last_fault_count != physical_device->fault_count)
202
if (last_fault_count != device->fault_count)
179
203
return vk_device_set_lost(&device->vk, "GPU faulted or hung");
181
205
return VK_SUCCESS;
249
273
VkDeviceSize size,
250
274
enum tu_mem_sync_op op);
277
get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
280
clock_gettime(CLOCK_MONOTONIC, &t);
281
tv->tv_sec = t.tv_sec + ns / 1000000000;
282
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
286
tu_wait_fence(struct tu_device *dev,
291
/* fence was created when no work was yet submitted */
295
struct drm_msm_wait_fence req = {
301
get_abs_timeout(&req.timeout, timeout_ns);
303
ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
305
if (ret == -ETIMEDOUT) {
308
mesa_loge("tu_wait_fence failed! %d (%s)", ret, strerror(errno));
309
return VK_ERROR_UNKNOWN;
317
tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
319
if (!u_vector_length(&dev->zombie_vmas))
323
struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
324
u_vector_head(&dev->zombie_vmas);
325
/* Wait for 3s (arbitrary timeout) */
326
VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
327
vma->fence, 3000000000);
329
if (ret != VK_SUCCESS)
333
int last_signaled_fence = -1;
334
while (u_vector_length(&dev->zombie_vmas) > 0) {
335
struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
336
u_vector_tail(&dev->zombie_vmas);
337
if (vma->fence > last_signaled_fence) {
339
tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
340
if (ret != VK_SUCCESS)
343
last_signaled_fence = vma->fence;
346
/* Ensure that internal kernel's vma is freed. */
347
struct drm_msm_gem_info req = {
348
.handle = vma->gem_handle,
349
.info = MSM_INFO_SET_IOVA,
354
drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
356
mesa_loge("MSM_INFO_SET_IOVA(0) failed! %d (%s)", ret,
358
return VK_ERROR_UNKNOWN;
361
tu_gem_close(dev, vma->gem_handle);
363
util_vma_heap_free(&dev->vma, vma->iova, vma->size);
364
u_vector_remove(&dev->zombie_vmas);
253
371
tu_allocate_userspace_iova(struct tu_device *dev,
254
372
uint32_t gem_handle,
257
375
enum tu_bo_alloc_flags flags,
260
mtx_lock(&dev->physical_device->vma_mutex);
378
mtx_lock(&dev->vma_mutex);
382
tu_free_zombie_vma_locked(dev, false);
264
384
if (flags & TU_BO_ALLOC_REPLAYABLE) {
265
385
if (client_iova) {
266
if (util_vma_heap_alloc_addr(&dev->physical_device->vma, client_iova,
386
if (util_vma_heap_alloc_addr(&dev->vma, client_iova, size)) {
268
387
*iova = client_iova;
270
return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
389
/* Address may be already freed by us, but not considered as
390
* freed by the kernel. We have to wait until all work that
391
* may hold the address is done. Since addresses are meant to
392
* be replayed only by debug tooling, it should be ok to wait.
394
if (tu_free_zombie_vma_locked(dev, true) == VK_SUCCESS &&
395
util_vma_heap_alloc_addr(&dev->vma, client_iova, size)) {
398
mtx_unlock(&dev->vma_mutex);
399
return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
273
403
/* We have to separate replayable IOVAs from ordinary one in order to
274
404
* for them not to clash. The easiest way to do this is to allocate
275
405
* them from the other end of the address space.
277
dev->physical_device->vma.alloc_high = true;
279
util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000);
407
dev->vma.alloc_high = true;
408
*iova = util_vma_heap_alloc(&dev->vma, size, 0x1000);
282
dev->physical_device->vma.alloc_high = false;
283
*iova = util_vma_heap_alloc(&dev->physical_device->vma, size, 0x1000);
411
dev->vma.alloc_high = false;
412
*iova = util_vma_heap_alloc(&dev->vma, size, 0x1000);
286
mtx_unlock(&dev->physical_device->vma_mutex);
415
mtx_unlock(&dev->vma_mutex);
289
418
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
352
483
vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list),
353
484
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
487
mtx_unlock(&dev->bo_mutex);
355
488
tu_gem_close(dev, gem_handle);
356
489
return VK_ERROR_OUT_OF_HOST_MEMORY;
488
621
if (real_size < 0 || (uint64_t) real_size < size)
489
622
return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
624
/* iova allocation needs to consider the object's *real* size: */
491
627
/* Importing the same dmabuf several times would yield the same
492
628
* gem_handle. Thus there could be a race when destroying
493
629
* BO and importing the same dmabuf from different threads.
595
731
mtx_unlock(&dev->bo_mutex);
597
733
if (dev->physical_device->has_set_iova) {
598
mtx_lock(&dev->physical_device->vma_mutex);
599
util_vma_heap_free(&dev->physical_device->vma, bo->iova, bo->size);
600
mtx_unlock(&dev->physical_device->vma_mutex);
734
mtx_lock(&dev->vma_mutex);
735
struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
736
u_vector_add(&dev->zombie_vmas);
737
vma->gem_handle = bo->gem_handle;
738
vma->iova = bo->iova;
739
vma->size = bo->size;
740
vma->fence = p_atomic_read(&dev->queues[0]->fence);
741
mtx_unlock(&dev->vma_mutex);
743
memset(bo, 0, sizeof(*bo));
745
/* Our BO structs are stored in a sparse array in the physical device,
746
* so we don't want to free the BO pointer, instead we want to reset it
747
* to 0, to signal that array entry as being free.
749
uint32_t gem_handle = bo->gem_handle;
750
memset(bo, 0, sizeof(*bo));
752
tu_gem_close(dev, gem_handle);
603
/* Our BO structs are stored in a sparse array in the physical device,
604
* so we don't want to free the BO pointer, instead we want to reset it
605
* to 0, to signal that array entry as being free.
607
uint32_t gem_handle = bo->gem_handle;
608
memset(bo, 0, sizeof(*bo));
610
tu_gem_close(dev, gem_handle);
612
755
u_rwlock_rdunlock(&dev->dma_bo_lock);
1206
1351
return VK_SUCCESS;
1210
get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
1213
clock_gettime(CLOCK_MONOTONIC, &t);
1214
tv->tv_sec = t.tv_sec + ns / 1000000000;
1215
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
1218
1354
static VkResult
1219
1355
msm_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1221
struct drm_msm_wait_fence req = {
1222
.fence = syncobj->fence,
1223
.queueid = syncobj->msm_queue_id,
1227
get_abs_timeout(&req.timeout, 1000000000);
1229
ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
1230
if (ret && (ret != -ETIMEDOUT)) {
1231
fprintf(stderr, "wait-fence failed! %d (%s)", ret, strerror(errno));
1357
return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000);
1238
1360
static VkResult
1298
1420
static const struct tu_knl msm_knl_funcs = {
1423
.device_init = msm_device_init,
1424
.device_finish = msm_device_finish,
1301
1425
.device_get_gpu_timestamp = msm_device_get_gpu_timestamp,
1302
1426
.device_get_suspend_count = msm_device_get_suspend_count,
1303
1427
.device_check_status = msm_device_check_status,
1372
if (tu_drm_get_param(device, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
1495
if (tu_drm_get_param(fd, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
1373
1496
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1374
1497
"could not get CHIP ID");
1392
* device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
1393
* &device->va_size);
1395
* If BO is freed while kernel considers it busy, our VMA state gets
1396
* desynchronized from kernel's VMA state, because kernel waits
1397
* until BO stops being busy. And whether BO is busy decided at
1398
* submission granularity.
1400
* Disable this capability until solution is found.
1402
device->has_set_iova = false;
1514
device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
1404
1517
/* Even if kernel is new enough, the GPU itself may not support it. */
1405
1518
device->has_cached_coherent_memory =
1417
ret = tu_drm_get_param(device, MSM_PARAM_FAULTS, &device->fault_count);
1419
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1420
"Failed to get initial fault count: %d", ret);
1424
1529
device->submitqueue_priority_count = tu_drm_get_priorities(device);
1426
1531
device->syncobj_type = vk_drm_syncobj_get_type(fd);