358
if (device->info->verx10 >= 125) {
359
/* Make sure VMA addresses are 2MiB aligned for the block pool */
360
assert(anv_is_aligned(start_address, 2 * 1024 * 1024));
361
assert(anv_is_aligned(initial_size, 2 * 1024 * 1024));
358
/* Make sure VMA addresses are aligned for the block pool */
359
assert(anv_is_aligned(start_address, device->info->mem_alignment));
360
assert(anv_is_aligned(initial_size, device->info->mem_alignment));
364
362
pool->name = name;
365
363
pool->device = device;
376
374
ANV_BO_ALLOC_FIXED_ADDRESS |
377
375
ANV_BO_ALLOC_MAPPED |
378
376
ANV_BO_ALLOC_SNOOPED |
379
ANV_BO_ALLOC_CAPTURE |
380
(device->info->has_local_mem ? ANV_BO_ALLOC_WRITE_COMBINE : 0);
377
ANV_BO_ALLOC_CAPTURE;
382
379
result = anv_block_pool_expand_range(pool, initial_size);
383
380
if (result != VK_SUCCESS)
638
635
/* We don't want to ever see signed overflow */
639
636
assert(start_offset < INT32_MAX - (int32_t)BLOCK_POOL_MEMFD_SIZE);
641
uint32_t initial_size = block_size * 16;
642
if (device->info->verx10 >= 125)
643
initial_size = MAX2(initial_size, 2 * 1024 * 1024);
638
uint32_t initial_size = MAX2(block_size * 16, device->info->mem_alignment);
645
640
VkResult result = anv_block_pool_init(&pool->block_pool, device, name,
646
641
base_address + start_offset,
1352
1346
uint64_t bo_flags = EXEC_OBJECT_PINNED;
1354
if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS) &&
1355
pdevice->supports_48bit_addresses)
1348
if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
1356
1349
bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
1358
1351
if (((alloc_flags & ANV_BO_ALLOC_CAPTURE) ||
1381
1374
device->kmd_backend->gem_close(device, bo->gem_handle);
1384
static void anv_bo_vma_free(struct anv_device *device, struct anv_bo *bo)
1378
anv_bo_vma_free(struct anv_device *device, struct anv_bo *bo)
1386
if (bo->offset != 0 && !bo->has_fixed_address)
1387
anv_vma_free(device, bo->offset, bo->size + bo->_ccs_size);
1380
if (bo->offset != 0 && !bo->has_fixed_address) {
1381
assert(bo->vma_heap != NULL);
1382
anv_vma_free(device, bo->vma_heap, bo->offset, bo->size + bo->_ccs_size);
1384
bo->vma_heap = NULL;
1403
1400
enum anv_bo_alloc_flags alloc_flags,
1404
1401
uint64_t explicit_address)
1403
assert(bo->vma_heap == NULL);
1406
1404
assert(explicit_address == intel_48b_address(explicit_address));
1408
1406
uint32_t align = device->physical->info.mem_alignment;
1410
/* Gen12 CCS surface addresses need to be 64K aligned. */
1411
if (device->info->ver >= 12 && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS))
1412
align = MAX2(64 * 1024, align);
1408
/* If we're using the AUX map, make sure we follow the required
1411
if (device->info->has_aux_map && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS))
1412
align = MAX2(intel_aux_map_get_alignment(device->aux_map_ctx), align);
1414
/* Opportunistically align addresses to 2Mb when above 1Mb. We do this
1415
* because this gives an opportunity for the kernel to use Transparent Huge
1416
* Pages (the 2MB page table layout) for faster memory access.
1418
* Only available on ICL+.
1420
if (device->info->ver >= 11 && (bo->size + bo->_ccs_size) >= 1 * 1024 * 1024)
1421
align = MAX2(2 * 1024 * 1024, align);
1414
1423
if (alloc_flags & ANV_BO_ALLOC_FIXED_ADDRESS) {
1415
1424
bo->has_fixed_address = true;
1416
bo->offset = explicit_address;
1425
bo->offset = intel_canonical_address(explicit_address);
1418
1427
bo->offset = anv_vma_alloc(device, bo->size + bo->_ccs_size,
1419
align, alloc_flags, explicit_address);
1428
align, alloc_flags, explicit_address,
1420
1430
if (bo->offset == 0) {
1421
1431
anv_bo_unmap_close(device, bo);
1422
1432
return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
1442
1452
anv_bo_alloc_flags_to_bo_flags(device, alloc_flags);
1443
1453
assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
1445
/* The kernel is going to give us whole pages anyway */
1455
/* The kernel is going to give us whole pages anyway. And we
1456
* also need 4KB alignment for 1MB AUX buffer that follows
1457
* the main region. The 4KB also covers 64KB AUX granularity
1458
* that has 256B AUX mapping to the main.
1446
1460
size = align64(size, 4096);
1448
1462
uint64_t ccs_size = 0;
1449
1463
if (device->info->has_aux_map && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)) {
1450
/* Align the size up to the next multiple of 64K so we don't have any
1451
* AUX-TT entries pointing from a 64K page to itself.
1453
size = align64(size, 64 * 1024);
1455
/* See anv_bo::_ccs_size */
1456
1464
uint64_t aux_ratio =
1457
1465
intel_aux_get_main_to_aux_ratio(device->aux_map_ctx);
1466
/* See anv_bo::_ccs_size */
1458
1467
ccs_size = align64(DIV_ROUND_UP(size, aux_ratio), 4096);
1508
1517
(alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0,
1509
1518
.has_implicit_ccs = ccs_size > 0 ||
1510
1519
(device->info->verx10 >= 125 && !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)),
1511
.map_wc = alloc_flags & ANV_BO_ALLOC_WRITE_COMBINE,
1512
1520
.vram_only = nregions == 1 &&
1513
1521
regions[0] == device->physical->vram_non_mappable.region,
1557
1565
if (new_bo._ccs_size > 0) {
1558
1566
assert(device->info->has_aux_map);
1559
intel_aux_map_add_mapping(device->aux_map_ctx,
1560
intel_canonical_address(new_bo.offset),
1567
intel_aux_map_add_mapping(device->aux_map_ctx, new_bo.offset,
1561
1568
intel_canonical_address(new_bo.offset + new_bo.size),
1562
1569
new_bo.size, 0 /* format_bits */);
1929
1936
assert(device->physical->has_implicit_ccs);
1930
1937
assert(device->info->has_aux_map);
1931
1938
assert(bo->has_implicit_ccs);
1932
intel_aux_map_unmap_range(device->aux_map_ctx,
1933
intel_canonical_address(bo->offset),
1939
intel_aux_map_unmap_range(device->aux_map_ctx, bo->offset, bo->size);
1937
1942
/* Memset the BO just in case. The refcount being zero should be enough to