2
* Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3
* Copyright © 2015 Advanced Micro Devices, Inc.
4
* Copyright © 2021 Valve Corporation
7
* Permission is hereby granted, free of charge, to any person obtaining
8
* a copy of this software and associated documentation files (the
9
* "Software"), to deal in the Software without restriction, including
10
* without limitation the rights to use, copy, modify, merge, publish,
11
* distribute, sub license, and/or sell copies of the Software, and to
12
* permit persons to whom the Software is furnished to do so, subject to
13
* the following conditions:
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
19
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
24
* The above copyright notice and this permission notice (including the
25
* next paragraph) shall be included in all copies or substantial portions
29
* Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
33
#include "zink_resource.h"
34
#include "zink_screen.h"
35
#include "util/u_hash_table.h"
39
struct zink_sparse_backing_chunk {
45
* Sub-allocation information for a real buffer used as backing memory of a
48
struct zink_sparse_backing {
49
struct list_head list;
53
/* Sorted list of free chunks. */
54
struct zink_sparse_backing_chunk *chunks;
59
struct zink_sparse_commitment {
60
struct zink_sparse_backing *backing;
67
struct zink_bo *buffer;
68
struct zink_bo *entries;
72
ALWAYS_INLINE static struct zink_slab *
73
zink_slab(struct pb_slab *pslab)
75
return (struct zink_slab*)pslab;
78
static struct pb_slabs *
79
get_slabs(struct zink_screen *screen, uint64_t size, enum zink_alloc_flag flags)
81
//struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
82
//screen->bo_slabs_encrypted : screen->bo_slabs;
84
struct pb_slabs *bo_slabs = screen->pb.bo_slabs;
85
/* Find the correct slab allocator for the given size. */
86
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
87
struct pb_slabs *slabs = &bo_slabs[i];
89
if (size <= 1ULL << (slabs->min_order + slabs->num_orders - 1))
97
/* Return the power of two size of a slab entry matching the input size. */
99
get_slab_pot_entry_size(struct zink_screen *screen, unsigned size)
101
unsigned entry_size = util_next_power_of_two(size);
102
unsigned min_entry_size = 1 << screen->pb.bo_slabs[0].min_order;
104
return MAX2(entry_size, min_entry_size);
107
/* Return the slab entry alignment. */
108
static unsigned get_slab_entry_alignment(struct zink_screen *screen, unsigned size)
110
unsigned entry_size = get_slab_pot_entry_size(screen, size);
112
if (size <= entry_size * 3 / 4)
113
return entry_size / 4;
119
bo_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
121
struct zink_bo *bo = zink_bo(pbuf);
123
simple_mtx_lock(&screen->pb.bo_export_table_lock);
124
_mesa_hash_table_remove_key(screen->pb.bo_export_table, bo);
125
simple_mtx_unlock(&screen->pb.bo_export_table_lock);
127
if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) {
128
bo->u.real.map_count = 1;
129
bo->u.real.cpu_ptr = NULL;
130
zink_bo_unmap(screen, bo);
133
VKSCR(FreeMemory)(screen->dev, bo->mem, NULL);
135
simple_mtx_destroy(&bo->lock);
140
bo_can_reclaim(struct zink_screen *screen, struct pb_buffer *pbuf)
142
struct zink_bo *bo = zink_bo(pbuf);
144
return zink_screen_usage_check_completion(screen, bo->reads) && zink_screen_usage_check_completion(screen, bo->writes);
148
bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
150
struct zink_bo *bo = container_of(entry, struct zink_bo, u.slab.entry);
152
return bo_can_reclaim(priv, &bo->base);
156
bo_slab_free(struct zink_screen *screen, struct pb_slab *pslab)
158
struct zink_slab *slab = zink_slab(pslab);
159
ASSERTED unsigned slab_size = slab->buffer->base.size;
161
assert(slab->base.num_entries * slab->entry_size <= slab_size);
163
zink_bo_unref(screen, slab->buffer);
168
bo_slab_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
170
struct zink_bo *bo = zink_bo(pbuf);
174
//if (bo->base.usage & RADEON_FLAG_ENCRYPTED)
175
//pb_slab_free(get_slabs(screen, bo->base.size, RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
177
pb_slab_free(get_slabs(screen, bo->base.size, 0), &bo->u.slab.entry);
181
clean_up_buffer_managers(struct zink_screen *screen)
183
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
184
pb_slabs_reclaim(&screen->pb.bo_slabs[i]);
185
//if (screen->info.has_tmz_support)
186
//pb_slabs_reclaim(&screen->bo_slabs_encrypted[i]);
189
pb_cache_release_all_buffers(&screen->pb.bo_cache);
193
get_optimal_alignment(struct zink_screen *screen, uint64_t size, unsigned alignment)
195
/* Increase the alignment for faster address translation and better memory
199
alignment = MAX2(alignment, 4096);
201
unsigned msb = util_last_bit(size);
203
alignment = MAX2(alignment, 1u << (msb - 1));
209
bo_destroy_or_cache(struct zink_screen *screen, struct pb_buffer *pbuf)
211
struct zink_bo *bo = zink_bo(pbuf);
213
assert(bo->mem); /* slab buffers have a separate vtbl */
217
if (bo->u.real.use_reusable_pool)
218
pb_cache_add_buffer(bo->cache_entry);
220
bo_destroy(screen, pbuf);
223
static const struct pb_vtbl bo_vtbl = {
224
/* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
225
(void*)bo_destroy_or_cache
226
/* other functions are never called */
229
static struct zink_bo *
230
bo_create_internal(struct zink_screen *screen,
237
struct zink_bo *bo = NULL;
240
/* too big for vk alloc */
241
if (size > UINT32_MAX)
244
alignment = get_optimal_alignment(screen, size, alignment);
246
VkMemoryAllocateInfo mai;
247
mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
249
mai.allocationSize = size;
250
mai.memoryTypeIndex = screen->heap_map[heap];
251
if (screen->info.mem_props.memoryTypes[mai.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
252
alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
253
mai.allocationSize = align64(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment);
255
unsigned heap_idx = screen->info.mem_props.memoryTypes[screen->heap_map[heap]].heapIndex;
256
if (mai.allocationSize > screen->info.mem_props.memoryHeaps[heap_idx].size) {
257
mesa_loge("zink: can't allocate %"PRIu64" bytes from heap that's only %"PRIu64" bytes!\n", mai.allocationSize, screen->info.mem_props.memoryHeaps[heap_idx].size);
261
/* all non-suballocated bo can cache */
262
init_pb_cache = !pNext;
265
bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
270
VkResult ret = VKSCR(AllocateMemory)(screen->dev, &mai, NULL, &bo->mem);
271
if (!zink_screen_handle_vkresult(screen, ret)) {
272
mesa_loge("zink: couldn't allocate memory! from heap %u", heap);
277
bo->u.real.use_reusable_pool = true;
278
pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base, heap);
282
simple_mtx_init(&bo->lock, mtx_plain);
283
pipe_reference_init(&bo->base.reference, 1);
284
bo->base.alignment_log2 = util_logbase2(alignment);
285
bo->base.size = mai.allocationSize;
286
bo->base.vtbl = &bo_vtbl;
287
bo->base.placement = screen->heap_flags[heap];
288
bo->base.usage = flags;
289
bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
294
bo_destroy(screen, (void*)bo);
299
* Attempt to allocate the given number of backing pages. Fewer pages may be
300
* allocated (depending on the fragmentation of existing backing buffers),
301
* which will be reflected by a change to *pnum_pages.
303
static struct zink_sparse_backing *
304
sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo,
305
uint32_t *pstart_page, uint32_t *pnum_pages)
307
struct zink_sparse_backing *best_backing;
309
uint32_t best_num_pages;
315
/* This is a very simple and inefficient best-fit algorithm. */
316
list_for_each_entry(struct zink_sparse_backing, backing, &bo->u.sparse.backing, list) {
317
for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
318
uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
319
if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
320
(best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
321
best_backing = backing;
323
best_num_pages = cur_num_pages;
328
/* Allocate a new backing buffer if necessary. */
330
struct pb_buffer *buf;
334
best_backing = CALLOC_STRUCT(zink_sparse_backing);
338
best_backing->max_chunks = 4;
339
best_backing->chunks = CALLOC(best_backing->max_chunks,
340
sizeof(*best_backing->chunks));
341
if (!best_backing->chunks) {
346
assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE));
348
size = MIN3(bo->base.size / 16,
350
bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE);
351
size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
353
buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE,
354
ZINK_HEAP_DEVICE_LOCAL, 0, NULL);
356
FREE(best_backing->chunks);
361
/* We might have gotten a bigger buffer than requested via caching. */
362
pages = buf->size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
364
best_backing->bo = zink_bo(buf);
365
best_backing->num_chunks = 1;
366
best_backing->chunks[0].begin = 0;
367
best_backing->chunks[0].end = pages;
369
list_add(&best_backing->list, &bo->u.sparse.backing);
370
bo->u.sparse.num_backing_pages += pages;
373
best_num_pages = pages;
376
*pnum_pages = MIN2(*pnum_pages, best_num_pages);
377
*pstart_page = best_backing->chunks[best_idx].begin;
378
best_backing->chunks[best_idx].begin += *pnum_pages;
380
if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
381
memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
382
sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
383
best_backing->num_chunks--;
390
sparse_free_backing_buffer(struct zink_screen *screen, struct zink_bo *bo,
391
struct zink_sparse_backing *backing)
393
bo->u.sparse.num_backing_pages -= backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
395
list_del(&backing->list);
396
zink_bo_unref(screen, backing->bo);
397
FREE(backing->chunks);
402
* Return a range of pages from the given backing buffer back into the
406
sparse_backing_free(struct zink_screen *screen, struct zink_bo *bo,
407
struct zink_sparse_backing *backing,
408
uint32_t start_page, uint32_t num_pages)
410
uint32_t end_page = start_page + num_pages;
412
unsigned high = backing->num_chunks;
414
/* Find the first chunk with begin >= start_page. */
416
unsigned mid = low + (high - low) / 2;
418
if (backing->chunks[mid].begin >= start_page)
424
assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
425
assert(low == 0 || backing->chunks[low - 1].end <= start_page);
427
if (low > 0 && backing->chunks[low - 1].end == start_page) {
428
backing->chunks[low - 1].end = end_page;
430
if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
431
backing->chunks[low - 1].end = backing->chunks[low].end;
432
memmove(&backing->chunks[low], &backing->chunks[low + 1],
433
sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
434
backing->num_chunks--;
436
} else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
437
backing->chunks[low].begin = start_page;
439
if (backing->num_chunks >= backing->max_chunks) {
440
unsigned new_max_chunks = 2 * backing->max_chunks;
441
struct zink_sparse_backing_chunk *new_chunks =
442
REALLOC(backing->chunks,
443
sizeof(*backing->chunks) * backing->max_chunks,
444
sizeof(*backing->chunks) * new_max_chunks);
448
backing->max_chunks = new_max_chunks;
449
backing->chunks = new_chunks;
452
memmove(&backing->chunks[low + 1], &backing->chunks[low],
453
sizeof(*backing->chunks) * (backing->num_chunks - low));
454
backing->chunks[low].begin = start_page;
455
backing->chunks[low].end = end_page;
456
backing->num_chunks++;
459
if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
460
backing->chunks[0].end == backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE)
461
sparse_free_backing_buffer(screen, bo, backing);
467
bo_sparse_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
469
struct zink_bo *bo = zink_bo(pbuf);
471
assert(!bo->mem && bo->base.usage & ZINK_ALLOC_SPARSE);
473
while (!list_is_empty(&bo->u.sparse.backing)) {
474
sparse_free_backing_buffer(screen, bo,
475
container_of(bo->u.sparse.backing.next,
476
struct zink_sparse_backing, list));
479
FREE(bo->u.sparse.commitments);
480
simple_mtx_destroy(&bo->lock);
484
static const struct pb_vtbl bo_sparse_vtbl = {
485
/* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
486
(void*)bo_sparse_destroy
487
/* other functions are never called */
490
static struct pb_buffer *
491
bo_sparse_create(struct zink_screen *screen, uint64_t size)
495
/* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
496
* that exceed this limit. This is not really a restriction: we don't have
497
* that much virtual address space anyway.
499
if (size > (uint64_t)INT32_MAX * ZINK_SPARSE_BUFFER_PAGE_SIZE)
502
bo = CALLOC_STRUCT(zink_bo);
506
simple_mtx_init(&bo->lock, mtx_plain);
507
pipe_reference_init(&bo->base.reference, 1);
508
bo->base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
509
bo->base.size = size;
510
bo->base.vtbl = &bo_sparse_vtbl;
511
bo->base.placement = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
512
bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
513
bo->base.usage = ZINK_ALLOC_SPARSE;
515
bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
516
bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
517
sizeof(*bo->u.sparse.commitments));
518
if (!bo->u.sparse.commitments)
519
goto error_alloc_commitments;
521
list_inithead(&bo->u.sparse.backing);
525
error_alloc_commitments:
526
simple_mtx_destroy(&bo->lock);
532
zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext)
535
/* pull in sparse flag */
536
flags |= zink_alloc_flags_from_heap(heap);
538
//struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
539
//screen->bo_slabs_encrypted : screen->bo_slabs;
540
struct pb_slabs *slabs = screen->pb.bo_slabs;
542
struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
543
unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
545
/* Sub-allocate small buffers from slabs. */
546
if (!(flags & (ZINK_ALLOC_NO_SUBALLOC | ZINK_ALLOC_SPARSE)) &&
547
size <= max_slab_entry_size) {
548
struct pb_slab_entry *entry;
550
if (heap < 0 || heap >= ZINK_HEAP_MAX)
553
unsigned alloc_size = size;
555
/* Always use slabs for sizes less than 4 KB because the kernel aligns
556
* everything to 4 KB.
558
if (size < alignment && alignment <= 4 * 1024)
559
alloc_size = alignment;
561
if (alignment > get_slab_entry_alignment(screen, alloc_size)) {
562
/* 3/4 allocations can return too small alignment. Try again with a power of two
565
unsigned pot_size = get_slab_pot_entry_size(screen, alloc_size);
567
if (alignment <= pot_size) {
568
/* This size works but wastes some memory to fulfil the alignment. */
569
alloc_size = pot_size;
571
goto no_slab; /* can't fulfil alignment requirements */
575
struct pb_slabs *slabs = get_slabs(screen, alloc_size, flags);
576
bool reclaim_all = false;
577
if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE && !screen->resizable_bar) {
578
unsigned low_bound = 128 * 1024 * 1024; //128MB is a very small BAR
579
if (screen->info.driver_props.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
580
low_bound *= 2; //nvidia has fat textures or something
581
unsigned heapidx = screen->info.mem_props.memoryTypes[screen->heap_map[heap]].heapIndex;
582
reclaim_all = screen->info.mem_props.memoryHeaps[heapidx].size <= low_bound;
584
entry = pb_slab_alloc_reclaimed(slabs, alloc_size, heap, reclaim_all);
586
/* Clean up buffer managers and try again. */
587
clean_up_buffer_managers(screen);
589
entry = pb_slab_alloc_reclaimed(slabs, alloc_size, heap, true);
594
bo = container_of(entry, struct zink_bo, u.slab.entry);
595
pipe_reference_init(&bo->base.reference, 1);
596
bo->base.size = size;
597
assert(alignment <= 1 << bo->base.alignment_log2);
603
if (flags & ZINK_ALLOC_SPARSE) {
604
assert(ZINK_SPARSE_BUFFER_PAGE_SIZE % alignment == 0);
606
return bo_sparse_create(screen, size);
609
/* Align size to page size. This is the minimum alignment for normal
610
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
611
* like constant/uniform buffers, can benefit from better and more reuse.
613
if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE) {
614
size = align64(size, screen->info.props.limits.minMemoryMapAlignment);
615
alignment = align(alignment, screen->info.props.limits.minMemoryMapAlignment);
618
bool use_reusable_pool = !(flags & ZINK_ALLOC_NO_SUBALLOC);
620
if (use_reusable_pool) {
621
/* Get a buffer from the cache. */
622
bo = (struct zink_bo*)
623
pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, heap);
628
/* Create a new one. */
629
bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
631
/* Clean up buffer managers and try again. */
632
clean_up_buffer_managers(screen);
634
bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
643
zink_bo_map(struct zink_screen *screen, struct zink_bo *bo)
647
struct zink_bo *real;
652
real = bo->u.slab.real;
653
offset = bo->offset - real->offset;
656
cpu = p_atomic_read(&real->u.real.cpu_ptr);
658
simple_mtx_lock(&real->lock);
659
/* Must re-check due to the possibility of a race. Re-check need not
660
* be atomic thanks to the lock. */
661
cpu = real->u.real.cpu_ptr;
663
VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.size, 0, &cpu);
664
if (result != VK_SUCCESS) {
665
mesa_loge("ZINK: vkMapMemory failed");
666
simple_mtx_unlock(&real->lock);
669
p_atomic_set(&real->u.real.cpu_ptr, cpu);
671
simple_mtx_unlock(&real->lock);
673
p_atomic_inc(&real->u.real.map_count);
675
return (uint8_t*)cpu + offset;
679
zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo)
681
struct zink_bo *real = bo->mem ? bo : bo->u.slab.real;
683
assert(real->u.real.map_count != 0 && "too many unmaps");
685
if (p_atomic_dec_zero(&real->u.real.map_count)) {
686
p_atomic_set(&real->u.real.cpu_ptr, NULL);
687
VKSCR(UnmapMemory)(screen->dev, real->mem);
692
buffer_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, uint32_t size, bool commit)
694
VkBindSparseInfo sparse = {0};
695
sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
696
sparse.bufferBindCount = res->obj->storage_buffer ? 2 : 1;
698
VkSparseBufferMemoryBindInfo sparse_bind[2];
699
sparse_bind[0].buffer = res->obj->buffer;
700
sparse_bind[1].buffer = res->obj->storage_buffer;
701
sparse_bind[0].bindCount = 1;
702
sparse_bind[1].bindCount = 1;
703
sparse.pBufferBinds = sparse_bind;
705
VkSparseMemoryBind mem_bind;
706
mem_bind.resourceOffset = offset;
707
mem_bind.size = MIN2(res->base.b.width0 - offset, size);
708
mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE;
709
mem_bind.memoryOffset = bo_offset * ZINK_SPARSE_BUFFER_PAGE_SIZE + (commit ? (bo->mem ? 0 : bo->offset) : 0);
711
sparse_bind[0].pBinds = &mem_bind;
712
sparse_bind[1].pBinds = &mem_bind;
714
VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue;
716
VkResult ret = VKSCR(QueueBindSparse)(queue, 1, &sparse, VK_NULL_HANDLE);
717
return zink_screen_handle_vkresult(screen, ret);
721
buffer_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit)
724
struct zink_bo *bo = res->obj->bo;
725
assert(offset % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0);
726
assert(offset <= bo->base.size);
727
assert(size <= bo->base.size - offset);
728
assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == bo->base.size);
730
struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
732
uint32_t va_page = offset / ZINK_SPARSE_BUFFER_PAGE_SIZE;
733
uint32_t end_va_page = va_page + DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
736
while (va_page < end_va_page) {
737
uint32_t span_va_page;
739
/* Skip pages that are already committed. */
740
if (comm[va_page].backing) {
745
/* Determine length of uncommitted span. */
746
span_va_page = va_page;
747
while (va_page < end_va_page && !comm[va_page].backing)
750
/* Fill the uncommitted span with chunks of backing memory. */
751
while (span_va_page < va_page) {
752
struct zink_sparse_backing *backing;
753
uint32_t backing_start, backing_size;
755
backing_size = va_page - span_va_page;
756
backing = sparse_backing_alloc(screen, bo, &backing_start, &backing_size);
761
if (!buffer_commit_single(screen, res, backing->bo, backing_start,
762
(uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
763
(uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true)) {
765
ok = sparse_backing_free(screen, bo, backing, backing_start, backing_size);
766
assert(ok && "sufficient memory should already be allocated");
772
while (backing_size) {
773
comm[span_va_page].backing = backing;
774
comm[span_va_page].page = backing_start;
783
uint32_t base_page = va_page;
784
while (va_page < end_va_page) {
785
struct zink_sparse_backing *backing;
786
uint32_t backing_start;
789
/* Skip pages that are already uncommitted. */
790
if (!comm[va_page].backing) {
795
if (!done && !buffer_commit_single(screen, res, NULL, 0,
796
(uint64_t)base_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
797
(uint64_t)(end_va_page - base_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false)) {
803
/* Group contiguous spans of pages. */
804
backing = comm[va_page].backing;
805
backing_start = comm[va_page].page;
806
comm[va_page].backing = NULL;
811
while (va_page < end_va_page &&
812
comm[va_page].backing == backing &&
813
comm[va_page].page == backing_start + span_pages) {
814
comm[va_page].backing = NULL;
819
if (!sparse_backing_free(screen, bo, backing, backing_start, span_pages)) {
820
/* Couldn't allocate tracking data structures, so we have to leak */
821
fprintf(stderr, "zink: leaking sparse backing memory\n");
831
texture_commit_single(struct zink_screen *screen, struct zink_resource *res, VkSparseImageMemoryBind *ibind, unsigned num_binds, bool commit)
833
VkBindSparseInfo sparse = {0};
834
sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
835
sparse.imageBindCount = 1;
837
VkSparseImageMemoryBindInfo sparse_ibind;
838
sparse_ibind.image = res->obj->image;
839
sparse_ibind.bindCount = num_binds;
840
sparse_ibind.pBinds = ibind;
841
sparse.pImageBinds = &sparse_ibind;
843
VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue;
845
VkResult ret = VKSCR(QueueBindSparse)(queue, 1, &sparse, VK_NULL_HANDLE);
846
return zink_screen_handle_vkresult(screen, ret);
850
texture_commit_miptail(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t bo_offset, uint32_t offset, bool commit)
852
VkBindSparseInfo sparse = {0};
853
sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
854
sparse.imageOpaqueBindCount = 1;
856
VkSparseImageOpaqueMemoryBindInfo sparse_bind;
857
sparse_bind.image = res->obj->image;
858
sparse_bind.bindCount = 1;
859
sparse.pImageOpaqueBinds = &sparse_bind;
861
VkSparseMemoryBind mem_bind;
862
mem_bind.resourceOffset = offset;
863
mem_bind.size = MIN2(ZINK_SPARSE_BUFFER_PAGE_SIZE, res->sparse.imageMipTailSize - offset);
864
mem_bind.memory = commit ? (bo->mem ? bo->mem : bo->u.slab.real->mem) : VK_NULL_HANDLE;
865
mem_bind.memoryOffset = bo_offset + (commit ? (bo->mem ? 0 : bo->offset) : 0);
867
sparse_bind.pBinds = &mem_bind;
869
VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue;
871
VkResult ret = VKSCR(QueueBindSparse)(queue, 1, &sparse, VK_NULL_HANDLE);
872
return zink_screen_handle_vkresult(screen, ret);
876
zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, unsigned level, struct pipe_box *box, bool commit)
879
struct zink_bo *bo = res->obj->bo;
881
if (screen->faked_e5sparse && res->base.b.format == PIPE_FORMAT_R9G9B9E5_FLOAT)
884
simple_mtx_lock(&screen->queue_lock);
885
simple_mtx_lock(&bo->lock);
886
if (res->base.b.target == PIPE_BUFFER) {
887
ok = buffer_bo_commit(screen, res, box->x, box->width, commit);
891
int gwidth, gheight, gdepth;
892
gwidth = res->sparse.formatProperties.imageGranularity.width;
893
gheight = res->sparse.formatProperties.imageGranularity.height;
894
gdepth = res->sparse.formatProperties.imageGranularity.depth;
895
assert(gwidth && gheight && gdepth);
897
struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
898
VkImageSubresource subresource = { res->aspect, level, 0 };
899
unsigned nwidth = DIV_ROUND_UP(box->width, gwidth);
900
unsigned nheight = DIV_ROUND_UP(box->height, gheight);
901
unsigned ndepth = DIV_ROUND_UP(box->depth, gdepth);
902
VkExtent3D lastBlockExtent = {
903
(box->width % gwidth) ? box->width % gwidth : gwidth,
904
(box->height % gheight) ? box->height % gheight : gheight,
905
(box->depth % gdepth) ? box->depth % gdepth : gdepth
907
/* TODO: msaa needs miptail */
908
//VkSparseImageOpaqueMemoryBindInfo sparse_obind;
909
#define NUM_BATCHED_BINDS 50
910
VkSparseImageMemoryBind ibind[NUM_BATCHED_BINDS];
911
uint32_t backing_start[NUM_BATCHED_BINDS], backing_size[NUM_BATCHED_BINDS];
912
struct zink_sparse_backing *backing[NUM_BATCHED_BINDS];
914
bool commits_pending = false;
915
uint32_t va_page_offset = 0;
916
for (unsigned l = 0; l < level; l++) {
917
unsigned mipwidth = DIV_ROUND_UP(MAX2(res->base.b.width0 >> l, 1), gwidth);
918
unsigned mipheight = DIV_ROUND_UP(MAX2(res->base.b.height0 >> l, 1), gheight);
919
unsigned mipdepth = DIV_ROUND_UP(res->base.b.array_size > 1 ? res->base.b.array_size : MAX2(res->base.b.depth0 >> l, 1), gdepth);
920
va_page_offset += mipwidth * mipheight * mipdepth;
922
for (unsigned d = 0; d < ndepth; d++) {
923
for (unsigned h = 0; h < nheight; h++) {
924
for (unsigned w = 0; w < nwidth; w++) {
925
ibind[i].subresource = subresource;
928
ibind[i].offset.x = w * gwidth;
929
ibind[i].offset.y = h * gheight;
930
if (res->base.b.array_size > 1) {
931
ibind[i].subresource.arrayLayer = d * gdepth;
932
ibind[i].offset.z = 0;
934
ibind[i].offset.z = d * gdepth;
937
ibind[i].extent.width = (w == nwidth - 1) ? lastBlockExtent.width : gwidth;
938
ibind[i].extent.height = (h == nheight - 1) ? lastBlockExtent.height : gheight;
939
ibind[i].extent.depth = (d == ndepth - 1 && res->base.b.target != PIPE_TEXTURE_CUBE) ? lastBlockExtent.depth : gdepth;
940
uint32_t va_page = va_page_offset +
941
(d + (box->z / gdepth)) * ((MAX2(res->base.b.width0 >> level, 1) / gwidth) * (MAX2(res->base.b.height0 >> level, 1) / gheight)) +
942
(h + (box->y / gheight)) * (MAX2(res->base.b.width0 >> level, 1) / gwidth) +
943
(w + (box->x / gwidth));
945
uint32_t end_va_page = va_page + 1;
948
while (va_page < end_va_page) {
949
uint32_t span_va_page;
951
/* Skip pages that are already committed. */
952
if (comm[va_page].backing) {
957
/* Determine length of uncommitted span. */
958
span_va_page = va_page;
959
while (va_page < end_va_page && !comm[va_page].backing)
962
/* Fill the uncommitted span with chunks of backing memory. */
963
while (span_va_page < va_page) {
964
backing_size[i] = va_page - span_va_page;
965
backing[i] = sparse_backing_alloc(screen, bo, &backing_start[i], &backing_size[i]);
970
if (level >= res->sparse.imageMipTailFirstLod) {
971
uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride;
972
ok = texture_commit_miptail(screen, res, backing[i]->bo, backing_start[i], offset, commit);
976
ibind[i].memory = backing[i]->bo->mem ? backing[i]->bo->mem : backing[i]->bo->u.slab.real->mem;
977
ibind[i].memoryOffset = backing_start[i] * ZINK_SPARSE_BUFFER_PAGE_SIZE +
978
(backing[i]->bo->mem ? 0 : backing[i]->bo->offset);
979
commits_pending = true;
982
while (backing_size[i]) {
983
comm[span_va_page].backing = backing[i];
984
comm[span_va_page].page = backing_start[i];
993
ibind[i].memory = VK_NULL_HANDLE;
994
ibind[i].memoryOffset = 0;
996
while (va_page < end_va_page) {
997
/* Skip pages that are already uncommitted. */
998
if (!comm[va_page].backing) {
1003
/* Group contiguous spans of pages. */
1004
backing[i] = comm[va_page].backing;
1005
backing_start[i] = comm[va_page].page;
1006
comm[va_page].backing = NULL;
1008
backing_size[i] = 1;
1011
while (va_page < end_va_page &&
1012
comm[va_page].backing == backing[i] &&
1013
comm[va_page].page == backing_start[i] + backing_size[i]) {
1014
comm[va_page].backing = NULL;
1018
if (level >= res->sparse.imageMipTailFirstLod) {
1019
uint32_t offset = res->sparse.imageMipTailOffset + d * res->sparse.imageMipTailStride;
1020
ok = texture_commit_miptail(screen, res, NULL, 0, offset, commit);
1024
commits_pending = true;
1029
if (i == ARRAY_SIZE(ibind)) {
1030
if (!texture_commit_single(screen, res, ibind, ARRAY_SIZE(ibind), commit)) {
1031
for (unsigned s = 0; s < i; s++) {
1032
ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]);
1034
/* Couldn't allocate tracking data structures, so we have to leak */
1035
fprintf(stderr, "zink: leaking sparse backing memory\n");
1041
commits_pending = false;
1047
if (commits_pending && !texture_commit_single(screen, res, ibind, i, commit)) {
1048
for (unsigned s = 0; s < i; s++) {
1049
ok = sparse_backing_free(screen, backing[s]->bo, backing[s], backing_start[s], backing_size[s]);
1051
/* Couldn't allocate tracking data structures, so we have to leak */
1052
fprintf(stderr, "zink: leaking sparse backing memory\n");
1059
simple_mtx_unlock(&bo->lock);
1060
simple_mtx_unlock(&screen->queue_lock);
1064
static const struct pb_vtbl bo_slab_vtbl = {
1065
/* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
1066
(void*)bo_slab_destroy
1067
/* other functions are never called */
1070
static struct pb_slab *
1071
bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index, bool encrypted)
1073
struct zink_screen *screen = priv;
1075
unsigned slab_size = 0;
1076
struct zink_slab *slab = CALLOC_STRUCT(zink_slab);
1081
//struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
1082
//screen->bo_slabs_encrypted : screen->bo_slabs;
1083
struct pb_slabs *slabs = screen->pb.bo_slabs;
1085
/* Determine the slab buffer size. */
1086
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1087
unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
1089
if (entry_size <= max_entry_size) {
1090
/* The slab size is twice the size of the largest possible entry. */
1091
slab_size = max_entry_size * 2;
1093
if (!util_is_power_of_two_nonzero(entry_size)) {
1094
assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
1096
/* If the entry size is 3/4 of a power of two, we would waste space and not gain
1097
* anything if we allocated only twice the power of two for the backing buffer:
1098
* 2 * 3/4 = 1.5 usable with buffer size 2
1100
* Allocating 5 times the entry size leads us to the next power of two and results
1101
* in a much better memory utilization:
1102
* 5 * 3/4 = 3.75 usable with buffer size 4
1104
if (entry_size * 5 > slab_size)
1105
slab_size = util_next_power_of_two(entry_size * 5);
1111
assert(slab_size != 0);
1113
slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, heap, 0, NULL));
1117
slab_size = slab->buffer->base.size;
1119
slab->base.num_entries = slab_size / entry_size;
1120
slab->base.num_free = slab->base.num_entries;
1121
slab->entry_size = entry_size;
1122
slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
1126
list_inithead(&slab->base.free);
1129
/* C11 too hard for msvc, no __sync_fetch_and_add */
1130
base_id = p_atomic_add_return(&screen->pb.next_bo_unique_id, slab->base.num_entries) - slab->base.num_entries;
1132
base_id = __sync_fetch_and_add(&screen->pb.next_bo_unique_id, slab->base.num_entries);
1134
for (unsigned i = 0; i < slab->base.num_entries; ++i) {
1135
struct zink_bo *bo = &slab->entries[i];
1137
simple_mtx_init(&bo->lock, mtx_plain);
1138
bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size));
1139
bo->base.size = entry_size;
1140
bo->base.vtbl = &bo_slab_vtbl;
1141
bo->offset = slab->buffer->offset + i * entry_size;
1142
bo->unique_id = base_id + i;
1143
bo->u.slab.entry.slab = &slab->base;
1144
bo->u.slab.entry.group_index = group_index;
1145
bo->u.slab.entry.entry_size = entry_size;
1147
if (slab->buffer->mem) {
1148
/* The slab is not suballocated. */
1149
bo->u.slab.real = slab->buffer;
1151
/* The slab is allocated out of a bigger slab. */
1152
bo->u.slab.real = slab->buffer->u.slab.real;
1153
assert(bo->u.slab.real->mem);
1155
bo->base.placement = bo->u.slab.real->base.placement;
1157
list_addtail(&bo->u.slab.entry.head, &slab->base.free);
1160
/* Wasted alignment due to slabs with 3/4 allocations being aligned to a power of two. */
1161
assert(slab->base.num_entries * entry_size <= slab_size);
1166
zink_bo_unref(screen, slab->buffer);
1172
static struct pb_slab *
1173
bo_slab_alloc_normal(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
1175
return bo_slab_alloc(priv, heap, entry_size, group_index, false);
1179
zink_bo_init(struct zink_screen *screen)
1181
uint64_t total_mem = 0;
1182
for (uint32_t i = 0; i < screen->info.mem_props.memoryHeapCount; ++i)
1183
total_mem += screen->info.mem_props.memoryHeaps[i].size;
1184
/* Create managers. */
1185
pb_cache_init(&screen->pb.bo_cache, ZINK_HEAP_MAX,
1187
total_mem / 8, screen,
1188
(void*)bo_destroy, (void*)bo_can_reclaim);
1190
unsigned min_slab_order = MIN_SLAB_ORDER; /* 256 bytes */
1191
unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
1192
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
1193
NUM_SLAB_ALLOCATORS;
1195
/* Divide the size order range among slab managers. */
1196
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1197
unsigned min_order = min_slab_order;
1198
unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
1201
if (!pb_slabs_init(&screen->pb.bo_slabs[i],
1202
min_order, max_order,
1203
ZINK_HEAP_MAX, true,
1205
bo_can_reclaim_slab,
1206
bo_slab_alloc_normal,
1207
(void*)bo_slab_free)) {
1210
min_slab_order = max_order + 1;
1212
screen->pb.min_alloc_size = 1 << screen->pb.bo_slabs[0].min_order;
1213
screen->pb.bo_export_table = util_hash_table_create_ptr_keys();
1214
simple_mtx_init(&screen->pb.bo_export_table_lock, mtx_plain);
1219
zink_bo_deinit(struct zink_screen *screen)
1221
for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1222
if (screen->pb.bo_slabs[i].groups)
1223
pb_slabs_deinit(&screen->pb.bo_slabs[i]);
1225
pb_cache_deinit(&screen->pb.bo_cache);
1226
_mesa_hash_table_destroy(screen->pb.bo_export_table, NULL);
1227
simple_mtx_destroy(&screen->pb.bo_export_table_lock);