1
/****************************************************************************
2
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
* @brief Arena memory manager
26
* The arena is convenient and fast for managing allocations for any of
27
* our allocations that are associated with operations and can all be freed
28
* once when their operation has completed. Allocations are cheap since
29
* most of the time its simply an increment of an offset. Also, no need to
30
* free individual allocations. All of the arena memory can be freed at once.
32
******************************************************************************/
38
#include "core/utils.h"
40
static const size_t ARENA_BLOCK_ALIGN = 64;
45
ArenaBlock* pNext = nullptr;
47
static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
49
class DefaultAllocator
52
ArenaBlock* AllocateAligned(size_t size, size_t align)
54
SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
56
ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
61
void Free(ArenaBlock* pMem)
65
SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
71
// Caching Allocator for Arena
72
template <uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
73
struct CachingAllocatorT : DefaultAllocator
75
ArenaBlock* AllocateAligned(size_t size, size_t align)
77
SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
78
SWR_ASSUME_ASSERT(size <= uint32_t(-1));
80
uint32_t bucket = GetBucketId(size);
83
// search cached blocks
84
std::lock_guard<std::mutex> l(m_mutex);
85
ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
86
ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
90
m_cachedSize -= pBlock->blockSize;
91
if (pBlock == m_pLastCachedBlocks[bucket])
93
m_pLastCachedBlocks[bucket] = pPrevBlock;
98
pPrevBlock = &m_oldCachedBlocks[bucket];
99
pBlock = SearchBlocks(pPrevBlock, size, align);
103
m_oldCachedSize -= pBlock->blockSize;
104
if (pBlock == m_pOldLastCachedBlocks[bucket])
106
m_pOldLastCachedBlocks[bucket] = pPrevBlock;
113
assert(pPrevBlock && pPrevBlock->pNext == pBlock);
114
pPrevBlock->pNext = pBlock->pNext;
115
pBlock->pNext = nullptr;
120
m_totalAllocated += size;
124
static uint32_t count = 0;
126
sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
127
OutputDebugStringA(buf);
132
if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
134
// Make all blocks in this bucket the same size
135
size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
138
return this->DefaultAllocator::AllocateAligned(size, align);
141
void Free(ArenaBlock* pMem)
145
std::unique_lock<std::mutex> l(m_mutex);
146
InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
156
std::lock_guard<std::mutex> l(m_mutex);
158
bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
160
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
164
ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
167
ArenaBlock* pNext = pBlock->pNext;
168
m_oldCachedSize -= pBlock->blockSize;
169
m_totalAllocated -= pBlock->blockSize;
170
this->DefaultAllocator::Free(pBlock);
173
m_oldCachedBlocks[i].pNext = nullptr;
174
m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
177
if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
179
if (i && i < (CACHE_NUM_BUCKETS - 1))
181
// We know that all blocks are the same size.
182
// Just move the list over.
183
m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
184
m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
185
m_cachedBlocks[i].pNext = nullptr;
186
if (m_pOldLastCachedBlocks[i]->pNext)
188
m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
190
m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
194
// The end buckets can have variable sized lists.
195
// Insert each block based on size
196
ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
199
ArenaBlock* pNext = pBlock->pNext;
200
pBlock->pNext = nullptr;
201
m_cachedSize -= pBlock->blockSize;
202
InsertCachedBlock<true>(i, pBlock);
206
m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
207
m_cachedBlocks[i].pNext = nullptr;
212
m_oldCachedSize += m_cachedSize;
218
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
220
m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
221
m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
227
// Free all cached blocks
228
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
230
ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
233
ArenaBlock* pNext = pBlock->pNext;
234
this->DefaultAllocator::Free(pBlock);
237
pBlock = m_oldCachedBlocks[i].pNext;
240
ArenaBlock* pNext = pBlock->pNext;
241
this->DefaultAllocator::Free(pBlock);
248
static uint32_t GetBucketId(size_t blockSize)
250
uint32_t bucketId = 0;
252
#if defined(BitScanReverseSizeT)
253
BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
254
bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
260
template <bool OldBlockT = false>
261
void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
263
SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
265
ArenaBlock* pPrevBlock =
266
OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
267
ArenaBlock* pBlock = pPrevBlock->pNext;
271
if (pNewBlock->blockSize >= pBlock->blockSize)
277
pBlock = pBlock->pNext;
281
SWR_ASSUME_ASSERT(pPrevBlock);
282
pPrevBlock->pNext = pNewBlock;
283
pNewBlock->pNext = pBlock;
287
if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
289
m_pOldLastCachedBlocks[bucketId] = pNewBlock;
292
m_oldCachedSize += pNewBlock->blockSize;
296
if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
298
m_pLastCachedBlocks[bucketId] = pNewBlock;
301
m_cachedSize += pNewBlock->blockSize;
305
static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
307
ArenaBlock* pBlock = pPrevBlock->pNext;
308
ArenaBlock* pPotentialBlock = nullptr;
309
ArenaBlock* pPotentialPrev = nullptr;
313
if (pBlock->blockSize >= blockSize)
315
if (pBlock == AlignUp(pBlock, align))
317
if (pBlock->blockSize == blockSize)
319
// Won't find a better match
323
// We could use this as it is larger than we wanted, but
324
// continue to search for a better match
325
pPotentialBlock = pBlock;
326
pPotentialPrev = pPrevBlock;
331
// Blocks are sorted by size (biggest first)
332
// So, if we get here, there are no blocks
333
// large enough, fall through to allocation.
339
pBlock = pBlock->pNext;
344
// Couldn't find an exact match, use next biggest size
345
pBlock = pPotentialBlock;
346
pPrevBlock = pPotentialPrev;
352
// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
353
static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
354
static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
355
static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
357
ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
358
ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
359
ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
360
ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
363
size_t m_totalAllocated = 0;
365
size_t m_cachedSize = 0;
366
size_t m_oldCachedSize = 0;
368
typedef CachingAllocatorT<> CachingAllocator;
370
template <typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
374
TArena(T& in_allocator) : m_allocator(in_allocator) {}
375
TArena() : m_allocator(m_defAllocator) {}
376
~TArena() { Reset(true); }
378
void* AllocAligned(size_t size, size_t align)
385
SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
389
ArenaBlock* pCurBlock = m_pCurBlock;
390
size_t offset = AlignUp(m_offset, align);
392
if ((offset + size) <= pCurBlock->blockSize)
394
void* pMem = PtrAdd(pCurBlock, offset);
395
m_offset = offset + size;
399
// Not enough memory in this block, fall through to allocate
403
static const size_t ArenaBlockSize = BlockSizeT;
404
size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
406
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
407
blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
409
ArenaBlock* pNewBlock = m_allocator.AllocateAligned(
410
blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
411
SWR_ASSERT(pNewBlock != nullptr);
413
if (pNewBlock != nullptr)
415
m_offset = ARENA_BLOCK_ALIGN;
416
pNewBlock->pNext = m_pCurBlock;
418
m_pCurBlock = pNewBlock;
421
return AllocAligned(size, align);
424
void* Alloc(size_t size) { return AllocAligned(size, 1); }
426
void* AllocAlignedSync(size_t size, size_t align)
428
void* pAlloc = nullptr;
431
pAlloc = AllocAligned(size, align);
437
void* AllocSync(size_t size)
439
void* pAlloc = nullptr;
442
pAlloc = Alloc(size);
448
void Reset(bool removeAll = false)
450
m_offset = ARENA_BLOCK_ALIGN;
454
ArenaBlock* pUsedBlocks = m_pCurBlock->pNext;
455
m_pCurBlock->pNext = nullptr;
458
ArenaBlock* pBlock = pUsedBlocks;
459
pUsedBlocks = pBlock->pNext;
461
m_allocator.Free(pBlock);
466
m_allocator.Free(m_pCurBlock);
467
m_pCurBlock = nullptr;
474
return (m_pCurBlock == nullptr) ||
475
(m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
479
ArenaBlock* m_pCurBlock = nullptr;
480
size_t m_offset = ARENA_BLOCK_ALIGN;
482
/// @note Mutex is only used by sync allocation functions.
485
DefaultAllocator m_defAllocator;
489
using StdArena = TArena<DefaultAllocator>;
490
using CachingArena = TArena<CachingAllocator>;