3
/* Python's malloc wrappers (see pymem.h) */
5
#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */
6
/* Forward declaration */
7
static void* _PyMem_DebugMalloc(void *ctx, size_t size);
8
static void _PyMem_DebugFree(void *ctx, void *p);
9
static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
11
static void _PyObject_DebugDumpAddress(const void *p);
12
static void _PyMem_DebugCheckAddress(char api_id, const void *p);
15
#if defined(__has_feature) /* Clang */
16
#if __has_feature(address_sanitizer) /* is ASAN enabled? */
17
#define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
18
__attribute__((no_address_safety_analysis)) \
19
__attribute__ ((noinline))
21
#define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
24
#if defined(__SANITIZE_ADDRESS__) /* GCC 4.8.x, is ASAN enabled? */
25
#define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
26
__attribute__((no_address_safety_analysis)) \
27
__attribute__ ((noinline))
29
#define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
37
#elif defined(HAVE_MMAP)
38
# include <sys/mman.h>
40
# define ARENAS_USE_MMAP
44
/* Forward declaration */
45
static void* _PyObject_Malloc(void *ctx, size_t size);
46
static void _PyObject_Free(void *ctx, void *p);
47
static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
52
_PyMem_RawMalloc(void *ctx, size_t size)
54
/* PyMem_Malloc(0) means malloc(1). Some systems would return NULL
55
for malloc(0), which would be treated as an error. Some platforms would
56
return a pointer with no memory behind it, which would break pymalloc.
57
To solve these problems, allocate an extra byte. */
64
_PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
68
return realloc(ptr, size);
72
_PyMem_RawFree(void *ctx, void *ptr)
80
_PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
82
return VirtualAlloc(NULL, size,
83
MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
87
_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
89
VirtualFree(ptr, 0, MEM_RELEASE);
92
#elif defined(ARENAS_USE_MMAP)
94
_PyObject_ArenaMmap(void *ctx, size_t size)
97
ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
98
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
99
if (ptr == MAP_FAILED)
106
_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
113
_PyObject_ArenaMalloc(void *ctx, size_t size)
119
_PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
126
#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree
128
# define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free
130
# define PYOBJ_FUNCS PYRAW_FUNCS
132
#define PYMEM_FUNCS PYRAW_FUNCS
134
#ifdef PYMALLOC_DEBUG
136
/* We tag each block with an API ID in order to tag API violations */
138
PyMemAllocator alloc;
141
debug_alloc_api_t raw;
142
debug_alloc_api_t mem;
143
debug_alloc_api_t obj;
145
{'r', {NULL, PYRAW_FUNCS}},
146
{'m', {NULL, PYMEM_FUNCS}},
147
{'o', {NULL, PYOBJ_FUNCS}}
150
#define PYDBG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
153
static PyMemAllocator _PyMem_Raw = {
154
#ifdef PYMALLOC_DEBUG
155
&_PyMem_Debug.raw, PYDBG_FUNCS
161
static PyMemAllocator _PyMem = {
162
#ifdef PYMALLOC_DEBUG
163
&_PyMem_Debug.mem, PYDBG_FUNCS
169
static PyMemAllocator _PyObject = {
170
#ifdef PYMALLOC_DEBUG
171
&_PyMem_Debug.obj, PYDBG_FUNCS
182
static PyObjectArenaAllocator _PyObject_Arena = {NULL,
184
_PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
185
#elif defined(ARENAS_USE_MMAP)
186
_PyObject_ArenaMmap, _PyObject_ArenaMunmap
188
_PyObject_ArenaMalloc, _PyObject_ArenaFree
193
PyMem_SetupDebugHooks(void)
195
#ifdef PYMALLOC_DEBUG
196
PyMemAllocator alloc;
198
alloc.malloc = _PyMem_DebugMalloc;
199
alloc.realloc = _PyMem_DebugRealloc;
200
alloc.free = _PyMem_DebugFree;
202
if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) {
203
alloc.ctx = &_PyMem_Debug.raw;
204
PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
205
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
208
if (_PyMem.malloc != _PyMem_DebugMalloc) {
209
alloc.ctx = &_PyMem_Debug.mem;
210
PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
211
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
214
if (_PyObject.malloc != _PyMem_DebugMalloc) {
215
alloc.ctx = &_PyMem_Debug.obj;
216
PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
217
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
223
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator)
227
case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
228
case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
229
case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
232
allocator->ctx = NULL;
233
allocator->malloc = NULL;
234
allocator->realloc = NULL;
235
allocator->free = NULL;
240
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator)
244
case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
245
case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
246
case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
247
/* ignore unknown domain */
253
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
255
*allocator = _PyObject_Arena;
259
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
261
_PyObject_Arena = *allocator;
265
PyMem_RawMalloc(size_t size)
268
* Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
269
* Most python internals blindly use a signed Py_ssize_t to track
270
* things without checking for overflows or negatives.
271
* As size_t is unsigned, checking for size < 0 is not required.
273
if (size > (size_t)PY_SSIZE_T_MAX)
276
return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
280
PyMem_RawRealloc(void *ptr, size_t new_size)
282
/* see PyMem_RawMalloc() */
283
if (new_size > (size_t)PY_SSIZE_T_MAX)
285
return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
288
void PyMem_RawFree(void *ptr)
290
_PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
294
PyMem_Malloc(size_t size)
296
/* see PyMem_RawMalloc() */
297
if (size > (size_t)PY_SSIZE_T_MAX)
299
return _PyMem.malloc(_PyMem.ctx, size);
303
PyMem_Realloc(void *ptr, size_t new_size)
305
/* see PyMem_RawMalloc() */
306
if (new_size > (size_t)PY_SSIZE_T_MAX)
308
return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
312
PyMem_Free(void *ptr)
314
_PyMem.free(_PyMem.ctx, ptr);
318
_PyMem_RawStrdup(const char *str)
323
size = strlen(str) + 1;
324
copy = PyMem_RawMalloc(size);
327
memcpy(copy, str, size);
332
_PyMem_Strdup(const char *str)
337
size = strlen(str) + 1;
338
copy = PyMem_Malloc(size);
341
memcpy(copy, str, size);
346
PyObject_Malloc(size_t size)
348
/* see PyMem_RawMalloc() */
349
if (size > (size_t)PY_SSIZE_T_MAX)
351
return _PyObject.malloc(_PyObject.ctx, size);
355
PyObject_Realloc(void *ptr, size_t new_size)
357
/* see PyMem_RawMalloc() */
358
if (new_size > (size_t)PY_SSIZE_T_MAX)
360
return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
364
PyObject_Free(void *ptr)
366
_PyObject.free(_PyObject.ctx, ptr);
373
#include <valgrind/valgrind.h>
375
/* If we're using GCC, use __builtin_expect() to reduce overhead of
376
the valgrind checks */
377
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
378
# define UNLIKELY(value) __builtin_expect((value), 0)
380
# define UNLIKELY(value) (value)
383
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
384
static int running_on_valgrind = -1;
387
/* An object allocator for Python.
389
Here is an introduction to the layers of the Python memory architecture,
390
showing where the object allocator is actually used (layer +2), It is
391
called for every object allocation and deallocation (PyObject_New/Del),
392
unless the object-specific allocators implement a proprietary allocation
393
scheme (ex.: ints use a simple free list). This is also the place where
394
the cyclic garbage collector operates selectively on container objects.
397
Object-specific allocators
398
_____ ______ ______ ________
399
[ int ] [ dict ] [ list ] ... [ string ] Python core |
400
+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
401
_______________________________ | |
402
[ Python's object allocator ] | |
403
+2 | ####### Object memory ####### | <------ Internal buffers ------> |
404
______________________________________________________________ |
405
[ Python's raw memory allocator (PyMem_ API) ] |
406
+1 | <----- Python memory (under PyMem manager's control) ------> | |
407
__________________________________________________________________
408
[ Underlying general-purpose allocator (ex: C library malloc) ]
409
0 | <------ Virtual memory allocated for the python process -------> |
411
=========================================================================
412
_______________________________________________________________________
413
[ OS-specific Virtual Memory Manager (VMM) ]
414
-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
415
__________________________________ __________________________________
417
-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
420
/*==========================================================================*/
422
/* A fast, special-purpose memory allocator for small blocks, to be used
423
on top of a general-purpose malloc -- heavily based on previous art. */
425
/* Vladimir Marangozov -- August 2000 */
428
* "Memory management is where the rubber meets the road -- if we do the wrong
429
* thing at any level, the results will not be good. And if we don't make the
430
* levels work well together, we are in serious trouble." (1)
432
* (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
433
* "Dynamic Storage Allocation: A Survey and Critical Review",
434
* in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
437
/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
439
/*==========================================================================*/
442
* Allocation strategy abstract:
444
* For small requests, the allocator sub-allocates <Big> blocks of memory.
445
* Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
446
* system's allocator.
448
* Small requests are grouped in size classes spaced 8 bytes apart, due
449
* to the required valid alignment of the returned address. Requests of
450
* a particular size are serviced from memory pools of 4K (one VMM page).
451
* Pools are fragmented on demand and contain free lists of blocks of one
452
* particular size class. In other words, there is a fixed-size allocator
453
* for each size class. Free pools are shared by the different allocators
454
* thus minimizing the space reserved for a particular size class.
456
* This allocation strategy is a variant of what is known as "simple
457
* segregated storage based on array of free lists". The main drawback of
458
* simple segregated storage is that we might end up with lot of reserved
459
* memory for the different free lists, which degenerate in time. To avoid
460
* this, we partition each free list in pools and we share dynamically the
461
* reserved space between all free lists. This technique is quite efficient
462
* for memory intensive programs which allocate mainly small-sized blocks.
464
* For small requests we have the following table:
466
* Request in bytes Size of allocated block Size class idx
467
* ----------------------------------------------------------------
481
* 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
485
/*==========================================================================*/
488
* -- Main tunable settings section --
492
* Alignment of addresses returned to the user. 8-bytes alignment works
493
* on most current architectures (with 32-bit or 64-bit address busses).
494
* The alignment value is also used for grouping small requests in size
495
* classes spaced ALIGNMENT bytes apart.
497
* You shouldn't change this unless you know what you are doing.
499
#define ALIGNMENT 8 /* must be 2^N */
500
#define ALIGNMENT_SHIFT 3
502
/* Return the number of bytes in size class I, as a uint. */
503
#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
506
* Max size threshold below which malloc requests are considered to be
507
* small enough in order to use preallocated memory pools. You can tune
508
* this value according to your application behaviour and memory needs.
510
* Note: a size threshold of 512 guarantees that newly created dictionaries
511
* will be allocated from preallocated memory pools on 64-bit.
513
* The following invariants must hold:
514
* 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
515
* 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
517
* Although not required, for better performance and space efficiency,
518
* it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
520
#define SMALL_REQUEST_THRESHOLD 512
521
#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
524
* The system's VMM page size can be obtained on most unices with a
525
* getpagesize() call or deduced from various header files. To make
526
* things simpler, we assume that it is 4K, which is OK for most systems.
527
* It is probably better if this is the native page size, but it doesn't
528
* have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
529
* size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
530
* violation fault. 4K is apparently OK for all the platforms that python
533
#define SYSTEM_PAGE_SIZE (4 * 1024)
534
#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
537
* Maximum amount of memory managed by the allocator for small requests.
539
#ifdef WITH_MEMORY_LIMITS
540
#ifndef SMALL_MEMORY_LIMIT
541
#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
546
* The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
547
* on a page boundary. This is a reserved virtual address space for the
548
* current process (obtained through a malloc()/mmap() call). In no way this
549
* means that the memory arenas will be used entirely. A malloc(<Big>) is
550
* usually an address range reservation for <Big> bytes, unless all pages within
551
* this space are referenced subsequently. So malloc'ing big blocks and not
552
* using them does not mean "wasting memory". It's an addressable range
555
* Arenas are allocated with mmap() on systems supporting anonymous memory
556
* mappings to reduce heap fragmentation.
558
#define ARENA_SIZE (256 << 10) /* 256KB */
560
#ifdef WITH_MEMORY_LIMITS
561
#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
565
* Size of the pools used for small blocks. Should be a power of 2,
566
* between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
568
#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
569
#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
572
* -- End of tunable settings section --
575
/*==========================================================================*/
580
* To reduce lock contention, it would probably be better to refine the
581
* crude function locking with per size class locking. I'm not positive
582
* however, whether it's worth switching to such locking policy because
583
* of the performance penalty it might introduce.
585
* The following macros describe the simplest (should also be the fastest)
586
* lock object on a particular platform and the init/fini/lock/unlock
587
* operations on it. The locks defined here are not expected to be recursive
588
* because it is assumed that they will always be called in the order:
589
* INIT, [LOCK, UNLOCK]*, FINI.
593
* Python's threads are serialized, so object malloc locking is disabled.
595
#define SIMPLELOCK_DECL(lock) /* simple lock declaration */
596
#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
597
#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
598
#define SIMPLELOCK_LOCK(lock) /* acquire released lock */
599
#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
603
* I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
606
#define uchar unsigned char /* assuming == 8 bits */
609
#define uint unsigned int /* assuming >= 16 bits */
612
#define ulong unsigned long /* assuming >= 32 bits */
615
#define uptr Py_uintptr_t
617
/* When you say memory, my mind reasons in terms of (pointers to) blocks */
620
/* Pool for small blocks. */
622
union { block *_padding;
623
uint count; } ref; /* number of allocated blocks */
624
block *freeblock; /* pool's free list head */
625
struct pool_header *nextpool; /* next pool of this size class */
626
struct pool_header *prevpool; /* previous pool "" */
627
uint arenaindex; /* index into arenas of base adr */
628
uint szidx; /* block size class index */
629
uint nextoffset; /* bytes to virgin block */
630
uint maxnextoffset; /* largest valid nextoffset */
633
typedef struct pool_header *poolp;
635
/* Record keeping for arenas. */
636
struct arena_object {
637
/* The address of the arena, as returned by malloc. Note that 0
638
* will never be returned by a successful malloc, and is used
639
* here to mark an arena_object that doesn't correspond to an
644
/* Pool-aligned pointer to the next pool to be carved off. */
647
/* The number of available pools in the arena: free pools + never-
652
/* The total number of pools in the arena, whether or not available. */
655
/* Singly-linked list of available pools. */
656
struct pool_header* freepools;
658
/* Whenever this arena_object is not associated with an allocated
659
* arena, the nextarena member is used to link all unassociated
660
* arena_objects in the singly-linked `unused_arena_objects` list.
661
* The prevarena member is unused in this case.
663
* When this arena_object is associated with an allocated arena
664
* with at least one available pool, both members are used in the
665
* doubly-linked `usable_arenas` list, which is maintained in
666
* increasing order of `nfreepools` values.
668
* Else this arena_object is associated with an allocated arena
669
* all of whose pools are in use. `nextarena` and `prevarena`
670
* are both meaningless in this case.
672
struct arena_object* nextarena;
673
struct arena_object* prevarena;
676
#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
678
#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
680
/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
681
#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
683
/* Return total number of blocks in pool of size index I, as a uint. */
684
#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
686
/*==========================================================================*/
691
SIMPLELOCK_DECL(_malloc_lock)
692
#define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
693
#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
694
#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
695
#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
698
* Pool table -- headed, circular, doubly-linked lists of partially used pools.
700
This is involved. For an index i, usedpools[i+i] is the header for a list of
701
all partially used pools holding small blocks with "size class idx" i. So
702
usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
703
16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
705
Pools are carved off an arena's highwater mark (an arena_object's pool_address
706
member) as needed. Once carved off, a pool is in one of three states forever
709
used == partially used, neither empty nor full
710
At least one block in the pool is currently allocated, and at least one
711
block in the pool is not currently allocated (note this implies a pool
712
has room for at least two blocks).
713
This is a pool's initial state, as a pool is created only when malloc
715
The pool holds blocks of a fixed size, and is in the circular list headed
716
at usedpools[i] (see above). It's linked to the other used pools of the
717
same size class via the pool_header's nextpool and prevpool members.
718
If all but one block is currently allocated, a malloc can cause a
719
transition to the full state. If all but one block is not currently
720
allocated, a free can cause a transition to the empty state.
722
full == all the pool's blocks are currently allocated
723
On transition to full, a pool is unlinked from its usedpools[] list.
724
It's not linked to from anything then anymore, and its nextpool and
725
prevpool members are meaningless until it transitions back to used.
726
A free of a block in a full pool puts the pool back in the used state.
727
Then it's linked in at the front of the appropriate usedpools[] list, so
728
that the next allocation for its size class will reuse the freed block.
730
empty == all the pool's blocks are currently available for allocation
731
On transition to empty, a pool is unlinked from its usedpools[] list,
732
and linked to the front of its arena_object's singly-linked freepools list,
733
via its nextpool member. The prevpool member has no meaning in this case.
734
Empty pools have no inherent size class: the next time a malloc finds
735
an empty list in usedpools[], it takes the first pool off of freepools.
736
If the size class needed happens to be the same as the size class the pool
737
last had, some pool initialization can be skipped.
742
Blocks within pools are again carved out as needed. pool->freeblock points to
743
the start of a singly-linked list of free blocks within the pool. When a
744
block is freed, it's inserted at the front of its pool's freeblock list. Note
745
that the available blocks in a pool are *not* linked all together when a pool
746
is initialized. Instead only "the first two" (lowest addresses) blocks are
747
set up, returning the first such block, and setting pool->freeblock to a
748
one-block list holding the second such block. This is consistent with that
749
pymalloc strives at all levels (arena, pool, and block) never to touch a piece
750
of memory until it's actually needed.
752
So long as a pool is in the used state, we're certain there *is* a block
753
available for allocating, and pool->freeblock is not NULL. If pool->freeblock
754
points to the end of the free list before we've carved the entire pool into
755
blocks, that means we simply haven't yet gotten to one of the higher-address
756
blocks. The offset from the pool_header to the start of "the next" virgin
757
block is stored in the pool_header nextoffset member, and the largest value
758
of nextoffset that makes sense is stored in the maxnextoffset member when a
759
pool is initialized. All the blocks in a pool have been passed out at least
760
once when and only when nextoffset > maxnextoffset.
763
Major obscurity: While the usedpools vector is declared to have poolp
764
entries, it doesn't really. It really contains two pointers per (conceptual)
765
poolp entry, the nextpool and prevpool members of a pool_header. The
766
excruciating initialization code below fools C so that
770
"acts like" a genuine poolp, but only so long as you only reference its
771
nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
772
compensating for that a pool_header's nextpool and prevpool members
773
immediately follow a pool_header's first two members:
775
union { block *_padding;
779
each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
780
contains is a fudged-up pointer p such that *if* C believes it's a poolp
781
pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
782
circular list is empty).
784
It's unclear why the usedpools setup is so convoluted. It could be to
785
minimize the amount of cache required to hold this heavily-referenced table
786
(which only *needs* the two interpool pointer members of a pool_header). OTOH,
787
referencing code has to remember to "double the index" and doing so isn't
788
free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
789
on that C doesn't insert any padding anywhere in a pool_header at or before
791
**************************************************************************** */
793
#define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
794
#define PT(x) PTA(x), PTA(x)
796
static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
797
PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
798
#if NB_SMALL_SIZE_CLASSES > 8
799
, PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
800
#if NB_SMALL_SIZE_CLASSES > 16
801
, PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
802
#if NB_SMALL_SIZE_CLASSES > 24
803
, PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
804
#if NB_SMALL_SIZE_CLASSES > 32
805
, PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
806
#if NB_SMALL_SIZE_CLASSES > 40
807
, PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
808
#if NB_SMALL_SIZE_CLASSES > 48
809
, PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
810
#if NB_SMALL_SIZE_CLASSES > 56
811
, PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
812
#if NB_SMALL_SIZE_CLASSES > 64
813
#error "NB_SMALL_SIZE_CLASSES should be less than 64"
814
#endif /* NB_SMALL_SIZE_CLASSES > 64 */
815
#endif /* NB_SMALL_SIZE_CLASSES > 56 */
816
#endif /* NB_SMALL_SIZE_CLASSES > 48 */
817
#endif /* NB_SMALL_SIZE_CLASSES > 40 */
818
#endif /* NB_SMALL_SIZE_CLASSES > 32 */
819
#endif /* NB_SMALL_SIZE_CLASSES > 24 */
820
#endif /* NB_SMALL_SIZE_CLASSES > 16 */
821
#endif /* NB_SMALL_SIZE_CLASSES > 8 */
824
/*==========================================================================
827
`arenas` is a vector of arena_objects. It contains maxarenas entries, some of
828
which may not be currently used (== they're arena_objects that aren't
829
currently associated with an allocated arena). Note that arenas proper are
830
separately malloc'ed.
832
Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
833
we do try to free() arenas, and use some mild heuristic strategies to increase
834
the likelihood that arenas eventually can be freed.
838
This is a singly-linked list of the arena_objects that are currently not
839
being used (no arena is associated with them). Objects are taken off the
840
head of the list in new_arena(), and are pushed on the head of the list in
841
PyObject_Free() when the arena is empty. Key invariant: an arena_object
842
is on this list if and only if its .address member is 0.
846
This is a doubly-linked list of the arena_objects associated with arenas
847
that have pools available. These pools are either waiting to be reused,
848
or have not been used before. The list is sorted to have the most-
849
allocated arenas first (ascending order based on the nfreepools member).
850
This means that the next allocation will come from a heavily used arena,
851
which gives the nearly empty arenas a chance to be returned to the system.
852
In my unscientific tests this dramatically improved the number of arenas
855
Note that an arena_object associated with an arena all of whose pools are
856
currently in use isn't on either list.
859
/* Array of objects used to track chunks of memory (arenas). */
860
static struct arena_object* arenas = NULL;
861
/* Number of slots currently allocated in the `arenas` vector. */
862
static uint maxarenas = 0;
864
/* The head of the singly-linked, NULL-terminated list of available
867
static struct arena_object* unused_arena_objects = NULL;
869
/* The head of the doubly-linked, NULL-terminated at each end, list of
870
* arena_objects associated with arenas that have pools available.
872
static struct arena_object* usable_arenas = NULL;
874
/* How many arena_objects do we initially allocate?
875
* 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
878
#define INITIAL_ARENA_OBJECTS 16
880
/* Number of arenas allocated that haven't been free()'d. */
881
static size_t narenas_currently_allocated = 0;
883
/* Total number of times malloc() called to allocate an arena. */
884
static size_t ntimes_arena_allocated = 0;
885
/* High water mark (max value ever seen) for narenas_currently_allocated. */
886
static size_t narenas_highwater = 0;
888
static Py_ssize_t _Py_AllocatedBlocks = 0;
891
_Py_GetAllocatedBlocks(void)
893
return _Py_AllocatedBlocks;
897
/* Allocate a new arena. If we run out of memory, return NULL. Else
898
* allocate a new arena, and return the address of an arena_object
899
* describing the new arena. It's expected that the caller will set
900
* `usable_arenas` to the return value.
902
static struct arena_object*
905
struct arena_object* arenaobj;
906
uint excess; /* number of bytes above pool alignment */
909
#ifdef PYMALLOC_DEBUG
910
if (Py_GETENV("PYTHONMALLOCSTATS"))
911
_PyObject_DebugMallocStats(stderr);
913
if (unused_arena_objects == NULL) {
918
/* Double the number of arena objects on each allocation.
919
* Note that it's possible for `numarenas` to overflow.
921
numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
922
if (numarenas <= maxarenas)
923
return NULL; /* overflow */
924
#if SIZEOF_SIZE_T <= SIZEOF_INT
925
if (numarenas > PY_SIZE_MAX / sizeof(*arenas))
926
return NULL; /* overflow */
928
nbytes = numarenas * sizeof(*arenas);
929
arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
930
if (arenaobj == NULL)
934
/* We might need to fix pointers that were copied. However,
935
* new_arena only gets called when all the pages in the
936
* previous arenas are full. Thus, there are *no* pointers
937
* into the old array. Thus, we don't have to worry about
938
* invalid pointers. Just to be sure, some asserts:
940
assert(usable_arenas == NULL);
941
assert(unused_arena_objects == NULL);
943
/* Put the new arenas on the unused_arena_objects list. */
944
for (i = maxarenas; i < numarenas; ++i) {
945
arenas[i].address = 0; /* mark as unassociated */
946
arenas[i].nextarena = i < numarenas - 1 ?
950
/* Update globals. */
951
unused_arena_objects = &arenas[maxarenas];
952
maxarenas = numarenas;
955
/* Take the next available arena object off the head of the list. */
956
assert(unused_arena_objects != NULL);
957
arenaobj = unused_arena_objects;
958
unused_arena_objects = arenaobj->nextarena;
959
assert(arenaobj->address == 0);
960
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
961
if (address == NULL) {
962
/* The allocation failed: return NULL after putting the
965
arenaobj->nextarena = unused_arena_objects;
966
unused_arena_objects = arenaobj;
969
arenaobj->address = (uptr)address;
971
++narenas_currently_allocated;
972
++ntimes_arena_allocated;
973
if (narenas_currently_allocated > narenas_highwater)
974
narenas_highwater = narenas_currently_allocated;
975
arenaobj->freepools = NULL;
976
/* pool_address <- first pool-aligned address in the arena
977
nfreepools <- number of whole pools that fit after alignment */
978
arenaobj->pool_address = (block*)arenaobj->address;
979
arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
980
assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
981
excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
983
--arenaobj->nfreepools;
984
arenaobj->pool_address += POOL_SIZE - excess;
986
arenaobj->ntotalpools = arenaobj->nfreepools;
992
Py_ADDRESS_IN_RANGE(P, POOL)
994
Return true if and only if P is an address that was allocated by pymalloc.
995
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
996
(the caller is asked to compute this because the macro expands POOL more than
997
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
998
variable and pass the latter to the macro; because Py_ADDRESS_IN_RANGE is
999
called on every alloc/realloc/free, micro-efficiency is important here).
1001
Tricky: Let B be the arena base address associated with the pool, B =
1002
arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1004
B <= P < B + ARENA_SIZE
1006
Subtracting B throughout, this is true iff
1008
0 <= P-B < ARENA_SIZE
1010
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1012
Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1013
before the first arena has been allocated. `arenas` is still NULL in that
1014
case. We're relying on that maxarenas is also 0 in that case, so that
1015
(POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1018
Details: given P and POOL, the arena_object corresponding to P is AO =
1019
arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1020
stores, etc), POOL is the correct address of P's pool, AO.address is the
1021
correct base address of the pool's arena, and P must be within ARENA_SIZE of
1022
AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1023
(NULL)). Therefore Py_ADDRESS_IN_RANGE correctly reports that obmalloc
1026
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1027
call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1028
in this case -- it may even be uninitialized trash. If the trash arenaindex
1029
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1032
Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1033
allocated arena, obmalloc controls all the memory in slice AO.address :
1034
AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1035
so P doesn't lie in that slice, so the macro correctly reports that P is not
1036
controlled by obmalloc.
1038
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1039
arena_object (one not currently associated with an allocated arena),
1040
AO.address is 0, and the second test in the macro reduces to:
1044
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1045
that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1046
of the test still passes, and the third clause (AO.address != 0) is necessary
1047
to get the correct result: AO.address is 0 in this case, so the macro
1048
correctly reports that P is not controlled by obmalloc (despite that P lies in
1049
slice AO.address : AO.address + ARENA_SIZE).
1051
Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1052
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1053
corresponded to a currently-allocated arena, so the "P is not controlled by
1054
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1057
Note that the logic is excruciating, and reading up possibly uninitialized
1058
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1059
creates problems for some memory debuggers. The overwhelming advantage is
1060
that this test determines whether an arbitrary address is controlled by
1061
obmalloc in a small constant time, independent of the number of arenas
1062
obmalloc controls. Since this test is needed at every entry point, it's
1063
extremely desirable that it be this fast.
1065
Since Py_ADDRESS_IN_RANGE may be reading from memory which was not allocated
1066
by Python, it is important that (POOL)->arenaindex is read only once, as
1067
another thread may be concurrently modifying the value without holding the
1068
GIL. To accomplish this, the arenaindex_temp variable is used to store
1069
(POOL)->arenaindex for the duration of the Py_ADDRESS_IN_RANGE macro's
1070
execution. The caller of the macro is responsible for declaring this
1073
#define Py_ADDRESS_IN_RANGE(P, POOL) \
1074
((arenaindex_temp = (POOL)->arenaindex) < maxarenas && \
1075
(uptr)(P) - arenas[arenaindex_temp].address < (uptr)ARENA_SIZE && \
1076
arenas[arenaindex_temp].address != 0)
1079
/* This is only useful when running memory debuggers such as
1080
* Purify or Valgrind. Uncomment to use.
1082
#define Py_USING_MEMORY_DEBUGGER
1085
#ifdef Py_USING_MEMORY_DEBUGGER
1087
/* Py_ADDRESS_IN_RANGE may access uninitialized memory by design
1088
* This leads to thousands of spurious warnings when using
1089
* Purify or Valgrind. By making a function, we can easily
1090
* suppress the uninitialized memory reads in this one function.
1091
* So we won't ignore real errors elsewhere.
1093
* Disable the macro and use a function.
1096
#undef Py_ADDRESS_IN_RANGE
1098
#if defined(__GNUC__) && ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) || \
1100
#define Py_NO_INLINE __attribute__((__noinline__))
1102
#define Py_NO_INLINE
1105
/* Don't make static, to try to ensure this isn't inlined. */
1106
int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE;
1110
/*==========================================================================*/
1112
/* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
1113
* from all other currently live pointers. This may not be possible.
1117
* The basic blocks are ordered by decreasing execution frequency,
1118
* which minimizes the number of jumps in the most common cases,
1119
* improves branching prediction and instruction scheduling (small
1120
* block allocations typically result in a couple of instructions).
1121
* Unless the optimizer reorders everything, being too smart...
1125
_PyObject_Malloc(void *ctx, size_t nbytes)
1132
_Py_AllocatedBlocks++;
1134
#ifdef WITH_VALGRIND
1135
if (UNLIKELY(running_on_valgrind == -1))
1136
running_on_valgrind = RUNNING_ON_VALGRIND;
1137
if (UNLIKELY(running_on_valgrind))
1142
* This implicitly redirects malloc(0).
1144
if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
1147
* Most frequent paths first
1149
size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1150
pool = usedpools[size + size];
1151
if (pool != pool->nextpool) {
1153
* There is a used pool for this size class.
1154
* Pick up the head block of its free list.
1157
bp = pool->freeblock;
1159
if ((pool->freeblock = *(block **)bp) != NULL) {
1164
* Reached the end of the free list, try to extend it.
1166
if (pool->nextoffset <= pool->maxnextoffset) {
1167
/* There is room for another block. */
1168
pool->freeblock = (block*)pool +
1170
pool->nextoffset += INDEX2SIZE(size);
1171
*(block **)(pool->freeblock) = NULL;
1175
/* Pool is full, unlink from used pools. */
1176
next = pool->nextpool;
1177
pool = pool->prevpool;
1178
next->prevpool = pool;
1179
pool->nextpool = next;
1184
/* There isn't a pool of the right size class immediately
1185
* available: use a free pool.
1187
if (usable_arenas == NULL) {
1188
/* No arena has a free pool: allocate a new arena. */
1189
#ifdef WITH_MEMORY_LIMITS
1190
if (narenas_currently_allocated >= MAX_ARENAS) {
1195
usable_arenas = new_arena();
1196
if (usable_arenas == NULL) {
1200
usable_arenas->nextarena =
1201
usable_arenas->prevarena = NULL;
1203
assert(usable_arenas->address != 0);
1205
/* Try to get a cached free pool. */
1206
pool = usable_arenas->freepools;
1208
/* Unlink from cached pools. */
1209
usable_arenas->freepools = pool->nextpool;
1211
/* This arena already had the smallest nfreepools
1212
* value, so decreasing nfreepools doesn't change
1213
* that, and we don't need to rearrange the
1214
* usable_arenas list. However, if the arena has
1215
* become wholly allocated, we need to remove its
1216
* arena_object from usable_arenas.
1218
--usable_arenas->nfreepools;
1219
if (usable_arenas->nfreepools == 0) {
1220
/* Wholly allocated: remove. */
1221
assert(usable_arenas->freepools == NULL);
1222
assert(usable_arenas->nextarena == NULL ||
1223
usable_arenas->nextarena->prevarena ==
1226
usable_arenas = usable_arenas->nextarena;
1227
if (usable_arenas != NULL) {
1228
usable_arenas->prevarena = NULL;
1229
assert(usable_arenas->address != 0);
1233
/* nfreepools > 0: it must be that freepools
1234
* isn't NULL, or that we haven't yet carved
1235
* off all the arena's pools for the first
1238
assert(usable_arenas->freepools != NULL ||
1239
usable_arenas->pool_address <=
1240
(block*)usable_arenas->address +
1241
ARENA_SIZE - POOL_SIZE);
1244
/* Frontlink to used pools. */
1245
next = usedpools[size + size]; /* == prev */
1246
pool->nextpool = next;
1247
pool->prevpool = next;
1248
next->nextpool = pool;
1249
next->prevpool = pool;
1250
pool->ref.count = 1;
1251
if (pool->szidx == size) {
1252
/* Luckily, this pool last contained blocks
1253
* of the same size class, so its header
1254
* and free list are already initialized.
1256
bp = pool->freeblock;
1258
pool->freeblock = *(block **)bp;
1263
* Initialize the pool header, set up the free list to
1264
* contain just the second block, and return the first
1268
size = INDEX2SIZE(size);
1269
bp = (block *)pool + POOL_OVERHEAD;
1270
pool->nextoffset = POOL_OVERHEAD + (size << 1);
1271
pool->maxnextoffset = POOL_SIZE - size;
1272
pool->freeblock = bp + size;
1273
*(block **)(pool->freeblock) = NULL;
1278
/* Carve off a new pool. */
1279
assert(usable_arenas->nfreepools > 0);
1280
assert(usable_arenas->freepools == NULL);
1281
pool = (poolp)usable_arenas->pool_address;
1282
assert((block*)pool <= (block*)usable_arenas->address +
1283
ARENA_SIZE - POOL_SIZE);
1284
pool->arenaindex = usable_arenas - arenas;
1285
assert(&arenas[pool->arenaindex] == usable_arenas);
1286
pool->szidx = DUMMY_SIZE_IDX;
1287
usable_arenas->pool_address += POOL_SIZE;
1288
--usable_arenas->nfreepools;
1290
if (usable_arenas->nfreepools == 0) {
1291
assert(usable_arenas->nextarena == NULL ||
1292
usable_arenas->nextarena->prevarena ==
1294
/* Unlink the arena: it is completely allocated. */
1295
usable_arenas = usable_arenas->nextarena;
1296
if (usable_arenas != NULL) {
1297
usable_arenas->prevarena = NULL;
1298
assert(usable_arenas->address != 0);
1305
/* The small block allocator ends here. */
1308
/* Redirect the original request to the underlying (libc) allocator.
1309
* We jump here on bigger requests, on error in the code above (as a
1310
* last chance to serve the request) or when the max memory limit
1314
void *result = PyMem_RawMalloc(nbytes);
1316
_Py_AllocatedBlocks--;
1323
ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
1325
_PyObject_Free(void *ctx, void *p)
1331
#ifndef Py_USING_MEMORY_DEBUGGER
1332
uint arenaindex_temp;
1335
if (p == NULL) /* free(NULL) has no effect */
1338
_Py_AllocatedBlocks--;
1340
#ifdef WITH_VALGRIND
1341
if (UNLIKELY(running_on_valgrind > 0))
1345
pool = POOL_ADDR(p);
1346
if (Py_ADDRESS_IN_RANGE(p, pool)) {
1347
/* We allocated this address. */
1349
/* Link p to the start of the pool's freeblock list. Since
1350
* the pool had at least the p block outstanding, the pool
1351
* wasn't empty (so it's already in a usedpools[] list, or
1352
* was full and is in no list -- it's not in the freeblocks
1353
* list in any case).
1355
assert(pool->ref.count > 0); /* else it was empty */
1356
*(block **)p = lastfree = pool->freeblock;
1357
pool->freeblock = (block *)p;
1359
struct arena_object* ao;
1360
uint nf; /* ao->nfreepools */
1362
/* freeblock wasn't NULL, so the pool wasn't full,
1363
* and the pool is in a usedpools[] list.
1365
if (--pool->ref.count != 0) {
1366
/* pool isn't empty: leave it in usedpools */
1370
/* Pool is now empty: unlink from usedpools, and
1371
* link to the front of freepools. This ensures that
1372
* previously freed pools will be allocated later
1373
* (being not referenced, they are perhaps paged out).
1375
next = pool->nextpool;
1376
prev = pool->prevpool;
1377
next->prevpool = prev;
1378
prev->nextpool = next;
1380
/* Link the pool to freepools. This is a singly-linked
1381
* list, and pool->prevpool isn't used there.
1383
ao = &arenas[pool->arenaindex];
1384
pool->nextpool = ao->freepools;
1385
ao->freepools = pool;
1386
nf = ++ao->nfreepools;
1388
/* All the rest is arena management. We just freed
1389
* a pool, and there are 4 cases for arena mgmt:
1390
* 1. If all the pools are free, return the arena to
1391
* the system free().
1392
* 2. If this is the only free pool in the arena,
1393
* add the arena back to the `usable_arenas` list.
1394
* 3. If the "next" arena has a smaller count of free
1395
* pools, we have to "slide this arena right" to
1396
* restore that usable_arenas is sorted in order of
1398
* 4. Else there's nothing more to do.
1400
if (nf == ao->ntotalpools) {
1401
/* Case 1. First unlink ao from usable_arenas.
1403
assert(ao->prevarena == NULL ||
1404
ao->prevarena->address != 0);
1405
assert(ao ->nextarena == NULL ||
1406
ao->nextarena->address != 0);
1408
/* Fix the pointer in the prevarena, or the
1409
* usable_arenas pointer.
1411
if (ao->prevarena == NULL) {
1412
usable_arenas = ao->nextarena;
1413
assert(usable_arenas == NULL ||
1414
usable_arenas->address != 0);
1417
assert(ao->prevarena->nextarena == ao);
1418
ao->prevarena->nextarena =
1421
/* Fix the pointer in the nextarena. */
1422
if (ao->nextarena != NULL) {
1423
assert(ao->nextarena->prevarena == ao);
1424
ao->nextarena->prevarena =
1427
/* Record that this arena_object slot is
1428
* available to be reused.
1430
ao->nextarena = unused_arena_objects;
1431
unused_arena_objects = ao;
1433
/* Free the entire arena. */
1434
_PyObject_Arena.free(_PyObject_Arena.ctx,
1435
(void *)ao->address, ARENA_SIZE);
1436
ao->address = 0; /* mark unassociated */
1437
--narenas_currently_allocated;
1443
/* Case 2. Put ao at the head of
1444
* usable_arenas. Note that because
1445
* ao->nfreepools was 0 before, ao isn't
1446
* currently on the usable_arenas list.
1448
ao->nextarena = usable_arenas;
1449
ao->prevarena = NULL;
1451
usable_arenas->prevarena = ao;
1453
assert(usable_arenas->address != 0);
1458
/* If this arena is now out of order, we need to keep
1459
* the list sorted. The list is kept sorted so that
1460
* the "most full" arenas are used first, which allows
1461
* the nearly empty arenas to be completely freed. In
1462
* a few un-scientific tests, it seems like this
1463
* approach allowed a lot more memory to be freed.
1465
if (ao->nextarena == NULL ||
1466
nf <= ao->nextarena->nfreepools) {
1467
/* Case 4. Nothing to do. */
1471
/* Case 3: We have to move the arena towards the end
1472
* of the list, because it has more free pools than
1473
* the arena to its right.
1474
* First unlink ao from usable_arenas.
1476
if (ao->prevarena != NULL) {
1477
/* ao isn't at the head of the list */
1478
assert(ao->prevarena->nextarena == ao);
1479
ao->prevarena->nextarena = ao->nextarena;
1482
/* ao is at the head of the list */
1483
assert(usable_arenas == ao);
1484
usable_arenas = ao->nextarena;
1486
ao->nextarena->prevarena = ao->prevarena;
1488
/* Locate the new insertion point by iterating over
1489
* the list, using our nextarena pointer.
1491
while (ao->nextarena != NULL &&
1492
nf > ao->nextarena->nfreepools) {
1493
ao->prevarena = ao->nextarena;
1494
ao->nextarena = ao->nextarena->nextarena;
1497
/* Insert ao at this point. */
1498
assert(ao->nextarena == NULL ||
1499
ao->prevarena == ao->nextarena->prevarena);
1500
assert(ao->prevarena->nextarena == ao->nextarena);
1502
ao->prevarena->nextarena = ao;
1503
if (ao->nextarena != NULL)
1504
ao->nextarena->prevarena = ao;
1506
/* Verify that the swaps worked. */
1507
assert(ao->nextarena == NULL ||
1508
nf <= ao->nextarena->nfreepools);
1509
assert(ao->prevarena == NULL ||
1510
nf > ao->prevarena->nfreepools);
1511
assert(ao->nextarena == NULL ||
1512
ao->nextarena->prevarena == ao);
1513
assert((usable_arenas == ao &&
1514
ao->prevarena == NULL) ||
1515
ao->prevarena->nextarena == ao);
1520
/* Pool was full, so doesn't currently live in any list:
1521
* link it to the front of the appropriate usedpools[] list.
1522
* This mimics LRU pool usage for new allocations and
1523
* targets optimal filling when several pools contain
1524
* blocks of the same size class.
1527
assert(pool->ref.count > 0); /* else the pool is empty */
1529
next = usedpools[size + size];
1530
prev = next->prevpool;
1531
/* insert pool before next: prev <-> pool <-> next */
1532
pool->nextpool = next;
1533
pool->prevpool = prev;
1534
next->prevpool = pool;
1535
prev->nextpool = pool;
1540
#ifdef WITH_VALGRIND
1543
/* We didn't allocate this address. */
1547
/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
1548
* then as the Python docs promise, we do not treat this like free(p), and
1549
* return a non-NULL result.
1552
ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
1554
_PyObject_Realloc(void *ctx, void *p, size_t nbytes)
1559
#ifndef Py_USING_MEMORY_DEBUGGER
1560
uint arenaindex_temp;
1564
return _PyObject_Malloc(ctx, nbytes);
1566
#ifdef WITH_VALGRIND
1567
/* Treat running_on_valgrind == -1 the same as 0 */
1568
if (UNLIKELY(running_on_valgrind > 0))
1572
pool = POOL_ADDR(p);
1573
if (Py_ADDRESS_IN_RANGE(p, pool)) {
1574
/* We're in charge of this block */
1575
size = INDEX2SIZE(pool->szidx);
1576
if (nbytes <= size) {
1577
/* The block is staying the same or shrinking. If
1578
* it's shrinking, there's a tradeoff: it costs
1579
* cycles to copy the block to a smaller size class,
1580
* but it wastes memory not to copy it. The
1581
* compromise here is to copy on shrink only if at
1582
* least 25% of size can be shaved off.
1584
if (4 * nbytes > 3 * size) {
1586
* or shrinking and new/old > 3/4.
1592
bp = _PyObject_Malloc(ctx, nbytes);
1594
memcpy(bp, p, size);
1595
_PyObject_Free(ctx, p);
1599
#ifdef WITH_VALGRIND
1602
/* We're not managing this block. If nbytes <=
1603
* SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
1604
* block. However, if we do, we need to copy the valid data from
1605
* the C-managed block to one of our blocks, and there's no portable
1606
* way to know how much of the memory space starting at p is valid.
1607
* As bug 1185883 pointed out the hard way, it's possible that the
1608
* C-managed block is "at the end" of allocated VM space, so that
1609
* a memory fault can occur if we try to copy nbytes bytes starting
1610
* at p. Instead we punt: let C continue to manage this block.
1613
return PyMem_RawRealloc(p, nbytes);
1614
/* C doesn't define the result of realloc(p, 0) (it may or may not
1615
* return NULL then), but Python's docs promise that nbytes==0 never
1616
* returns NULL. We don't pass 0 to realloc(), to avoid that endcase
1617
* to begin with. Even then, we can't be sure that realloc() won't
1620
bp = PyMem_RawRealloc(p, 1);
1624
#else /* ! WITH_PYMALLOC */
1626
/*==========================================================================*/
1627
/* pymalloc not enabled: Redirect the entry points to malloc. These will
1628
* only be used by extensions that are compiled with pymalloc enabled. */
1631
_Py_GetAllocatedBlocks(void)
1636
#endif /* WITH_PYMALLOC */
1638
#ifdef PYMALLOC_DEBUG
1639
/*==========================================================================*/
1640
/* A x-platform debugging allocator. This doesn't manage memory directly,
1641
* it wraps a real allocator, adding extra debugging info to the memory blocks.
1644
/* Special bytes broadcast into debug memory blocks at appropriate times.
1645
* Strings of these are unlikely to be valid addresses, floats, ints or
1650
#undef FORBIDDENBYTE
1651
#define CLEANBYTE 0xCB /* clean (newly allocated) memory */
1652
#define DEADBYTE 0xDB /* dead (newly freed) memory */
1653
#define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */
1655
static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
1657
/* serialno is always incremented via calling this routine. The point is
1658
* to supply a single place to set a breakpoint.
1666
#define SST SIZEOF_SIZE_T
1668
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1670
read_size_t(const void *p)
1672
const uchar *q = (const uchar *)p;
1673
size_t result = *q++;
1676
for (i = SST; --i > 0; ++q)
1677
result = (result << 8) | *q;
1681
/* Write n as a big-endian size_t, MSB at address p, LSB at
1682
* p + sizeof(size_t) - 1.
1685
write_size_t(void *p, size_t n)
1687
uchar *q = (uchar *)p + SST - 1;
1690
for (i = SST; --i >= 0; --q) {
1691
*q = (uchar)(n & 0xff);
1697
/* Is target in the list? The list is traversed via the nextpool pointers.
1698
* The list may be NULL-terminated, or circular. Return 1 if target is in
1702
pool_is_in_list(const poolp target, poolp list)
1704
poolp origlist = list;
1705
assert(target != NULL);
1711
list = list->nextpool;
1712
} while (list != NULL && list != origlist);
1717
#define pool_is_in_list(X, Y) 1
1719
#endif /* Py_DEBUG */
1721
/* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and
1722
fills them with useful stuff, here calling the underlying malloc's result p:
1725
Number of bytes originally asked for. This is a size_t, big-endian (easier
1726
to read in a memory dump).
1728
API ID. See PEP 445. This is a character, but seems undocumented.
1730
Copies of FORBIDDENBYTE. Used to catch under- writes and reads.
1732
The requested memory, filled with copies of CLEANBYTE.
1733
Used to catch reference to uninitialized memory.
1734
&p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
1735
handled the request itself.
1737
Copies of FORBIDDENBYTE. Used to catch over- writes and reads.
1738
p[2*S+n+S: 2*S+n+2*S]
1739
A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
1740
and _PyMem_DebugRealloc.
1741
This is a big-endian size_t.
1742
If "bad memory" is detected later, the serial number gives an
1743
excellent way to set a breakpoint on the next run, to capture the
1744
instant at which this block was passed out.
1748
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
1750
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1751
uchar *p; /* base address of malloc'ed block */
1752
uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */
1753
size_t total; /* nbytes + 4*SST */
1756
total = nbytes + 4*SST;
1758
/* overflow: can't represent total as a size_t */
1761
p = (uchar *)api->alloc.malloc(api->alloc.ctx, total);
1765
/* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
1766
write_size_t(p, nbytes);
1767
p[SST] = (uchar)api->api_id;
1768
memset(p + SST + 1, FORBIDDENBYTE, SST-1);
1771
memset(p + 2*SST, CLEANBYTE, nbytes);
1773
/* at tail, write pad (SST bytes) and serialno (SST bytes) */
1774
tail = p + 2*SST + nbytes;
1775
memset(tail, FORBIDDENBYTE, SST);
1776
write_size_t(tail + SST, serialno);
1781
/* The debug free first checks the 2*SST bytes on each end for sanity (in
1782
particular, that the FORBIDDENBYTEs with the api ID are still intact).
1783
Then fills the original bytes with DEADBYTE.
1784
Then calls the underlying free.
1787
_PyMem_DebugFree(void *ctx, void *p)
1789
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1790
uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */
1795
_PyMem_DebugCheckAddress(api->api_id, p);
1796
nbytes = read_size_t(q);
1799
memset(q, DEADBYTE, nbytes);
1800
api->alloc.free(api->alloc.ctx, q);
1804
_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes)
1806
debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1807
uchar *q = (uchar *)p, *oldq;
1809
size_t total; /* nbytes + 4*SST */
1810
size_t original_nbytes;
1814
return _PyMem_DebugMalloc(ctx, nbytes);
1816
_PyMem_DebugCheckAddress(api->api_id, p);
1818
original_nbytes = read_size_t(q - 2*SST);
1819
total = nbytes + 4*SST;
1821
/* overflow: can't represent total as a size_t */
1824
/* Resize and add decorations. We may get a new pointer here, in which
1825
* case we didn't get the chance to mark the old memory with DEADBYTE,
1826
* but we live with that.
1829
q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
1833
if (q == oldq && nbytes < original_nbytes) {
1834
/* shrinking: mark old extra memory dead */
1835
memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
1838
write_size_t(q, nbytes);
1839
assert(q[SST] == (uchar)api->api_id);
1840
for (i = 1; i < SST; ++i)
1841
assert(q[SST + i] == FORBIDDENBYTE);
1845
memset(tail, FORBIDDENBYTE, SST);
1846
write_size_t(tail + SST, serialno);
1848
if (nbytes > original_nbytes) {
1849
/* growing: mark new extra memory clean */
1850
memset(q + original_nbytes, CLEANBYTE,
1851
nbytes - original_nbytes);
1857
/* Check the forbidden bytes on both ends of the memory allocated for p.
1858
* If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
1859
* and call Py_FatalError to kill the program.
1860
* The API id, is also checked.
1863
_PyMem_DebugCheckAddress(char api, const void *p)
1865
const uchar *q = (const uchar *)p;
1874
msg = "didn't expect a NULL pointer";
1878
/* Check the API id */
1882
snprintf(msg, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
1883
msgbuf[sizeof(msgbuf)-1] = 0;
1887
/* Check the stuff at the start of p first: if there's underwrite
1888
* corruption, the number-of-bytes field may be nuts, and checking
1889
* the tail could lead to a segfault then.
1891
for (i = SST-1; i >= 1; --i) {
1892
if (*(q-i) != FORBIDDENBYTE) {
1893
msg = "bad leading pad byte";
1898
nbytes = read_size_t(q - 2*SST);
1900
for (i = 0; i < SST; ++i) {
1901
if (tail[i] != FORBIDDENBYTE) {
1902
msg = "bad trailing pad byte";
1910
_PyObject_DebugDumpAddress(p);
1914
/* Display info to stderr about the memory block at p. */
1916
_PyObject_DebugDumpAddress(const void *p)
1918
const uchar *q = (const uchar *)p;
1920
size_t nbytes, serial;
1925
fprintf(stderr, "Debug memory block at address p=%p:", p);
1927
fprintf(stderr, "\n");
1931
fprintf(stderr, " API '%c'\n", id);
1933
nbytes = read_size_t(q - 2*SST);
1934
fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
1935
"requested\n", nbytes);
1937
/* In case this is nuts, check the leading pad bytes first. */
1938
fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
1940
for (i = 1; i <= SST-1; ++i) {
1941
if (*(q-i) != FORBIDDENBYTE) {
1947
fputs("FORBIDDENBYTE, as expected.\n", stderr);
1949
fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
1951
for (i = SST-1; i >= 1; --i) {
1952
const uchar byte = *(q-i);
1953
fprintf(stderr, " at p-%d: 0x%02x", i, byte);
1954
if (byte != FORBIDDENBYTE)
1955
fputs(" *** OUCH", stderr);
1956
fputc('\n', stderr);
1959
fputs(" Because memory is corrupted at the start, the "
1960
"count of bytes requested\n"
1961
" may be bogus, and checking the trailing pad "
1962
"bytes may segfault.\n", stderr);
1966
fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail);
1968
for (i = 0; i < SST; ++i) {
1969
if (tail[i] != FORBIDDENBYTE) {
1975
fputs("FORBIDDENBYTE, as expected.\n", stderr);
1977
fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
1979
for (i = 0; i < SST; ++i) {
1980
const uchar byte = tail[i];
1981
fprintf(stderr, " at tail+%d: 0x%02x",
1983
if (byte != FORBIDDENBYTE)
1984
fputs(" *** OUCH", stderr);
1985
fputc('\n', stderr);
1989
serial = read_size_t(tail + SST);
1990
fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
1991
"u to debug malloc/realloc.\n", serial);
1995
fputs(" Data at p:", stderr);
1996
/* print up to 8 bytes at the start */
1997
while (q < tail && i < 8) {
1998
fprintf(stderr, " %02x", *q);
2002
/* and up to 8 at the end */
2005
fputs(" ...", stderr);
2009
fprintf(stderr, " %02x", *q);
2013
fputc('\n', stderr);
2017
#endif /* PYMALLOC_DEBUG */
2020
printone(FILE *out, const char* msg, size_t value)
2024
size_t origvalue = value;
2027
for (i = (int)strlen(msg); i < 35; ++i)
2031
/* Write the value with commas. */
2037
size_t nextvalue = value / 10;
2038
unsigned int digit = (unsigned int)(value - nextvalue * 10);
2040
buf[i--] = (char)(digit + '0');
2042
if (k == 0 && value && i >= 0) {
2046
} while (value && i >= 0);
2056
_PyDebugAllocatorStats(FILE *out,
2057
const char *block_name, int num_blocks, size_t sizeof_block)
2061
PyOS_snprintf(buf1, sizeof(buf1),
2062
"%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2063
num_blocks, block_name, sizeof_block);
2064
PyOS_snprintf(buf2, sizeof(buf2),
2066
(void)printone(out, buf2, num_blocks * sizeof_block);
2069
#ifdef WITH_PYMALLOC
2071
/* Print summary info to "out" about the state of pymalloc's structures.
2072
* In Py_DEBUG mode, also perform some expensive internal consistency
2076
_PyObject_DebugMallocStats(FILE *out)
2079
const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2080
/* # of pools, allocated blocks, and free blocks per class index */
2081
size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2082
size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2083
size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2084
/* total # of allocated bytes in used and full pools */
2085
size_t allocated_bytes = 0;
2086
/* total # of available bytes in used pools */
2087
size_t available_bytes = 0;
2088
/* # of free pools + pools not yet carved out of current arena */
2089
uint numfreepools = 0;
2090
/* # of bytes for arena alignment padding */
2091
size_t arena_alignment = 0;
2092
/* # of bytes in used and full pools used for pool_headers */
2093
size_t pool_header_bytes = 0;
2094
/* # of bytes in used and full pools wasted due to quantization,
2095
* i.e. the necessarily leftover space at the ends of used and
2098
size_t quantization = 0;
2099
/* # of arenas actually allocated. */
2101
/* running total -- should equal narenas * ARENA_SIZE */
2105
fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2106
SMALL_REQUEST_THRESHOLD, numclasses);
2108
for (i = 0; i < numclasses; ++i)
2109
numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2111
/* Because full pools aren't linked to from anything, it's easiest
2112
* to march over all the arenas. If we're lucky, most of the memory
2113
* will be living in full pools -- would be a shame to miss them.
2115
for (i = 0; i < maxarenas; ++i) {
2117
uptr base = arenas[i].address;
2119
/* Skip arenas which are not allocated. */
2120
if (arenas[i].address == (uptr)NULL)
2124
numfreepools += arenas[i].nfreepools;
2126
/* round up to pool alignment */
2127
if (base & (uptr)POOL_SIZE_MASK) {
2128
arena_alignment += POOL_SIZE;
2129
base &= ~(uptr)POOL_SIZE_MASK;
2133
/* visit every pool in the arena */
2134
assert(base <= (uptr) arenas[i].pool_address);
2136
base < (uptr) arenas[i].pool_address;
2137
++j, base += POOL_SIZE) {
2138
poolp p = (poolp)base;
2139
const uint sz = p->szidx;
2142
if (p->ref.count == 0) {
2143
/* currently unused */
2144
assert(pool_is_in_list(p, arenas[i].freepools));
2148
numblocks[sz] += p->ref.count;
2149
freeblocks = NUMBLOCKS(sz) - p->ref.count;
2150
numfreeblocks[sz] += freeblocks;
2153
assert(pool_is_in_list(p, usedpools[sz + sz]));
2157
assert(narenas == narenas_currently_allocated);
2160
fputs("class size num pools blocks in use avail blocks\n"
2161
"----- ---- --------- ------------- ------------\n",
2164
for (i = 0; i < numclasses; ++i) {
2165
size_t p = numpools[i];
2166
size_t b = numblocks[i];
2167
size_t f = numfreeblocks[i];
2168
uint size = INDEX2SIZE(i);
2170
assert(b == 0 && f == 0);
2173
fprintf(out, "%5u %6u "
2174
"%11" PY_FORMAT_SIZE_T "u "
2175
"%15" PY_FORMAT_SIZE_T "u "
2176
"%13" PY_FORMAT_SIZE_T "u\n",
2178
allocated_bytes += b * size;
2179
available_bytes += f * size;
2180
pool_header_bytes += p * POOL_OVERHEAD;
2181
quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2184
#ifdef PYMALLOC_DEBUG
2185
(void)printone(out, "# times object malloc called", serialno);
2187
(void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2188
(void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2189
(void)printone(out, "# arenas highwater mark", narenas_highwater);
2190
(void)printone(out, "# arenas allocated current", narenas);
2192
PyOS_snprintf(buf, sizeof(buf),
2193
"%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2194
narenas, ARENA_SIZE);
2195
(void)printone(out, buf, narenas * ARENA_SIZE);
2199
total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2200
total += printone(out, "# bytes in available blocks", available_bytes);
2202
PyOS_snprintf(buf, sizeof(buf),
2203
"%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2204
total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2206
total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2207
total += printone(out, "# bytes lost to quantization", quantization);
2208
total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2209
(void)printone(out, "Total", total);
2212
#endif /* #ifdef WITH_PYMALLOC */
2214
#ifdef Py_USING_MEMORY_DEBUGGER
2215
/* Make this function last so gcc won't inline it since the definition is
2216
* after the reference.
2219
Py_ADDRESS_IN_RANGE(void *P, poolp pool)
2221
uint arenaindex_temp = pool->arenaindex;
2223
return arenaindex_temp < maxarenas &&
2224
(uptr)P - arenas[arenaindex_temp].address < (uptr)ARENA_SIZE &&
2225
arenas[arenaindex_temp].address != 0;