1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software: you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 3 as
5
# published by the Free Software Foundation.
7
# This program is distributed in the hope that it will be useful, but
8
# WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
# General Public License for more details.
12
# You should have received a copy of the GNU General Public License
13
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15
"""Routines and objects for loading dump files."""
17
cdef extern from "Python.h":
18
ctypedef unsigned long size_t
19
ctypedef struct PyObject:
22
void *PyMem_Malloc(size_t)
23
void PyMem_Free(void *)
25
long PyObject_Hash(PyObject *) except -1
27
object PyList_New(Py_ssize_t)
28
void PyList_SET_ITEM(object, Py_ssize_t, object)
29
PyObject *PyDict_GetItem(object d, object key)
30
PyObject *PyDict_GetItem_ptr "PyDict_GetItem" (object d, PyObject *key)
31
int PyDict_SetItem(object d, object key, object val) except -1
32
int PyDict_SetItem_ptr "PyDict_SetItem" (object d, PyObject *key,
33
PyObject *val) except -1
34
void Py_INCREF(PyObject*)
35
void Py_XDECREF(PyObject*)
36
void Py_DECREF(PyObject*)
37
object PyTuple_New(Py_ssize_t)
38
object PyTuple_SET_ITEM(object, Py_ssize_t, object)
39
int PyObject_RichCompareBool(PyObject *, PyObject *, int) except -1
41
void memset(void *, int, size_t)
43
# void fprintf(void *, char *, ...)
47
from meliae import warn
50
ctypedef struct RefList:
55
cdef object _set_default(object d, object val):
56
"""Either return the value in the dict, or return 'val'.
58
This is the same as Dict.setdefault, but without the attribute lookups. (It
59
may be slightly worse because it does 2 dict lookups?)
63
# TODO: Note that using _lookup directly would remove the need to ever do a
64
# double-lookup to set the value
65
tmp = PyDict_GetItem(d, val)
67
PyDict_SetItem(d, val, val)
73
cdef int _set_default_ptr(object d, PyObject **val) except -1:
74
"""Similar to _set_default, only it sets the val in place"""
77
tmp = PyDict_GetItem_ptr(d, val[0])
79
# val is unchanged, so we don't change the refcounts
80
PyDict_SetItem_ptr(d, val[0], val[0])
83
# We will be pointing val to something new, so fix up the refcounts
90
cdef int _free_ref_list(RefList *ref_list) except -1:
91
"""Decref and free the list."""
96
for i from 0 <= i < ref_list.size:
97
if ref_list.refs[i] == NULL:
98
raise RuntimeError('Somehow we got a NULL reference.')
99
Py_DECREF(ref_list.refs[i])
104
cdef object _ref_list_to_list(RefList *ref_list):
105
"""Convert the notation of [len, items, ...] into [items].
107
:param ref_list: A pointer to NULL, or to a list of longs. The list should
108
start with the count of items
111
# TODO: Always return a tuple, we already know the width, and this prevents
112
# double malloc(). However, this probably isn't a critical code path
117
refs_append = refs.append
118
for i from 0 <= i < ref_list.size:
119
refs_append(<object>(ref_list.refs[i]))
123
cdef RefList *_list_to_ref_list(object refs) except? NULL:
124
cdef long i, num_refs
125
cdef RefList *ref_list
130
ref_list = <RefList *>PyMem_Malloc(sizeof(RefList) +
131
sizeof(PyObject*)*num_refs)
132
ref_list.size = num_refs
135
ref_list.refs[i] = <PyObject*>ref
136
Py_INCREF(ref_list.refs[i])
141
cdef object _format_list(RefList *ref_list):
142
cdef long i, max_refs
146
max_refs = ref_list.size
150
for i from 0 <= i < max_refs:
152
ref_str.append('%d' % (<object>ref_list.refs[i]))
154
ref_str.append(', %d' % (<object>ref_list.refs[i]))
155
if ref_list.size > 10:
156
ref_str.append(', ...]')
159
return ''.join(ref_str)
162
cdef struct _MemObject:
163
# """The raw C structure, used to minimize memory allocation size."""
166
# Consider making this unsigned long
169
# Removed for now, since it hasn't proven useful
172
# TODO: I don't think I've found an object that has both a value and a
173
# name. As such, I should probably remove the redundancy, as it saves
177
unsigned long total_size
178
# This is an uncounted ref to a _MemObjectProxy. _MemObjectProxy also has a
179
# refreence to this object, so when it disappears it can set the reference
184
cdef _MemObject *_new_mem_object(address, type_str, size, children,
185
value, name, parent_list, total_size) except NULL:
186
cdef _MemObject *new_entry
189
new_entry = <_MemObject *>PyMem_Malloc(sizeof(_MemObject))
190
if new_entry == NULL:
191
raise MemoryError('Failed to allocate %d bytes' % (sizeof(_MemObject),))
192
memset(new_entry, 0, sizeof(_MemObject))
193
addr = <PyObject *>address
195
new_entry.address = addr
196
new_entry.type_str = <PyObject *>type_str
197
Py_INCREF(new_entry.type_str)
198
new_entry.size = size
199
new_entry.child_list = _list_to_ref_list(children)
200
# TODO: Was found wanting and removed
202
# new_entry.length = -1
204
# new_entry.length = length
205
if value is not None and name is not None:
206
raise RuntimeError("We currently only support one of value or name"
208
if value is not None:
209
new_entry.value = <PyObject *>value
211
new_entry.value = <PyObject *>name
212
Py_INCREF(new_entry.value)
213
new_entry.parent_list = _list_to_ref_list(parent_list)
214
new_entry.total_size = total_size
218
cdef int _free_mem_object(_MemObject *cur) except -1:
219
if cur == NULL: # Already cleared
223
if cur.address == NULL:
224
raise RuntimeError('clering something that doesn\'t have address')
225
Py_XDECREF(cur.address)
227
Py_XDECREF(cur.type_str)
229
_free_ref_list(cur.child_list)
230
cur.child_list = NULL
231
Py_XDECREF(cur.value)
233
# Py_XDECREF(cur.name)
235
_free_ref_list(cur.parent_list)
236
cur.parent_list = NULL
242
cdef _MemObject *_dummy
243
_dummy = <_MemObject*>(-1)
246
cdef class MemObjectCollection
247
cdef class _MemObjectProxy
250
def _MemObjectProxy_from_args(address, type_str, size, children=(), length=0,
251
value=None, name=None, parent_list=(),
253
"""Create a standalone _MemObjectProxy instance.
255
Note that things like '__getitem__' won't work, as they query the
256
collection for the actual data.
258
cdef _MemObject *new_entry
259
cdef _MemObjectProxy proxy
261
new_entry = _new_mem_object(address, type_str, size, children,
262
value, name, parent_list, total_size)
263
proxy = _MemObjectProxy(None)
264
proxy._obj = new_entry
265
proxy._managed_obj = new_entry
266
new_entry.proxy = <PyObject *>proxy
270
cdef class _MemObjectProxy:
271
"""The standard interface for understanding memory consumption.
273
MemObjectCollection stores the data as a fairly efficient table, without
274
the overhead of having a regular python object for every data point.
275
However, the rest of python code needs to interact with a real python
276
object, so we generate these on-the-fly.
278
Most attributes are properties, which thunk over to the actual data table
281
:ivar address: The address in memory of the original object. This is used
282
as the 'handle' to this object.
283
:ivar type_str: The type of this object
284
:ivar size: The number of bytes consumed for just this object. So for a
285
dict, this would be the basic_size + the size of the allocated array to
286
store the reference pointers
287
:ivar children: A list of items referenced from this object
288
:ivar num_refs: Count of references, you can also use len()
289
:ivar value: A PyObject representing the Value for this object. (For
290
strings, it is the first 100 bytes, it may be None if we have no value,
291
or it may be an integer, etc.) This is also where the 'name' is stored
292
for objects like 'module'.
296
cdef MemObjectCollection collection
297
# This must be set immediately after construction, before accessing any
299
cdef _MemObject *_obj
300
# If not NULL, this will be freed when this object is deallocated
301
cdef _MemObject *_managed_obj
303
def __init__(self, collection):
304
self.collection = collection
306
self._managed_obj = NULL
308
def __dealloc__(self):
309
if self._obj != NULL:
310
if self._obj.proxy == <PyObject *>self:
311
# This object is going away, remove the reference
312
self._obj.proxy = NULL
314
# fprintf(stderr, "obj at address %x referenced"
315
# " a proxy that was not self\n",
316
# <int><object>self._obj.address)
317
if self._managed_obj != NULL:
318
_free_mem_object(self._managed_obj)
319
self._managed_obj = NULL
322
"""The identifier for the tracked object."""
324
return <object>(self._obj.address)
327
"""The type of this object."""
329
return <object>(self._obj.type_str)
331
def __set__(self, value):
333
ptr = <PyObject *>value
335
Py_DECREF(self._obj.type_str)
336
self._obj.type_str = ptr
339
"""The number of bytes allocated for this object."""
341
return self._obj.size
343
def __set__(self, value):
344
self._obj.size = value
347
# """Name associated with this object."""
349
# return <object>self._obj.name
352
"""Value for this object (for strings and ints)"""
354
return <object>self._obj.value
356
def __set__(self, value):
357
cdef PyObject *new_val
358
new_val = <PyObject *>value
359
# INCREF first, just in case value is self._obj.value
361
Py_DECREF(self._obj.value)
362
self._obj.value = new_val
365
"""Mean to hold the size of this plus size of all referenced objects."""
367
return self._obj.total_size
369
def __set__(self, value):
370
self._obj.total_size = value
373
if self._obj.child_list == NULL:
375
return self._obj.child_list.size
379
warn.deprecated('Attribute .num_refs deprecated.'
380
' Use len() instead.')
381
return self.__len__()
383
def _intern_from_cache(self, cache):
385
_set_default_ptr(cache, &self._obj.address)
386
_set_default_ptr(cache, &self._obj.type_str)
387
if self._obj.child_list != NULL:
388
for i from 0 <= i < self._obj.child_list.size:
389
_set_default_ptr(cache, &self._obj.child_list.refs[i])
390
if self._obj.parent_list != NULL:
391
for i from 0 <= i < self._obj.parent_list.size:
392
_set_default_ptr(cache, &self._obj.parent_list.refs[i])
396
"""The list of objects referenced by this object."""
398
return _ref_list_to_list(self._obj.child_list)
400
def __set__(self, value):
401
_free_ref_list(self._obj.child_list)
402
self._obj.child_list = _list_to_ref_list(value)
405
"""The list of objects referenced by this object.
407
Deprecated, use .children instead.
410
warn.deprecated('Attribute .ref_list deprecated.'
411
' Use .children instead.')
414
def __set__(self, val):
415
warn.deprecated('Attribute .ref_list deprecated.'
416
' Use .children instead.')
420
"""Objects which refer to this object.
422
Deprecated, use .parents instead.
425
warn.deprecated('Attribute .referrers deprecated.'
426
' Use .parents instead.')
429
def __set__(self, value):
430
warn.deprecated('Attribute .referrers deprecated.'
431
' Use .parents instead.')
435
"""The list of objects that reference this object.
437
Original set to None, can be computed on demand.
440
return _ref_list_to_list(self._obj.parent_list)
442
def __set__(self, value):
443
_free_ref_list(self._obj.parent_list)
444
self._obj.parent_list = _list_to_ref_list(value)
446
property num_referrers:
447
"""The length of the parents list."""
449
warn.deprecated('Attribute .num_referrers deprecated.'
450
' Use .num_parents instead.')
451
if self._obj.parent_list == NULL:
453
return self._obj.parent_list.size
455
property num_parents:
456
"""The length of the parents list."""
458
if self._obj.parent_list == NULL:
460
return self._obj.parent_list.size
462
def __getitem__(self, offset):
465
if self._obj.child_list == NULL:
466
raise IndexError('%s has no references' % (self,))
468
if off >= self._obj.child_list.size:
469
raise IndexError('%s has only %d (not %d) references'
470
% (self, self._obj.child_list.size, offset))
471
address = <object>self._obj.child_list.refs[off]
473
return self.collection[address]
475
# TODO: What to do if the object isn't present? I think returning a
476
# 'no-such-object' proxy would be nicer than returning nothing
480
"""The list of children objects as objects (not references)."""
485
if self._obj.child_list == NULL:
487
for pos from 0 <= pos < self._obj.child_list.size:
488
address = <object>self._obj.child_list.refs[pos]
489
obj = self.collection[address]
494
"""The list of parent objects as objects (not references)."""
499
if self._obj.parent_list == NULL:
501
for pos from 0 <= pos < self._obj.parent_list.size:
502
address = <object>self._obj.parent_list.refs[pos]
504
obj = self.collection[address]
506
# We should probably create an "unknown object" type
512
if self._obj.child_list == NULL:
515
refs = ' %drefs' % (self._obj.child_list.size,)
516
if self._obj.parent_list == NULL:
519
parent_str = ' %dpar' % (self._obj.parent_list.size,)
520
if self._obj.value == NULL or self._obj.value == Py_None:
523
val = ' %r' % (<object>self._obj.value,)
524
if self._obj.total_size == 0:
527
total_size = float(self._obj.total_size)
529
if total_size > 800.0:
530
total_size = total_size / 1024.0
532
if total_size > 800.0:
533
total_size = total_size / 1024.0
535
if total_size > 800.0:
536
total_size = total_size / 1024.0
538
total_size_str = ' %.1f%stot' % (total_size, order)
539
return '%s(%d %dB%s%s%s%s)' % (
540
self.type_str, self.address, self.size,
541
refs, parent_str, val, total_size_str)
544
"""Convert this back into json."""
546
for ref in sorted(self.children):
547
refs.append(str(ref))
548
# Note: We've lost the info about whether this was a value or a name
549
# We've also lost the 'length' field.
550
if self.value is not None:
551
if self.type_str == 'int':
552
value = '"value": %s, ' % self.value
554
# TODO: This isn't perfect, as it doesn't do proper json
556
value = '"value": "%s", ' % self.value
559
return '{"address": %d, "type": "%s", "size": %d, %s"refs": [%s]}' % (
560
self.address, self.type_str, self.size, value, ', '.join(refs))
562
def refs_as_dict(self):
563
"""Expand the ref list considering it to be a 'dict' structure.
565
Often we have dicts that point to simple strings and ints, etc. This
566
tries to expand that as much as possible.
569
children = self.children
570
if self.type_str not in ('dict', 'module'):
571
# Instance dicts end with a 'type' reference
572
children = children[:-1]
573
for idx in xrange(0, len(children), 2):
574
key = self.collection[children[idx]]
575
val = self.collection[children[idx+1]]
576
if key.value is not None:
578
# TODO: We should consider recursing if val is a 'known' type, such
580
if val.type_str == 'bool':
581
val = (val.value == 'True')
582
elif val.value is not None:
584
elif val.type_str == 'NoneType':
590
cdef class MemObjectCollection:
591
"""Track a bunch of _MemObject instances."""
593
cdef readonly int _table_mask # N slots = table_mask + 1
594
cdef readonly int _active # How many slots have real data
595
cdef readonly int _filled # How many slots have real or dummy
596
cdef _MemObject** _table # _MemObjects are stored inline
599
self._table_mask = 1024 - 1
600
self._table = <_MemObject**>PyMem_Malloc(sizeof(_MemObject*)*1024)
601
memset(self._table, 0, sizeof(_MemObject*)*1024)
606
cdef _MemObject** _lookup(self, address) except NULL:
608
cdef size_t i, n_lookup
610
cdef _MemObject **table, **slot, **free_slot
611
cdef PyObject *py_addr
613
py_addr = <PyObject *>address
614
the_hash = PyObject_Hash(py_addr)
616
mask = self._table_mask
619
for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
620
slot = &table[i & mask]
623
if free_slot != NULL:
624
# Did we find an earlier _dummy entry?
628
elif slot[0] == _dummy:
629
if free_slot == NULL:
631
elif slot[0].address == py_addr:
632
# Found an exact pointer to the key
634
elif slot[0].address == NULL:
635
raise RuntimeError('Found a non-empty slot with null address')
636
elif PyObject_RichCompareBool(slot[0].address, py_addr, Py_EQ):
637
# Both py_key and cur belong in this slot, return it
640
raise RuntimeError('we failed to find an open slot after %d lookups'
643
cdef int _clear_slot(self, _MemObject **slot) except -1:
644
_free_mem_object(slot[0])
648
def _test_lookup(self, address):
649
cdef _MemObject **slot
651
slot = self._lookup(address)
652
return (slot - self._table)
654
def __contains__(self, address):
655
cdef _MemObject **slot
657
slot = self._lookup(address)
658
if slot[0] == NULL or slot[0] == _dummy:
662
cdef _MemObjectProxy _proxy_for(self, address, _MemObject *val):
663
cdef _MemObjectProxy proxy
665
if val.proxy == NULL:
666
proxy = _MemObjectProxy(self)
668
val.proxy = <PyObject *>proxy
670
proxy = <object>val.proxy
673
def __getitem__(self, at):
674
cdef _MemObject **slot
675
cdef _MemObjectProxy proxy
677
if isinstance(at, _MemObjectProxy):
684
slot = self._lookup(address)
685
if slot[0] == NULL or slot[0] == _dummy:
686
raise KeyError('address %s not present' % (at,))
688
proxy = self._proxy_for(address, slot[0])
690
assert proxy._obj == slot[0]
693
def get(self, at, default=None):
699
def __delitem__(self, at):
700
cdef _MemObject **slot
701
cdef _MemObjectProxy proxy
703
if isinstance(at, _MemObjectProxy):
708
slot = self._lookup(address)
709
if slot[0] == NULL or slot[0] == _dummy:
710
raise KeyError('address %s not present' % (at,))
711
if slot[0].proxy != NULL:
712
# Have the proxy take over the memory lifetime. At the same time,
713
# we break the reference cycle, so that the proxy will get cleaned
715
proxy = <object>slot[0].proxy
716
proxy._managed_obj = proxy._obj
718
# Without a proxy, we just nuke the object
719
self._clear_slot(slot)
724
#def __setitem__(self, address, value):
725
# """moc[address] = value"""
728
cdef int _insert_clean(self, _MemObject *entry) except -1:
729
"""Copy _MemObject into the table.
731
We know that this _MemObject is unique, and we know that self._table
732
contains no _dummy entries. So we can do the lookup cheaply, without
733
any equality checks, etc.
736
cdef size_t i, n_lookup, mask
737
cdef _MemObject **slot
739
assert entry != NULL and entry.address != NULL
740
mask = <size_t>self._table_mask
741
the_hash = <size_t>PyObject_Hash(entry.address)
743
for n_lookup from 0 <= n_lookup < mask:
744
slot = &self._table[i & mask]
751
raise RuntimeError('could not find a free slot after %d lookups'
754
cdef int _resize(self, int min_active) except -1:
755
"""Resize the internal table.
757
We will be big enough to hold at least 'min_active' entries. We will
758
create a copy of all data, leaving out dummy entries.
760
:return: The new table size.
762
cdef int new_size, remaining
764
cdef _MemObject **old_table, **old_slot, **new_table
767
while new_size <= min_active and new_size > 0:
770
raise MemoryError('table size too large for %d entries'
772
n_bytes = sizeof(_MemObject*)*new_size
773
new_table = <_MemObject**>PyMem_Malloc(n_bytes)
774
if new_table == NULL:
775
raise MemoryError('Failed to allocate %d bytes' % (n_bytes,))
776
memset(new_table, 0, n_bytes)
777
old_slot = old_table = self._table
778
self._table = new_table
779
self._table_mask = new_size - 1
780
remaining = self._active
785
if old_slot[0] == NULL:
787
elif old_slot[0] == _dummy:
791
self._insert_clean(old_slot[0])
793
# Moving everything over is refcount neutral, so we just free the old
795
PyMem_Free(old_table)
799
def add(self, address, type_str, size, children=(), length=0,
800
value=None, name=None, parent_list=(), total_size=0):
801
"""Add a new MemObject to this collection."""
802
cdef _MemObject **slot, *new_entry
803
cdef _MemObjectProxy proxy
805
slot = self._lookup(address)
806
if slot[0] != NULL and slot[0] != _dummy:
807
# We are overwriting an existing entry, for now, fail
808
# Probably all we have to do is clear the slot first, then continue
809
assert False, "We don't support overwrite yet."
810
# TODO: These are fairy small and more subject to churn, maybe we
811
# should be using PyObj_Malloc instead...
812
new_entry = _new_mem_object(address, type_str, size, children,
813
value, name, parent_list, total_size)
819
if self._filled * 3 > (self._table_mask + 1) * 2:
821
self._resize(self._active * 2)
822
proxy = self._proxy_for(address, new_entry)
825
def __dealloc__(self):
828
for i from 0 <= i < self._table_mask:
829
self._clear_slot(self._table + i)
830
PyMem_Free(self._table)
834
return self.iterkeys()
837
return iter(self.keys())
842
cdef _MemObjectProxy proxy
844
# TODO: Pre-allocate the full size list
846
for i from 0 <= i < self._table_mask:
848
if cur == NULL or cur == _dummy:
851
address = <object>cur.address
852
values.append(address)
859
"""Iterate over (key, value) tuples."""
862
cdef _MemObjectProxy proxy
864
enabled = gc.isenabled()
866
# We are going to be creating a lot of objects here, but not with
867
# cycles, so we disable gc temporarily
868
# With an object list of ~3M items, this drops the .items() time
869
# from 25s down to 1.3s
872
values = PyList_New(self._active)
874
for i from 0 <= i < self._table_mask:
876
if cur == NULL or cur == _dummy:
879
address = <object>cur.address
880
proxy = self._proxy_for(address, cur)
881
item = (address, proxy)
882
# SET_ITEM steals a reference
883
Py_INCREF(<PyObject *>item)
884
PyList_SET_ITEM(values, out_idx, item)
891
def itervalues(self):
892
"""Return an iterable of values stored in this map."""
893
return _MOCValueIterator(self)
896
# This returns a list, but that is 'close enough' for what we need
899
cdef _MemObjectProxy proxy
902
for i from 0 <= i < self._table_mask:
904
if cur == NULL or cur == _dummy:
907
proxy = self._proxy_for(<object>cur.address, cur)
912
cdef class _MOCValueIterator:
913
"""A simple iterator over the values in a MOC."""
915
cdef MemObjectCollection collection
916
cdef int initial_active
919
def __init__(self, collection):
920
self.collection = collection
921
self.initial_active = self.collection._active
930
if self.collection._active != self.initial_active:
931
raise RuntimeError('MemObjectCollection changed size during'
933
while (self.table_pos <= self.collection._table_mask):
934
cur = self.collection._table[self.table_pos]
935
if cur != NULL and cur != _dummy:
938
if self.table_pos > self.collection._table_mask:
939
raise StopIteration()
940
# This entry is 'consumed', go on to the next
942
if cur == NULL or cur == _dummy:
943
raise RuntimeError('didn\'t run off the end, but got null/dummy'
944
' %d, %d %d' % (<int>cur, self.table_pos,
945
self.collection._table_mask))
946
return self.collection._proxy_for(<object>cur.address, cur)