~ubuntu-branches/debian/sid/meliae/sid : revision 1

1

2

#

3

# This program is free software: you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License version 3 as

5

# published by the Free Software Foundation.

6

#

7

# This program is distributed in the hope that it will be useful, but

8

# WITHOUT ANY WARRANTY; without even the implied warranty of

9

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

10

# General Public License for more details.

11

#

12

# You should have received a copy of the GNU General Public License

13

# along with this program. If not, see <http://www.gnu.org/licenses/>.

14

15

"""Some bits for helping to scan objects looking for referenced memory."""

16

17

import gc

18

import types

19

20

from meliae import (

21

_intset,

22

_scanner,

23

)

24

25

26

size_of = _scanner.size_of

27

get_referents = _scanner.get_referents

28

29

30

def dump_all_referenced(outf, obj, is_pending=False):

31

"""Recursively dump everything that is referenced from obj."""

32

if isinstance(outf, str):

33

outf = open(outf, 'wb')

34

if is_pending:

35

pending = obj

36

else:

37

pending = [obj]

38

last_offset = len(pending) - 1

39

seen = _intset.IDSet()

40

while last_offset >= 0:

41

next = pending[last_offset]

42

last_offset -= 1

43

id_next = id(next)

44

if id_next in seen:

45

continue

46

seen.add(id_next)

47

# We will recurse here, so tell dump_object_info to not recurse

48

_scanner.dump_object_info(outf, next, recurse_depth=0)

49

for ref in get_referents(next):

50

if id(ref) not in seen:

51

last_offset += 1

52

if len(pending) > last_offset:

53

pending[last_offset] = ref

54

else:

55

pending.append(ref)

56

57

58

def dump_gc_objects(outf, recurse_depth=1):

59

"""Dump everything that is available via gc.get_objects().

60

"""

61

if isinstance(outf, basestring):

62

opened = True

63

outf = open(outf, 'wb')

64

else:

65

opened = False

66

# Get the list of everything before we start building new objects

67

all_objs = gc.get_objects()

68

# Dump out a few specific objects, so they don't get repeated forever

69

nodump = [None, True, False]

70

# In current versions of python, these are all pre-cached

71

nodump.extend(xrange(-5, 256))

72

nodump.extend([chr(c) for c in xrange(256)])

73

nodump.extend([t for t in types.__dict__.itervalues()

74

if type(t) is types.TypeType])

75

nodump.extend([set, dict])

76

# Some very common interned strings

77

nodump.extend(('__doc__', 'self', 'operator', '__init__', 'codecs',

78

'__new__', '__builtin__', '__builtins__', 'error', 'len',

79

'errors', 'keys', 'None', '__module__', 'file', 'name', '',

80

'sys', 'True', 'False'))

81

nodump.extend((BaseException, Exception, StandardError, ValueError))

82

for obj in nodump:

83

_scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0)

84

# Avoid dumping the all_objs list and this function as well. This helps

85

# avoid getting a 'reference everything in existence' problem.

86

nodump.append(dump_gc_objects)

87

# This currently costs us ~16kB during dumping, but means we won't write

88

# out those objects multiple times in the log file.

89

# TODO: we might want to make nodump a variable-size dict, and add anything

90

# with ob_refcnt > 1000 or so.

91

nodump = frozenset(nodump)

92

for obj in all_objs:

93

_scanner.dump_object_info(outf, obj, nodump=nodump,

94

recurse_depth=recurse_depth)

95

del all_objs[:]

96

if opened:

97

outf.close()

98

else:

99

outf.flush()

100

101

102

def dump_all_objects(outf):

103

"""Dump everything that is referenced from gc.get_objects()

104

105

This recurses, and tracks dumped objects in an IDSet. Which means it costs

106

memory, which is often about 10% of currently active memory. Otherwise,

107

this usually results in smaller dump files than dump_gc_objects().

108

109

This also can be faster, because it doesn't dump the same item multiple

110

times.

111

"""

112

if isinstance(outf, basestring):

113

opened = True

114

outf = open(outf, 'wb')

115

else:

116

opened = False

117

all_objs = gc.get_objects()

118

dump_all_referenced(outf, all_objs, is_pending=True)

119

del all_objs[:]

120

if opened:

121

outf.close()

122

else:

123

outf.flush()

124

125

126

127

def get_recursive_size(obj):

128

"""Get the memory referenced from this object.

129

130

This returns the memory of the direct object, and all of the memory

131

referenced by child objects. It also returns the total number of objects.

132

"""

133

total_size = 0

134

pending = [obj]

135

last_item = 0

136

seen = _intset.IDSet()

137

size_of = _scanner.size_of

138

while last_item >= 0:

139

item = pending[last_item]

140

last_item -= 1

141

id_item = id(item)

142

if id_item in seen:

143

continue

144

seen.add(id_item)

145

total_size += size_of(item)

146

for child in get_referents(item):

147

if id(child) not in seen:

148

last_item += 1

149

if len(pending) > last_item:

150

pending[last_item] = child

151

else:

152

pending.append(child)

153

return len(seen), total_size

154

155

156

def get_recursive_items(obj):

157

"""Walk all referred items and return the unique list of them."""

158

all = []

159

pending = [obj]

160

last_item = 0

161

seen = _intset.IDSet()

162

while last_item >= 0:

163

item = pending[last_item]

164

last_item -= 1

165

id_item = id(item)

166

if id_item in seen:

167

continue

168

seen.add(id_item)

169

all.append(item)

170

for child in get_referents(item):

171

if id(child) not in seen:

172

last_item += 1

173

if len(pending) > last_item:

174

pending[last_item] = child

175

else:

176

pending.append(child)

177

return all

178

179

180

def find_interned_dict():

181

"""Go through all gc objects and find the interned python dict."""

182

for obj in gc.get_objects():

183

if (type(obj) is not dict

184

or 'find_interned_dict' not in obj

185

or obj['find_interned_dict'] is not 'find_interned_dict'

186

or 'get_recursive_items' not in obj

187

or obj['get_recursive_items'] is not 'get_recursive_items'):

188

# The above check assumes that local strings will be interned,

189

# which is the standard cpython behavior, but perhaps not the best

190

# to require? However, if we used something like a custom string

191

# that we intern() we still could have problems with locals(), etc.

192

continue

193

return obj