13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
"""A simple least-recently-used (LRU) cache."""
19
from collections import deque
21
class _LRUNode(object):
22
"""This maintains the linked-list which is the lru internals."""
24
__slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
26
def __init__(self, key, value, cleanup=None):
28
self.next_key = _null_key
31
self.cleanup = cleanup
32
# TODO: We could compute this 'on-the-fly' like we used to, and remove
33
# one pointer from this object, we just need to decide if it
34
# actually costs us much of anything in normal usage
41
prev_key = self.prev.key
42
return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
43
self.next_key, prev_key)
45
def run_cleanup(self):
46
if self.cleanup is not None:
47
self.cleanup(self.key, self.value)
49
# Just make sure to break any refcycles, etc
22
53
class LRUCache(object):
25
56
def __init__(self, max_cache=100, after_cleanup_count=None):
28
self._queue = deque() # Track when things are accessed
29
self._refcount = {} # number of entries in self._queue for each key
58
# The "HEAD" of the lru linked list
59
self._most_recently_used = None
60
# The "TAIL" of the lru linked list
61
self._least_recently_used = None
30
62
self._update_max_cache(max_cache, after_cleanup_count)
32
64
def __contains__(self, key):
33
65
return key in self._cache
35
67
def __getitem__(self, key):
36
val = self._cache[key]
37
self._record_access(key)
70
# Inlined from _record_access to decrease the overhead of __getitem__
71
# We also have more knowledge about structure if __getitem__ is
72
# succeeding, then we know that self._most_recently_used must not be
74
mru = self._most_recently_used
76
# Nothing to do, this node is already at the head of the queue
78
# Remove this node from the old location
80
next_key = node.next_key
81
# benchmarking shows that the lookup of _null_key in globals is faster
82
# than the attribute lookup for (node is self._least_recently_used)
83
if next_key is _null_key:
84
# 'node' is the _least_recently_used, because it doesn't have a
85
# 'next' item. So move the current lru to the previous node.
86
self._least_recently_used = node_prev
88
node_next = cache[next_key]
89
node_next.prev = node_prev
90
node_prev.next_key = next_key
91
# Insert this node at the front of the list
92
node.next_key = mru.key
94
self._most_recently_used = node
41
99
return len(self._cache)
102
"""Walk the LRU list, only meant to be used in tests."""
103
node = self._most_recently_used
105
if node.prev is not None:
106
raise AssertionError('the _most_recently_used entry is not'
107
' supposed to have a previous entry'
109
while node is not None:
110
if node.next_key is _null_key:
111
if node is not self._least_recently_used:
112
raise AssertionError('only the last node should have'
113
' no next value: %s' % (node,))
116
node_next = self._cache[node.next_key]
117
if node_next.prev is not node:
118
raise AssertionError('inconsistency found, node.next.prev'
119
' != node: %s' % (node,))
120
if node.prev is None:
121
if node is not self._most_recently_used:
122
raise AssertionError('only the _most_recently_used should'
123
' not have a previous node: %s'
126
if node.prev.next_key != node.key:
127
raise AssertionError('inconsistency found, node.prev.next'
128
' != node: %s' % (node,))
43
132
def add(self, key, value, cleanup=None):
44
133
"""Add a new value to the cache.
46
Also, if the entry is ever removed from the queue, call cleanup.
47
Passing it the key and value being removed.
135
Also, if the entry is ever removed from the cache, call
49
138
:param key: The key to store it under
50
139
:param value: The object to store
51
140
:param cleanup: None or a function taking (key, value) to indicate
52
'value' sohuld be cleaned up.
141
'value' should be cleaned up.
144
raise ValueError('cannot use _null_key as a key')
54
145
if key in self._cache:
56
self._cache[key] = value
57
if cleanup is not None:
58
self._cleanup[key] = cleanup
59
self._record_access(key)
146
node = self._cache[key]
149
node.cleanup = cleanup
151
node = _LRUNode(key, value, cleanup=cleanup)
152
self._cache[key] = node
153
self._record_access(node)
61
155
if len(self._cache) > self._max_cache:
62
156
# Trigger the cleanup
159
def cache_size(self):
160
"""Get the number of entries we will cache."""
161
return self._max_cache
65
163
def get(self, key, default=None):
66
if key in self._cache:
164
node = self._cache.get(key, None)
167
self._record_access(node)
71
171
"""Get the list of keys currently cached.
87
191
# Make sure the cache is shrunk to the correct size
88
192
while len(self._cache) > self._after_cleanup_count:
89
193
self._remove_lru()
90
# No need to compact the queue at this point, because the code that
91
# calls this would have already triggered it based on queue length
93
195
def __setitem__(self, key, value):
94
196
"""Add a value to the cache, there will be no cleanup function."""
95
197
self.add(key, value, cleanup=None)
97
def _record_access(self, key):
199
def _record_access(self, node):
98
200
"""Record that key was accessed."""
99
self._queue.append(key)
100
# Can't use setdefault because you can't += 1 the result
101
self._refcount[key] = self._refcount.get(key, 0) + 1
103
# If our access queue is too large, clean it up too
104
if len(self._queue) > self._compact_queue_length:
105
self._compact_queue()
107
def _compact_queue(self):
108
"""Compact the queue, leaving things in sorted last appended order."""
110
for item in self._queue:
111
if self._refcount[item] == 1:
112
new_queue.append(item)
114
self._refcount[item] -= 1
115
self._queue = new_queue
116
# All entries should be of the same size. There should be one entry in
117
# queue for each entry in cache, and all refcounts should == 1
118
if not (len(self._queue) == len(self._cache) ==
119
len(self._refcount) == sum(self._refcount.itervalues())):
120
raise AssertionError()
122
def _remove(self, key):
123
"""Remove an entry, making sure to maintain the invariants."""
124
cleanup = self._cleanup.pop(key, None)
125
val = self._cache.pop(key)
126
if cleanup is not None:
201
# Move 'node' to the front of the queue
202
if self._most_recently_used is None:
203
self._most_recently_used = node
204
self._least_recently_used = node
206
elif node is self._most_recently_used:
207
# Nothing to do, this node is already at the head of the queue
209
# We've taken care of the tail pointer, remove the node, and insert it
212
if node is self._least_recently_used:
213
self._least_recently_used = node.prev
214
if node.prev is not None:
215
node.prev.next_key = node.next_key
216
if node.next_key is not _null_key:
217
node_next = self._cache[node.next_key]
218
node_next.prev = node.prev
220
node.next_key = self._most_recently_used.key
221
self._most_recently_used.prev = node
222
self._most_recently_used = node
225
def _remove_node(self, node):
226
if node is self._least_recently_used:
227
self._least_recently_used = node.prev
228
self._cache.pop(node.key)
229
# If we have removed all entries, remove the head pointer as well
230
if self._least_recently_used is None:
231
self._most_recently_used = None
233
# Now remove this node from the linked list
234
if node.prev is not None:
235
node.prev.next_key = node.next_key
236
if node.next_key is not _null_key:
237
node_next = self._cache[node.next_key]
238
node_next.prev = node.prev
239
# And remove this node's pointers
241
node.next_key = _null_key
130
243
def _remove_lru(self):
131
244
"""Remove one entry from the lru, and handle consequences.
191
298
self._compute_size = compute_size
192
299
if compute_size is None:
193
300
self._compute_size = len
194
# This approximates that texts are > 0.5k in size. It only really
195
# effects when we clean up the queue, so we don't want it to be too
197
301
self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
198
302
LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
200
304
def add(self, key, value, cleanup=None):
201
305
"""Add a new value to the cache.
203
Also, if the entry is ever removed from the queue, call cleanup.
204
Passing it the key and value being removed.
307
Also, if the entry is ever removed from the cache, call
206
310
:param key: The key to store it under
207
311
:param value: The object to store
208
312
:param cleanup: None or a function taking (key, value) to indicate
209
'value' sohuld be cleaned up.
313
'value' should be cleaned up.
211
if key in self._cache:
316
raise ValueError('cannot use _null_key as a key')
317
node = self._cache.get(key, None)
213
318
value_len = self._compute_size(value)
214
319
if value_len >= self._after_cleanup_size:
320
# The new value is 'too big to fit', as it would fill up/overflow
321
# the cache all by itself
323
# We won't be replacing the old node, so just remove it
324
self._remove_node(node)
325
if cleanup is not None:
329
node = _LRUNode(key, value, cleanup=cleanup)
330
self._cache[key] = node
332
self._value_size -= node.size
333
node.size = value_len
216
334
self._value_size += value_len
217
self._cache[key] = value
218
if cleanup is not None:
219
self._cleanup[key] = cleanup
220
self._record_access(key)
335
self._record_access(node)
222
337
if self._value_size > self._max_size:
223
338
# Time to cleanup