2
# Copyright 2004 Apache Software Foundation
4
# Licensed under the Apache License, Version 2.0 (the "License"); you
5
# may not use this file except in compliance with the License. You
6
# may obtain a copy of the License at
8
# http://www.apache.org/licenses/LICENSE-2.0
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
# implied. See the License for the specific language governing
14
# permissions and limitations under the License.
16
# Originally developed by Gregory Trubetskoy.
18
# This was donated by Nicolas Lehuen, and also posted to the Python Cookbook
19
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302997
21
# $Id: cache.py 374268 2006-02-02 05:31:45Z nlehuen $
24
from time import time, mktime
25
from rfc822 import parsedate
26
from calendar import timegm
33
from threading import Lock
35
from dummy_threading import Lock
37
NOT_INITIALIZED = object()
40
""" A cache entry, mostly an internal object. """
41
def __init__(self, key):
44
self._value=NOT_INITIALIZED
48
""" An abstract, multi-threaded cache object. """
50
def __init__(self, max_size=0):
51
""" Builds a cache with a limit of max_size entries.
52
If this limit is exceeded, the Least Recently Used entry is discarded.
53
if max_size==0, the cache is unbounded (no LRU rule is applied).
56
self._maxsize=max_size
60
# Header of the access list
62
self._head=Entry(None)
63
self._head._previous=self._head
64
self._head._next=self._head
66
def __setitem__(self, name, value):
67
""" Populates the cache with a given name and value. """
70
entry = self._get_entry(key)
74
self._pack(entry,value)
79
def __getitem__(self, name):
80
""" Gets a value from the cache, builds it if required.
82
return self._checkitem(name)[2]
84
def __delitem__(self, name):
92
def _get_entry(self,key):
95
entry = self._dict.get(key)
100
entry._next = entry._previous = None
109
def _checkitem(self, name):
110
""" Gets a value from the cache, builds it if required.
111
Returns a tuple is_new, key, value, entry.
112
If is_new is True, the result had to be rebuilt.
116
entry = self._get_entry(key)
118
entry._lock.acquire()
120
value = self._unpack(entry)
122
if value is NOT_INITIALIZED:
123
opened = self.check(key, name, entry)
124
value = self.build(key, name, opened, entry)
126
self._pack(entry, value)
129
opened = self.check(key, name, entry)
130
if opened is not None:
131
value = self.build(key, name, opened, entry)
133
self._pack(entry, value)
135
return is_new, key, value, entry
137
entry._lock.release()
140
""" Returns the Most Recently Used key """
144
return self._head._previous._key
151
""" Returns the Least Recently Used key """
155
return self._head._next._key
162
""" Override this method to extract a key from the name passed to the [] operator """
166
""" Override this method if you want to do something each time the underlying dictionary is modified (e.g. make it persistent). """
170
""" Clears the cache """
175
self._head._next=self._head
176
self._head._previous=self._head
180
def check(self, key, name, entry):
181
""" Override this method to check whether the entry with the given name is stale. Return None if it is fresh
182
or an opened resource if it is stale. The object returned will be passed to the 'build' method as the 'opened' parameter.
183
Use the 'entry' parameter to store meta-data if required. Don't worry about multiple threads accessing the same name,
184
as this method is properly isolated.
188
def build(self, key, name, opened, entry):
189
""" Build the cached value with the given name from the given opened resource. Use entry to obtain or store meta-data if needed.
190
Don't worry about multiple threads accessing the same name, as this method is properly isolated.
192
raise NotImplementedError()
194
def _access(self, entry):
195
" Internal use only, must be invoked within a cache lock. Updates the access list. """
196
if entry._next is not self._head:
197
if entry._previous is not None:
198
# remove the entry from the access list
199
entry._previous._next=entry._next
200
entry._next._previous=entry._previous
201
# insert the entry at the end of the access list
202
entry._previous=self._head._previous
203
entry._previous._next=entry
204
entry._next=self._head
205
entry._next._previous=entry
206
if self._head._next is self._head:
207
self._head._next=entry
210
" Internal use only, must be invoked within a cache lock. Removes the LRU entry if needed. """
211
if len(self._dict)>self._maxsize:
213
lru._previous._next=lru._next
214
lru._next._previous=lru._previous
215
del self._dict[lru._key]
217
def _pack(self, entry, value):
218
""" Store the value in the entry. """
221
def _unpack(self, entry):
222
""" Recover the value from the entry, returns NOT_INITIALIZED if it is not OK. """
225
class WeakCache(Cache):
226
""" This cache holds weak references to the values it stores. Whenever a value is not longer
227
normally referenced, it is removed from the cache. Useful for sharing the result of long
228
computations but letting them go as soon as they are not needed by anybody.
231
def _pack(self, entry, value):
232
entry._value=weakref.ref(value, lambda ref: self.__delitem__(entry._key))
234
def _unpack(self, entry):
235
if entry._value is NOT_INITIALIZED:
236
return NOT_INITIALIZED
238
value = entry._value()
240
return NOT_INITIALIZED
244
class FileCache(Cache):
245
""" A file cache. Returns the content of the files as a string, given their filename.
246
Whenever the files are modified (according to their modification time) the cache is updated.
247
Override the build method to obtain more interesting behaviour.
249
def __init__(self, max_size=0, mode='rb'):
250
Cache.__init__(self, max_size)
253
def check(self, key, name, entry):
254
timestamp = stat(key).st_mtime
256
if entry._value is NOT_INITIALIZED:
257
entry._timestamp = timestamp
258
return file(key, self.mode)
260
if entry._timestamp != timestamp:
261
entry._timestamp = timestamp
262
return file(key, self.mode)
266
def build(self, key, name, opened, entry):
267
""" Return the content of the file as a string. Override this for better behaviour. """
273
def parseRFC822Time(t):
274
return mktime(parsedate(t))
276
re_max_age=re.compile('max-age\s*=\s*(\d+)', re.I)
278
class HTTPEntity(object):
279
def __init__(self, entity, metadata):
281
self.metadata=metadata
284
return 'HTTPEntity(%s, %s)'%(repr(self.entity), self.metadata)
289
class HTTPCache(Cache):
290
""" An HTTP cache. Returns the entity found at the given URL.
291
Uses Expires, ETag and Last-Modified headers to minimize bandwidth usage.
292
Partial Cache-Control support (only max-age is supported).
294
def check(self, key, name, entry):
295
request = urllib2.Request(key)
298
if time()<entry._expires:
300
except AttributeError:
303
header, value = entry._validator
304
request.headers[header]=value
305
except AttributeError:
309
opened = urllib2.urlopen(request)
310
headers = opened.info()
312
# expiration handling
315
match = re_max_age.match(headers['cache-control'])
317
entry._expires=time()+int(match.group(1))
319
except (KeyError, ValueError):
323
date = parseRFC822Time(headers['date'])
324
expires = parseRFC822Time(headers['expires'])
325
entry._expires = time()+(expires-date)
333
entry._validator='If-None-Match', headers['etag']
339
entry._validator='If-Modified-Since', headers['last-modified']
344
except urllib2.HTTPError, error:
345
if opened: opened.close()
351
def build(self, key, name, opened, entry):
353
return HTTPEntity(opened.read(), dict(opened.info()))
357
re_not_word = re.compile(r'\W+')
359
class ModuleCache(FileCache):
360
""" A module cache. Give it a file name, it returns a module
361
which results from the execution of the Python script it contains.
362
This module is not inserted into sys.modules.
364
def __init__(self, max_size=0):
365
FileCache.__init__(self, max_size, 'r')
367
def build(self, key, name, opened, entry):
369
module = new.module(re_not_word.sub('_',key))
370
module.__file__ = key
371
exec opened in module.__dict__
376
class HttpModuleCache(HTTPCache):
377
""" A module cache. Give it an HTTP URL, it returns a module
378
which results from the execution of the Python script it contains.
379
This module is not inserted into sys.modules.
381
def __init__(self, max_size=0):
382
HTTPCache.__init__(self, max_size)
384
def build(self, key, name, opened, entry):
386
module = new.module(re_not_word.sub('_',key))
387
module.__file__ = key
388
text = opened.read().replace('\r\n', '\n')
389
code = compile(text, name, 'exec')
390
exec code in module.__dict__
395
class FunctionCache(Cache):
396
def __init__(self, function, max_size=0):
397
Cache.__init__(self, max_size)
398
self.function=function
400
def __call__(self, *args, **kw):
402
# a dict is not hashable so we build a tuple of (key, value) pairs
403
kw = tuple(kw.iteritems())
404
return self[args, kw]
406
return self[args, ()]
408
def build(self, key, name, opened, entry):
410
return self.function(*args, **dict(kw))