1
# -*- coding: utf-8 -*-
3
# Copyright (C) 2006-2007 Edgewall Software
6
# This software is licensed as described in the file COPYING, which
7
# you should have received as part of this distribution. The terms
8
# are also available at http://genshi.edgewall.org/wiki/License.
10
# This software consists of voluntary contributions made by many
11
# individuals. For the exact contribution history, see the revision
12
# history and logs, available at http://genshi.edgewall.org/log/.
14
"""Various utility classes and functions."""
21
from sets import ImmutableSet as frozenset
22
from sets import Set as set
24
__docformat__ = 'restructuredtext en'
28
"""A dictionary-like object that stores only a certain number of items, and
29
discards its least recently used item when full.
31
>>> cache = LRUCache(3)
41
Adding new items to the cache does not increase its size. Instead, the least
42
recently used item is dropped:
50
Iterating over the cache returns the keys, starting with the most recently
59
This code is based on the LRUCache class from ``myghtyutils.util``, written
60
by Mike Bayer and released under the MIT license. See:
62
http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
66
def __init__(self, key, value):
67
self.previous = self.next = None
71
return repr(self.value)
73
def __init__(self, capacity):
75
self.capacity = capacity
79
def __contains__(self, key):
80
return key in self._dict
89
return len(self._dict)
91
def __getitem__(self, key):
92
item = self._dict[key]
93
self._update_item(item)
96
def __setitem__(self, key, value):
97
item = self._dict.get(key)
99
item = self._Item(key, value)
100
self._dict[key] = item
101
self._insert_item(item)
104
self._update_item(item)
108
return repr(self._dict)
110
def _insert_item(self, item):
112
item.next = self.head
113
if self.head is not None:
114
self.head.previous = item
120
def _manage_size(self):
121
while len(self._dict) > self.capacity:
122
olditem = self._dict[self.tail.key]
123
del self._dict[self.tail.key]
124
if self.tail != self.head:
125
self.tail = self.tail.previous
126
self.tail.next = None
128
self.head = self.tail = None
130
def _update_item(self, item):
131
if self.head == item:
134
previous = item.previous
135
previous.next = item.next
136
if item.next is not None:
137
item.next.previous = previous
142
item.next = self.head
143
self.head.previous = self.head = item
147
"""Flattens a potentially nested sequence into a flat list.
149
:param items: the sequence to flatten
153
>>> flatten([1, (2, 3), 4])
155
>>> flatten([1, (2, [3, 4]), 5])
160
if isinstance(item, (frozenset, list, set, tuple)):
161
retval += flatten(item)
166
def plaintext(text, keeplinebreaks=True):
167
"""Returns the text as a `unicode` string with all entities and tags
170
>>> plaintext('<b>1 < 2</b>')
173
The `keeplinebreaks` parameter can be set to ``False`` to replace any line
174
breaks by simple spaces:
176
>>> plaintext('''<b>1
178
... 2</b>''', keeplinebreaks=False)
181
:param text: the text to convert to plain text
182
:param keeplinebreaks: whether line breaks in the text should be kept intact
183
:return: the text with tags and entities removed
185
text = stripentities(striptags(text))
186
if not keeplinebreaks:
187
text = text.replace(u'\n', u' ')
190
_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
191
def stripentities(text, keepxmlentities=False):
192
"""Return a copy of the given text with any character or numeric entities
193
replaced by the equivalent UTF-8 characters.
195
>>> stripentities('1 < 2')
197
>>> stripentities('more …')
199
>>> stripentities('…')
201
>>> stripentities('…')
204
If the `keepxmlentities` parameter is provided and is a truth value, the
205
core XML entities (&, ', >, < and ") are left intact.
207
>>> stripentities('1 < 2 …', keepxmlentities=True)
210
def _replace_entity(match):
211
if match.group(1): # numeric entity
213
if ref.startswith('x'):
214
ref = int(ref[1:], 16)
218
else: # character entity
220
if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
223
return unichr(htmlentitydefs.name2codepoint[ref])
226
return u'&%s;' % ref
229
return _STRIPENTITIES_RE.sub(_replace_entity, text)
231
_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
233
"""Return a copy of the text with any XML/HTML tags removed.
235
>>> striptags('<span>Foo</span> bar')
237
>>> striptags('<span class="bar">Foo</span>')
239
>>> striptags('Foo<br />')
242
HTML/XML comments are stripped, too:
244
>>> striptags('<!-- <blub>hehe</blah> -->test')
247
:param text: the string to remove tags from
248
:return: the text with tags removed
250
return _STRIPTAGS_RE.sub('', text)