1
# -*- coding: utf-8 -*-
6
Implements a Markup string.
8
:copyright: (c) 2010 by Armin Ronacher.
9
:license: BSD, see LICENSE for more details.
13
from collections import Mapping
14
from markupsafe._compat import text_type, string_types, int_types, \
15
unichr, iteritems, PY2
18
__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
21
_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
22
_entity_re = re.compile(r'&([^;]+);')
25
class Markup(text_type):
26
r"""Marks a string as being safe for inclusion in HTML/XML output without
27
needing to be escaped. This implements the `__html__` interface a couple
28
of frameworks and web applications use. :class:`Markup` is a direct
29
subclass of `unicode` and provides all the methods of `unicode` just that
30
it escapes arguments passed and always returns `Markup`.
32
The `escape` function returns markup objects so that double escaping can't
35
The constructor of the :class:`Markup` class can be used for three
36
different things: When passed an unicode object it's assumed to be safe,
37
when passed an object with an HTML representation (has an `__html__`
38
method) that representation is used, otherwise the object passed is
39
converted into a unicode string and then assumed to be safe:
41
>>> Markup("Hello <em>World</em>!")
42
Markup(u'Hello <em>World</em>!')
43
>>> class Foo(object):
44
... def __html__(self):
45
... return '<a href="#">foo</a>'
48
Markup(u'<a href="#">foo</a>')
50
If you want object passed being always treated as unsafe you can use the
51
:meth:`escape` classmethod to create a :class:`Markup` object:
53
>>> Markup.escape("Hello <em>World</em>!")
54
Markup(u'Hello <em>World</em>!')
56
Operations on a markup string are markup aware which means that all
57
arguments are passed through the :func:`escape` function:
59
>>> em = Markup("<em>%s</em>")
61
Markup(u'<em>foo & bar</em>')
62
>>> strong = Markup("<strong>%(text)s</strong>")
63
>>> strong % {'text': '<blink>hacker here</blink>'}
64
Markup(u'<strong><blink>hacker here</blink></strong>')
65
>>> Markup("<em>Hello</em> ") + "<foo>"
66
Markup(u'<em>Hello</em> <foo>')
70
def __new__(cls, base=u'', encoding=None, errors='strict'):
71
if hasattr(base, '__html__'):
72
base = base.__html__()
74
return text_type.__new__(cls, base)
75
return text_type.__new__(cls, base, encoding, errors)
80
def __add__(self, other):
81
if isinstance(other, string_types) or hasattr(other, '__html__'):
82
return self.__class__(super(Markup, self).__add__(self.escape(other)))
85
def __radd__(self, other):
86
if hasattr(other, '__html__') or isinstance(other, string_types):
87
return self.escape(other).__add__(self)
90
def __mul__(self, num):
91
if isinstance(num, int_types):
92
return self.__class__(text_type.__mul__(self, num))
96
def __mod__(self, arg):
97
if isinstance(arg, tuple):
98
arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
100
arg = _MarkupEscapeHelper(arg, self.escape)
101
return self.__class__(text_type.__mod__(self, arg))
105
self.__class__.__name__,
106
text_type.__repr__(self)
110
return self.__class__(text_type.join(self, map(self.escape, seq)))
111
join.__doc__ = text_type.join.__doc__
113
def split(self, *args, **kwargs):
114
return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
115
split.__doc__ = text_type.split.__doc__
117
def rsplit(self, *args, **kwargs):
118
return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
119
rsplit.__doc__ = text_type.rsplit.__doc__
121
def splitlines(self, *args, **kwargs):
122
return list(map(self.__class__, text_type.splitlines(
123
self, *args, **kwargs)))
124
splitlines.__doc__ = text_type.splitlines.__doc__
127
r"""Unescape markup again into an text_type string. This also resolves
128
known HTML4 and XHTML entities:
130
>>> Markup("Main » <em>About</em>").unescape()
131
u'Main \xbb <em>About</em>'
133
from markupsafe._constants import HTML_ENTITIES
136
if name in HTML_ENTITIES:
137
return unichr(HTML_ENTITIES[name])
139
if name[:2] in ('#x', '#X'):
140
return unichr(int(name[2:], 16))
141
elif name.startswith('#'):
142
return unichr(int(name[1:]))
146
return _entity_re.sub(handle_match, text_type(self))
149
r"""Unescape markup into an text_type string and strip all tags. This
150
also resolves known HTML4 and XHTML entities. Whitespace is
153
>>> Markup("Main » <em>About</em>").striptags()
156
stripped = u' '.join(_striptags_re.sub('', self).split())
157
return Markup(stripped).unescape()
161
"""Escape the string. Works like :func:`escape` with the difference
162
that for subclasses of :class:`Markup` this function would return the
166
if rv.__class__ is not cls:
170
def make_simple_escaping_wrapper(name):
171
orig = getattr(text_type, name)
172
def func(self, *args, **kwargs):
173
args = _escape_argspec(list(args), enumerate(args), self.escape)
174
_escape_argspec(kwargs, iteritems(kwargs), self.escape)
175
return self.__class__(orig(self, *args, **kwargs))
176
func.__name__ = orig.__name__
177
func.__doc__ = orig.__doc__
180
for method in '__getitem__', 'capitalize', \
181
'title', 'lower', 'upper', 'replace', 'ljust', \
182
'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
183
'translate', 'expandtabs', 'swapcase', 'zfill':
184
locals()[method] = make_simple_escaping_wrapper(method)
187
if hasattr(text_type, 'partition'):
188
def partition(self, sep):
189
return tuple(map(self.__class__,
190
text_type.partition(self, self.escape(sep))))
191
def rpartition(self, sep):
192
return tuple(map(self.__class__,
193
text_type.rpartition(self, self.escape(sep))))
196
if hasattr(text_type, 'format'):
197
def format(*args, **kwargs):
198
self, args = args[0], args[1:]
199
formatter = EscapeFormatter(self.escape)
200
kwargs = _MagicFormatMapping(args, kwargs)
201
return self.__class__(formatter.vformat(self, args, kwargs))
203
def __html_format__(self, format_spec):
205
raise ValueError('Unsupported format specification '
210
if hasattr(text_type, '__getslice__'):
211
__getslice__ = make_simple_escaping_wrapper('__getslice__')
213
del method, make_simple_escaping_wrapper
216
class _MagicFormatMapping(Mapping):
217
"""This class implements a dummy wrapper to fix a bug in the Python
218
standard library for string formatting.
220
See http://bugs.python.org/issue13598 for information about why
224
def __init__(self, args, kwargs):
226
self._kwargs = kwargs
229
def __getitem__(self, key):
231
idx = self._last_index
232
self._last_index += 1
234
return self._args[idx]
238
return self._kwargs[key]
241
return iter(self._kwargs)
244
return len(self._kwargs)
247
if hasattr(text_type, 'format'):
248
class EscapeFormatter(string.Formatter):
250
def __init__(self, escape):
253
def format_field(self, value, format_spec):
254
if hasattr(value, '__html_format__'):
255
rv = value.__html_format__(format_spec)
256
elif hasattr(value, '__html__'):
258
raise ValueError('No format specification allowed '
259
'when formatting an object with '
260
'its __html__ method.')
261
rv = value.__html__()
263
rv = string.Formatter.format_field(self, value, format_spec)
264
return text_type(self.escape(rv))
267
def _escape_argspec(obj, iterable, escape):
268
"""Helper for various string-wrapped functions."""
269
for key, value in iterable:
270
if hasattr(value, '__html__') or isinstance(value, string_types):
271
obj[key] = escape(value)
275
class _MarkupEscapeHelper(object):
276
"""Helper for Markup.__mod__"""
278
def __init__(self, obj, escape):
282
__getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
283
__unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
284
__repr__ = lambda s: str(s.escape(repr(s.obj)))
285
__int__ = lambda s: int(s.obj)
286
__float__ = lambda s: float(s.obj)
289
# we have to import it down here as the speedups and native
290
# modules imports the markup type which is define above.
292
from markupsafe._speedups import escape, escape_silent, soft_unicode
294
from markupsafe._native import escape, escape_silent, soft_unicode
297
soft_str = soft_unicode
298
__all__.append('soft_str')