5
* This module implements functions for automatic escaping in C for better
8
* :copyright: (c) 2010 by Armin Ronacher.
14
#define ESCAPED_CHARS_TABLE_SIZE 63
15
#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
17
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
18
typedef int Py_ssize_t;
19
#define PY_SSIZE_T_MAX INT_MAX
20
#define PY_SSIZE_T_MIN INT_MIN
24
static PyObject* markup;
25
static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
26
static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
32
/* happing of characters to replace */
33
escaped_chars_repl['"'] = UNICHR(""");
34
escaped_chars_repl['\''] = UNICHR("'");
35
escaped_chars_repl['&'] = UNICHR("&");
36
escaped_chars_repl['<'] = UNICHR("<");
37
escaped_chars_repl['>'] = UNICHR(">");
39
/* lengths of those characters when replaced - 1 */
40
memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len));
41
escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
42
escaped_chars_delta_len['&'] = 4;
43
escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
45
/* import markup type so that we can mark the return value */
46
module = PyImport_ImportModule("markupsafe");
49
markup = PyObject_GetAttrString(module, "Markup");
56
escape_unicode(PyUnicodeObject *in)
59
Py_UNICODE *inp = PyUnicode_AS_UNICODE(in);
60
const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in);
61
Py_UNICODE *next_escp;
63
Py_ssize_t delta=0, erepl=0, delta_len=0;
65
/* First we need to figure out how long the escaped string will be */
66
while (*(inp) || inp < inp_end) {
67
if (*inp < ESCAPED_CHARS_TABLE_SIZE) {
68
delta += escaped_chars_delta_len[*inp];
69
erepl += !!escaped_chars_delta_len[*inp];
74
/* Do we need to escape anything at all? */
80
out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta);
84
outp = PyUnicode_AS_UNICODE(out);
85
inp = PyUnicode_AS_UNICODE(in);
87
/* look for the next substitution */
89
while (next_escp < inp_end) {
90
if (*next_escp < ESCAPED_CHARS_TABLE_SIZE &&
91
(delta_len = escaped_chars_delta_len[*next_escp])) {
98
if (next_escp > inp) {
99
/* copy unescaped chars between inp and next_escp */
100
Py_UNICODE_COPY(outp, inp, next_escp-inp);
101
outp += next_escp - inp;
104
/* escape 'next_escp' */
105
Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len);
111
Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in)));
113
return (PyObject*)out;
118
escape(PyObject *self, PyObject *text)
120
PyObject *s = NULL, *rv = NULL, *html;
122
/* we don't have to escape integers, bools or floats */
123
if (PyLong_CheckExact(text) ||
124
#if PY_MAJOR_VERSION < 3
125
PyInt_CheckExact(text) ||
127
PyFloat_CheckExact(text) || PyBool_Check(text) ||
129
return PyObject_CallFunctionObjArgs(markup, text, NULL);
131
/* if the object has an __html__ method that performs the escaping */
132
html = PyObject_GetAttrString(text, "__html__");
134
rv = PyObject_CallObject(html, NULL);
139
/* otherwise make the object unicode if it isn't, then escape */
141
if (!PyUnicode_Check(text)) {
142
#if PY_MAJOR_VERSION < 3
143
PyObject *unicode = PyObject_Unicode(text);
145
PyObject *unicode = PyObject_Str(text);
149
s = escape_unicode((PyUnicodeObject*)unicode);
153
s = escape_unicode((PyUnicodeObject*)text);
155
/* convert the unicode string into a markup object. */
156
rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
163
escape_silent(PyObject *self, PyObject *text)
166
return escape(self, text);
167
return PyObject_CallFunctionObjArgs(markup, NULL);
172
soft_unicode(PyObject *self, PyObject *s)
174
if (!PyUnicode_Check(s))
175
#if PY_MAJOR_VERSION < 3
176
return PyObject_Unicode(s);
178
return PyObject_Str(s);
185
static PyMethodDef module_methods[] = {
186
{"escape", (PyCFunction)escape, METH_O,
187
"escape(s) -> markup\n\n"
188
"Convert the characters &, <, >, ', and \" in string s to HTML-safe\n"
189
"sequences. Use this if you need to display text that might contain\n"
190
"such characters in HTML. Marks return value as markup string."},
191
{"escape_silent", (PyCFunction)escape_silent, METH_O,
192
"escape_silent(s) -> markup\n\n"
193
"Like escape but converts None to an empty string."},
194
{"soft_unicode", (PyCFunction)soft_unicode, METH_O,
195
"soft_unicode(object) -> string\n\n"
196
"Make a string unicode if it isn't already. That way a markup\n"
197
"string is not converted back to unicode."},
198
{NULL, NULL, 0, NULL} /* Sentinel */
202
#if PY_MAJOR_VERSION < 3
204
#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
205
#define PyMODINIT_FUNC void
210
if (!init_constants())
213
Py_InitModule3("markupsafe._speedups", module_methods, "");
216
#else /* Python 3.x module initialization */
218
static struct PyModuleDef module_definition = {
219
PyModuleDef_HEAD_INIT,
220
"markupsafe._speedups",
231
PyInit__speedups(void)
233
if (!init_constants())
236
return PyModule_Create(&module_definition);