=== modified file 'NEWS' --- NEWS 2011-03-19 08:35:57 +0000 +++ NEWS 2011-03-24 23:02:29 +0000 @@ -39,6 +39,9 @@ Improves performance on projects with long histories like emacs. (John Arbash Meinel) + - Fix escaping of filenames in revision views. + (William Grant, #740142) + 1.18 [10Nov2010] ---------------- === modified file 'loggerhead/controllers/view_ui.py' --- loggerhead/controllers/view_ui.py 2011-03-12 17:15:08 +0000 +++ loggerhead/controllers/view_ui.py 2011-03-23 02:37:07 +0000 @@ -17,7 +17,6 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -import cgi import os import time @@ -65,7 +64,7 @@ extra_lines = len(file_lines) - len(hl_lines) hl_lines.extend([u''] * extra_lines) else: - hl_lines = map(cgi.escape, file_lines) + hl_lines = map(util.html_escape, file_lines) return hl_lines; === modified file 'loggerhead/templatefunctions.py' --- loggerhead/templatefunctions.py 2011-03-02 14:07:21 +0000 +++ loggerhead/templatefunctions.py 2011-03-23 04:04:44 +0000 @@ -14,8 +14,8 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -import cgi import os +import urllib import pkg_resources @@ -23,6 +23,7 @@ import loggerhead from loggerhead.zptsupport import zpt +from loggerhead.util import html_format templatefunctions = {} @@ -49,16 +50,21 @@ if style == 'fragment': def file_link(filename): if currently_showing and filename == currently_showing: - return '%s' % ( - cgi.escape(filename), cgi.escape(filename)) + return html_format( + '%s', + urllib.quote(filename.encode('utf-8')), filename) else: return revision_link( - url, entry.revno, filename, '#' + filename) + url, entry.revno, filename, + '#' + urllib.quote(filename.encode('utf-8'))) else: def file_link(filename): - return '%s' % ( - url(['/revision', entry.revno]), '#' + filename, cgi.escape(filename), - cgi.escape(entry.revno), cgi.escape(filename)) + return html_format( + '' + '%s', + url(['/revision', entry.revno]), + '#' + urllib.quote(filename.encode('utf-8')), + filename, entry.revno, filename) return _pt('revisionfilechanges').expand( entry=entry, file_changes=file_changes, file_link=file_link, **templatefunctions) @@ -122,14 +128,16 @@ @templatefunc def view_link(url, revno, path): - return '%s' % ( - url(['/view', revno, path]), cgi.escape(path), cgi.escape(path)) + return html_format( + '%s', + url(['/view', revno, path]), path, path) + @templatefunc def revision_link(url, revno, path, frag=''): - return '%s' % ( - url(['/revision', revno, path]), frag, cgi.escape(path), - cgi.escape(revno), cgi.escape(path)) + return html_format( + '%s', + url(['/revision', revno, path]), frag, path, revno, path) @templatefunc === modified file 'loggerhead/tests/__init__.py' --- loggerhead/tests/__init__.py 2011-03-19 08:35:57 +0000 +++ loggerhead/tests/__init__.py 2011-03-23 03:59:38 +0000 @@ -26,5 +26,6 @@ 'test_simple', 'test_revision_ui', 'test_templating', + 'test_util', ]])) return standard_tests === modified file 'loggerhead/tests/test_simple.py' --- loggerhead/tests/test_simple.py 2011-03-19 08:35:57 +0000 +++ loggerhead/tests/test_simple.py 2011-03-23 03:36:23 +0000 @@ -56,9 +56,11 @@ self.filecontents = ('some\nmultiline\ndata\n' 'with&")) + + def test_html_format(self): + self.assertEqual( + '<baz>&', + html_format( + '%s', "baz\"'", "&")) === modified file 'loggerhead/util.py' --- loggerhead/util.py 2010-04-28 21:41:32 +0000 +++ loggerhead/util.py 2011-03-23 05:21:34 +0000 @@ -20,7 +20,6 @@ # import base64 -import cgi import datetime import logging import re @@ -214,16 +213,47 @@ # only do this if unicode turns out to be a problem #_BADCHARS_RE = re.compile(ur'[\u007f-\uffff]') +# Can't be a dict; & needs to be done first. +html_entity_subs = [ + ("&", "&"), + ('"', """), + ("'", "'"), # ' is defined in XML, but not HTML. + (">", ">"), + ("<", "<"), + ] + + +def html_escape(s): + """Transform dangerous (X)HTML characters into entities. + + Like cgi.escape, except also escaping " and '. This makes it safe to use + in both attribute and element content. + + If you want to safely fill a format string with escaped values, use + html_format instead + """ + for char, repl in html_entity_subs: + s = s.replace(char, repl) + return s + + +def html_format(template, *args): + """Safely format an HTML template string, escaping the arguments. + + The template string must not be user-controlled; it will not be escaped. + """ + return template % tuple(html_escape(arg) for arg in args) + + # FIXME: get rid of this method; use fixed_width() and avoid XML(). - def html_clean(s): """ clean up a string for html display. expand any tabs, encode any html entities, and replace spaces with ' '. this is primarily for use in displaying monospace text. """ - s = cgi.escape(s.expandtabs()) + s = html_escape(s.expandtabs()) s = s.replace(' ', ' ') return s @@ -269,7 +299,7 @@ except UnicodeDecodeError: s = s.decode('iso-8859-15') - s = cgi.escape(s).expandtabs().replace(' ', NONBREAKING_SPACE) + s = html_escape(s).expandtabs().replace(' ', NONBREAKING_SPACE) return HSC.clean(s).replace('\n', '
')