~ubuntu-branches/ubuntu/utopic/gozerbot/utopic

« back to all changes in this revision

Viewing changes to gozerbot/utils/textutils.py

  • Committer: Bazaar Package Importer
  • Author(s): Jeremy Malcolm
  • Date: 2009-09-14 09:00:29 UTC
  • mfrom: (1.1.4 upstream) (3.1.5 sid)
  • Revision ID: james.westby@ubuntu.com-20090914090029-uval0ekt72kmklxw
Tags: 0.9.1.3-3
Changed dependency on python-setuptools to python-pkg-resources
(Closes: #546435) 

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
import cgi
 
2
import re
 
3
import htmlentitydefs
 
4
 
 
5
def unescape_charref(ref): 
 
6
    name = ref[2:-1] 
 
7
    base = 10 
 
8
    if name.startswith("x"): 
 
9
        name = name[1:] 
 
10
        base = 16 
 
11
    return unichr(int(name, base)) 
 
12
def replace_entities(match): 
 
13
    ent = match.group() 
 
14
    if ent[1] == "#": 
 
15
        return unescape_charref(ent) 
 
16
    repl = htmlentitydefs.name2codepoint.get(ent[1:-1]) 
 
17
    if repl is not None: 
 
18
        repl = unichr(repl) 
 
19
    else: 
 
20
        repl = ent 
 
21
    return repl 
 
22
 
 
23
def html_unescape(data): 
 
24
    '''
 
25
    Unescape (numeric) HTML entities.
 
26
    '''
 
27
    return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) 
 
28
 
 
29
def html_escape(data):
 
30
    '''
 
31
    Escape HTML entities.
 
32
    '''
 
33
    return cgi.escape(data)
 
34