~grischa/gwibber/bugfix-lp-554205

« back to all changes in this revision

Viewing changes to gwibber/microblog/plugins/twitter/__init__.py

  • Committer: grischa
  • Date: 2010-12-01 12:51:02 UTC
  • Revision ID: grischa@gmail.com-20101201125102-5dk578fgctaktw2s
changed the html entity escaping to a more robust method

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
from gwibber.microblog import network, util
2
 
import htmllib, re
 
2
import htmlentitydefs, re
3
3
import gnomekeyring
4
4
from oauth import oauth
5
5
from gwibber.microblog.util import log, exceptions
53
53
URL_PREFIX = "https://twitter.com"
54
54
API_PREFIX = "https://api.twitter.com/1"
55
55
 
56
 
def unescape(s):
57
 
  p = htmllib.HTMLParser(None)
58
 
  p.save_bgn()
59
 
  p.feed(s)
60
 
  return p.save_end()
 
56
# from http://effbot.org/zone/re-sub.htm#unescape-html
 
57
def unescape(text):
 
58
  def fixup(m):
 
59
    text = m.group(0)
 
60
    if text[:2] == "&#":
 
61
      # character reference
 
62
      try:
 
63
        if text[:3] == "&#x":
 
64
          return unichr(int(text[3:-1], 16))
 
65
        else:
 
66
          return unichr(int(text[2:-1]))
 
67
      except ValueError:
 
68
        pass
 
69
    else:
 
70
      # named entity
 
71
      try:
 
72
        text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
 
73
      except KeyError:
 
74
        pass
 
75
    return text # leave as is
 
76
  return re.sub("&#?\w+;", fixup, text)
 
77
 
61
78
 
62
79
class Client:
63
80
  def __init__(self, acct):