~widelands-dev/widelands-website/trunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env python -tt
# encoding: utf-8
#
# File: mainpage/templatetags/wl_markdown.py
#
# Created by Holger Rapp on 2009-02-27.
# Copyright (c) 2009 HolgerRapp@gmx.net. All rights reserved.
#
# Last Modified: $Date$
#

from django import template
from django.conf import settings
from django.utils.encoding import smart_str, force_unicode
from django.utils.safestring import mark_safe
from settings import BLEACH_ALLOWED_TAGS, BLEACH_ALLOWED_ATTRIBUTES

# Try to get a not so fully broken markdown module
import markdown
if markdown.version_info[0] < 2:
    raise ImportError, 'Markdown library to old!'
from markdown import markdown
import re
import bleach

from BeautifulSoup import BeautifulSoup, NavigableString

# If we can import a Wiki module with Articles, we
# will check for internal wikipages links in all internal
# links starting with /wiki/
try:
    from wiki.models import Article, ChangeSet
    check_for_missing_wikipages = True
except ImportError:
    check_for_missing_wikipages = False

# We will also need the site domain
from django.contrib.sites.models import Site
from settings import SITE_ID, SMILEYS, SMILEY_DIR, \
    SMILEY_PREESCAPING, BZR_URL

try:
    _domain = Site.objects.get(pk=SITE_ID).domain
except:
    _domain = ''

# Getting local domain lists
try:
    from settings import LOCAL_DOMAINS as _LOCAL_DOMAINS
    LOCAL_DOMAINS = [_domain] + _LOCAL_DOMAINS
except ImportError:
    LOCAL_DOMAINS = [_domain]


register = template.Library()


def _insert_smileys(text):
    """This searches for smiley symbols in the current text and replaces them
    with the correct images.

    Only replacing if smiley symbols aren't in a word (e.g. http://....)

    """
    words = text.split(' ')
    for sc, img in SMILEYS:
        if sc in words:
            words[words.index(
                sc)] = "<img src='%s%s' alt='%s' />" % (SMILEY_DIR, img, img)
    text = ' '.join(words)
    return text


def _insert_smiley_preescaping(text):
    """This searches for smiley symbols in the current text and replaces them
    with the correct images."""
    for before, after in SMILEY_PREESCAPING:
        text = text.replace(before, after)
    return text


revisions_re = [
    re.compile('bzr:r(\d+)'),
]


def _insert_revision(text):
    for r in revisions_re:
        text = r.sub( lambda m: """<a href="%s">r%s</a>""" % (
            settings.BZR_URL % m.group(1), m.group(1)), text)
    return text


def _classify_link(tag):
    """Returns a classname to insert if this link is in any way special
    (external or missing wikipages)

    tag to classify for

    """
    # No class change for image links
    if tag.findChild('img') != None:
        return None

    href = tag['href'].lower()

    # Check for external link
    if href.startswith('http'):
        for domain in LOCAL_DOMAINS:
            external = True
            if href.find(domain) != -1:
                external = False
                break
        if external:
            return {'class': 'externalLink', 'title': 'This link refers to outer space'}

    if '/profile/' in (tag['href']):
        return {'class': 'userLink', 'title': 'This link refers to a userpage'}

    if check_for_missing_wikipages and href.startswith('/wiki/'):

        # Check for missing wikilink /wiki/PageName[/additionl/stuff]
        # Using href because we need cAsEs here
        pn = tag['href'][6:].split('/', 1)[0]

        if not len(pn):  # Wiki root link is not a page
            return {'class': 'wrongLink', 'title': 'This Link misses an articlename'}

        # Wiki special pages are also not counted
        if pn in ['list', 'search', 'history', 'feeds', 'observe', 'edit']:
            return {'class': 'specialLink'}

        # Check for a redirect
        try:
            # try to get the article id; if this fails an IndexError is raised
            a_id = ChangeSet.objects.filter(
                old_title=pn).values_list('article_id')[0]

            # get actual title of article
            act_t = Article.objects.get(id=a_id[0]).title
            if pn != act_t:
                return {'title': "This is a redirect and points to \"" + act_t + "\""}
            else:
                return None
        except IndexError:
            pass

        # article missing (or misspelled)
        if Article.objects.filter(title=pn).count() == 0:
            return {'class': 'missingLink', 'title': 'This Link is misspelled or missing. Click to create it anyway.'}

    return None


def _clickable_image(tag):
    # is external link?
    if tag['src'].startswith('http'):
        # is allways a link?
        if tag.parent.name != 'a':
            # add link to image
            text = '<a href=' + tag['src'] + \
                '><img src=' + tag['src'] + '></a>'
            return text
    return None


custom_filters = {
    # Wikiwordification
    # Match a wiki page link LikeThis. All !WikiWords (with a !
    # in front) are ignored
    'wikiwords': (re.compile(r"(!?)(\b[A-Z][a-z]+[A-Z]\w+\b)"), lambda m:
                  m.group(2) if m.group(1) == '!' else
                  u"""<a href="/wiki/%(match)s">%(match)s</a>""" %
                  {'match': m.group(2)}),

}


def do_wl_markdown(value, *args, **keyw):
    # Do Preescaping for markdown, so that some things stay intact
    # This is currently only needed for this smiley ">:-)"
    value = _insert_smiley_preescaping(value)
    custom = keyw.pop('custom', True)
    html = smart_str(markdown(value, extensions=[
                     'extra', 'toc'], *args, **keyw))

    # Sanitize posts from potencial untrusted users (Forum/Wiki/Maps)
    if 'bleachit' in args:
        html = mark_safe(bleach.clean(
            html, tags=BLEACH_ALLOWED_TAGS, attributes=BLEACH_ALLOWED_ATTRIBUTES))

    # Since we only want to do replacements outside of tags (in general) and not between
    # <a> and </a> we partition our site accordingly
    # BeautifoulSoup does all the heavy lifting
    soup = BeautifulSoup(html)
    if len(soup.contents) == 0:
        # well, empty soup. Return it
        return unicode(soup)

    for text in soup.findAll(text=True):
        # Do not replace inside a link
        if text.parent.name == 'a':
            continue

        # We do our own small preprocessing of the stuff we got, after markdown
        # went over it General consensus is to avoid replacing anything in
        # links [blah](blkf)
        if custom:
            # Replace bzr revisions
            rv = _insert_revision(text)
            # Replace smileys; only outside "code-tags"
            if not text.parent.name == 'code':
                rv = _insert_smileys(rv)

            for name, (pattern, replacement) in custom_filters.iteritems():
                if not len(text.strip()) or not keyw.get(name, True):
                    continue

                rv = pattern.sub(replacement, rv)
            text.replaceWith(rv)

    # This call slows the whole function down...
    # unicode->reparsing.
    # The function goes from .5 ms to 1.5ms on my system
    # Well, for our site with it's little traffic it's maybe not so important...
    # What a waste of cycles :(
    soup = BeautifulSoup(unicode(soup))
    # We have to go over this to classify links
    for tag in soup.findAll('a'):
        rv = _classify_link(tag)
        if rv:
            for attribute in rv.iterkeys():
                tag[attribute] = rv.get(attribute)

    # All external images gets clickable
    # This applies only in forum
    for tag in soup.findAll('img'):
        link = _clickable_image(tag)
        if link:
            tag.replaceWith(link)

    return unicode(soup)


@register.filter
def wl_markdown(content, arg=''):
    """A Filter which decides when to 'bleach' the content."""
    if arg == 'bleachit':
        return mark_safe(do_wl_markdown(content, 'bleachit'))
    else:
        return mark_safe(do_wl_markdown(content))