~ntt-pf-lab/nova/monkey_patch_notification

« back to all changes in this revision

Viewing changes to vendor/tornado/website/markdown/preprocessors.py

  • Committer: Jesse Andrews
  • Date: 2010-05-28 06:05:26 UTC
  • Revision ID: git-v1:bf6e6e718cdc7488e2da87b21e258ccc065fe499
initial commit

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
 
 
2
"""
 
3
PRE-PROCESSORS
 
4
=============================================================================
 
5
 
 
6
Preprocessors work on source text before we start doing anything too
 
7
complicated. 
 
8
"""
 
9
 
 
10
import re
 
11
import markdown
 
12
 
 
13
HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:"
 
14
HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX
 
15
 
 
16
class Processor:
 
17
    def __init__(self, markdown_instance=None):
 
18
        if markdown_instance:
 
19
            self.markdown = markdown_instance
 
20
 
 
21
class Preprocessor (Processor):
 
22
    """
 
23
    Preprocessors are run after the text is broken into lines.
 
24
 
 
25
    Each preprocessor implements a "run" method that takes a pointer to a
 
26
    list of lines of the document, modifies it as necessary and returns
 
27
    either the same pointer or a pointer to a new list.
 
28
 
 
29
    Preprocessors must extend markdown.Preprocessor.
 
30
 
 
31
    """
 
32
    def run(self, lines):
 
33
        """
 
34
        Each subclass of Preprocessor should override the `run` method, which
 
35
        takes the document as a list of strings split by newlines and returns
 
36
        the (possibly modified) list of lines.
 
37
 
 
38
        """
 
39
        pass
 
40
 
 
41
class HtmlStash:
 
42
    """
 
43
    This class is used for stashing HTML objects that we extract
 
44
    in the beginning and replace with place-holders.
 
45
    """
 
46
 
 
47
    def __init__ (self):
 
48
        """ Create a HtmlStash. """
 
49
        self.html_counter = 0 # for counting inline html segments
 
50
        self.rawHtmlBlocks=[]
 
51
 
 
52
    def store(self, html, safe=False):
 
53
        """
 
54
        Saves an HTML segment for later reinsertion.  Returns a
 
55
        placeholder string that needs to be inserted into the
 
56
        document.
 
57
 
 
58
        Keyword arguments:
 
59
 
 
60
        * html: an html segment
 
61
        * safe: label an html segment as safe for safemode
 
62
 
 
63
        Returns : a placeholder string
 
64
 
 
65
        """
 
66
        self.rawHtmlBlocks.append((html, safe))
 
67
        placeholder = HTML_PLACEHOLDER % self.html_counter
 
68
        self.html_counter += 1
 
69
        return placeholder
 
70
 
 
71
    def reset(self):
 
72
        self.html_counter = 0
 
73
        self.rawHtmlBlocks = []
 
74
 
 
75
 
 
76
class HtmlBlockPreprocessor(Preprocessor):
 
77
    """Remove html blocks from the text and store them for later retrieval."""
 
78
 
 
79
    right_tag_patterns = ["</%s>", "%s>"]
 
80
 
 
81
    def _get_left_tag(self, block):
 
82
        return block[1:].replace(">", " ", 1).split()[0].lower()
 
83
 
 
84
    def _get_right_tag(self, left_tag, block):
 
85
        for p in self.right_tag_patterns:
 
86
            tag = p % left_tag
 
87
            i = block.rfind(tag)
 
88
            if i > 2:
 
89
                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
 
90
        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
 
91
 
 
92
    def _equal_tags(self, left_tag, right_tag):
 
93
        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
 
94
            return True
 
95
        if ("/" + left_tag) == right_tag:
 
96
            return True
 
97
        if (right_tag == "--" and left_tag == "--"):
 
98
            return True
 
99
        elif left_tag == right_tag[1:] \
 
100
            and right_tag[0] != "<":
 
101
            return True
 
102
        else:
 
103
            return False
 
104
 
 
105
    def _is_oneliner(self, tag):
 
106
        return (tag in ['hr', 'hr/'])
 
107
 
 
108
    def run(self, lines):
 
109
        text = "\n".join(lines)
 
110
        new_blocks = []
 
111
        text = text.split("\n\n")
 
112
        items = []
 
113
        left_tag = ''
 
114
        right_tag = ''
 
115
        in_tag = False # flag
 
116
 
 
117
        while text:
 
118
            block = text[0]
 
119
            if block.startswith("\n"):
 
120
                block = block[1:]
 
121
            text = text[1:]
 
122
 
 
123
            if block.startswith("\n"):
 
124
                block = block[1:]
 
125
 
 
126
            if not in_tag:
 
127
                if block.startswith("<"):
 
128
                    left_tag = self._get_left_tag(block)
 
129
                    right_tag, data_index = self._get_right_tag(left_tag, block)
 
130
 
 
131
                    if data_index < len(block):
 
132
                        text.insert(0, block[data_index:])
 
133
                        block = block[:data_index]
 
134
 
 
135
                    if not (markdown.isBlockLevel(left_tag) \
 
136
                        or block[1] in ["!", "?", "@", "%"]):
 
137
                        new_blocks.append(block)
 
138
                        continue
 
139
 
 
140
                    if self._is_oneliner(left_tag):
 
141
                        new_blocks.append(block.strip())
 
142
                        continue
 
143
 
 
144
                    if block[1] == "!":
 
145
                        # is a comment block
 
146
                        left_tag = "--"
 
147
                        right_tag, data_index = self._get_right_tag(left_tag, block)
 
148
                        # keep checking conditions below and maybe just append
 
149
 
 
150
                    if block.rstrip().endswith(">") \
 
151
                        and self._equal_tags(left_tag, right_tag):
 
152
                        new_blocks.append(
 
153
                            self.markdown.htmlStash.store(block.strip()))
 
154
                        continue
 
155
                    else: #if not block[1] == "!":
 
156
                        # if is block level tag and is not complete
 
157
 
 
158
                        if markdown.isBlockLevel(left_tag) or left_tag == "--" \
 
159
                        and not block.rstrip().endswith(">"):
 
160
                            items.append(block.strip())
 
161
                            in_tag = True
 
162
                        else:
 
163
                            new_blocks.append(
 
164
                            self.markdown.htmlStash.store(block.strip()))
 
165
 
 
166
                        continue
 
167
 
 
168
                new_blocks.append(block)
 
169
 
 
170
            else:
 
171
                items.append(block.strip())
 
172
 
 
173
                right_tag, data_index = self._get_right_tag(left_tag, block)
 
174
 
 
175
                if self._equal_tags(left_tag, right_tag):
 
176
                    # if find closing tag
 
177
                    in_tag = False
 
178
                    new_blocks.append(
 
179
                        self.markdown.htmlStash.store('\n\n'.join(items)))
 
180
                    items = []
 
181
 
 
182
        if items:
 
183
            new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
 
184
            new_blocks.append('\n')
 
185
 
 
186
        new_text = "\n\n".join(new_blocks)
 
187
        return new_text.split("\n")
 
188
 
 
189
 
 
190
class ReferencePreprocessor(Preprocessor):
 
191
    """ Remove reference definitions from text and store for later use. """
 
192
 
 
193
    RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
 
194
 
 
195
    def run (self, lines):
 
196
        new_text = [];
 
197
        for line in lines:
 
198
            m = self.RE.match(line)
 
199
            if m:
 
200
                id = m.group(2).strip().lower()
 
201
                t = m.group(4).strip()  # potential title
 
202
                if not t:
 
203
                    self.markdown.references[id] = (m.group(3), t)
 
204
                elif (len(t) >= 2
 
205
                      and (t[0] == t[-1] == "\""
 
206
                           or t[0] == t[-1] == "\'"
 
207
                           or (t[0] == "(" and t[-1] == ")") ) ):
 
208
                    self.markdown.references[id] = (m.group(3), t[1:-1])
 
209
                else:
 
210
                    new_text.append(line)
 
211
            else:
 
212
                new_text.append(line)
 
213
 
 
214
        return new_text #+ "\n"