4
=============================================================================
6
Preprocessors work on source text before we start doing anything too
13
HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:"
14
HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX
17
def __init__(self, markdown_instance=None):
19
self.markdown = markdown_instance
21
class Preprocessor (Processor):
23
Preprocessors are run after the text is broken into lines.
25
Each preprocessor implements a "run" method that takes a pointer to a
26
list of lines of the document, modifies it as necessary and returns
27
either the same pointer or a pointer to a new list.
29
Preprocessors must extend markdown.Preprocessor.
34
Each subclass of Preprocessor should override the `run` method, which
35
takes the document as a list of strings split by newlines and returns
36
the (possibly modified) list of lines.
43
This class is used for stashing HTML objects that we extract
44
in the beginning and replace with place-holders.
48
""" Create a HtmlStash. """
49
self.html_counter = 0 # for counting inline html segments
52
def store(self, html, safe=False):
54
Saves an HTML segment for later reinsertion. Returns a
55
placeholder string that needs to be inserted into the
60
* html: an html segment
61
* safe: label an html segment as safe for safemode
63
Returns : a placeholder string
66
self.rawHtmlBlocks.append((html, safe))
67
placeholder = HTML_PLACEHOLDER % self.html_counter
68
self.html_counter += 1
73
self.rawHtmlBlocks = []
76
class HtmlBlockPreprocessor(Preprocessor):
77
"""Remove html blocks from the text and store them for later retrieval."""
79
right_tag_patterns = ["</%s>", "%s>"]
81
def _get_left_tag(self, block):
82
return block[1:].replace(">", " ", 1).split()[0].lower()
84
def _get_right_tag(self, left_tag, block):
85
for p in self.right_tag_patterns:
89
return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
90
return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
92
def _equal_tags(self, left_tag, right_tag):
93
if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
95
if ("/" + left_tag) == right_tag:
97
if (right_tag == "--" and left_tag == "--"):
99
elif left_tag == right_tag[1:] \
100
and right_tag[0] != "<":
105
def _is_oneliner(self, tag):
106
return (tag in ['hr', 'hr/'])
108
def run(self, lines):
109
text = "\n".join(lines)
111
text = text.split("\n\n")
115
in_tag = False # flag
119
if block.startswith("\n"):
123
if block.startswith("\n"):
127
if block.startswith("<"):
128
left_tag = self._get_left_tag(block)
129
right_tag, data_index = self._get_right_tag(left_tag, block)
131
if data_index < len(block):
132
text.insert(0, block[data_index:])
133
block = block[:data_index]
135
if not (markdown.isBlockLevel(left_tag) \
136
or block[1] in ["!", "?", "@", "%"]):
137
new_blocks.append(block)
140
if self._is_oneliner(left_tag):
141
new_blocks.append(block.strip())
147
right_tag, data_index = self._get_right_tag(left_tag, block)
148
# keep checking conditions below and maybe just append
150
if block.rstrip().endswith(">") \
151
and self._equal_tags(left_tag, right_tag):
153
self.markdown.htmlStash.store(block.strip()))
155
else: #if not block[1] == "!":
156
# if is block level tag and is not complete
158
if markdown.isBlockLevel(left_tag) or left_tag == "--" \
159
and not block.rstrip().endswith(">"):
160
items.append(block.strip())
164
self.markdown.htmlStash.store(block.strip()))
168
new_blocks.append(block)
171
items.append(block.strip())
173
right_tag, data_index = self._get_right_tag(left_tag, block)
175
if self._equal_tags(left_tag, right_tag):
176
# if find closing tag
179
self.markdown.htmlStash.store('\n\n'.join(items)))
183
new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
184
new_blocks.append('\n')
186
new_text = "\n\n".join(new_blocks)
187
return new_text.split("\n")
190
class ReferencePreprocessor(Preprocessor):
191
""" Remove reference definitions from text and store for later use. """
193
RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
195
def run (self, lines):
198
m = self.RE.match(line)
200
id = m.group(2).strip().lower()
201
t = m.group(4).strip() # potential title
203
self.markdown.references[id] = (m.group(3), t)
205
and (t[0] == t[-1] == "\""
206
or t[0] == t[-1] == "\'"
207
or (t[0] == "(" and t[-1] == ")") ) ):
208
self.markdown.references[id] = (m.group(3), t[1:-1])
210
new_text.append(line)
212
new_text.append(line)
214
return new_text #+ "\n"