1
from __future__ import unicode_literals
2
from __future__ import absolute_import
5
from . import inlinepatterns
8
def build_treeprocessors(md_instance, **kwargs):
9
""" Build the default treeprocessors for Markdown. """
10
treeprocessors = odict.OrderedDict()
11
treeprocessors["inline"] = InlineProcessor(md_instance)
12
treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
17
""" Check if it's string """
18
if not isinstance(s, util.AtomicString):
19
return isinstance(s, util.string_type)
23
class Treeprocessor(util.Processor):
25
Treeprocessors are run on the ElementTree object before serialization.
27
Each Treeprocessor implements a "run" method that takes a pointer to an
28
ElementTree, modifies it as necessary and returns an ElementTree
31
Treeprocessors must extend markdown.Treeprocessor.
36
Subclasses of Treeprocessor should implement a `run` method, which
37
takes a root ElementTree. This method can return another ElementTree
38
object, and the existing root ElementTree will be replaced, or it can
39
modify the current tree and return None.
41
pass # pragma: no cover
44
class InlineProcessor(Treeprocessor):
46
A Treeprocessor that traverses a tree, applying inline patterns.
49
def __init__(self, md):
50
self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
51
self.__placeholder_suffix = util.ETX
52
self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
53
+ len(self.__placeholder_suffix)
54
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
56
self.inlinePatterns = md.inlinePatterns
58
def __makePlaceholder(self, type):
59
""" Generate a placeholder """
60
id = "%04d" % len(self.stashed_nodes)
61
hash = util.INLINE_PLACEHOLDER % id
64
def __findPlaceholder(self, data, index):
66
Extract id from data string, start from index
71
* index: index, from which we start search
73
Returns: placeholder id and string index, after the found placeholder.
76
m = self.__placeholder_re.search(data, index)
78
return m.group(1), m.end()
80
return None, index + 1
82
def __stashNode(self, node, type):
83
""" Add node to stash """
84
placeholder, id = self.__makePlaceholder(type)
85
self.stashed_nodes[id] = node
88
def __handleInline(self, data, patternIndex=0):
90
Process string with inline patterns and replace it
95
* data: A line of Markdown text
96
* patternIndex: The index of the inlinePattern to start with
98
Returns: String with placeholders.
101
if not isinstance(data, util.AtomicString):
103
while patternIndex < len(self.inlinePatterns):
104
data, matched, startIndex = self.__applyPattern(
105
self.inlinePatterns.value_for_index(patternIndex),
106
data, patternIndex, startIndex)
111
def __processElementText(self, node, subnode, isText=True):
113
Process placeholders in Element.text or Element.tail
114
of Elements popped from self.stashed_nodes.
119
* subnode: processing node
120
* isText: bool variable, True - it's text, False - it's tail
132
childResult = self.__processPlaceholders(text, subnode, isText)
134
if not isText and node is not subnode:
135
pos = list(node).index(subnode) + 1
139
childResult.reverse()
140
for newChild in childResult:
141
node.insert(pos, newChild)
143
def __processPlaceholders(self, data, parent, isText=True):
145
Process string with placeholders and generate ElementTree tree.
149
* data: string with placeholders instead of ElementTree elements.
150
* parent: Element, which contains processing inline data
152
Returns: list with ElementTree elements with applied inline patterns.
159
result[-1].tail += text
161
result[-1].tail = text
175
index = data.find(self.__placeholder_prefix, strartIndex)
177
id, phEndIndex = self.__findPlaceholder(data, index)
179
if id in self.stashed_nodes:
180
node = self.stashed_nodes.get(id)
183
text = data[strartIndex:index]
186
if not isString(node): # it's Element
187
for child in [node] + list(node):
189
if child.tail.strip():
190
self.__processElementText(
194
if child.text.strip():
195
self.__processElementText(child, child)
196
else: # it's just a string
198
strartIndex = phEndIndex
201
strartIndex = phEndIndex
204
else: # wrong placeholder
205
end = index + len(self.__placeholder_prefix)
206
linkText(data[strartIndex:end])
209
text = data[strartIndex:]
210
if isinstance(data, util.AtomicString):
211
# We don't want to loose the AtomicString
212
text = util.AtomicString(text)
218
def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
220
Check if the line fits the pattern, create the necessary
221
elements, add it to stashed_nodes.
225
* data: the text to be processed
226
* pattern: the pattern to be checked
227
* patternIndex: index of current pattern
228
* startIndex: string index, from which we start searching
230
Returns: String with placeholders instead of ElementTree elements.
233
match = pattern.getCompiledRegExp().match(data[startIndex:])
234
leftData = data[:startIndex]
237
return data, False, 0
239
node = pattern.handleMatch(match)
242
return data, True, len(leftData)+match.span(len(match.groups()))[0]
244
if not isString(node):
245
if not isinstance(node.text, util.AtomicString):
246
# We need to process current node too
247
for child in [node] + list(node):
248
if not isString(node):
250
child.text = self.__handleInline(
251
child.text, patternIndex + 1
254
child.tail = self.__handleInline(
255
child.tail, patternIndex
258
placeholder = self.__stashNode(node, pattern.type())
260
return "%s%s%s%s" % (leftData,
262
placeholder, match.groups()[-1]), True, 0
265
"""Apply inline patterns to a parsed Markdown tree.
267
Iterate over ElementTree, find elements with inline tag, apply inline
268
patterns and append newly created Elements to tree. If you don't
269
want to process your data with inline paterns, instead of normal
270
string, use subclass AtomicString:
272
node.text = markdown.AtomicString("This will not be processed.")
276
* tree: ElementTree object, representing Markdown tree.
278
Returns: ElementTree object with applied inline patterns.
281
self.stashed_nodes = {}
286
currElement = stack.pop()
288
for child in currElement:
289
if child.text and not isinstance(
290
child.text, util.AtomicString
294
lst = self.__processPlaceholders(
295
self.__handleInline(text), child
298
insertQueue.append((child, lst))
300
tail = self.__handleInline(child.tail)
301
dumby = util.etree.Element('d')
303
tailResult = self.__processPlaceholders(tail, dumby, False)
305
child.tail = dumby.tail
306
pos = list(currElement).index(child) + 1
308
for newChild in tailResult:
309
currElement.insert(pos, newChild)
313
for element, lst in insertQueue:
314
if self.markdown.enable_attributes:
315
if element.text and isString(element.text):
316
element.text = inlinepatterns.handleAttributes(
317
element.text, element
321
if self.markdown.enable_attributes:
322
# Processing attributes
323
if newChild.tail and isString(newChild.tail):
324
newChild.tail = inlinepatterns.handleAttributes(
325
newChild.tail, element
327
if newChild.text and isString(newChild.text):
328
newChild.text = inlinepatterns.handleAttributes(
329
newChild.text, newChild
331
element.insert(i, newChild)
336
class PrettifyTreeprocessor(Treeprocessor):
337
""" Add linebreaks to the html document. """
339
def _prettifyETree(self, elem):
340
""" Recursively add linebreaks to ElementTree children. """
343
if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
344
if (not elem.text or not elem.text.strip()) \
345
and len(elem) and util.isBlockLevel(elem[0].tag):
348
if util.isBlockLevel(e.tag):
349
self._prettifyETree(e)
350
if not elem.tail or not elem.tail.strip():
352
if not elem.tail or not elem.tail.strip():
356
""" Add linebreaks to ElementTree root object. """
358
self._prettifyETree(root)
359
# Do <br />'s seperately as they are often in the middle of
360
# inline content and missed by _prettifyETree.
361
brs = root.getiterator('br')
363
if not br.tail or not br.tail.strip():
366
br.tail = '\n%s' % br.tail
367
# Clean up extra empty lines at end of code blocks.
368
pres = root.getiterator('pre')
370
if len(pre) and pre[0].tag == 'code':
371
pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n')