119
111
''' % '|'.join([link, macro, image, code])
113
def __init__(self, bloglike_lines=False, url_protocols=None,
116
# For pre escaping, in creole 1.0 done with ~:
117
self.pre_escape_re = c(self.pre_escape, re.M | re.X)
118
# for link descriptions
119
self.link_re = c('|'.join([self.image, self.linebreak,
120
self.char]), re.X | re.U)
122
self.item_re = c(self.item, re.X | re.U | re.M)
124
self.cell_re = c(self.cell, re.X | re.U)
126
# For block elements:
128
self.text = r'(?P<text> .+ ) (?P<break> (?<!\\)$\n(?!\s*$) )?'
129
self.block_re = c('|'.join([self.line, self.head, self.separator,
130
self.pre, self.list, self.table,
131
self.text]), re.X | re.U | re.M)
133
# For inline elements:
134
if url_protocols is not None:
135
self.proto = '|'.join(re.escape(p) for p in url_protocols)
136
self.url = r'''(?P<url>
137
(^ | (?<=\s | [.,:;!?()/=]))
139
(?P<url_target> (?P<url_proto> %s ):\S+? )
140
($ | (?=\s | [,.:;!?()] (\s | $))))''' % self.proto
141
inline_elements = [self.link, self.url, self.macro,
142
self.code, self.image, self.strong,
143
self.emph, self.linebreak,
144
self.escape, self.char]
147
up_case = u''.join(unichr(i) for i in xrange(sys.maxunicode)
148
if unicodedata.category(unichr(i))=='Lu')
149
self.wiki = ur'''(?P<wiki>[%s]\w+[%s]\w+)''' % (up_case, up_case)
150
inline_elements.insert(3, self.wiki)
151
self.inline_re = c('|'.join(inline_elements), re.X | re.U)
123
155
Parse the raw text and create a document object
124
156
that can be converted into output using Emitter.
158
A separate instance should be created for parsing a new document.
159
The first parameter is the raw text to be parsed. An optional second
160
argument is the Rules object to use. You can customize the parsing
161
rules to enable optional features or extend the parser.
127
# For pre escaping, in creole 1.0 done with ~:
128
pre_escape_re = re.compile(Rules.pre_escape, re.M | re.X)
129
link_re = re.compile('|'.join([Rules.image, Rules.linebreak, Rules.char]), re.X | re.U) # for link descriptions
130
item_re = re.compile(Rules.item, re.X | re.U | re.M) # for list items
131
cell_re = re.compile(Rules.cell, re.X | re.U) # for table cells
132
# For block elements:
133
block_re = re.compile('|'.join([Rules.line, Rules.head, Rules.separator,
134
Rules.pre, Rules.list, Rules.table, Rules.text]), re.X | re.U | re.M)
135
# For inline elements:
136
inline_re = re.compile('|'.join([Rules.link, Rules.url, Rules.macro,
137
Rules.code, Rules.image, Rules.strong, Rules.emph, Rules.linebreak,
138
Rules.escape, Rules.char]), re.X | re.U)
140
def __init__(self, raw):
164
def __init__(self, raw, rules=None):
165
self.rules = rules or Rules()
142
167
self.root = DocNode('document', None)
143
168
self.cur = self.root # The most recent document node
184
209
self.cur = DocNode('link', self.cur)
185
210
self.cur.content = target
187
re.sub(self.link_re, self._replace, text)
212
re.sub(self.rules.link_re, self._replace, text)
188
213
self.cur = parent
190
215
_link_target_repl = _link_repl
191
216
_link_text_repl = _link_repl
218
def _wiki_repl(self, groups):
219
"""Handle WikiWord links, if enabled."""
221
text = groups.get('wiki', '')
222
node = DocNode('link', self.cur)
224
DocNode('text', node, node.content)
193
227
def _macro_repl(self, groups):
194
228
"""Handles macros using the placeholder syntax."""
363
397
def parse_inline(self, raw):
364
398
"""Recognize inline elements inside blocks."""
366
re.sub(self.inline_re, self._replace, raw)
400
re.sub(self.rules.inline_re, self._replace, raw)
368
402
def parse_block(self, raw):
369
403
"""Recognize block elements."""
371
re.sub(self.block_re, self._replace, raw)
405
re.sub(self.rules.block_re, self._replace, raw)
374
408
"""Parse the text given as self.raw and return DOM tree."""