1
# -*- coding: iso-8859-1 -*-
3
MoinMoin - convert content in 1.6.0alpha (rev 1844: 58ebb64243cc) wiki markup to 1.6.0 style
4
by using a modified 1.6.0alpha parser as translator.
6
Assuming we have this "renames" map:
7
-------------------------------------------------------
8
'PAGE', 'some_page' -> 'some page'
9
'FILE', 'with%20blank.txt' -> 'with blank.txt'
11
Markup transformations needed:
12
-------------------------------------------------------
13
["some_page"] -> [[some page]] # renamed
14
[:some_page:some text] -> [[some page|some text]]
15
[:page:text] -> [[page|text]]
16
(with a page not being renamed)
18
attachment:with%20blank.txt -> [[attachment:with blank.txt]]
19
attachment:some_page/with%20blank.txt -> [[attachment:some page/with blank.txt]]
20
The attachment processing should also urllib.unquote the filename (or at
21
least replace %20 by space) and put it into "quotes" if it contains spaces.
23
@copyright: 2007 MoinMoin:JohannesBerg,
24
2007 MoinMoin:ThomasWaldmann
25
@license: GNU GPL, see COPYING for details.
30
from MoinMoin import i18n
31
i18n.wikiLanguages = lambda: {}
33
from MoinMoin import config, macro, wikiutil
34
from MoinMoin.action import AttachFile
35
from MoinMoin.Page import Page
36
from MoinMoin.support.python_compatibility import rsplit
39
from text_moin160a_wiki import Parser
43
def convert_wiki(request, pagename, intext, renames):
44
""" Convert content written in wiki markup """
46
if not intext.endswith('\r\n'):
49
c = Converter(request, pagename, intext, renames)
50
result = request.redirectedOutput(c.convert, request)
51
if noeol and result.endswith('\r\n'):
56
STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
58
# copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
59
# ... and slightly modified/refactored for our needs here.
60
# hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
61
# quoted urls when they contain a '=' char in the 1.5 data input.
62
def explore_args(args):
63
""" explore args for positional and keyword parameters """
65
args = args.split(',')
66
args = [arg.strip() for arg in args]
71
kw = {} # keyword args
72
pp = [] # positional parameters
74
kwAllowed = ('width', 'height', 'alt')
78
key, value = arg.split('=', 1)
79
key_lowerstr = str(key.lower())
80
# avoid that urls with "=" are interpreted as keyword
81
if key_lowerstr in kwAllowed:
83
kw[key_lowerstr] = value
84
elif not kw_count and '://' in arg:
85
# assuming that this is the image
90
if STONEAGE_IMAGELINK and len(pp) >= 2:
91
pp[0], pp[1] = pp[1], pp[0]
96
class Converter(Parser):
97
def __init__(self, request, pagename, raw, renames):
98
self.pagename = pagename
100
self.renames = renames
101
self.request = request
105
self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
108
def return_word(self, word):
110
_emph_repl = return_word
111
_emph_ibb_repl = return_word
112
_emph_ibi_repl = return_word
113
_emph_ib_or_bi_repl = return_word
114
_u_repl = return_word
115
_strike_repl = return_word
116
_sup_repl = return_word
117
_sub_repl = return_word
118
_small_repl = return_word
119
_big_repl = return_word
120
_tt_repl = return_word
121
_tt_bt_repl = return_word
122
_remark_repl = return_word
123
_table_repl = return_word
124
_tableZ_repl = return_word
125
_rule_repl = return_word
126
_smiley_repl = return_word
127
_smileyA_repl = return_word
128
_ent_repl = return_word
129
_ent_numeric_repl = return_word
130
_ent_symbolic_repl = return_word
131
_heading_repl = return_word
132
_email_repl = return_word
133
_notword_repl = return_word
134
_indent_repl = return_word
135
_li_none_repl = return_word
136
_li_repl = return_word
137
_ol_repl = return_word
138
_dl_repl = return_word
139
_comment_repl = return_word
141
# translate pagenames using pagename translation map
143
def _replace(self, key):
144
""" replace a item_name if it is in the renames dict
145
key is either a 2-tuple ('PAGE', pagename)
146
or a 3-tuple ('FILE', pagename, filename)
148
current_page = self.pagename
149
item_type, page_name, file_name = (key + (None, ))[:3]
150
abs_page_name = wikiutil.AbsPageName(current_page, page_name)
151
if item_type == 'PAGE':
152
key = (item_type, abs_page_name)
153
new_name = self.renames.get(key)
155
# we don't have an entry in rename map - apply the same magic
156
# to the page name as 1.5 did (" " -> "_") and try again:
157
abs_magic_name = abs_page_name.replace(u' ', u'_')
158
key = (item_type, abs_magic_name)
159
new_name = self.renames.get(key)
161
# we didn't find it under the magic name either -
162
# that means we do not rename it!
164
if new_name != page_name and abs_page_name != page_name:
165
# we have to fix the (absolute) new_name to be a relative name (as it was before)
166
new_name = wikiutil.RelPageName(current_page, new_name)
167
elif item_type == 'FILE':
168
key = (item_type, abs_page_name, file_name)
169
new_name = self.renames.get(key)
171
# we don't have an entry in rename map - apply the same magic
172
# to the page name as 1.5 did (" " -> "_") and try again:
173
abs_magic_name = abs_page_name.replace(u' ', u'_')
174
key = (item_type, abs_magic_name, file_name)
175
new_name = self.renames.get(key)
177
# we didn't find it under the magic name either -
178
# that means we do not rename it!
182
def _replace_target(self, target):
183
target_and_anchor = rsplit(target, '#', 1)
184
if len(target_and_anchor) > 1:
185
target, anchor = target_and_anchor
186
target = self._replace(('PAGE', target))
187
return '%s#%s' % (target, anchor)
189
target = self._replace(('PAGE', target))
194
def _macro_repl(self, word):
195
# we use [[...]] for links now, macros will be <<...>>
199
(\((?P<macro_args>.*?)\))?
202
word = unicode(word) # XXX why is word not unicode before???
203
m = re.match(macro_rule, word, re.X|re.U)
204
macro_name = m.group('macro_name')
205
macro_args = m.group('macro_args')
206
if macro_name == 'ImageLink':
207
fixed, kw = explore_args(macro_args)
208
#print "macro_args=%r" % macro_args
209
#print "fixed=%r, kw=%r" % (fixed, kw)
210
image, target = (fixed + ['', ''])[:2]
215
if '://' not in image:
216
# if it is not a URL, it is meant as attachment
217
image = u'attachment:%s' % image
220
elif target.startswith('inline:'):
221
target = 'attachment:' + target[7:] # we don't support inline:
222
elif target.startswith('wiki:'):
223
target = target[5:] # drop wiki:
225
alt = kw.get('alt') or ''
226
width = kw.get('width')
227
if width is not None:
228
image_attrs.append(u"width=%s" % width)
229
height = kw.get('height')
230
if height is not None:
231
image_attrs.append(u"height=%s" % height)
232
image_attrs = u", ".join(image_attrs)
234
image_attrs = u'|' + image_attrs
235
if alt or image_attrs:
237
result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
240
macro_args = u"(%s)" % macro_args
243
result = u"<<%s%s>>" % (macro_name, macro_args)
244
# XXX later check whether some to be renamed pagename is used as macro param
247
def _word_repl(self, word, text=None):
248
"""Handle WikiNames."""
250
if wikiutil.isStrictWikiname(word):
253
return '[[%s]]' % word
254
else: # internal use:
255
return '[[%s|%s]]' % (word, text)
257
def _wikiname_bracket_repl(self, text):
258
"""Handle special-char wikinames with link text, like:
259
["Jim O'Brian" Jim's home page] or ['Hello "world"!' a page with doublequotes]
261
word = text[1:-1] # strip brackets
263
if first_char in QUOTE_CHARS:
264
# split on closing quote
265
target, linktext = word[1:].split(first_char, 1)
267
# split on whitespace
268
target, linktext = word.split(None, 1)
270
target = self._replace(('PAGE', target))
271
linktext = linktext.strip()
272
if linktext and linktext != target:
273
return '[[%s|%s]]' % (target, linktext)
275
return '[[%s]]' % target
280
def _interwiki_repl(self, word):
281
"""Handle InterWiki links."""
282
wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
286
wikiname, pagename = word.split(':', 1)
287
pagename = wikiutil.url_unquote(pagename) # maybe someone has used %20 for blanks in pagename
288
camelcase = wikiutil.isStrictWikiname(pagename)
289
if wikiname in ('Self', self.request.cfg.interwikiname):
290
pagename = self._replace(('PAGE', pagename))
292
return '%s' % pagename # optimize special case
294
return '[[%s]]' % pagename # optimize special case
296
if ' ' in pagename: # we could get a ' ' by urlunquoting
297
return '[[%s:%s]]' % (wikiname, pagename)
299
return '%s:%s' % (wikiname, pagename)
302
def _interwiki_repl(self, word):
303
"""Handle InterWiki links."""
304
wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
308
return self.interwiki("wiki:" + word)
310
def interwiki(self, target_and_text, **kw):
311
scheme, rest = target_and_text.split(':', 1)
312
wikiname, pagename, text = wikiutil160a.split_wiki(rest)
316
if (pagename.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
317
Page(self.request, pagename).exists()): # fancy link to local page [wiki:LocalPage text]
318
pagename = wikiutil.url_unquote(pagename)
319
pagename = self._replace_target(pagename)
320
return '[[%s%s]]' % (pagename, text)
322
if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
323
pagename = wikiutil.url_unquote(pagename)
324
pagename = self._replace_target(pagename)
325
camelcase = wikiutil.isStrictWikiname(pagename)
326
if camelcase and text == pagename:
327
return '%s' % pagename # optimize special case
329
return '[[%s%s]]' % (pagename, text)
331
wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, wikiname+':')
332
if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
333
pagename = wikiutil.url_unquote(pagename)
334
pagename = self._replace_target(pagename)
337
wikitail = wikiutil.url_unquote(pagename)
340
if wikiutil.isPicture(wikitail):
341
return '{{%s:%s%s}}' % (wikitag, wikitail, text)
343
if ' ' not in wikitail and not text:
344
return '%s:%s' % (wikitag, wikitail)
346
return '[[%s:%s%s]]' % (wikitag, wikitail, text)
349
def interwiki(self, url_and_text):
350
# keep track of whether this is a self-reference, so links
351
# are always shown even the page doesn't exist.
352
wikiname, pagename = wikiutil.split_wiki(url)
355
def attachment(self, url_and_text):
356
""" This gets called on attachment URLs. """
357
if len(url_and_text) == 1:
358
url = url_and_text[0]
361
url, text = url_and_text
364
scheme, fname = url.split(":", 1)
365
#scheme, fname, text = wikiutil.split_wiki(target_and_text)
367
pagename, fname = AttachFile.absoluteName(fname, self.pagename)
368
from_this_page = pagename == self.pagename
369
fname = self._replace(('FILE', pagename, fname))
370
fname = wikiutil.url_unquote(fname, want_unicode=True)
371
fname = self._replace(('FILE', pagename, fname))
372
pagename = self._replace(('PAGE', pagename))
376
name = "%s/%s" % (pagename, fname)
378
if scheme == 'drawing':
379
return "{{drawing:%s%s}}" % (name, text)
381
# check for image URL, and possibly return IMG tag
382
# (images are always inlined, just like for other URLs)
383
if wikiutil.isPicture(name):
384
return "{{attachment:%s%s}}" % (name, text)
386
# inline the attachment
387
if scheme == 'inline':
388
return '{{attachment:%s%s}}' % (name, text)
390
return '[[attachment:%s%s]]' % (name, text)
393
def attachment(self, target_and_text, **kw):
394
""" This gets called on attachment URLs """
396
scheme, fname, text = wikiutil160a.split_wiki(target_and_text)
401
if scheme == 'drawing':
402
return "{{drawing:%s}}" % fn_txt
404
# check for image, and possibly return IMG tag (images are always inlined)
405
if not kw.get('pretty_url', 0) and wikiutil.isPicture(fname):
406
return "{{attachment:%s}}" % fn_txt
408
# inline the attachment
409
if scheme == 'inline':
410
return '{{attachment:%s}}' % fn_txt
412
return '[[attachment:%s]]' % fn_txt
414
def _url_repl(self, word):
415
"""Handle literal URLs including inline images."""
416
scheme = word.split(":", 1)[0]
419
return self.interwiki(word)
420
if scheme in self.attachment_schemas:
421
return '%s' % self.attachment(word)
423
if wikiutil.isPicture(word): # magic will go away in 1.6!
424
return '{{%s}}' % word # new markup for inline images
429
def _url_bracket_repl(self, word):
430
"""Handle bracketed URLs."""
431
word = word[1:-1] # strip brackets
433
# Local extended link? [:page name:link text] XXX DEPRECATED
435
words = word[1:].split(':', 1)
436
pagename = self._replace(('PAGE', words[0]))
437
if len(words) == 1 or len(words) == 2 and not words[1]:
438
return '[[%s]]' % (pagename, )
440
return '[[%s|%s]]' % (pagename, words[1])
442
scheme_and_rest = word.split(":", 1)
443
if len(scheme_and_rest) == 1: # no scheme
444
# Traditional split on space
445
words = word.split(None, 1)
449
if words[0].startswith('#'): # anchor link
450
if words[0] == words[1]:
451
return '[[%s]]' % words[0]
453
return '[[%s|%s]]' % tuple(words)
455
scheme, rest = scheme_and_rest
457
return self.interwiki(word, pretty_url=1)
458
if scheme in self.attachment_schemas:
459
return self.attachment(word)
461
words = word.split(None, 1)
466
if wikiutil.isPicture(text) and re.match(self.url_rule, text):
467
return '[[%s|{{%s}}]]' % (target, text)
470
return '[[%s]]' % target
472
return '[[%s|%s]]' % (target, text)
476
def _url_bracket_repl(self, word):
477
"""Handle bracketed URLs."""
478
word = word[1:-1] # strip brackets
480
# Local extended link?
482
words = word[1:].split(':', 1)
483
link, text = (words + ['', ''])[:2]
484
if link.strip() == text.strip():
486
link = self._replace_target(link)
489
return '[[%s%s]]' % (link, text)
491
# Traditional split on space
492
words = word.split(None, 1)
493
if words[0][0] == '#':
495
link, text = (words + ['', ''])[:2]
496
if link.strip() == text.strip():
498
#link = self._replace_target(link)
501
return '[[%s%s]]' % (link, text)
503
scheme = words[0].split(":", 1)[0]
505
return self.interwiki(words)
506
#scheme, wikiname, pagename, text = self.interwiki(word)
507
#print "%r %r %r %r" % (scheme, wikiname, pagename, text)
508
#if wikiname in ('Self', self.request.cfg.interwikiname, ''):
511
# return '[[%s%s]]' % (pagename, text)
515
# return "[[%s:%s%s]]" % (wikiname, pagename, text)
516
if scheme in self.attachment_schemas:
517
m = self.attachment(words)
518
if m.startswith('{{') and m.endswith('}}'):
519
# with url_bracket markup, 1.5.8 parser does not embed, but link!
520
m = '[[%s]]' % m[2:-2]
523
target, desc = (words + ['', ''])[:2]
524
if wikiutil.isPicture(desc) and re.match(self.url_rule, desc):
525
#return '[[%s|{{%s|%s}}]]' % (words[0], words[1], words[0])
526
return '[[%s|{{%s}}]]' % (target, desc)
530
return '[[%s%s]]' % (target, desc)
533
def _pre_repl(self, word):
535
if w == '{{{' and not self.in_pre:
537
elif w == '}}}' and self.in_pre:
541
def _processor_repl(self, word):
545
def scan(self, scan_re, line):
546
""" Scans one line - append text before match, invoke replace() with match, and add text after match. """
550
for match in scan_re.finditer(line):
551
# Add text before the match
552
if lastpos < match.start():
553
result.append(line[lastpos:match.start()])
554
# Replace match with markup
555
result.append(self.replace(match))
556
lastpos = match.end()
558
# Add remainder of the line
559
result.append(line[lastpos:])
560
return u''.join(result)
563
def replace(self, match):
564
""" Replace match using type name """
566
for _type, hit in match.groupdict().items():
567
if hit is not None and not _type in ["hmarker", ]:
568
# Get replace method and replace hit
569
replace = getattr(self, '_' + _type + '_repl')
571
result.append(replace(hit))
572
return ''.join(result)
574
# We should never get here
576
raise Exception("Can't handle match %r\n%s\n%s" % (
578
pprint.pformat(match.groupdict()),
579
pprint.pformat(match.groups()),
584
def convert(self, request):
585
""" For each line, scan through looking for magic
586
strings, outputting verbatim any intervening text.
588
self.request = request
589
# prepare regex patterns
590
rules = self.formatting_rules.replace('\n', '|')
591
if self.request.cfg.bang_meta:
592
rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
593
'word_rule': self.word_rule,
596
pre_rules = r'''(?P<pre>\}\}\})'''
597
pre_scan_re = re.compile(pre_rules, re.UNICODE)
598
scan_re = re.compile(rules, re.UNICODE)
599
eol_re = re.compile(r'\r?\n', re.UNICODE)
603
# remove last item because it's guaranteed to be empty
604
self.lines = eol_re.split(rawtext)[:-1]
605
self.in_processing_instructions = True
608
for line in self.lines:
609
# ignore processing instructions
610
if self.in_processing_instructions:
612
for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
613
"#pragma", "#form", "#acl", "#language"):
614
if line.lower().startswith(pi):
615
self.request.write(line + '\r\n')
619
self.in_processing_instructions = False
621
continue # do not parse this line
623
self.request.write(line + '\r\n')
625
# Scan line, format and write
626
scanning_re = self.in_pre and pre_scan_re or scan_re
627
formatted_line = self.scan(scanning_re, line)
628
self.request.write(formatted_line + '\r\n')