1
# -*- coding: iso-8859-1 -*-
3
MoinMoin - migration from 1.6.0alpha (rev 1844: 58ebb64243cc - used a similar markup as 1.5.8, but with quotes for linking stuff with blanks) to 1.6.0 (creole link style)
7
a) reverse underscore == blank stuff in pagenames (introducing this was a fault)
9
pagename quoted pagename
10
-----------------------------------------------------
11
old MainPage/Sub_Page MainPage(2f)Sub_Page
12
new MainPage/Sub Page MainPage(2f)Sub(20)Page or
13
new MainPage/Sub_Page MainPage(2f)Sub_Page (user has to decide by editing rename1.txt)
17
----------------------------------------------------
18
old MoinMoin:MainPage/Sub_Page ../Sub_Page2
19
new [[MoinMoin:MainPage/Sub Page]] [[../Sub Page2]]
22
b) decode url encoded chars in attachment names (and quote the whole fname):
25
----------------------------------------------------
26
old attachment:file%20with%20blanks.txt
27
new [[attachment:file with blanks.txt]]
29
c) users: move bookmarks from separate files into user profile
30
d) users: generate new name[] for lists and name{} for dicts
32
e) kill all */MoinEditorBackup pages (replaced by drafts functionality)
34
@copyright: 2007 by Thomas Waldmann
35
@license: GNU GPL, see COPYING for details.
41
import codecs, urllib, glob
43
from MoinMoin import config, wikiutil
44
from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
46
import mimetypes # this MUST be after wikiutil import!
48
from _conv160a_wiki import convert_wiki
50
create_rev = True # create a <new> rev with the converted content of <new-1> rev?
52
def markup_converter(request, pagename, text, renames):
53
""" Convert the <text> content of page <pagename>, using <renames> dict
54
to rename links correctly. Additionally, convert some changed markup.
56
if text.startswith('<?xml'):
57
# would be done with xslt processor
60
pis, body = wikiutil.get_processing_instructions(text)
62
if pi == 'format' and val != 'wiki':
66
text = convert_wiki(request, pagename, text, renames)
71
def __init__(self, request, fname):
72
self.request = request
78
""" read complete event-log from disk """
82
f = file(self.fname, 'r')
85
line = line.replace('\r', '').replace('\n', '')
86
if not line.strip(): # skip empty lines
88
fields = line.split('\t')
90
timestamp, action, kvpairs = fields[:3]
91
timestamp = int(timestamp)
92
kvdict = wikiutil.parseQueryString(kvpairs)
93
data.append((timestamp, action, kvdict))
94
except ValueError, err:
95
# corrupt event log line, log error and skip it
96
print "Error: invalid event log (%s) line %d, err: %s, SKIPPING THIS LINE!" % (self.fname, lineno, str(err))
103
def write(self, fname):
104
""" write complete event-log to disk """
107
for timestamp, action, kvdict in self.data:
108
pagename = kvdict.get('pagename')
109
if pagename and ('PAGE', pagename) in self.renames:
110
kvdict['pagename'] = self.renames[('PAGE', pagename)]
111
kvpairs = wikiutil.makeQueryString(kvdict)
112
fields = str(timestamp), action, kvpairs
113
line = '\t'.join(fields) + '\n'
117
def copy(self, destfname, renames):
118
self.renames = renames
120
self.write(destfname)
124
def __init__(self, request, fname):
125
self.request = request
131
""" read complete edit-log from disk """
135
f = file(self.fname, 'r')
138
line = line.replace('\r', '').replace('\n', '')
139
if not line.strip(): # skip empty lines
141
fields = line.split('\t') + [''] * 9
142
timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
144
timestamp = int(timestamp)
146
except ValueError, err:
147
print "Error: %r has a damaged timestamp or revision number in log line %d [%s] - skipping this entry" % (
148
self.fname, lineno, str(err))
149
continue # ignore this line, do not terminate - to find all those errors in one go
150
pagename = wikiutil.unquoteWikiname(pagename)
151
data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
158
def write(self, fname, deleted=False):
159
""" write complete edit-log to disk """
161
editlog = self.data.items()
165
for key, fields in editlog:
166
timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields
167
if action.startswith('ATT'):
169
fname = urllib.unquote(extra).decode('utf-8')
170
except UnicodeDecodeError:
171
fname = urllib.unquote(extra).decode('iso-8859-1')
172
if ('FILE', pagename, fname) in self.renames:
173
fname = self.renames[('FILE', pagename, fname)]
174
extra = urllib.quote(fname.encode('utf-8'))
175
if ('PAGE', pagename) in self.renames:
176
pagename = self.renames[('PAGE', pagename)]
177
timestamp = str(timestamp)
179
max_rev = max(rev, max_rev)
180
revstr = '%08d' % rev
181
pagename = wikiutil.quoteWikinameFS(pagename)
182
fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
183
log_str = '\t'.join(fields) + '\n'
185
if create_rev and not deleted:
186
timestamp = str(wikiutil.timestamp2version(time.time()))
187
revstr = '%08d' % (max_rev + 1)
190
hostname = 'localhost'
193
comment = "converted to 1.6 markup"
194
fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
195
log_str = '\t'.join(fields) + '\n'
199
def copy(self, destfname, renames, deleted=False):
200
self.renames = renames
202
self.write(destfname, deleted)
206
""" a single revision of a page """
207
def __init__(self, request, pagename, rev_dir, rev):
208
self.request = request
209
self.pagename = pagename
210
self.rev_dir = rev_dir
214
fname = opj(self.rev_dir, '%08d' % self.rev)
215
f = file(fname, "rb")
218
data = data.decode(config.charset)
221
def write(self, data, rev_dir, convert, rev=None):
225
data = markup_converter(self.request, self.pagename, data, self.renames)
226
fname = opj(rev_dir, '%08d' % rev)
227
data = data.encode(config.charset)
228
f = file(fname, "wb")
232
def copy(self, rev_dir, renames, convert=False, new_rev=None):
233
self.renames = renames
235
self.write(data, rev_dir, convert, new_rev)
239
""" a single attachment """
240
def __init__(self, request, attach_dir, attfile):
241
self.request = request
242
self.path = opj(attach_dir, attfile)
243
self.name = attfile.decode('utf-8', 'replace')
245
def copy(self, attach_dir):
246
""" copy attachment file from orig path to new destination """
247
attfile = self.name.encode('utf-8')
248
dest = opj(attach_dir, attfile)
249
copy_file(self.path, dest)
253
""" represents a page with all related data """
254
def __init__(self, request, pages_dir, qpagename):
255
self.request = request
256
self.name = wikiutil.unquoteWikiname(qpagename)
257
self.name_old = self.name # renaming: still original name when self.name has the new name
258
self.page_dir = opj(pages_dir, qpagename)
259
self.current = None # int current
260
self.editlog = None # dict (see read_editlog)
261
self.revlist = None # list of ints (page text revisions)
262
self.revisions = None # dict int: pagerev obj
263
self.attachments = None # dict of unicode fname: full path
264
self.renames = {} # info for renaming pages/attachments
267
""" read a page, including revisions, log, attachments from disk """
268
page_dir = self.page_dir
270
current_fname = opj(page_dir, 'current')
271
if os.path.exists(current_fname):
272
current_file = file(current_fname, "r")
273
current_rev = current_file.read()
276
self.current = int(current_rev)
278
print "Error: invalid current file %s, SKIPPING THIS PAGE!" % current_fname
281
editlog_fname = opj(page_dir, 'edit-log')
282
if os.path.exists(editlog_fname):
283
self.editlog = EditLog(self.request, editlog_fname)
284
# read page revisions
285
rev_dir = opj(page_dir, 'revisions')
286
if os.path.exists(rev_dir):
287
revlist = listdir(rev_dir)
288
revlist = [int(rev) for rev in revlist]
290
self.revlist = revlist
293
self.revisions[rev] = PageRev(self.request, self.name_old, rev_dir, rev)
295
self.is_deleted = not self.revisions or self.current not in self.revisions
296
# read attachment filenames
297
attach_dir = opj(page_dir, 'attachments')
298
if os.path.exists(attach_dir):
299
self.attachments = {}
300
attlist = listdir(attach_dir)
301
for attfile in attlist:
302
a = Attachment(self.request, attach_dir, attfile)
303
self.attachments[a.name] = a
305
def write(self, pages_dir):
306
""" write a page, including revisions, log, attachments to disk """
307
if ('PAGE', self.name) in self.renames:
308
name_new = self.renames[('PAGE', self.name)]
309
if name_new != self.name:
310
print "Renaming page %r -> %r" % (self.name, name_new)
311
self.name_old = self.name
313
qpagename = wikiutil.quoteWikinameFS(self.name)
314
page_dir = opj(pages_dir, qpagename)
315
os.makedirs(page_dir)
317
current = self.current
318
if current is not None:
319
if create_rev and not self.is_deleted:
321
current_fname = opj(page_dir, 'current')
322
current_file = file(current_fname, "w")
323
current_str = '%08d\n' % current
324
current_file.write(current_str)
327
if self.editlog is not None:
328
editlog_fname = opj(page_dir, 'edit-log')
329
self.editlog.copy(editlog_fname, self.renames, deleted=self.is_deleted)
330
# copy page revisions
331
if self.revisions is not None:
332
rev_dir = opj(page_dir, 'revisions')
334
for rev in self.revlist:
336
self.revisions[rev].copy(rev_dir, self.renames)
338
if int(rev) == self.current:
339
self.revisions[rev].copy(rev_dir, self.renames, convert=True)
341
self.revisions[rev].copy(rev_dir, self.renames)
342
if create_rev and not self.is_deleted:
343
self.revisions[rev].copy(rev_dir, self.renames, convert=True, new_rev=rev+1)
346
if self.attachments is not None:
347
attach_dir = opj(page_dir, 'attachments')
348
os.makedirs(attach_dir)
349
for fn, att in self.attachments.items():
350
# we have to check for renames here because we need the (old) pagename, too:
351
if ('FILE', self.name_old, fn) in self.renames:
352
fn_new = self.renames[('FILE', self.name_old, fn)]
354
print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new)
358
def copy(self, pages_dir, renames):
359
self.renames = renames
361
self.write(pages_dir)
365
""" represents a user with all related data """
366
def __init__(self, request, users_dir, uid):
367
self.request = request
369
self.users_dir = users_dir
371
self.bookmarks = None
374
""" read profile and bookmarks data from disk """
376
fname = opj(self.users_dir, self.uid)
378
f = codecs.open(fname, 'r', config.charset)
380
line = line.replace(u'\r', '').replace(u'\n', '')
381
if not line.strip() or line.startswith(u'#'): # skip empty or comment lines
384
key, value = line.split(u'=', 1)
385
except Exception, err:
386
print "Error: User reader can not parse line %r from profile %r (%s)" % (line, fname, str(err))
388
self.profile[key] = value
392
fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid)
393
for fname in glob.glob(fname_pattern):
397
wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '')
398
self.bookmarks[wiki] = int(bookmark)
399
# don't care about trail
401
def write(self, users_dir):
402
""" write profile and bookmarks data to disk """
403
fname = opj(users_dir, self.uid)
404
f = codecs.open(fname, 'w', config.charset)
405
for key, value in self.profile.items():
406
if key in (u'subscribed_pages', u'quicklinks'):
407
pages = value.split(u'\t')
408
for i in range(len(pages)):
411
interwiki, pagename = pagename.split(u':', 1)
413
interwiki, pagename = u'Self', pagename
414
if interwiki == u'Self' or interwiki == self.request.cfg.interwikiname:
415
if ('PAGE', pagename) in self.renames:
416
pagename = self.renames[('PAGE', pagename)]
417
pages[i] = u'%s:%s' % (interwiki, pagename)
418
key += '[]' # we have lists here
419
value = u'\t'.join(pages)
420
f.write(u"%s=%s\n" % (key, value))
422
f.write(u"%s=%s\n" % (key, value))
423
bookmark_entries = [u'%s:%s' % item for item in self.bookmarks.items()]
425
value = u'\t'.join(bookmark_entries)
426
f.write(u"%s=%s\n" % (key, value))
428
# don't care about trail
430
def copy(self, users_dir, renames):
431
self.renames = renames
433
self.write(users_dir)
436
class DataConverter(object):
437
def __init__(self, request, src_data_dir, dest_data_dir):
438
self.request = request
439
self.sdata = src_data_dir
440
self.ddata = dest_data_dir
445
self.complete_fname = opj(self.sdata, 'complete.txt')
446
self.rename_fname1 = opj(self.sdata, 'rename1.txt')
447
self.rename_fname2 = opj(self.sdata, 'rename2.txt')
450
""" First create the rename list - the user has to review/edit it as
451
we can't decide about page/attachment names automatically.
455
for pn, p in self.pages.items():
458
continue # we don't care for pages with no revisions (trash)
459
if pn.endswith('/MoinEditorBackup'):
460
continue # we don't care for old editor backups
461
self.complete[('PAGE', pn)] = None
463
# log all pagenames with underscores
464
self.renames[('PAGE', pn)] = None
465
if p.attachments is not None:
466
for fn in p.attachments:
468
fn_str = fn.encode('ascii')
469
log = False # pure ascii filenames are no problem
470
except UnicodeEncodeError:
471
log = True # this file maybe has a strange representation in wiki markup
473
if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting
475
self.complete[('FILE', pn, fn)] = None
477
# log all strange attachment filenames
478
fn_str = fn.encode('utf-8')
479
self.renames[('FILE', pn, fn)] = None
480
self.save_list(self.complete_fname, self.complete)
481
self.save_list(self.rename_fname1, self.renames)
483
LIST_FIELDSEP = u'|' # in case | makes trouble, one can use \t tab char
485
def save_list(self, fname, what):
486
what_sorted = what.keys()
487
# make sure we have 3-tuples:
488
what_sorted = [(k + (None, ))[:3] for k in what_sorted]
489
# we only have python 2.3, thus no cmp keyword for the sort() call,
490
# thus we need to do it the more complicated way:
491
what_sorted = [(pn, fn, rtype) for rtype, pn, fn in what_sorted] # shuffle
492
what_sorted.sort() # sort
493
what_sorted = [(rtype, pn, fn) for pn, fn, rtype in what_sorted] # shuffle
494
f = codecs.open(fname, 'w', 'utf-8')
495
for rtype, pn, fn in what_sorted:
497
line = (rtype, pn, pn)
498
elif rtype == 'FILE':
499
line = (rtype, pn, fn, fn)
500
line = self.LIST_FIELDSEP.join(line)
501
f.write(line + u'\n')
504
def load_list(self, fname, what):
505
f = codecs.open(fname, 'r', 'utf-8')
510
t = line.split(self.LIST_FIELDSEP)
511
rtype, p1, p2, p3 = (t + [None]*3)[:4]
513
what[(str(rtype), p1)] = p2
514
elif rtype == u'FILE':
515
what[(str(rtype), p1, p2)] = p3
519
""" Second, read the (user edited) rename list and do the renamings everywhere. """
521
#self.load_list(self.complete_fname, self.complete)
522
self.load_list(self.rename_fname2, self.renames)
526
# create Page objects in memory
527
pages_dir = opj(self.sdata, 'pages')
528
pagelist = listdir(pages_dir)
529
for qpagename in pagelist:
530
p = Page(self.request, pages_dir, qpagename)
531
self.pages[p.name] = p
533
# create User objects in memory
534
users_dir = opj(self.sdata, 'user')
535
user_re = re.compile(r'^\d+\.\d+(\.\d+)?$')
536
userlist = listdir(users_dir)
537
userlist = [f for f in userlist if user_re.match(f)]
538
for userid in userlist:
539
u = User(self.request, users_dir, userid)
540
self.users[u.uid] = u
542
# create log objects in memory
543
self.editlog = EditLog(self.request, opj(self.sdata, 'edit-log'))
544
self.eventlog = EventLog(self.request, opj(self.sdata, 'event-log'))
546
def write_dest(self):
549
pages_dir = opj(self.ddata, 'pages')
550
for pn, page in self.pages.items():
551
if pn.endswith('/MoinEditorBackup'):
552
continue # we don't care for old editor backups
553
page.copy(pages_dir, self.renames)
556
users_dir = opj(self.ddata, 'user')
557
for user in self.users.values():
558
user.copy(users_dir, self.renames)
561
self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames)
562
self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames)
566
os.makedirs(self.ddata)
569
os.makedirs(opj(self.ddata, 'pages'))
570
os.makedirs(opj(self.ddata, 'user'))
571
copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin'))
572
copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))