1
# Copyright (C) 2001-2012 by the Free Software Foundation, Inc.
3
# This file is part of GNU Mailman.
5
# GNU Mailman is free software: you can redistribute it and/or modify it under
6
# the terms of the GNU General Public License as published by the Free
7
# Software Foundation, either version 3 of the License, or (at your option)
10
# GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
11
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15
# You should have received a copy of the GNU General Public License along with
16
# GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
18
"""Clean up an .mbox archive file."""
25
from mailman.core.i18n import _
26
from mailman.version import MAILMAN_VERSION
29
cre = re.compile(mailbox.UnixMailbox._fromlinepattern)
30
# From RFC 2822, a header field name must contain only characters from 33-126
31
# inclusive, excluding colon. I.e. from oct 41 to oct 176 less oct 072. Must
32
# use re.match() so that it's anchored at the beginning of the line.
33
fre = re.compile(r'[\041-\071\073-\176]+')
38
parser = optparse.OptionParser(version=MAILMAN_VERSION,
40
%prog [options] < inputfile > outputfile
42
The archiver looks for Unix-From lines separating messages in an mbox archive
43
file. For compatibility, it specifically looks for lines that start with
44
'From ' -- i.e. the letters capital-F, lowercase-r, o, m, space, ignoring
45
everything else on the line.
47
Normally, any lines that start 'From ' in the body of a message should be
48
escaped such that a > character is actually the first on a line. It is
49
possible though that body lines are not actually escaped. This script
50
attempts to fix these by doing a stricter test of the Unix-From lines. Any
51
lines that start From ' but do not pass this stricter test are escaped with a
53
parser.add_option('-q', '--quiet',
54
default=False, action='store_true', help=_("""\
55
Don't print changed line information to standard error."""))
56
parser.add_option('-s', '--status',
57
default=-1, type='int', help=_("""\
58
Print a '#' character for every n lines processed. With a number less than or
59
equal to zero, suppress the '#' characters."""))
60
parser.add_option('-n', '--dry-run',
61
default=False, action='store_true', help=_("""\
62
Don't actually output anything."""))
63
opts, args = parser.parser_args()
65
parser.print_error(_('Unexpected arguments'))
66
return parser, opts, args
70
def escape_line(line, lineno, quiet, output):
72
sys.stdout.write('>' + line)
74
print >> sys.stderr, _('Unix-From line changed: $lineno')
75
print >> sys.stderr, line[:-1]
80
parser, opts, args = parseargs()
88
line = sys.stdin.readline()
91
if line.startswith('From '):
93
# This is a real Unix-From line. But it could be a message
94
# /about/ Unix-From lines, so as a second order test, make
95
# sure there's at least one RFC 2822 header following
96
nextline = sys.stdin.readline()
99
# It was the last line of the mbox, so it couldn't have
101
escape_line(line, lineno, quiet, output)
103
fieldname = nextline.split(':', 1)
104
if len(fieldname) < 2 or not fre.match(nextline):
105
# The following line was not a header, so this wasn't a
107
escape_line(line, lineno, quiet, output)
109
sys.stdout.write(nextline)
111
# It's a valid Unix-From line
114
# Before we spit out the From_ line, make sure the
115
# previous line was blank.
116
if prevline is not None and prevline != '\n':
117
sys.stdout.write('\n')
118
sys.stdout.write(line)
119
sys.stdout.write(nextline)
121
# This is a bogus Unix-From line
122
escape_line(line, lineno, quiet, output)
125
sys.stdout.write(line)
126
if status > 0 and (lineno % status) == 0:
127
sys.stderr.write('#')
133
print >> sys.stderr, _('%(messages)d messages found')