13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
17
18
"""HyperArch: Pipermail archiving for Mailman
42
43
from email.Header import decode_header, make_header
44
from email.Errors import HeaderParseError
45
from email.Charset import Charset
44
47
from Mailman import mm_cfg
45
48
from Mailman import Utils
49
from Mailman import Errors
46
50
from Mailman import LockFile
47
51
from Mailman import MailList
48
52
from Mailman import i18n
276
280
otrans = i18n.get_translation()
278
282
i18n.set_language(lang)
279
self.email = re.sub('@', _(' at '), self.email)
283
if self.author == self.email:
284
self.author = self.email = re.sub('@', _(' at '),
287
self.email = re.sub('@', _(' at '), self.email)
281
289
i18n.set_translation(otrans)
287
295
self.ctype = ctype.lower()
288
296
self.cenc = cenc.lower()
289
297
self.decoded = {}
290
charset = message.get_param('charset', 'us-ascii')
291
if isinstance(charset, types.TupleType):
292
# An RFC 2231 charset
293
charset = unicode(charset[2], charset[0])
298
cset = Utils.GetCharSet(mlist.preferred_language)
299
cset_out = Charset(cset).output_charset or cset
300
charset = message.get_content_charset(cset_out)
295
302
charset = charset.lower().strip()
296
303
if charset[0]=='"' and charset[-1]=='"':
402
409
self.decoded['email'] = email
411
if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
412
otrans = i18n.get_translation()
414
i18n.set_language(self._lang)
415
atmark = unicode(_(' at '), Utils.GetCharSet(self._lang))
416
subject = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
417
'\g<1>' + atmark + '\g<2>', subject)
419
i18n.set_translation(otrans)
404
420
self.decoded['subject'] = subject
421
self.decoded['stripped'] = self.strip_subject(subject or self.subject)
423
def strip_subject(self, subject):
424
# Strip subject_prefix and Re: for subject sorting
425
# This part was taken from CookHeaders.py (TK)
426
prefix = self._mlist.subject_prefix.strip()
428
prefix_pat = re.escape(prefix)
429
prefix_pat = '%'.join(prefix_pat.split(r'\%'))
430
prefix_pat = re.sub(r'%\d*d', r'\s*\d+\s*', prefix_pat)
431
subject = re.sub(prefix_pat, '', subject)
432
subject = subject.lstrip()
433
strip_pat = re.compile('^((RE|AW|SV|VS)(\[\d+\])?:\s*)+', re.I)
434
stripped = strip_pat.sub('', subject)
406
437
def decode_charset(self, field):
407
if field.find("=?") == -1:
409
# Get the decoded header as a list of (s, charset) tuples
410
pairs = decode_header(field)
411
# Use __unicode__() until we can guarantee Python 2.2
438
# TK: This function was rewritten for unifying to Unicode.
439
# Convert 'field' into Unicode one line string.
413
# Use a large number for maxlinelen so it won't get wrapped
414
h = make_header(pairs, 99999)
415
return h.__unicode__()
416
except (UnicodeError, LookupError):
419
# The last value for c will have the proper charset in it
420
return EMPTYSTRING.join([s for s, c in pairs])
441
pairs = decode_header(field)
442
ustr = make_header(pairs).__unicode__()
443
except (LookupError, UnicodeError, ValueError, HeaderParseError):
444
# assume list's language
445
cset = Utils.GetCharSet(self._mlist.preferred_language)
446
if cset == 'us-ascii':
447
cset = 'iso-8859-1' # assume this for English list
448
ustr = unicode(field, cset, 'replace')
449
return u''.join(ustr.splitlines())
422
451
def as_html(self):
423
452
d = self.__dict__.copy()
536
565
if d['_message_id']:
537
566
headers.append('Message-ID: %(_message_id)s')
538
567
body = EMPTYSTRING.join(self.body)
539
if isinstance(body, types.UnicodeType):
540
body = body.encode(Utils.GetCharSet(self._lang), 'replace')
541
return NL.join(headers) % d + '\n\n' + body
568
cset = Utils.GetCharSet(self._lang)
569
# Coerce the body to Unicode and replace any invalid characters.
570
if not isinstance(body, types.UnicodeType):
571
body = unicode(body, cset, 'replace')
572
if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
573
otrans = i18n.get_translation()
575
atmark = unicode(_(' at '), cset)
576
i18n.set_language(self._lang)
577
body = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
578
'\g<1>' + atmark + '\g<2>', body)
580
i18n.set_translation(otrans)
581
# Return body to character set of article.
582
body = body.encode(cset, 'replace')
583
return NL.join(headers) % d + '\n\n' + body + '\n'
543
585
def _set_date(self, message):
544
586
self.__super_set_date(message)
1003
1045
subject = self.get_header("subject", article)
1004
1046
author = self.get_header("author", article)
1005
1047
if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
1006
author = re.sub('@', _(' at '), author)
1049
author = re.sub('@', _(' at '), author)
1050
except UnicodeError:
1051
# Non-ASCII author contains '@' ... no valid email anyway
1007
1053
subject = CGIescape(subject, self.lang)
1008
1054
author = CGIescape(author, self.lang)