1
# Copyright (C) 2001-2006 Python Software Foundation
3
# Contact: email-sig@python.org
5
"""Miscellaneous utilities."""
8
'collapse_rfc2231_value',
31
from email._parseaddr import quote
32
from email._parseaddr import AddressList as _AddressList
33
from email._parseaddr import mktime_tz
35
# We need wormarounds for bugs in these methods in older Pythons (see below)
36
from email._parseaddr import parsedate as _parsedate
37
from email._parseaddr import parsedate_tz as _parsedate_tz
39
from quopri import decodestring as _qdecode
41
# Intrapackage imports
42
from email.encoders import _bencode, _qencode
50
specialsre = re.compile(r'[][\\()<>@,:;".]')
51
escapesre = re.compile(r'[][\\()"]')
62
# We can't quite use base64.encodestring() since it tacks on a "courtesy
66
value = base64.decodestring(s)
67
if not s.endswith('\n') and value.endswith('\n'):
74
"""Replace all line-ending characters with \r\n."""
75
# Fix newlines with no preceding carriage return
76
s = re.sub(r'(?<!\r)\n', CRLF, s)
77
# Fix carriage returns with no following newline
78
s = re.sub(r'\r(?!\n)', CRLF, s)
84
"""The inverse of parseaddr(), this takes a 2-tuple of the form
85
(realname, email_address) and returns the string value suitable
86
for an RFC 2822 From, To or Cc header.
88
If the first element of pair is false, then the second element is
94
if specialsre.search(name):
96
name = escapesre.sub(r'\\\g<0>', name)
97
return '%s%s%s <%s>' % (quotes, name, quotes, address)
102
def getaddresses(fieldvalues):
103
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
104
all = COMMASPACE.join(fieldvalues)
105
a = _AddressList(all)
110
ecre = re.compile(r'''
112
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
114
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
116
(?P<atom>.*?) # non-greedy up to the next ?= is the atom
118
''', re.VERBOSE | re.IGNORECASE)
122
def formatdate(timeval=None, localtime=False, usegmt=False):
123
"""Returns a date string as specified by RFC 2822, e.g.:
125
Fri, 09 Nov 2001 01:08:47 -0000
127
Optional timeval if given is a floating point time value as accepted by
128
gmtime() and localtime(), otherwise the current time is used.
130
Optional localtime is a flag that when True, interprets timeval, and
131
returns a date relative to the local timezone instead of UTC, properly
132
taking daylight savings time into account.
134
Optional argument usegmt means that the timezone is written out as
135
an ascii string, not numeric one (so "GMT" instead of "+0000"). This
136
is needed for HTTP, and is only used when localtime==False.
138
# Note: we cannot use strftime() because that honors the locale and RFC
139
# 2822 requires that day and month names be the English abbreviations.
141
timeval = time.time()
143
now = time.localtime(timeval)
144
# Calculate timezone offset, based on whether the local zone has
145
# daylight savings time, and whether DST is in effect.
146
if time.daylight and now[-1]:
147
offset = time.altzone
149
offset = time.timezone
150
hours, minutes = divmod(abs(offset), 3600)
151
# Remember offset is in seconds west of UTC, but the timezone is in
152
# minutes east of UTC, so the signs differ.
157
zone = '%s%02d%02d' % (sign, hours, minutes // 60)
159
now = time.gmtime(timeval)
160
# Timezone offset is always -0000
165
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
166
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
168
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
169
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
170
now[0], now[3], now[4], now[5],
175
def make_msgid(idstring=None):
176
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
178
<20020201195627.33539.96671@nightshade.la.mastaler.com>
180
Optional idstring if given is a string used to strengthen the
181
uniqueness of the message id.
183
timeval = time.time()
184
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
186
randint = random.randrange(100000)
190
idstring = '.' + idstring
191
idhost = socket.getfqdn()
192
msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
197
# These functions are in the standalone mimelib version only because they've
198
# subsequently been fixed in the latest Python versions. We use this to worm
199
# around broken older Pythons.
203
return _parsedate(data)
206
def parsedate_tz(data):
209
return _parsedate_tz(data)
213
addrs = _AddressList(addr).addresslist
219
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
221
"""Remove quotes from a string."""
223
if str.startswith('"') and str.endswith('"'):
224
return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
225
if str.startswith('<') and str.endswith('>'):
231
# RFC2231-related functions - parameter encoding and decoding
232
def decode_rfc2231(s):
233
"""Decode string according to RFC 2231"""
234
parts = s.split(TICK, 2)
240
def encode_rfc2231(s, charset=None, language=None):
241
"""Encode string according to RFC 2231.
243
If neither charset nor language is given, then s is returned as-is. If
244
charset is given but not language, the string is encoded using the empty
248
s = urllib.quote(s, safe='')
249
if charset is None and language is None:
253
return "%s'%s'%s" % (charset, language, s)
256
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
258
def decode_params(params):
259
"""Decode parameters list according to RFC 2231.
261
params is a sequence of 2-tuples containing (param name, string value).
263
# Copy params so we don't mess with the original
266
# Map parameter's name to a list of continuations. The values are a
267
# 3-tuple of the continuation number, the string value, and a flag
268
# specifying whether a particular segment is %-encoded.
270
name, value = params.pop(0)
271
new_params.append((name, value))
273
name, value = params.pop(0)
274
if name.endswith('*'):
278
value = unquote(value)
279
mo = rfc2231_continuation.match(name)
281
name, num = mo.group('name', 'num')
284
rfc2231_params.setdefault(name, []).append((num, value, encoded))
286
new_params.append((name, '"%s"' % quote(value)))
288
for name, continuations in rfc2231_params.items():
293
# And now append all values in numerical order, converting
294
# %-encodings for the encoded segments. If any of the
295
# continuation names ends in a *, then the entire string, after
296
# decoding segments and concatenating, must have the charset and
297
# language specifiers at the beginning of the string.
298
for num, s, encoded in continuations:
300
s = urllib.unquote(s)
303
value = quote(EMPTYSTRING.join(value))
305
charset, language, value = decode_rfc2231(value)
306
new_params.append((name, (charset, language, '"%s"' % value)))
308
new_params.append((name, '"%s"' % value))
311
def collapse_rfc2231_value(value, errors='replace',
312
fallback_charset='us-ascii'):
313
if isinstance(value, tuple):
314
rawval = unquote(value[2])
315
charset = value[0] or 'us-ascii'
317
return unicode(rawval, charset, errors)
319
# XXX charset is unknown to Python.
320
return unicode(rawval, fallback_charset, errors)
322
return unquote(value)