~mailman-coders/mailman/2.1

1182 by Mark Sapiro
Backported several bug fixes from the 2.2 branch.
1
# Copyright (C) 2002-2009 by the Free Software Foundation, Inc.
1 by
This commit was manufactured by cvs2svn to create branch
2
#
3
# This program is free software; you can redistribute it and/or
4
# modify it under the terms of the GNU General Public License
5
# as published by the Free Software Foundation; either version 2
6
# of the License, or (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
830 by bwarsaw
A cleansing pass, almost entirely cosmetic. Such things as whitespace
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16
# USA.
1 by
This commit was manufactured by cvs2svn to create branch
17
18
"""MIME-stripping filter for Mailman.
19
20
This module scans a message for MIME content, removing those sections whose
21
MIME types match one of a list of matches.  multipart/alternative sections are
22
replaced by the first non-empty component, and multipart/mixed sections
23
wrapping only single sections after other processing are replaced by their
24
contents.
25
"""
26
27
import os
28
import errno
29
import tempfile
463 by tkikuchi
Merging SF patches:
30
from os.path import splitext
1 by
This commit was manufactured by cvs2svn to create branch
31
32
from email.Iterators import typed_subpart_iterator
33
34
from Mailman import mm_cfg
35
from Mailman import Errors
36
from Mailman.Message import UserNotification
37
from Mailman.Queue.sbcache import get_switchboard
38
from Mailman.Logging.Syslog import syslog
39
from Mailman.Version import VERSION
40
from Mailman.i18n import _
463 by tkikuchi
Merging SF patches:
41
from Mailman.Utils import oneline
1 by
This commit was manufactured by cvs2svn to create branch
42
43
44

45
def process(mlist, msg, msgdata):
46
    # Short-circuits
47
    if not mlist.filter_content:
48
        return
49
    if msgdata.get('isdigest'):
50
        return
51
    # We also don't care about our own digests or plaintext
52
    ctype = msg.get_content_type()
53
    mtype = msg.get_content_maintype()
54
    # Check to see if the outer type matches one of the filter types
55
    filtertypes = mlist.filter_mime_types
56
    passtypes = mlist.pass_mime_types
57
    if ctype in filtertypes or mtype in filtertypes:
58
        dispose(mlist, msg, msgdata,
59
                _("The message's content type was explicitly disallowed"))
60
    # Check to see if there is a pass types and the outer type doesn't match
61
    # one of these types
62
    if passtypes and not (ctype in passtypes or mtype in passtypes):
63
        dispose(mlist, msg, msgdata,
64
                _("The message's content type was not explicitly allowed"))
463 by tkikuchi
Merging SF patches:
65
    # Filter by file extensions
66
    filterexts = mlist.filter_filename_extensions
67
    passexts = mlist.pass_filename_extensions
68
    fext = get_file_ext(msg)
69
    if fext:
70
        if fext in filterexts:
71
            dispose(mlist, msg, msgdata,
72
                 _("The message's file extension was explicitly disallowed"))
73
        if passexts and not (fext in passexts):
74
            dispose(mlist, msg, msgdata,
75
                 _("The message's file extension was not explicitly allowed"))
1 by
This commit was manufactured by cvs2svn to create branch
76
    numparts = len([subpart for subpart in msg.walk()])
77
    # If the message is a multipart, filter out matching subparts
78
    if msg.is_multipart():
79
        # Recursively filter out any subparts that match the filter list
80
        prelen = len(msg.get_payload())
463 by tkikuchi
Merging SF patches:
81
        filter_parts(msg, filtertypes, passtypes, filterexts, passexts)
1 by
This commit was manufactured by cvs2svn to create branch
82
        # If the outer message is now an empty multipart (and it wasn't
83
        # before!) then, again it gets discarded.
84
        postlen = len(msg.get_payload())
85
        if postlen == 0 and prelen > 0:
86
            dispose(mlist, msg, msgdata,
87
                    _("After content filtering, the message was empty"))
88
    # Now replace all multipart/alternatives with just the first non-empty
89
    # alternative.  BAW: We have to special case when the outer part is a
90
    # multipart/alternative because we need to retain most of the outer part's
91
    # headers.  For now we'll move the subpart's payload into the outer part,
92
    # and then copy over its Content-Type: and Content-Transfer-Encoding:
93
    # headers (any others?).
737 by tkikuchi
Introduce new attribute (collapse_alternatives) to allow HTML in
94
    if mlist.collapse_alternatives:
95
        collapse_multipart_alternatives(msg)
96
        if ctype == 'multipart/alternative':
97
            firstalt = msg.get_payload(0)
98
            reset_payload(msg, firstalt)
1 by
This commit was manufactured by cvs2svn to create branch
99
    # If we removed some parts, make note of this
100
    changedp = 0
101
    if numparts <> len([subpart for subpart in msg.walk()]):
102
        changedp = 1
103
    # Now perhaps convert all text/html to text/plain
104
    if mlist.convert_html_to_plaintext and mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND:
105
        changedp += to_plaintext(msg)
106
    # If we're left with only two parts, an empty body and one attachment,
107
    # recast the message to one of just that part
108
    if msg.is_multipart() and len(msg.get_payload()) == 2:
109
        if msg.get_payload(0).get_payload() == '':
110
            useful = msg.get_payload(1)
111
            reset_payload(msg, useful)
112
            changedp = 1
113
    if changedp:
114
        msg['X-Content-Filtered-By'] = 'Mailman/MimeDel %s' % VERSION
115
116
117

118
def reset_payload(msg, subpart):
119
    # Reset payload of msg to contents of subpart, and fix up content headers
120
    payload = subpart.get_payload()
121
    msg.set_payload(payload)
122
    del msg['content-type']
123
    del msg['content-transfer-encoding']
124
    del msg['content-disposition']
125
    del msg['content-description']
126
    msg['Content-Type'] = subpart.get('content-type', 'text/plain')
127
    cte = subpart.get('content-transfer-encoding')
128
    if cte:
129
        msg['Content-Transfer-Encoding'] = cte
130
    cdisp = subpart.get('content-disposition')
131
    if cdisp:
132
        msg['Content-Disposition'] = cdisp
133
    cdesc = subpart.get('content-description')
134
    if cdesc:
135
        msg['Content-Description'] = cdesc
136
137
138

463 by tkikuchi
Merging SF patches:
139
def filter_parts(msg, filtertypes, passtypes, filterexts, passexts):
1 by
This commit was manufactured by cvs2svn to create branch
140
    # Look at all the message's subparts, and recursively filter
141
    if not msg.is_multipart():
142
        return 1
143
    payload = msg.get_payload()
144
    prelen = len(payload)
145
    newpayload = []
146
    for subpart in payload:
463 by tkikuchi
Merging SF patches:
147
        keep = filter_parts(subpart, filtertypes, passtypes,
148
                            filterexts, passexts)
1 by
This commit was manufactured by cvs2svn to create branch
149
        if not keep:
150
            continue
151
        ctype = subpart.get_content_type()
152
        mtype = subpart.get_content_maintype()
153
        if ctype in filtertypes or mtype in filtertypes:
154
            # Throw this subpart away
155
            continue
156
        if passtypes and not (ctype in passtypes or mtype in passtypes):
157
            # Throw this subpart away
158
            continue
463 by tkikuchi
Merging SF patches:
159
        # check file extension
160
        fext = get_file_ext(subpart)
161
        if fext:
162
            if fext in filterexts:
163
                continue
164
            if passexts and not (fext in passexts):
165
                continue
1 by
This commit was manufactured by cvs2svn to create branch
166
        newpayload.append(subpart)
167
    # Check to see if we discarded all the subparts
168
    postlen = len(newpayload)
169
    msg.set_payload(newpayload)
170
    if postlen == 0 and prelen > 0:
171
        # We threw away everything
172
        return 0
173
    return 1
174
175
176

177
def collapse_multipart_alternatives(msg):
178
    if not msg.is_multipart():
179
        return
180
    newpayload = []
181
    for subpart in msg.get_payload():
182
        if subpart.get_content_type() == 'multipart/alternative':
183
            try:
184
                firstalt = subpart.get_payload(0)
185
                newpayload.append(firstalt)
1182 by Mark Sapiro
Backported several bug fixes from the 2.2 branch.
186
            except (IndexError, TypeError):
1 by
This commit was manufactured by cvs2svn to create branch
187
                pass
1234 by Mark Sapiro
Fixed Content Filtering collapse_alternatives to work on deeply nested
188
        elif subpart.is_multipart():
189
            collapse_multipart_alternatives(subpart)
190
            newpayload.append(subpart)
1 by
This commit was manufactured by cvs2svn to create branch
191
        else:
192
            newpayload.append(subpart)
193
    msg.set_payload(newpayload)
194
195
196

197
def to_plaintext(msg):
198
    changedp = 0
199
    for subpart in typed_subpart_iterator(msg, 'text', 'html'):
200
        filename = tempfile.mktemp('.html')
201
        fp = open(filename, 'w')
202
        try:
783 by msapiro
Fix bug 1367783 - decode base64 and quoted-printable HTML parts when passing to mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND
203
            fp.write(subpart.get_payload(decode=1))
1 by
This commit was manufactured by cvs2svn to create branch
204
            fp.close()
205
            cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
206
                           {'filename': filename})
207
            plaintext = cmd.read()
208
            rtn = cmd.close()
209
            if rtn:
210
                syslog('error', 'HTML->text/plain error: %s', rtn)
211
        finally:
212
            try:
213
                os.unlink(filename)
214
            except OSError, e:
215
                if e.errno <> errno.ENOENT: raise
216
        # Now replace the payload of the subpart and twiddle the Content-Type:
784 by msapiro
One more little change. One day I'll learn not to commit until I'm REALLY done.
217
        del subpart['content-transfer-encoding']
1 by
This commit was manufactured by cvs2svn to create branch
218
        subpart.set_payload(plaintext)
219
        subpart.set_type('text/plain')
220
        changedp = 1
221
    return changedp
222
223
224

225
def dispose(mlist, msg, msgdata, why):
226
    # filter_action == 0 just discards, see below
227
    if mlist.filter_action == 1:
228
        # Bounce the message to the original author
229
        raise Errors.RejectMessage, why
230
    if mlist.filter_action == 2:
231
        # Forward it on to the list owner
232
        listname = mlist.internal_name()
233
        mlist.ForwardMessage(
234
            msg,
235
            text=_("""\
236
The attached message matched the %(listname)s mailing list's content filtering
237
rules and was prevented from being forwarded on to the list membership.  You
238
are receiving the only remaining copy of the discarded message.
239
240
"""),
241
            subject=_('Content filtered message notification'))
242
    if mlist.filter_action == 3 and \
243
           mm_cfg.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES:
244
        badq = get_switchboard(mm_cfg.BADQUEUE_DIR)
245
        badq.enqueue(msg, msgdata)
246
    # Most cases also discard the message
247
    raise Errors.DiscardMessage
463 by tkikuchi
Merging SF patches:
248
249
def get_file_ext(m):
250
    """
251
    Get filename extension. Caution: some virus don't put filename
252
    in 'Content-Disposition' header.
253
"""
254
    fext = ''
255
    filename = m.get_filename('') or m.get_param('name', '')
256
    if filename:
257
        fext = splitext(oneline(filename,'utf-8'))[1]
258
        if len(fext) > 1:
259
            fext = fext[1:]
260
        else:
261
            fext = ''
994 by Mark Sapiro
/cygdrive/c/MM_bzr/log.txt
262
    return fext.lower()