1
from __future__ import generators
4
from zope.interface import implements
8
from twisted.internet import defer
9
from twisted.web2.stream import IStream, FileStream, BufferedStream, readStream
10
from twisted.web2.stream import generatorToStream, readAndDiscard
11
from twisted.web2 import http_headers
12
from cStringIO import StringIO
14
###################################
15
##### Multipart MIME Reader #####
16
###################################
18
class MimeFormatError(Exception):
21
# parseContentDispositionFormData is absolutely horrible, but as
22
# browsers don't seem to believe in sensible quoting rules, it's
23
# really the only way to handle the header. (Quotes can be in the
24
# filename, unescaped)
25
cd_regexp = re.compile(
26
' *form-data; *name="([^"]*)"(?:; *filename="(.*)")?$',
29
def parseContentDispositionFormData(value):
30
match = cd_regexp.match(value)
33
raise ValueError("Unknown content-disposition format.")
35
filename=match.group(2)
39
#@defer.deferredGenerator
40
def _readHeaders(stream):
41
"""Read the MIME headers. Assumes we've just finished reading in the
44
ctype = fieldname = filename = None
49
line = stream.readline(size=1024)
50
if isinstance(line, defer.Deferred):
51
line = defer.waitForDeferred(line)
53
line = line.getResult()
55
if not line.endswith('\r\n'):
57
raise MimeFormatError("Unexpected end of stream.")
59
raise MimeFormatError("Header line too long")
61
line = line[:-2] # strip \r\n
63
break # End of headers
65
parts = line.split(':', 1)
67
raise MimeFormatError("Header did not have a :")
70
headers.append((name, value))
72
if name == "content-type":
73
ctype = http_headers.parseContentType(http_headers.tokenize((value,), foldCase=False))
74
elif name == "content-disposition":
75
fieldname, filename = parseContentDispositionFormData(value)
78
ctype == http_headers.MimeType('application', 'octet-stream')
80
raise MimeFormatError('Content-disposition invalid or omitted.')
82
# End of headers, return (field name, content-type, filename)
83
yield fieldname, filename, ctype
85
_readHeaders = defer.deferredGenerator(_readHeaders)
88
class _BoundaryWatchingStream(object):
89
def __init__(self, stream, boundary):
91
self.boundary = boundary
93
self.deferred = defer.Deferred()
95
length = None # unknown
97
if self.stream is None:
98
if self.deferred is not None:
99
deferred = self.deferred
101
deferred.callback(None)
103
newdata = self.stream.read()
104
if isinstance(newdata, defer.Deferred):
105
return newdata.addCallbacks(self._gotRead, self._gotError)
106
return self._gotRead(newdata)
108
def _gotRead(self, newdata):
110
raise MimeFormatError("Unexpected EOF")
111
# BLECH, converting buffer back into string.
112
self.data += str(newdata)
114
boundary = self.boundary
115
off = data.find(boundary)
118
# No full boundary, check for the first character
119
off = data.rfind(boundary[0], max(0, len(data)-len(boundary)))
121
# We could have a partial boundary, store it for next time
122
self.data = data[off:]
128
self.stream.pushback(data[off+len(boundary):])
132
def _gotError(self, err):
133
# Propogate error back to MultipartMimeStream also
134
if self.deferred is not None:
135
deferred = self.deferred
137
deferred.errback(err)
141
# Assume error will be raised again and handled by MMS?
142
readAndDiscard(self).addErrback(lambda _: None)
144
class MultipartMimeStream(object):
146
def __init__(self, stream, boundary):
147
self.stream = BufferedStream(stream)
148
self.boundary = "--"+boundary
153
Return a deferred which will fire with a tuple of:
154
(fieldname, filename, ctype, dataStream)
155
or None when all done.
157
Format errors will be sent to the errback.
159
Returns None when all done.
161
IMPORTANT: you *must* exhaust dataStream returned by this call
162
before calling .read() again!
166
d = self._readFirstBoundary()
168
d = self._readBoundaryLine()
169
d.addCallback(self._doReadHeaders)
170
d.addCallback(self._gotHeaders)
173
def _readFirstBoundary(self):
174
#print "_readFirstBoundary"
175
line = self.stream.readline(size=1024)
176
if isinstance(line, defer.Deferred):
177
line = defer.waitForDeferred(line)
179
line = line.getResult()
180
if line != self.boundary + '\r\n':
181
raise MimeFormatError("Extra data before first boundary: %r looking for: %r" % (line, self.boundary + '\r\n'))
183
self.boundary = "\r\n"+self.boundary
186
_readFirstBoundary = defer.deferredGenerator(_readFirstBoundary)
188
def _readBoundaryLine(self):
189
#print "_readBoundaryLine"
190
line = self.stream.readline(size=1024)
191
if isinstance(line, defer.Deferred):
192
line = defer.waitForDeferred(line)
194
line = line.getResult()
201
raise MimeFormatError("Unexpected data on same line as boundary: %r" % (line,))
204
_readBoundaryLine = defer.deferredGenerator(_readBoundaryLine)
206
def _doReadHeaders(self, morefields):
207
#print "_doReadHeaders", morefields
210
return _readHeaders(self.stream)
212
def _gotHeaders(self, headers):
215
bws = _BoundaryWatchingStream(self.stream, self.boundary)
216
self.deferred = bws.deferred
222
def readIntoFile(stream, outFile, maxlen):
223
"""Read the stream into a file, but not if it's longer than maxlen.
224
Returns Deferred which will be triggered on finish.
230
curlen[0] += len(data)
231
if curlen[0] > maxlen:
232
raise MimeFormatError("Maximum length of %d bytes exceeded." %
236
return readStream(stream, write).addBoth(done)
238
#@defer.deferredGenerator
239
def parseMultipartFormData(stream, boundary,
240
maxMem=100*1024, maxFields=1024, maxSize=10*1024*1024):
241
# If the stream length is known to be too large upfront, abort immediately
243
if stream.length is not None and stream.length > maxSize:
244
raise MimeFormatError("Maximum length of %d bytes exceeded." %
247
mms = MultipartMimeStream(stream, boundary)
254
if isinstance(datas, defer.Deferred):
255
datas = defer.waitForDeferred(datas)
257
datas = datas.getResult()
262
if numFields == maxFields:
263
raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields)
266
fieldname, filename, ctype, stream = datas
270
maxBuf = min(maxSize, maxMem)
272
outfile = tempfile.NamedTemporaryFile()
274
x = readIntoFile(stream, outfile, maxBuf)
275
if isinstance(x, defer.Deferred):
276
x = defer.waitForDeferred(x)
280
# Is a normal form field
282
data = outfile.read()
283
args.setdefault(fieldname, []).append(data)
288
maxSize -= outfile.tell()
290
files.setdefault(fieldname, []).append((filename, ctype, outfile))
295
parseMultipartFormData = defer.deferredGenerator(parseMultipartFormData)
297
###################################
298
##### x-www-urlencoded reader #####
299
###################################
302
def parse_urlencoded_stream(input, maxMem=100*1024,
303
keep_blank_values=False, strict_parsing=False):
311
except StopIteration:
317
raise MimeFormatError("Maximum length of %d bytes exceeded." %
319
pairs = str(data).split('&')
320
pairs[0] = lastdata + pairs[0]
323
for name_value in pairs:
324
nv = name_value.split('=', 1)
327
raise MimeFormatError("bad query field: %s") % `name_value`
329
if len(nv[1]) or keep_blank_values:
330
name = urllib.unquote(nv[0].replace('+', ' '))
331
value = urllib.unquote(nv[1].replace('+', ' '))
333
parse_urlencoded_stream = generatorToStream(parse_urlencoded_stream)
335
def parse_urlencoded(stream, maxMem=100*1024, maxFields=1024,
336
keep_blank_values=False, strict_parsing=False):
340
s=parse_urlencoded_stream(stream, maxMem, keep_blank_values, strict_parsing)
344
if isinstance(datas, defer.Deferred):
345
datas = defer.waitForDeferred(datas)
347
datas = datas.getResult()
353
if numFields == maxFields:
354
raise MimeFormatError("Maximum number of fields %d exceeded"%maxFields)
357
d[name].append(value)
362
parse_urlencoded = defer.deferredGenerator(parse_urlencoded)
365
if __name__ == '__main__':
366
d = parseMultipartFormData(
367
FileStream(open("upload.txt")), "----------0xKhTmLbOuNdArY")
368
from twisted.python import log
369
d.addErrback(log.err)
374
__all__ = ['parseMultipartFormData', 'parse_urlencoded', 'parse_urlencoded_stream', 'MultipartMimeStream', 'MimeFormatError']