1
# Copyright (c) 2005 Allan Saddi <allan@saddi.com>
4
# Redistribution and use in source and binary forms, with or without
5
# modification, are permitted provided that the following conditions
7
# 1. Redistributions of source code must retain the above copyright
8
# notice, this list of conditions and the following disclaimer.
9
# 2. Redistributions in binary form must reproduce the above copyright
10
# notice, this list of conditions and the following disclaimer in the
11
# documentation and/or other materials provided with the distribution.
13
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
# $Id: gzip.py 2106 2006-11-19 17:34:30Z asaddi $
27
__author__ = 'Allan Saddi <allan@saddi.com>'
28
__version__ = '$Revision: 2106 $'
35
__all__ = ['GzipMiddleware']
37
# This gzip middleware component differentiates itself from others in that
38
# it (hopefully) follows the spec more closely. Namely with regard to the
39
# application iterator and buffering. (It doesn't buffer.)
40
# See <http://www.python.org/peps/pep-0333.html#middleware-handling-of-block-boundaries>
42
# Of course this all comes with a price... just LOOK at this mess! :)
44
# The inner workings of gzip and the gzip file format were gleaned from gzip.py
47
"""Returns a gzip header (with no filename)."""
48
# See GzipFile._write_gzip_header in gzip.py
52
struct.pack('<L', long(time.time())) + \
56
class _iterWrapper(object):
58
gzip iterator wrapper. It ensures that: the application iterator's close()
59
method (if any) is called by the parent server; and at least one value
60
is yielded each time the application's iterator yields a value.
62
If the application's iterator yields N values, this iterator will yield
63
N+1 values. This is to account for the gzip trailer.
65
def __init__(self, appIter, gzipMiddleware):
66
self._g = gzipMiddleware
67
self._next = iter(appIter).next
69
self._last = False # True if appIter has yielded last value.
70
self._trailerSent = False
72
if hasattr(appIter, 'close'):
73
self.close = appIter.close
78
# This would've been a lot easier had I used a generator. But then I'd have
79
# to wrap the generator anyway to ensure that any existing close() method
80
# was called. (Calling it within the generator is not the same thing,
81
# namely it does not ensure that it will be called no matter what!)
84
# Need to catch StopIteration here so we can append trailer.
92
return self._g.gzipData(data)
96
# See if trailer needs to be sent.
97
if self._g.headerSent and not self._trailerSent:
98
self._trailerSent = True
99
return self._g.gzipTrailer()
100
# Otherwise, that's the end of this iterator.
103
class _gzipMiddleware(object):
105
The actual gzip middleware component. Holds compression state as well
106
implementations of start_response and write. Instantiated before each
107
call to the underlying application.
109
This class is private. See GzipMiddleware for the public interface.
111
def __init__(self, start_response, mimeTypes, compresslevel):
112
self._start_response = start_response
113
self._mimeTypes = mimeTypes
116
self.headerSent = False
118
# See GzipFile.__init__ and GzipFile._init_write in gzip.py
119
self._crc = zlib.crc32('')
121
self._compress = zlib.compressobj(compresslevel,
127
def gzipData(self, data):
129
Compresses the given data, prepending the gzip header if necessary.
130
Returns the result as a string.
132
if not self.headerSent:
133
self.headerSent = True
138
# See GzipFile.write in gzip.py
142
self._crc = zlib.crc32(data, self._crc)
143
out += self._compress.compress(data)
146
def gzipTrailer(self):
147
# See GzipFile.close in gzip.py
148
return self._compress.flush() + \
149
struct.pack('<l', self._crc) + \
150
struct.pack('<L', self._size & 0xffffffffL)
152
def start_response(self, status, headers, exc_info=None):
155
# Scan the headers. Only allow gzip compression if the Content-Type
156
# is one that we're flagged to compress AND the headers do not
157
# already contain Content-Encoding.
158
for name,value in headers:
160
if name == 'content-type':
161
value = value.split(';')[0].strip()
162
for p in self._mimeTypes:
163
if p.match(value) is not None:
166
elif name == 'content-encoding':
171
# Remove Content-Length, if present, because compression will
172
# most surely change it. (And unfortunately, we can't predict
174
headers = [(name,value) for name,value in headers
175
if name.lower() != 'content-length']
176
headers.append(('Content-Encoding', 'gzip'))
178
_write = self._start_response(status, headers, exc_info)
181
def write_gzip(data):
182
_write(self.gzipData(data))
187
class GzipMiddleware(object):
189
WSGI middleware component that gzip compresses the application's output
190
(if the client supports gzip compression - gleaned from the
191
Accept-Encoding request header).
193
mimeTypes should be a list of Content-Types that are OK to compress.
195
compresslevel is the gzip compression level, an integer from 1 to 9; 1
196
is the fastest and produces the least compression, and 9 is the slowest,
197
producing the most compression.
199
def __init__(self, application, mimeTypes=None, compresslevel=9):
200
if mimeTypes is None:
201
mimeTypes = ['text/.*']
203
self._application = application
204
self._mimeTypes = [re.compile(m) for m in mimeTypes]
205
self._compresslevel = compresslevel
207
def __call__(self, environ, start_response):
208
"""WSGI application interface."""
209
# If the client doesn't support gzip encoding, just pass through
210
# directly to the application.
211
if 'gzip' not in environ.get('HTTP_ACCEPT_ENCODING', ''):
212
return self._application(environ, start_response)
214
# All of the work is done in _gzipMiddleware and _iterWrapper.
215
g = _gzipMiddleware(start_response, self._mimeTypes,
218
result = self._application(environ, g.start_response)
220
# See if it's a length 1 iterable...
222
shortcut = len(result) == 1
227
# Special handling if application returns a length 1 iterable:
228
# also return a length 1 iterable!
231
# Hmmm, if we get a StopIteration here, the application's
232
# broken (__len__ lied!)
235
return [g.gzipData(data) + g.gzipTrailer()]
239
if hasattr(result, 'close'):
242
return _iterWrapper(result, g)
244
if __name__ == '__main__':
245
def myapp(environ, start_response):
246
start_response('200 OK', [('Content-Type', 'text/html')])
247
return ['Hello World!\n']
248
app = GzipMiddleware(myapp)
250
from ajp import WSGIServer
252
WSGIServer(app, loggingLevel=logging.DEBUG).run()