1
# -*- test-case-name: twisted.web.test.test_http -*-
2
# Copyright (c) 2001-2010 Twisted Matrix Laboratories.
3
# See LICENSE for details.
6
HyperText Transfer Protocol implementation.
8
This is used by twisted.web.
11
- HTTP client support will at some point be refactored to support HTTP/1.1.
12
- Accept chunked data from clients in server.
13
- Other missing HTTP features from the RFC.
15
Maintainer: Itamar Shtull-Trauring
19
from cStringIO import StringIO
21
import base64, binascii
29
from urlparse import urlparse as _urlparse
31
from zope.interface import implements
34
from twisted.internet import interfaces, reactor, protocol, address
35
from twisted.internet.defer import Deferred
36
from twisted.protocols import policies, basic
37
from twisted.python import log
38
try: # try importing the fast, C version
39
from twisted.protocols._c_urlarg import unquote
41
from urllib import unquote
43
from twisted.web.http_headers import _DictHeaders, Headers
45
protocol_version = "HTTP/1.1"
53
NON_AUTHORITATIVE_INFORMATION = 203
60
MOVED_PERMANENTLY = 301
65
TEMPORARY_REDIRECT = 307
69
PAYMENT_REQUIRED = 402
74
PROXY_AUTH_REQUIRED = 407
79
PRECONDITION_FAILED = 412
80
REQUEST_ENTITY_TOO_LARGE = 413
81
REQUEST_URI_TOO_LONG = 414
82
UNSUPPORTED_MEDIA_TYPE = 415
83
REQUESTED_RANGE_NOT_SATISFIABLE = 416
84
EXPECTATION_FAILED = 417
86
INTERNAL_SERVER_ERROR = 500
89
SERVICE_UNAVAILABLE = 503
91
HTTP_VERSION_NOT_SUPPORTED = 505
92
INSUFFICIENT_STORAGE_SPACE = 507
97
_CONTINUE: "Continue",
98
SWITCHING: "Switching Protocols",
103
ACCEPTED: "Accepted",
104
NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
105
NO_CONTENT: "No Content",
106
RESET_CONTENT: "Reset Content.",
107
PARTIAL_CONTENT: "Partial Content",
108
MULTI_STATUS: "Multi-Status",
111
MULTIPLE_CHOICE: "Multiple Choices",
112
MOVED_PERMANENTLY: "Moved Permanently",
114
SEE_OTHER: "See Other",
115
NOT_MODIFIED: "Not Modified",
116
USE_PROXY: "Use Proxy",
118
TEMPORARY_REDIRECT: "Temporary Redirect",
121
BAD_REQUEST: "Bad Request",
122
UNAUTHORIZED: "Unauthorized",
123
PAYMENT_REQUIRED: "Payment Required",
124
FORBIDDEN: "Forbidden",
125
NOT_FOUND: "Not Found",
126
NOT_ALLOWED: "Method Not Allowed",
127
NOT_ACCEPTABLE: "Not Acceptable",
128
PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
129
REQUEST_TIMEOUT: "Request Time-out",
130
CONFLICT: "Conflict",
132
LENGTH_REQUIRED: "Length Required",
133
PRECONDITION_FAILED: "Precondition Failed",
134
REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large",
135
REQUEST_URI_TOO_LONG: "Request-URI Too Long",
136
UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
137
REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
138
EXPECTATION_FAILED: "Expectation Failed",
141
INTERNAL_SERVER_ERROR: "Internal Server Error",
142
NOT_IMPLEMENTED: "Not Implemented",
143
BAD_GATEWAY: "Bad Gateway",
144
SERVICE_UNAVAILABLE: "Service Unavailable",
145
GATEWAY_TIMEOUT: "Gateway Time-out",
146
HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
147
INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
148
NOT_EXTENDED: "Not Extended"
151
CACHED = """Magic constant returned by http.Request methods to set cache
152
validation headers when the request is conditional and the value fails
155
# backwards compatability
156
responses = RESPONSES
159
# datetime parsing and formatting
160
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
162
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
163
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
164
weekdayname_lower = [name.lower() for name in weekdayname]
165
monthname_lower = [name and name.lower() for name in monthname]
169
Parse an URL into six components.
171
This is similar to L{urlparse.urlparse}, but rejects C{unicode} input
172
and always produces C{str} output.
176
@raise TypeError: The given url was a C{unicode} string instead of a
179
@rtype: six-tuple of str
180
@return: The scheme, net location, path, params, query string, and fragment
183
if isinstance(url, unicode):
184
raise TypeError("url must be str, not unicode")
185
scheme, netloc, path, params, query, fragment = _urlparse(url)
186
if isinstance(scheme, unicode):
187
scheme = scheme.encode('ascii')
188
netloc = netloc.encode('ascii')
189
path = path.encode('ascii')
190
query = query.encode('ascii')
191
fragment = fragment.encode('ascii')
192
return scheme, netloc, path, params, query, fragment
195
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, unquote=unquote):
197
like cgi.parse_qs, only with custom unquote function
200
items = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
203
k, v = item.split("=", 1)
208
if v or keep_blank_values:
209
k = unquote(k.replace("+", " "))
210
v = unquote(v.replace("+", " "))
217
def datetimeToString(msSinceEpoch=None):
219
Convert seconds since epoch to HTTP datetime string.
221
if msSinceEpoch == None:
222
msSinceEpoch = time.time()
223
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
224
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
226
day, monthname[month], year,
230
def datetimeToLogString(msSinceEpoch=None):
232
Convert seconds since epoch to log datetime string.
234
if msSinceEpoch == None:
235
msSinceEpoch = time.time()
236
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
237
s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % (
238
day, monthname[month], year,
243
# a hack so we don't need to recalculate log datetime every hit,
244
# at the price of a small, unimportant, inaccuracy.
246
_logDateTimeUsers = 0
247
_resetLogDateTimeID = None
249
def _resetLogDateTime():
251
global _resetLogDateTime
252
global _resetLogDateTimeID
253
_logDateTime = datetimeToLogString()
254
_resetLogDateTimeID = reactor.callLater(1, _resetLogDateTime)
256
def _logDateTimeStart():
257
global _logDateTimeUsers
258
if not _logDateTimeUsers:
260
_logDateTimeUsers += 1
262
def _logDateTimeStop():
263
global _logDateTimeUsers
264
_logDateTimeUsers -= 1;
265
if (not _logDateTimeUsers and _resetLogDateTimeID
266
and _resetLogDateTimeID.active()):
267
_resetLogDateTimeID.cancel()
269
def timegm(year, month, day, hour, minute, second):
271
Convert time tuple in GMT to seconds since epoch, GMT
275
raise ValueError("Years prior to %d not supported" % (EPOCH,))
276
assert 1 <= month <= 12
277
days = 365*(year-EPOCH) + calendar.leapdays(EPOCH, year)
278
for i in range(1, month):
279
days = days + calendar.mdays[i]
280
if month > 2 and calendar.isleap(year):
282
days = days + day - 1
283
hours = days*24 + hour
284
minutes = hours*60 + minute
285
seconds = minutes*60 + second
288
def stringToDatetime(dateString):
290
Convert an HTTP date string (one of three formats) to seconds since epoch.
292
parts = dateString.split()
294
if not parts[0][0:3].lower() in weekdayname_lower:
295
# Weekday is stupid. Might have been omitted.
297
return stringToDatetime("Sun, "+dateString)
303
if (partlen == 5 or partlen == 6) and parts[1].isdigit():
304
# 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
305
# (Note: "GMT" is literal, not a variable timezone)
306
# (also handles without "GMT")
307
# This is the normal format
312
elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1:
313
# 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
314
# (Note: "GMT" is literal, not a variable timezone)
315
# (also handles without without "GMT")
316
# Two digit year, yucko.
317
day, month, year = parts[1].split('-')
324
elif len(parts) == 5:
325
# 3rd date format: Sun Nov 6 08:49:37 1994
326
# ANSI C asctime() format.
332
raise ValueError("Unknown datetime format %r" % dateString)
335
month = int(monthname_lower.index(month.lower()))
337
hour, min, sec = map(int, time.split(':'))
338
return int(timegm(year, month, day, hour, min, sec))
342
Convert string to a chunk.
344
@returns: a tuple of strings representing the chunked encoding of data
346
return ("%x\r\n" % len(data), data, "\r\n")
350
Convert chunk to string.
352
@returns: tuple (result, remaining), may raise ValueError.
354
prefix, rest = data.split('\r\n', 1)
355
length = int(prefix, 16)
357
raise ValueError("Chunk length must be >= 0, not %d" % (length,))
358
if not rest[length:length + 2] == '\r\n':
359
raise ValueError, "chunk must end with CRLF"
360
return rest[:length], rest[length + 2:]
363
def parseContentRange(header):
365
Parse a content-range header into (start, end, realLength).
367
realLength might be None if real length is not known ('*').
369
kind, other = header.strip().split()
370
if kind.lower() != "bytes":
371
raise ValueError, "a range of type %r is not supported"
372
startend, realLength = other.split("/")
373
start, end = map(int, startend.split("-"))
374
if realLength == "*":
377
realLength = int(realLength)
378
return (start, end, realLength)
382
class StringTransport:
384
I am a StringIO wrapper that conforms for the transport API. I support
385
the `writeSequence' method.
389
def writeSequence(self, seq):
390
self.s.write(''.join(seq))
391
def __getattr__(self, attr):
392
return getattr(self.__dict__['s'], attr)
395
class HTTPClient(basic.LineReceiver):
397
A client for HTTP 1.0.
400
You probably want to send a 'Host' header with the name of the site you're
401
connecting to, in order to not break name based virtual hosting.
403
@ivar length: The length of the request body in bytes.
406
@ivar firstLine: Are we waiting for the first header line?
407
@type firstLine: C{bool}
409
@ivar __buffer: The buffer that stores the response to the HTTP request.
410
@type __buffer: A C{StringIO} object.
412
@ivar _header: Part or all of an HTTP request header.
413
@type _header: C{str}
420
def sendCommand(self, command, path):
421
self.transport.write('%s %s HTTP/1.0\r\n' % (command, path))
423
def sendHeader(self, name, value):
424
self.transport.write('%s: %s\r\n' % (name, value))
426
def endHeaders(self):
427
self.transport.write('\r\n')
430
def extractHeader(self, header):
432
Given a complete HTTP header, extract the field name and value and
435
@param header: a complete HTTP request header of the form
439
key, val = header.split(':', 1)
441
self.handleHeader(key, val)
442
if key.lower() == 'content-length':
443
self.length = int(val)
446
def lineReceived(self, line):
448
Parse the status line and headers for an HTTP request.
450
@param line: Part of an HTTP request header. Request bodies are parsed
451
in L{rawDataReceived}.
455
self.firstLine = False
456
l = line.split(None, 2)
462
# sometimes there is no message
464
self.handleStatus(version, status, message)
467
if self._header != "":
468
# Only extract headers if there are any
469
self.extractHeader(self._header)
470
self.__buffer = StringIO()
471
self.handleEndHeaders()
475
if line.startswith('\t') or line.startswith(' '):
476
# This line is part of a multiline header. According to RFC 822, in
477
# "unfolding" multiline headers you do not strip the leading
478
# whitespace on the continuing line.
479
self._header = self._header + line
481
# This line starts a new header, so process the previous one.
482
self.extractHeader(self._header)
488
def connectionLost(self, reason):
489
self.handleResponseEnd()
491
def handleResponseEnd(self):
493
The response has been completely received.
495
This callback may be invoked more than once per request.
497
if self.__buffer is not None:
498
b = self.__buffer.getvalue()
500
self.handleResponse(b)
502
def handleResponsePart(self, data):
503
self.__buffer.write(data)
505
def connectionMade(self):
508
def handleStatus(self, version, status, message):
510
Called when the status-line is received.
512
@param version: e.g. 'HTTP/1.0'
513
@param status: e.g. '200'
515
@param message: e.g. 'OK'
518
def handleHeader(self, key, val):
520
Called every time a header is received.
523
def handleEndHeaders(self):
525
Called when all headers have been received.
529
def rawDataReceived(self, data):
530
if self.length is not None:
531
data, rest = data[:self.length], data[self.length:]
532
self.length -= len(data)
535
self.handleResponsePart(data)
537
self.handleResponseEnd()
538
self.setLineMode(rest)
542
# response codes that must have empty bodies
543
NO_BODY_CODES = (204, 304)
549
Subclasses should override the process() method to determine how
550
the request will be processed.
552
@ivar method: The HTTP method that was used.
553
@ivar uri: The full URI that was requested (includes arguments).
554
@ivar path: The path only (arguments not included).
555
@ivar args: All of the arguments, including URL and POST arguments.
556
@type args: A mapping of strings (the argument names) to lists of values.
557
i.e., ?foo=bar&foo=baz&quux=spam results in
558
{'foo': ['bar', 'baz'], 'quux': ['spam']}.
560
@type requestHeaders: L{http_headers.Headers}
561
@ivar requestHeaders: All received HTTP request headers.
563
@ivar received_headers: Backwards-compatibility access to
564
C{requestHeaders}. Use C{requestHeaders} instead. C{received_headers}
565
behaves mostly like a C{dict} and does not provide access to all header
568
@type responseHeaders: L{http_headers.Headers}
569
@ivar responseHeaders: All HTTP response headers to be sent.
571
@ivar headers: Backwards-compatibility access to C{responseHeaders}. Use
572
C{responseHeaders} instead. C{headers} behaves mostly like a C{dict}
573
and does not provide access to all header values nor does it allow
574
multiple values for one header to be set.
576
@ivar notifications: A C{list} of L{Deferred}s which are waiting for
577
notification that the response to this request has been finished
578
(successfully or with an error). Don't use this attribute directly,
579
instead use the L{Request.notifyFinish} method.
581
@ivar _disconnected: A flag which is C{False} until the connection over
582
which this request was received is closed and which is C{True} after
584
@type _disconnected: C{bool}
586
implements(interfaces.IConsumer)
591
code_message = RESPONSES[OK]
592
method = "(no method yet)"
593
clientproto = "(no clientproto yet)"
597
sentLength = 0 # content-length of response, or total bytes sent via chunking
604
_disconnected = False
606
def __init__(self, channel, queued):
608
@param channel: the channel we're connected to.
609
@param queued: are we in the request queue, or can we start writing to
612
self.notifications = []
613
self.channel = channel
615
self.requestHeaders = Headers()
616
self.received_cookies = {}
617
self.responseHeaders = Headers()
618
self.cookies = [] # outgoing cookies
621
self.transport = StringTransport()
623
self.transport = self.channel.transport
626
def __setattr__(self, name, value):
628
Support assignment of C{dict} instances to C{received_headers} for
629
backwards-compatibility.
631
if name == 'received_headers':
632
# A property would be nice, but Request is classic.
633
self.requestHeaders = headers = Headers()
634
for k, v in value.iteritems():
635
headers.setRawHeaders(k, [v])
636
elif name == 'requestHeaders':
637
self.__dict__[name] = value
638
self.__dict__['received_headers'] = _DictHeaders(value)
639
elif name == 'headers':
640
self.responseHeaders = headers = Headers()
641
for k, v in value.iteritems():
642
headers.setRawHeaders(k, [v])
643
elif name == 'responseHeaders':
644
self.__dict__[name] = value
645
self.__dict__['headers'] = _DictHeaders(value)
647
self.__dict__[name] = value
652
Called when have finished responding and are no longer queued.
655
log.err(RuntimeError("Producer was not unregistered for %s" % self.uri))
656
self.unregisterProducer()
657
self.channel.requestDone(self)
662
# win32 suckiness, no idea why it does this
665
for d in self.notifications:
667
self.notifications = []
669
# methods for channel - end users should not use these
671
def noLongerQueued(self):
673
Notify the object that it is no longer queued.
675
We start writing whatever data we have to the transport, etc.
677
This method is not intended for users.
680
raise RuntimeError, "noLongerQueued() got called unnecessarily."
684
# set transport to real one and send any buffer data
685
data = self.transport.getvalue()
686
self.transport = self.channel.transport
688
self.transport.write(data)
690
# if we have producer, register it with transport
691
if (self.producer is not None) and not self.finished:
692
self.transport.registerProducer(self.producer, self.streamingProducer)
694
# if we're finished, clean up
698
def gotLength(self, length):
700
Called when HTTP channel got length of content in this request.
702
This method is not intended for users.
704
@param length: The length of the request body, as indicated by the
705
request headers. C{None} if the request headers do not indicate a
708
if length is not None and length < 100000:
709
self.content = StringIO()
711
self.content = tempfile.TemporaryFile()
714
def parseCookies(self):
716
Parse cookie headers.
718
This method is not intended for users.
720
cookieheaders = self.requestHeaders.getRawHeaders("cookie")
722
if cookieheaders is None:
725
for cookietxt in cookieheaders:
727
for cook in cookietxt.split(';'):
730
k, v = cook.split('=', 1)
731
self.received_cookies[k] = v
736
def handleContentChunk(self, data):
738
Write a chunk of data.
740
This method is not intended for users.
742
self.content.write(data)
745
def requestReceived(self, command, path, version):
747
Called by channel when all data has been received.
749
This method is not intended for users.
751
@type command: C{str}
752
@param command: The HTTP verb of this request. This has the case
753
supplied by the client (eg, it maybe "get" rather than "GET").
756
@param path: The URI of this request.
758
@type version: C{str}
759
@param version: The HTTP version of this request.
761
self.content.seek(0,0)
765
self.method, self.uri = command, path
766
self.clientproto = version
767
x = self.uri.split('?', 1)
772
self.path, argstring = x
773
self.args = parse_qs(argstring, 1)
775
# cache the client and server information, we'll need this later to be
776
# serialized and sent with the request so CGIs will work remotely
777
self.client = self.channel.transport.getPeer()
778
self.host = self.channel.transport.getHost()
780
# Argument processing
782
ctype = self.requestHeaders.getRawHeaders('content-type')
783
if ctype is not None:
786
if self.method == "POST" and ctype:
787
mfd = 'multipart/form-data'
788
key, pdict = cgi.parse_header(ctype)
789
if key == 'application/x-www-form-urlencoded':
790
args.update(parse_qs(self.content.read(), 1))
793
args.update(cgi.parse_multipart(self.content, pdict))
795
if e.args[0] == 'content-disposition':
796
# Parse_multipart can't cope with missing
797
# content-dispostion headers in multipart/form-data
798
# parts, so we catch the exception and tell the client
799
# it was a bad request.
800
self.channel.transport.write(
801
"HTTP/1.1 400 Bad Request\r\n\r\n")
802
self.channel.transport.loseConnection()
805
self.content.seek(0, 0)
811
return '<%s %s %s>'% (self.method, self.uri, self.clientproto)
815
Override in subclasses.
817
This method is not intended for users.
824
def registerProducer(self, producer, streaming):
829
raise ValueError, "registering producer %s before previous one (%s) was unregistered" % (producer, self.producer)
831
self.streamingProducer = streaming
832
self.producer = producer
836
producer.pauseProducing()
838
self.transport.registerProducer(producer, streaming)
840
def unregisterProducer(self):
842
Unregister the producer.
845
self.transport.unregisterProducer()
848
# private http response methods
850
def _sendError(self, code, resp=''):
851
self.transport.write('%s %s %s\r\n\r\n' % (self.clientproto, code, resp))
854
# The following is the public interface that people should be
856
def getHeader(self, key):
858
Get an HTTP request header.
861
@param key: The name of the header to get the value of.
863
@rtype: C{str} or C{NoneType}
864
@return: The value of the specified header, or C{None} if that header
865
was not present in the request.
867
value = self.requestHeaders.getRawHeaders(key)
868
if value is not None:
872
def getCookie(self, key):
874
Get a cookie that was sent from the network.
876
return self.received_cookies.get(key)
879
def notifyFinish(self):
881
Notify when the response to this request has finished.
885
@return: A L{Deferred} which will be triggered when the request is
886
finished -- with a C{None} value if the request finishes
887
successfully or with an error if the request is interrupted by an
888
error (for example, the client closing the connection prematurely).
890
self.notifications.append(Deferred())
891
return self.notifications[-1]
896
Indicate that all response data has been written to this L{Request}.
898
if self._disconnected:
900
"Request.finish called on a request after its connection was lost; "
901
"use Request.notifyFinish to keep track of this.")
903
warnings.warn("Warning! request.finish called twice.", stacklevel=2)
906
if not self.startedWriting:
911
# write last chunk and closing CRLF
912
self.transport.write("0\r\n\r\n")
915
if hasattr(self.channel, "factory"):
916
self.channel.factory.log(self)
922
def write(self, data):
924
Write some data as a result of an HTTP request. The first
925
time this is called, it writes out response data.
928
@param data: Some bytes to be sent as part of the response body.
930
if not self.startedWriting:
931
self.startedWriting = 1
932
version = self.clientproto
934
l.append('%s %s %s\r\n' % (version, self.code,
936
# if we don't have a content length, we send data in
937
# chunked mode, so that we can support pipelining in
938
# persistent connections.
939
if ((version == "HTTP/1.1") and
940
(self.responseHeaders.getRawHeaders('content-length') is None) and
941
self.method != "HEAD" and self.code not in NO_BODY_CODES):
942
l.append("%s: %s\r\n" % ('Transfer-Encoding', 'chunked'))
945
if self.lastModified is not None:
946
if self.responseHeaders.hasHeader('last-modified'):
947
log.msg("Warning: last-modified specified both in"
948
" header list and lastModified attribute.")
950
self.responseHeaders.setRawHeaders(
952
[datetimeToString(self.lastModified)])
954
if self.etag is not None:
955
self.responseHeaders.setRawHeaders('ETag', [self.etag])
957
for name, values in self.responseHeaders.getAllRawHeaders():
959
l.append("%s: %s\r\n" % (name, value))
961
for cookie in self.cookies:
962
l.append('%s: %s\r\n' % ("Set-Cookie", cookie))
966
self.transport.writeSequence(l)
968
# if this is a "HEAD" request, we shouldn't return any data
969
if self.method == "HEAD":
970
self.write = lambda data: None
973
# for certain result codes, we should never return any data
974
if self.code in NO_BODY_CODES:
975
self.write = lambda data: None
978
self.sentLength = self.sentLength + len(data)
981
self.transport.writeSequence(toChunk(data))
983
self.transport.write(data)
985
def addCookie(self, k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None):
987
Set an outgoing HTTP cookie.
989
In general, you should consider using sessions instead of cookies, see
990
L{twisted.web.server.Request.getSession} and the
991
L{twisted.web.server.Session} class for details.
993
cookie = '%s=%s' % (k, v)
994
if expires is not None:
995
cookie = cookie +"; Expires=%s" % expires
996
if domain is not None:
997
cookie = cookie +"; Domain=%s" % domain
999
cookie = cookie +"; Path=%s" % path
1000
if max_age is not None:
1001
cookie = cookie +"; Max-Age=%s" % max_age
1002
if comment is not None:
1003
cookie = cookie +"; Comment=%s" % comment
1005
cookie = cookie +"; Secure"
1006
self.cookies.append(cookie)
1008
def setResponseCode(self, code, message=None):
1010
Set the HTTP response code.
1012
if not isinstance(code, (int, long)):
1013
raise TypeError("HTTP response code must be int or long")
1016
self.code_message = message
1018
self.code_message = RESPONSES.get(code, "Unknown Status")
1021
def setHeader(self, name, value):
1023
Set an HTTP response header. Overrides any previously set values for
1027
@param name: The name of the header for which to set the value.
1030
@param value: The value to set for the named header.
1032
self.responseHeaders.setRawHeaders(name, [value])
1035
def redirect(self, url):
1037
Utility function that does a redirect.
1039
The request should have finish() called after this.
1041
self.setResponseCode(FOUND)
1042
self.setHeader("location", url)
1045
def setLastModified(self, when):
1047
Set the C{Last-Modified} time for the response to this request.
1049
If I am called more than once, I ignore attempts to set
1050
Last-Modified earlier, only replacing the Last-Modified time
1051
if it is to a later value.
1053
If I am a conditional request, I may modify my response code
1054
to L{NOT_MODIFIED} if appropriate for the time given.
1056
@param when: The last time the resource being returned was
1057
modified, in seconds since the epoch.
1059
@return: If I am a C{If-Modified-Since} conditional request and
1060
the time given is not newer than the condition, I return
1061
L{http.CACHED<CACHED>} to indicate that you should write no
1062
body. Otherwise, I return a false value.
1064
# time.time() may be a float, but the HTTP-date strings are
1065
# only good for whole seconds.
1066
when = long(math.ceil(when))
1067
if (not self.lastModified) or (self.lastModified < when):
1068
self.lastModified = when
1070
modifiedSince = self.getHeader('if-modified-since')
1072
firstPart = modifiedSince.split(';', 1)[0]
1074
modifiedSince = stringToDatetime(firstPart)
1077
if modifiedSince >= when:
1078
self.setResponseCode(NOT_MODIFIED)
1082
def setETag(self, etag):
1084
Set an C{entity tag} for the outgoing response.
1086
That's \"entity tag\" as in the HTTP/1.1 C{ETag} header, \"used
1087
for comparing two or more entities from the same requested
1090
If I am a conditional request, I may modify my response code
1091
to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate
1094
@param etag: The entity tag for the resource being returned.
1096
@return: If I am a C{If-None-Match} conditional request and
1097
the tag matches one in the request, I return
1098
L{http.CACHED<CACHED>} to indicate that you should write
1099
no body. Otherwise, I return a false value.
1104
tags = self.getHeader("if-none-match")
1107
if (etag in tags) or ('*' in tags):
1108
self.setResponseCode(((self.method in ("HEAD", "GET"))
1110
or PRECONDITION_FAILED)
1115
def getAllHeaders(self):
1117
Return dictionary mapping the names of all received headers to the last
1118
value received for each.
1120
Since this method does not return all header information,
1121
C{self.requestHeaders.getAllRawHeaders()} may be preferred.
1124
for k, v in self.requestHeaders.getAllRawHeaders():
1125
headers[k.lower()] = v[-1]
1129
def getRequestHostname(self):
1131
Get the hostname that the user passed in to the request.
1133
This will either use the Host: header (if it is available) or the
1134
host we are listening on if the header is unavailable.
1136
@returns: the requested hostname
1139
# XXX This method probably has no unit tests. I changed it a ton and
1141
host = self.getHeader('host')
1143
return host.split(':', 1)[0]
1144
return self.getHost().host
1149
Get my originally requesting transport's host.
1151
Don't rely on the 'transport' attribute, since Request objects may be
1152
copied remotely. For information on this method's return value, see
1153
twisted.internet.tcp.Port.
1157
def setHost(self, host, port, ssl=0):
1159
Change the host and port the request thinks it's using.
1161
This method is useful for working with reverse HTTP proxies (e.g.
1162
both Squid and Apache's mod_proxy can do this), when the address
1163
the HTTP client is using is different than the one we're listening on.
1165
For example, Apache may be listening on https://www.example.com, and then
1166
forwarding requests to http://localhost:8080, but we don't want HTML produced
1167
by Twisted to say 'http://localhost:8080', they should say 'https://www.example.com',
1170
request.setHost('www.example.com', 443, ssl=1)
1173
@param host: The value to which to change the host header.
1176
@param ssl: A flag which, if C{True}, indicates that the request is
1177
considered secure (if C{True}, L{isSecure} will return C{True}).
1179
self._forceSSL = ssl
1180
self.requestHeaders.setRawHeaders("host", [host])
1181
self.host = address.IPv4Address("TCP", host, port)
1184
def getClientIP(self):
1186
Return the IP address of the client who submitted this request.
1188
@returns: the client IP address
1191
if isinstance(self.client, address.IPv4Address):
1192
return self.client.host
1198
Return True if this request is using a secure transport.
1200
Normally this method returns True if this request's HTTPChannel
1201
instance is using a transport that implements ISSLTransport.
1203
This will also return True if setHost() has been called
1206
@returns: True if this request is secure
1211
transport = getattr(getattr(self, 'channel', None), 'transport', None)
1212
if interfaces.ISSLTransport(transport, None) is not None:
1216
def _authorize(self):
1217
# Authorization, (mostly) per the RFC
1219
authh = self.getHeader("Authorization")
1221
self.user = self.password = ''
1223
bas, upw = authh.split()
1224
if bas.lower() != "basic":
1226
upw = base64.decodestring(upw)
1227
self.user, self.password = upw.split(':', 1)
1228
except (binascii.Error, ValueError):
1229
self.user = self.password = ""
1232
self.user = self.password = ""
1236
Return the HTTP user sent with this request, if any.
1238
If no user was supplied, return the empty string.
1240
@returns: the HTTP user, if any
1250
def getPassword(self):
1252
Return the HTTP password sent with this request, if any.
1254
If no password was supplied, return the empty string.
1256
@returns: the HTTP password, if any
1260
return self.password
1264
return self.password
1266
def getClient(self):
1267
if self.client.type != 'TCP':
1269
host = self.client.host
1271
name, names, addresses = socket.gethostbyaddr(host)
1272
except socket.error:
1274
names.insert(0, name)
1281
def connectionLost(self, reason):
1283
There is no longer a connection for this request to respond over.
1284
Clean up anything which can't be useful anymore.
1286
self._disconnected = True
1288
if self.content is not None:
1289
self.content.close()
1290
for d in self.notifications:
1292
self.notifications = []
1296
class _DataLoss(Exception):
1298
L{_DataLoss} indicates that not all of a message body was received. This
1299
is only one of several possible exceptions which may indicate that data
1300
was lost. Because of this, it should not be checked for by
1301
specifically; any unexpected exception should be treated as having
1307
class PotentialDataLoss(Exception):
1309
L{PotentialDataLoss} may be raised by a transfer encoding decoder's
1310
C{noMoreData} method to indicate that it cannot be determined if the
1311
entire response body has been delivered. This only occurs when making
1312
requests to HTTP servers which do not set I{Content-Length} or a
1313
I{Transfer-Encoding} in the response because in this case the end of the
1314
response is indicated by the connection being closed, an event which may
1315
also be due to a transient network problem or other error.
1320
class _IdentityTransferDecoder(object):
1322
Protocol for accumulating bytes up to a specified length. This handles the
1323
case where no I{Transfer-Encoding} is specified.
1325
@ivar contentLength: Counter keeping track of how many more bytes there are
1328
@ivar dataCallback: A one-argument callable which will be invoked each
1329
time application data is received.
1331
@ivar finishCallback: A one-argument callable which will be invoked when
1332
the terminal chunk is received. It will be invoked with all bytes
1333
which were delivered to this protocol which came after the terminal
1336
def __init__(self, contentLength, dataCallback, finishCallback):
1337
self.contentLength = contentLength
1338
self.dataCallback = dataCallback
1339
self.finishCallback = finishCallback
1342
def dataReceived(self, data):
1344
Interpret the next chunk of bytes received. Either deliver them to the
1345
data callback or invoke the finish callback if enough bytes have been
1348
@raise RuntimeError: If the finish callback has already been invoked
1349
during a previous call to this methood.
1351
if self.dataCallback is None:
1353
"_IdentityTransferDecoder cannot decode data after finishing")
1355
if self.contentLength is None:
1356
self.dataCallback(data)
1357
elif len(data) < self.contentLength:
1358
self.contentLength -= len(data)
1359
self.dataCallback(data)
1361
# Make the state consistent before invoking any code belonging to
1362
# anyone else in case noMoreData ends up being called beneath this
1364
contentLength = self.contentLength
1365
dataCallback = self.dataCallback
1366
finishCallback = self.finishCallback
1367
self.dataCallback = self.finishCallback = None
1368
self.contentLength = 0
1370
dataCallback(data[:contentLength])
1371
finishCallback(data[contentLength:])
1374
def noMoreData(self):
1376
All data which will be delivered to this decoder has been. Check to
1377
make sure as much data as was expected has been received.
1379
@raise PotentialDataLoss: If the content length is unknown.
1380
@raise _DataLoss: If the content length is known and fewer than that
1381
many bytes have been delivered.
1385
finishCallback = self.finishCallback
1386
self.dataCallback = self.finishCallback = None
1387
if self.contentLength is None:
1389
raise PotentialDataLoss()
1390
elif self.contentLength != 0:
1395
class _ChunkedTransferDecoder(object):
1397
Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
1398
section 3.6.1. This protocol can interpret the contents of a request or
1399
response body which uses the I{chunked} Transfer-Encoding. It cannot
1400
interpret any of the rest of the HTTP protocol.
1402
It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
1403
implementation. Currently, the only user of this class will only ever
1404
call dataReceived on it. However, it might be an improvement if the
1405
user could connect this to a transport and deliver connection lost
1406
notification. This way, `dataCallback` becomes `self.transport.write`
1407
and perhaps `finishCallback` becomes `self.transport.loseConnection()`
1408
(although I'm not sure where the extra data goes in that case). This
1409
could also allow this object to indicate to the receiver of data that
1410
the stream was not completely received, an error case which should be
1413
@ivar dataCallback: A one-argument callable which will be invoked each
1414
time application data is received.
1416
@ivar finishCallback: A one-argument callable which will be invoked when
1417
the terminal chunk is received. It will be invoked with all bytes
1418
which were delivered to this protocol which came after the terminal
1421
@ivar length: Counter keeping track of how many more bytes in a chunk there
1424
@ivar state: One of C{'chunk-length'}, C{'trailer'}, C{'body'}, or
1425
C{'finished'}. For C{'chunk-length'}, data for the chunk length line
1426
is currently being read. For C{'trailer'}, the CR LF pair which
1427
follows each chunk is being read. For C{'body'}, the contents of a
1428
chunk are being read. For C{'finished'}, the last chunk has been
1429
completely read and no more input is valid.
1431
@ivar finish: A flag indicating that the last chunk has been started. When
1432
it finishes, the state will change to C{'finished'} and no more data
1435
state = 'chunk-length'
1438
def __init__(self, dataCallback, finishCallback):
1439
self.dataCallback = dataCallback
1440
self.finishCallback = finishCallback
1444
def dataReceived(self, data):
1446
Interpret data from a request or response body which uses the
1447
I{chunked} Transfer-Encoding.
1449
data = self._buffer + data
1452
if self.state == 'chunk-length':
1454
line, rest = data.split('\r\n', 1)
1455
parts = line.split(';')
1456
self.length = int(parts[0], 16)
1457
if self.length == 0:
1458
self.state = 'trailer'
1466
elif self.state == 'trailer':
1467
if data.startswith('\r\n'):
1470
self.state = 'finished'
1471
self.finishCallback(data)
1474
self.state = 'chunk-length'
1478
elif self.state == 'body':
1479
if len(data) >= self.length:
1480
chunk, data = data[:self.length], data[self.length:]
1481
self.dataCallback(chunk)
1482
self.state = 'trailer'
1483
elif len(data) < self.length:
1484
self.length -= len(data)
1485
self.dataCallback(data)
1487
elif self.state == 'finished':
1489
"_ChunkedTransferDecoder.dataReceived called after last "
1490
"chunk was processed")
1493
def noMoreData(self):
1495
Verify that all data has been received. If it has not been, raise
1498
if self.state != 'finished':
1500
"Chunked decoder in %r state, still expecting more data to "
1501
"get to finished state." % (self.state,))
1505
class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
1507
A receiver for HTTP requests.
1509
@ivar _transferDecoder: C{None} or an instance of
1510
L{_ChunkedTransferDecoder} if the request body uses the I{chunked}
1514
maxHeaders = 500 # max number of headers allowed per request
1522
# set in instances or subclasses
1523
requestFactory = Request
1525
_savedTimeOut = None
1526
_receivedHeaderCount = 0
1531
self._transferDecoder = None
1534
def connectionMade(self):
1535
self.setTimeout(self.timeOut)
1537
def lineReceived(self, line):
1540
if self.__first_line:
1541
# if this connection is not persistent, drop any data which
1542
# the client (illegally) sent after the last request.
1543
if not self.persistent:
1544
self.dataReceived = self.lineReceived = lambda *args: None
1547
# IE sends an extraneous empty line (\r\n) after a POST request;
1548
# eat up such a line, but only ONCE
1549
if not line and self.__first_line == 1:
1550
self.__first_line = 2
1553
# create a new Request object
1554
request = self.requestFactory(self, len(self.requests))
1555
self.requests.append(request)
1557
self.__first_line = 0
1558
parts = line.split()
1560
self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
1561
self.transport.loseConnection()
1563
command, request, version = parts
1564
self._command = command
1565
self._path = request
1566
self._version = version
1569
self.headerReceived(self.__header)
1571
self.allHeadersReceived()
1572
if self.length == 0:
1573
self.allContentReceived()
1576
elif line[0] in ' \t':
1577
self.__header = self.__header+'\n'+line
1580
self.headerReceived(self.__header)
1581
self.__header = line
1584
def _finishRequestBody(self, data):
1585
self.allContentReceived()
1586
self.setLineMode(data)
1589
def headerReceived(self, line):
1591
Do pre-processing (for content-length) and store this header away.
1592
Enforce the per-request header limit.
1595
@param line: A line from the header section of a request, excluding the
1598
header, data = line.split(':', 1)
1599
header = header.lower()
1601
if header == 'content-length':
1602
self.length = int(data)
1603
self._transferDecoder = _IdentityTransferDecoder(
1604
self.length, self.requests[-1].handleContentChunk, self._finishRequestBody)
1605
elif header == 'transfer-encoding' and data.lower() == 'chunked':
1607
self._transferDecoder = _ChunkedTransferDecoder(
1608
self.requests[-1].handleContentChunk, self._finishRequestBody)
1610
reqHeaders = self.requests[-1].requestHeaders
1611
values = reqHeaders.getRawHeaders(header)
1612
if values is not None:
1615
reqHeaders.setRawHeaders(header, [data])
1617
self._receivedHeaderCount += 1
1618
if self._receivedHeaderCount > self.maxHeaders:
1619
self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
1620
self.transport.loseConnection()
1623
def allContentReceived(self):
1624
command = self._command
1626
version = self._version
1628
# reset ALL state variables, so we don't interfere with next request
1630
self._receivedHeaderCount = 0
1631
self.__first_line = 1
1632
self._transferDecoder = None
1633
del self._command, self._path, self._version
1635
# Disable the idle timeout, in case this request takes a long
1636
# time to finish generating output.
1638
self._savedTimeOut = self.setTimeout(None)
1640
req = self.requests[-1]
1641
req.requestReceived(command, path, version)
1643
def rawDataReceived(self, data):
1645
self._transferDecoder.dataReceived(data)
1648
def allHeadersReceived(self):
1649
req = self.requests[-1]
1651
self.persistent = self.checkPersistence(req, self._version)
1652
req.gotLength(self.length)
1655
def checkPersistence(self, request, version):
1657
Check if the channel should close or not.
1659
@param request: The request most recently received over this channel
1660
against which checks will be made to determine if this connection
1661
can remain open after a matching response is returned.
1663
@type version: C{str}
1664
@param version: The version of the request.
1667
@return: A flag which, if C{True}, indicates that this connection may
1668
remain open to receive another request; if C{False}, the connection
1669
must be closed in order to indicate the completion of the response
1672
connection = request.requestHeaders.getRawHeaders('connection')
1674
tokens = map(str.lower, connection[0].split(' '))
1678
# HTTP 1.0 persistent connection support is currently disabled,
1679
# since we need a way to disable pipelining. HTTP 1.0 can't do
1680
# pipelining since we can't know in advance if we'll have a
1681
# content-length header, if we don't have the header we need to close the
1682
# connection. In HTTP 1.1 this is not an issue since we use chunked
1683
# encoding if content-length is not available.
1685
#if version == "HTTP/1.0":
1686
# if 'keep-alive' in tokens:
1687
# request.setHeader('connection', 'Keep-Alive')
1691
if version == "HTTP/1.1":
1692
if 'close' in tokens:
1693
request.responseHeaders.setRawHeaders('connection', ['close'])
1701
def requestDone(self, request):
1703
Called by first request in queue when it is done.
1705
if request != self.requests[0]: raise TypeError
1706
del self.requests[0]
1709
# notify next request it can start writing
1711
self.requests[0].noLongerQueued()
1713
if self._savedTimeOut:
1714
self.setTimeout(self._savedTimeOut)
1716
self.transport.loseConnection()
1718
def timeoutConnection(self):
1719
log.msg("Timing out client: %s" % str(self.transport.getPeer()))
1720
policies.TimeoutMixin.timeoutConnection(self)
1722
def connectionLost(self, reason):
1723
self.setTimeout(None)
1724
for request in self.requests:
1725
request.connectionLost(reason)
1728
class HTTPFactory(protocol.ServerFactory):
1730
Factory for HTTP server.
1733
protocol = HTTPChannel
1737
timeOut = 60 * 60 * 12
1739
def __init__(self, logPath=None, timeout=60*60*12):
1740
if logPath is not None:
1741
logPath = os.path.abspath(logPath)
1742
self.logPath = logPath
1743
self.timeOut = timeout
1745
def buildProtocol(self, addr):
1746
p = protocol.ServerFactory.buildProtocol(self, addr)
1747
# timeOut needs to be on the Protocol instance cause
1748
# TimeoutMixin expects it there
1749
p.timeOut = self.timeOut
1752
def startFactory(self):
1755
self.logFile = self._openLogFile(self.logPath)
1757
self.logFile = log.logfile
1759
def stopFactory(self):
1760
if hasattr(self, "logFile"):
1761
if self.logFile != log.logfile:
1762
self.logFile.close()
1766
def _openLogFile(self, path):
1768
Override in subclasses, e.g. to use twisted.python.logfile.
1770
f = open(path, "a", 1)
1773
def _escape(self, s):
1774
# pain in the ass. Return a string like python repr, but always
1775
# escaped as if surrounding quotes were "".
1778
return r[1:-1].replace('"', '\\"').replace("\\'", "'")
1781
def log(self, request):
1783
Log a request's result to the logfile, by default in combined log format.
1785
if hasattr(self, "logFile"):
1786
line = '%s - - %s "%s" %d %s "%s" "%s"\n' % (
1787
request.getClientIP(),
1788
# request.getUser() or "-", # the remote user is almost never important
1790
'%s %s %s' % (self._escape(request.method),
1791
self._escape(request.uri),
1792
self._escape(request.clientproto)),
1794
request.sentLength or "-",
1795
self._escape(request.getHeader("referer") or "-"),
1796
self._escape(request.getHeader("user-agent") or "-"))
1797
self.logFile.write(line)