1
# -*- test-case-name: twisted.web.test.test_http -*-
3
# Copyright (c) 2001-2004 Twisted Matrix Laboratories.
4
# See LICENSE for details.
7
"""HyperText Transfer Protocol implementation.
9
This is used by twisted.web.
14
- HTTP client support will at some point be refactored to support HTTP/1.1.
15
- Accept chunked data from clients in server.
16
- Other missing HTTP features from the RFC.
18
Maintainer: U{Itamar Shtull-Trauring<mailto:twisted@itamarst.org>}
22
from cStringIO import StringIO
24
import base64, binascii
33
from zope.interface import implements
36
from twisted.internet import interfaces, reactor, protocol, address, task
37
from twisted.protocols import policies, basic
38
from twisted.python import log
39
try: # try importing the fast, C version
40
from twisted.protocols._c_urlarg import unquote
42
from urllib import unquote
45
protocol_version = "HTTP/1.1"
53
NON_AUTHORITATIVE_INFORMATION = 203
60
MOVED_PERMANENTLY = 301
65
TEMPORARY_REDIRECT = 307
69
PAYMENT_REQUIRED = 402
74
PROXY_AUTH_REQUIRED = 407
79
PRECONDITION_FAILED = 412
80
REQUEST_ENTITY_TOO_LARGE = 413
81
REQUEST_URI_TOO_LONG = 414
82
UNSUPPORTED_MEDIA_TYPE = 415
83
REQUESTED_RANGE_NOT_SATISFIABLE = 416
84
EXPECTATION_FAILED = 417
86
INTERNAL_SERVER_ERROR = 500
89
SERVICE_UNAVAILABLE = 503
91
HTTP_VERSION_NOT_SUPPORTED = 505
92
INSUFFICIENT_STORAGE_SPACE = 507
97
_CONTINUE: "Continue",
98
SWITCHING: "Switching Protocols",
103
ACCEPTED: "Accepted",
104
NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
105
NO_CONTENT: "No Content",
106
RESET_CONTENT: "Reset Content.",
107
PARTIAL_CONTENT: "Partial Content",
108
MULTI_STATUS: "Multi-Status",
111
MULTIPLE_CHOICE: "Multiple Choices",
112
MOVED_PERMANENTLY: "Moved Permanently",
114
SEE_OTHER: "See Other",
115
NOT_MODIFIED: "Not Modified",
116
USE_PROXY: "Use Proxy",
118
TEMPORARY_REDIRECT: "Temporary Redirect",
121
BAD_REQUEST: "Bad Request",
122
UNAUTHORIZED: "Unauthorized",
123
PAYMENT_REQUIRED: "Payment Required",
124
FORBIDDEN: "Forbidden",
125
NOT_FOUND: "Not Found",
126
NOT_ALLOWED: "Method Not Allowed",
127
NOT_ACCEPTABLE: "Not Acceptable",
128
PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
129
REQUEST_TIMEOUT: "Request Time-out",
130
CONFLICT: "Conflict",
132
LENGTH_REQUIRED: "Length Required",
133
PRECONDITION_FAILED: "Precondition Failed",
134
REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large",
135
REQUEST_URI_TOO_LONG: "Request-URI Too Long",
136
UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
137
REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
138
EXPECTATION_FAILED: "Expectation Failed",
141
INTERNAL_SERVER_ERROR: "Internal Server Error",
142
NOT_IMPLEMENTED: "Not Implemented",
143
BAD_GATEWAY: "Bad Gateway",
144
SERVICE_UNAVAILABLE: "Service Unavailable",
145
GATEWAY_TIMEOUT: "Gateway Time-out",
146
HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
147
INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
148
NOT_EXTENDED: "Not Extended"
151
CACHED = """Magic constant returned by http.Request methods to set cache
152
validation headers when the request is conditional and the value fails
155
# backwards compatability
156
responses = RESPONSES
159
# datetime parsing and formatting
160
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
162
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
163
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
164
weekdayname_lower = [name.lower() for name in weekdayname]
165
monthname_lower = [name and name.lower() for name in monthname]
167
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, unquote=unquote):
168
"""like cgi.parse_qs, only with custom unquote function"""
170
items = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
173
k, v = item.split("=", 1)
178
if v or keep_blank_values:
179
k = unquote(k.replace("+", " "))
180
v = unquote(v.replace("+", " "))
187
def datetimeToString(msSinceEpoch=None):
188
"""Convert seconds since epoch to HTTP datetime string."""
189
if msSinceEpoch == None:
190
msSinceEpoch = time.time()
191
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
192
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
194
day, monthname[month], year,
198
def datetimeToLogString(msSinceEpoch=None):
199
"""Convert seconds since epoch to log datetime string."""
200
if msSinceEpoch == None:
201
msSinceEpoch = time.time()
202
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
203
s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % (
204
day, monthname[month], year,
209
# a hack so we don't need to recalculate log datetime every hit,
210
# at the price of a small, unimportant, inaccuracy.
212
_logDateTimeUsers = 0
213
_resetLogDateTimeID = None
215
def _resetLogDateTime():
217
global _resetLogDateTime
218
global _resetLogDateTimeID
219
_logDateTime = datetimeToLogString()
220
_resetLogDateTimeID = reactor.callLater(1, _resetLogDateTime)
222
def _logDateTimeStart():
223
global _logDateTimeUsers
224
if not _logDateTimeUsers:
226
_logDateTimeUsers += 1
228
def _logDateTimeStop():
229
global _logDateTimeUsers
230
_logDateTimeUsers -= 1;
231
if (not _logDateTimeUsers and _resetLogDateTimeID
232
and _resetLogDateTimeID.active()):
233
_resetLogDateTimeID.cancel()
235
def timegm(year, month, day, hour, minute, second):
236
"""Convert time tuple in GMT to seconds since epoch, GMT"""
239
assert 1 <= month <= 12
240
days = 365*(year-EPOCH) + calendar.leapdays(EPOCH, year)
241
for i in range(1, month):
242
days = days + calendar.mdays[i]
243
if month > 2 and calendar.isleap(year):
245
days = days + day - 1
246
hours = days*24 + hour
247
minutes = hours*60 + minute
248
seconds = minutes*60 + second
251
def stringToDatetime(dateString):
252
"""Convert an HTTP date string (one of three formats) to seconds since epoch."""
253
parts = dateString.split()
255
if not parts[0][0:3].lower() in weekdayname_lower:
256
# Weekday is stupid. Might have been omitted.
258
return stringToDatetime("Sun, "+dateString)
264
if (partlen == 5 or partlen == 6) and parts[1].isdigit():
265
# 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
266
# (Note: "GMT" is literal, not a variable timezone)
267
# (also handles without "GMT")
268
# This is the normal format
273
elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1:
274
# 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
275
# (Note: "GMT" is literal, not a variable timezone)
276
# (also handles without without "GMT")
277
# Two digit year, yucko.
278
day, month, year = parts[1].split('-')
285
elif len(parts) == 5:
286
# 3rd date format: Sun Nov 6 08:49:37 1994
287
# ANSI C asctime() format.
293
raise ValueError("Unknown datetime format %r" % dateString)
296
month = int(monthname_lower.index(month.lower()))
298
hour, min, sec = map(int, time.split(':'))
299
return int(timegm(year, month, day, hour, min, sec))
302
"""Convert string to a chunk.
304
@returns: a tuple of strings representing the chunked encoding of data"""
305
return ("%x\r\n" % len(data), data, "\r\n")
308
"""Convert chunk to string.
310
@returns: tuple (result, remaining), may raise ValueError.
312
prefix, rest = data.split('\r\n', 1)
313
length = int(prefix, 16)
315
raise ValueError("Chunk length must be >= 0, not %d" % (length,))
316
if not rest[length:length + 2] == '\r\n':
317
raise ValueError, "chunk must end with CRLF"
318
return rest[:length], rest[length + 2:]
321
def parseContentRange(header):
322
"""Parse a content-range header into (start, end, realLength).
324
realLength might be None if real length is not known ('*').
326
kind, other = header.strip().split()
327
if kind.lower() != "bytes":
328
raise ValueError, "a range of type %r is not supported"
329
startend, realLength = other.split("/")
330
start, end = map(int, startend.split("-"))
331
if realLength == "*":
334
realLength = int(realLength)
335
return (start, end, realLength)
338
class StringTransport:
340
I am a StringIO wrapper that conforms for the transport API. I support
341
the `writeSequence' method.
345
def writeSequence(self, seq):
346
self.s.write(''.join(seq))
347
def __getattr__(self, attr):
348
return getattr(self.__dict__['s'], attr)
351
class HTTPClient(basic.LineReceiver):
352
"""A client for HTTP 1.0
355
You probably want to send a 'Host' header with the name of
356
the site you're connecting to, in order to not break name
357
based virtual hosting.
363
def sendCommand(self, command, path):
364
self.transport.write('%s %s HTTP/1.0\r\n' % (command, path))
366
def sendHeader(self, name, value):
367
self.transport.write('%s: %s\r\n' % (name, value))
369
def endHeaders(self):
370
self.transport.write('\r\n')
372
def lineReceived(self, line):
375
l = line.split(None, 2)
381
# sometimes there is no message
383
self.handleStatus(version, status, message)
386
key, val = line.split(':', 1)
388
self.handleHeader(key, val)
389
if key.lower() == 'content-length':
390
self.length = int(val)
392
self.__buffer = StringIO()
393
self.handleEndHeaders()
396
def connectionLost(self, reason):
397
self.handleResponseEnd()
399
def handleResponseEnd(self):
400
if self.__buffer is not None:
401
b = self.__buffer.getvalue()
403
self.handleResponse(b)
405
def handleResponsePart(self, data):
406
self.__buffer.write(data)
408
def connectionMade(self):
411
handleStatus = handleHeader = handleEndHeaders = lambda *args: None
413
def rawDataReceived(self, data):
414
if self.length is not None:
415
data, rest = data[:self.length], data[self.length:]
416
self.length -= len(data)
419
self.handleResponsePart(data)
421
self.handleResponseEnd()
422
self.setLineMode(rest)
425
# response codes that must have empty bodies
426
NO_BODY_CODES = (204, 304)
431
Subclasses should override the process() method to determine how
432
the request will be processed.
434
@ivar method: The HTTP method that was used.
435
@ivar uri: The full URI that was requested (includes arguments).
436
@ivar path: The path only (arguments not included).
437
@ivar args: All of the arguments, including URL and POST arguments.
438
@type args: A mapping of strings (the argument names) to lists of values.
439
i.e., ?foo=bar&foo=baz&quux=spam results in
440
{'foo': ['bar', 'baz'], 'quux': ['spam']}.
441
@ivar received_headers: All received headers
444
implements(interfaces.IConsumer)
449
code_message = RESPONSES[OK]
450
method = "(no method yet)"
451
clientproto = "(no clientproto yet)"
455
sentLength = 0 # content-length of response, or total bytes sent via chunking
460
def __init__(self, channel, queued):
462
@param channel: the channel we're connected to.
463
@param queued: are we in the request queue, or can we start writing to
466
self.channel = channel
468
self.received_headers = {}
469
self.received_cookies = {}
470
self.headers = {} # outgoing headers
471
self.cookies = [] # outgoing cookies
474
self.transport = StringTransport()
476
self.transport = self.channel.transport
479
"""Called when have finished responding and are no longer queued."""
481
log.err(RuntimeError("Producer was not unregistered for %s" % self.uri))
482
self.unregisterProducer()
483
self.channel.requestDone(self)
488
# win32 suckiness, no idea why it does this
492
# methods for channel - end users should not use these
494
def noLongerQueued(self):
495
"""Notify the object that it is no longer queued.
497
We start writing whatever data we have to the transport, etc.
499
This method is not intended for users.
502
raise RuntimeError, "noLongerQueued() got called unnecessarily."
506
# set transport to real one and send any buffer data
507
data = self.transport.getvalue()
508
self.transport = self.channel.transport
510
self.transport.write(data)
512
# if we have producer, register it with transport
513
if (self.producer is not None) and not self.finished:
514
self.transport.registerProducer(self.producer, self.streamingProducer)
516
# if we're finished, clean up
520
def gotLength(self, length):
521
"""Called when HTTP channel got length of content in this request.
523
This method is not intended for users.
526
self.content = StringIO()
528
self.content = tempfile.TemporaryFile()
530
def parseCookies(self):
531
"""Parse cookie headers.
533
This method is not intended for users."""
534
cookietxt = self.getHeader("cookie")
536
for cook in cookietxt.split(';'):
539
k, v = cook.split('=', 1)
540
self.received_cookies[k] = v
544
def handleContentChunk(self, data):
545
"""Write a chunk of data.
547
This method is not intended for users.
549
self.content.write(data)
551
def requestReceived(self, command, path, version):
552
"""Called by channel when all data has been received.
554
This method is not intended for users.
556
self.content.seek(0,0)
560
self.method, self.uri = command, path
561
self.clientproto = version
562
x = self.uri.split('?', 1)
567
self.path, argstring = x
568
self.args = parse_qs(argstring, 1)
570
# cache the client and server information, we'll need this later to be
571
# serialized and sent with the request so CGIs will work remotely
572
self.client = self.channel.transport.getPeer()
573
self.host = self.channel.transport.getHost()
575
# Argument processing
577
ctype = self.getHeader('content-type')
578
if self.method == "POST" and ctype:
579
mfd = 'multipart/form-data'
580
key, pdict = cgi.parse_header(ctype)
581
if key == 'application/x-www-form-urlencoded':
582
args.update(parse_qs(self.content.read(), 1))
585
args.update(cgi.parse_multipart(self.content, pdict))
587
if e.args[0] == 'content-disposition':
588
# Parse_multipart can't cope with missing
589
# content-dispostion headers in multipart/form-data
590
# parts, so we catch the exception and tell the client
591
# it was a bad request.
592
self.channel.transport.write(
593
"HTTP/1.1 400 Bad Request\r\n\r\n")
594
self.channel.transport.loseConnection()
601
return '<%s %s %s>'% (self.method, self.uri, self.clientproto)
604
"""Override in subclasses.
606
This method is not intended for users.
613
def registerProducer(self, producer, streaming):
614
"""Register a producer."""
616
raise ValueError, "registering producer %s before previous one (%s) was unregistered" % (producer, self.producer)
618
self.streamingProducer = streaming
619
self.producer = producer
622
producer.pauseProducing()
624
self.transport.registerProducer(producer, streaming)
626
def unregisterProducer(self):
627
"""Unregister the producer."""
629
self.transport.unregisterProducer()
632
# private http response methods
634
def _sendError(self, code, resp=''):
635
self.transport.write('%s %s %s\r\n\r\n' % (self.clientproto, code, resp))
638
# The following is the public interface that people should be
641
def getHeader(self, key):
642
"""Get a header that was sent from the network.
644
return self.received_headers.get(key.lower())
646
def getCookie(self, key):
647
"""Get a cookie that was sent from the network.
649
return self.received_cookies.get(key)
652
"""We are finished writing data."""
654
warnings.warn("Warning! request.finish called twice.", stacklevel=2)
657
if not self.startedWriting:
662
# write last chunk and closing CRLF
663
self.transport.write("0\r\n\r\n")
666
if hasattr(self.channel, "factory"):
667
self.channel.factory.log(self)
673
def write(self, data):
675
Write some data as a result of an HTTP request. The first
676
time this is called, it writes out response data.
678
if not self.startedWriting:
679
self.startedWriting = 1
680
version = self.clientproto
682
l.append('%s %s %s\r\n' % (version, self.code,
684
# if we don't have a content length, we send data in
685
# chunked mode, so that we can support pipelining in
686
# persistent connections.
687
if ((version == "HTTP/1.1") and
688
(self.headers.get('content-length', None) is None) and
689
self.method != "HEAD" and self.code not in NO_BODY_CODES):
690
l.append("%s: %s\r\n" % ('Transfer-encoding', 'chunked'))
692
if self.lastModified is not None:
693
if self.headers.has_key('last-modified'):
694
log.msg("Warning: last-modified specified both in"
695
" header list and lastModified attribute.")
697
self.setHeader('last-modified',
698
datetimeToString(self.lastModified))
699
if self.etag is not None:
700
self.setHeader('ETag', self.etag)
701
for name, value in self.headers.items():
702
l.append("%s: %s\r\n" % (name.capitalize(), value))
703
for cookie in self.cookies:
704
l.append('%s: %s\r\n' % ("Set-Cookie", cookie))
707
self.transport.writeSequence(l)
709
# if this is a "HEAD" request, we shouldn't return any data
710
if self.method == "HEAD":
711
self.write = lambda data: None
714
# for certain result codes, we should never return any data
715
if self.code in NO_BODY_CODES:
716
self.write = lambda data: None
719
self.sentLength = self.sentLength + len(data)
722
self.transport.writeSequence(toChunk(data))
724
self.transport.write(data)
726
def addCookie(self, k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None):
727
"""Set an outgoing HTTP cookie.
729
In general, you should consider using sessions instead of cookies, see
730
twisted.web.server.Request.getSession and the
731
twisted.web.server.Session class for details.
733
cookie = '%s=%s' % (k, v)
734
if expires is not None:
735
cookie = cookie +"; Expires=%s" % expires
736
if domain is not None:
737
cookie = cookie +"; Domain=%s" % domain
739
cookie = cookie +"; Path=%s" % path
740
if max_age is not None:
741
cookie = cookie +"; Max-Age=%s" % max_age
742
if comment is not None:
743
cookie = cookie +"; Comment=%s" % comment
745
cookie = cookie +"; Secure"
746
self.cookies.append(cookie)
748
def setResponseCode(self, code, message=None):
749
"""Set the HTTP response code.
753
self.code_message = message
755
self.code_message = RESPONSES.get(code, "Unknown Status")
757
def setHeader(self, k, v):
758
"""Set an outgoing HTTP header.
760
self.headers[k.lower()] = v
762
def redirect(self, url):
763
"""Utility function that does a redirect.
765
The request should have finish() called after this.
767
self.setResponseCode(FOUND)
768
self.setHeader("location", url)
770
def setLastModified(self, when):
771
"""Set the X{Last-Modified} time for the response to this request.
773
If I am called more than once, I ignore attempts to set
774
Last-Modified earlier, only replacing the Last-Modified time
775
if it is to a later value.
777
If I am a conditional request, I may modify my response code
778
to L{NOT_MODIFIED} if appropriate for the time given.
780
@param when: The last time the resource being returned was
781
modified, in seconds since the epoch.
783
@return: If I am a X{If-Modified-Since} conditional request and
784
the time given is not newer than the condition, I return
785
L{http.CACHED<CACHED>} to indicate that you should write no
786
body. Otherwise, I return a false value.
788
# time.time() may be a float, but the HTTP-date strings are
789
# only good for whole seconds.
790
when = long(math.ceil(when))
791
if (not self.lastModified) or (self.lastModified < when):
792
self.lastModified = when
794
modified_since = self.getHeader('if-modified-since')
796
modified_since = stringToDatetime(modified_since.split(';', 1)[0])
797
if modified_since >= when:
798
self.setResponseCode(NOT_MODIFIED)
802
def setETag(self, etag):
803
"""Set an X{entity tag} for the outgoing response.
805
That's \"entity tag\" as in the HTTP/1.1 X{ETag} header, \"used
806
for comparing two or more entities from the same requested
809
If I am a conditional request, I may modify my response code
810
to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate
813
@param etag: The entity tag for the resource being returned.
815
@return: If I am a X{If-None-Match} conditional request and
816
the tag matches one in the request, I return
817
L{http.CACHED<CACHED>} to indicate that you should write
818
no body. Otherwise, I return a false value.
823
tags = self.getHeader("if-none-match")
826
if (etag in tags) or ('*' in tags):
827
self.setResponseCode(((self.method in ("HEAD", "GET"))
829
or PRECONDITION_FAILED)
833
def getAllHeaders(self):
834
"""Return dictionary of all headers the request received."""
835
return self.received_headers
837
def getRequestHostname(self):
838
"""Get the hostname that the user passed in to the request.
840
This will either use the Host: header (if it is available) or the
841
host we are listening on if the header is unavailable.
843
return (self.getHeader('host') or
844
socket.gethostbyaddr(self.getHost()[1])[0]
848
"""Get my originally requesting transport's host.
850
Don't rely on the 'transport' attribute, since Request objects may be
851
copied remotely. For information on this method's return value, see
852
twisted.internet.tcp.Port.
856
def setHost(self, host, port, ssl=0):
857
"""Change the host and port the request thinks it's using.
859
This method is useful for working with reverse HTTP proxies (e.g.
860
both Squid and Apache's mod_proxy can do this), when the address
861
the HTTP client is using is different than the one we're listening on.
863
For example, Apache may be listening on https://www.example.com, and then
864
forwarding requests to http://localhost:8080, but we don't want HTML produced
865
by Twisted to say 'http://localhost:8080', they should say 'https://www.example.com',
868
request.setHost('www.example.com', 443, ssl=1)
870
This method is experimental.
873
self.received_headers["host"] = host
874
self.host = address.IPv4Address("TCP", host, port)
876
def getClientIP(self):
877
if isinstance(self.client, address.IPv4Address):
878
return self.client.host
885
transport = getattr(getattr(self, 'channel', None), 'transport', None)
886
if interfaces.ISSLTransport(transport, None) is not None:
890
def _authorize(self):
891
# Authorization, (mostly) per the RFC
893
authh = self.getHeader("Authorization")
895
self.user = self.password = ''
897
bas, upw = authh.split()
898
if bas.lower() != "basic":
900
upw = base64.decodestring(upw)
901
self.user, self.password = upw.split(':', 1)
902
except (binascii.Error, ValueError):
903
self.user = self.password = ""
906
self.user = self.password = ""
916
def getPassword(self):
925
if self.client.type != 'TCP':
927
host = self.client.host
929
name, names, addresses = socket.gethostbyaddr(host)
932
names.insert(0, name)
938
def connectionLost(self, reason):
939
"""connection was lost"""
942
class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
943
"""A receiver for HTTP requests."""
945
maxHeaders = 500 # max number of headers allowed per request
953
# set in instances or subclasses
954
requestFactory = Request
962
def connectionMade(self):
963
self.setTimeout(self.timeOut)
965
def lineReceived(self, line):
968
if self.__first_line:
969
# if this connection is not persistent, drop any data which
970
# the client (illegally) sent after the last request.
971
if not self.persistent:
972
self.dataReceived = self.lineReceived = lambda *args: None
975
# IE sends an extraneous empty line (\r\n) after a POST request;
976
# eat up such a line, but only ONCE
977
if not line and self.__first_line == 1:
978
self.__first_line = 2
981
# create a new Request object
982
request = self.requestFactory(self, len(self.requests))
983
self.requests.append(request)
985
self.__first_line = 0
988
self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
989
self.transport.loseConnection()
991
command, request, version = parts
992
self._command = command
994
self._version = version
997
self.headerReceived(self.__header)
999
self.allHeadersReceived()
1000
if self.length == 0:
1001
self.allContentReceived()
1004
elif line[0] in ' \t':
1005
self.__header = self.__header+'\n'+line
1008
self.headerReceived(self.__header)
1009
self.__header = line
1011
def headerReceived(self, line):
1012
"""Do pre-processing (for content-length) and store this header away.
1014
header, data = line.split(':', 1)
1015
header = header.lower()
1017
if header == 'content-length':
1018
self.length = int(data)
1019
reqHeaders = self.requests[-1].received_headers
1020
reqHeaders[header] = data
1021
if len(reqHeaders) > self.maxHeaders:
1022
self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
1023
self.transport.loseConnection()
1025
def allContentReceived(self):
1026
command = self._command
1028
version = self._version
1030
# reset ALL state variables, so we don't interfere with next request
1033
self.__first_line = 1
1034
del self._command, self._path, self._version
1036
# Disable the idle timeout, in case this request takes a long
1037
# time to finish generating output.
1039
self._savedTimeOut = self.setTimeout(None)
1041
req = self.requests[-1]
1042
req.requestReceived(command, path, version)
1044
def rawDataReceived(self, data):
1045
if len(data) < self.length:
1046
self.requests[-1].handleContentChunk(data)
1047
self.length = self.length - len(data)
1049
self.requests[-1].handleContentChunk(data[:self.length])
1050
extraneous = data[self.length:]
1051
self.allContentReceived()
1052
self.setLineMode(extraneous)
1054
def allHeadersReceived(self):
1055
req = self.requests[-1]
1057
self.persistent = self.checkPersistence(req, self._version)
1058
req.gotLength(self.length)
1060
def checkPersistence(self, request, version):
1061
"""Check if the channel should close or not."""
1062
connection = request.getHeader('connection')
1064
tokens = map(str.lower, connection.split(' '))
1068
# HTTP 1.0 persistent connection support is currently disabled,
1069
# since we need a way to disable pipelining. HTTP 1.0 can't do
1070
# pipelining since we can't know in advance if we'll have a
1071
# content-length header, if we don't have the header we need to close the
1072
# connection. In HTTP 1.1 this is not an issue since we use chunked
1073
# encoding if content-length is not available.
1075
#if version == "HTTP/1.0":
1076
# if 'keep-alive' in tokens:
1077
# request.setHeader('connection', 'Keep-Alive')
1081
if version == "HTTP/1.1":
1082
if 'close' in tokens:
1083
request.setHeader('connection', 'close')
1090
def requestDone(self, request):
1091
"""Called by first request in queue when it is done."""
1092
if request != self.requests[0]: raise TypeError
1093
del self.requests[0]
1096
# notify next request it can start writing
1098
self.requests[0].noLongerQueued()
1100
if self._savedTimeOut:
1101
self.setTimeout(self._savedTimeOut)
1103
self.transport.loseConnection()
1105
def timeoutConnection(self):
1106
log.msg("Timing out client: %s" % str(self.transport.getPeer()))
1107
policies.TimeoutMixin.timeoutConnection(self)
1109
def connectionLost(self, reason):
1110
self.setTimeout(None)
1111
for request in self.requests:
1112
request.connectionLost(reason)
1115
class HTTPFactory(protocol.ServerFactory):
1116
"""Factory for HTTP server."""
1118
protocol = HTTPChannel
1122
timeOut = 60 * 60 * 12
1124
def __init__(self, logPath=None, timeout=60*60*12):
1125
if logPath is not None:
1126
logPath = os.path.abspath(logPath)
1127
self.logPath = logPath
1128
self.timeOut = timeout
1130
def buildProtocol(self, addr):
1131
p = protocol.ServerFactory.buildProtocol(self, addr)
1132
# timeOut needs to be on the Protocol instance cause
1133
# TimeoutMixin expects it there
1134
p.timeOut = self.timeOut
1137
def startFactory(self):
1140
self.logFile = self._openLogFile(self.logPath)
1142
self.logFile = log.logfile
1144
def stopFactory(self):
1145
if hasattr(self, "logFile"):
1146
if self.logFile != log.logfile:
1147
self.logFile.close()
1151
def _openLogFile(self, path):
1152
"""Override in subclasses, e.g. to use twisted.python.logfile."""
1153
f = open(path, "a", 1)
1156
def _escape(self, s):
1157
# pain in the ass. Return a string like python repr, but always
1158
# escaped as if surrounding quotes were "".
1161
return r[1:-1].replace('"', '\\"').replace("\\'", "'")
1164
def log(self, request):
1165
"""Log a request's result to the logfile, by default in combined log format."""
1166
if hasattr(self, "logFile"):
1167
line = '%s - - %s "%s" %d %s "%s" "%s"\n' % (
1168
request.getClientIP(),
1169
# request.getUser() or "-", # the remote user is almost never important
1171
'%s %s %s' % (self._escape(request.method),
1172
self._escape(request.uri),
1173
self._escape(request.clientproto)),
1175
request.sentLength or "-",
1176
self._escape(request.getHeader("referer") or "-"),
1177
self._escape(request.getHeader("user-agent") or "-"))
1178
self.logFile.write(line)