1
"""HTTP library functions.
3
This module contains functions for building an HTTP application
4
framework: any one, not just one whose name starts with "Ch". ;) If you
5
reference any modules from some popular framework inside *this* module,
6
FuManChu will personally hang you up by your thumbs and submit you
10
from binascii import b2a_base64
11
from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, reversed, sorted
12
from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr, unicodestr, unquote_qs
13
response_codes = BaseHTTPRequestHandler.responses.copy()
15
# From http://www.cherrypy.org/ticket/361
16
response_codes[500] = ('Internal Server Error',
17
'The server encountered an unexpected condition '
18
'which prevented it from fulfilling the request.')
19
response_codes[503] = ('Service Unavailable',
20
'The server is currently unable to handle the '
21
'request due to a temporary overloading or '
22
'maintenance of the server.')
30
"""Return the given path \*atoms, joined into a single URL.
32
This will correctly join a SCRIPT_NAME and PATH_INFO into the
33
original URL, even if either atom is blank.
35
url = "/".join([x for x in atoms if x])
37
url = url.replace("//", "/")
38
# Special-case the final url of "", and return "/" instead.
41
def urljoin_bytes(*atoms):
42
"""Return the given path *atoms, joined into a single URL.
44
This will correctly join a SCRIPT_NAME and PATH_INFO into the
45
original URL, even if either atom is blank.
47
url = ntob("/").join([x for x in atoms if x])
48
while ntob("//") in url:
49
url = url.replace(ntob("//"), ntob("/"))
50
# Special-case the final url of "", and return "/" instead.
51
return url or ntob("/")
53
def protocol_from_http(protocol_str):
54
"""Return a protocol tuple from the given 'HTTP/x.y' string."""
55
return int(protocol_str[5]), int(protocol_str[7])
57
def get_ranges(headervalue, content_length):
58
"""Return a list of (start, stop) indices from a Range header, or None.
60
Each (start, stop) tuple will be composed of two ints, which are suitable
61
for use in a slicing operation. That is, the header "Range: bytes=3-6",
62
if applied against a Python string, is requesting resource[3:7]. This
63
function will return the list [(3, 7)].
65
If this function returns an empty list, you should return HTTP 416.
72
bytesunit, byteranges = headervalue.split("=", 1)
73
for brange in byteranges.split(","):
74
start, stop = [x.strip() for x in brange.split("-", 1)]
77
stop = content_length - 1
78
start, stop = int(start), int(stop)
79
if start >= content_length:
80
# From rfc 2616 sec 14.16:
81
# "If the server receives a request (other than one
82
# including an If-Range request-header field) with an
83
# unsatisfiable Range request-header field (that is,
84
# all of whose byte-range-spec values have a first-byte-pos
85
# value greater than the current length of the selected
86
# resource), it SHOULD return a response code of 416
87
# (Requested range not satisfiable)."
90
# From rfc 2616 sec 14.16:
91
# "If the server ignores a byte-range-spec because it
92
# is syntactically invalid, the server SHOULD treat
93
# the request as if the invalid Range header field
94
# did not exist. (Normally, this means return a 200
95
# response containing the full entity)."
97
result.append((start, stop + 1))
100
# See rfc quote above.
102
# Negative subscript (last N bytes)
103
result.append((content_length - int(stop), content_length))
108
class HeaderElement(object):
109
"""An element (with parameters) from an HTTP header's element list."""
111
def __init__(self, value, params=None):
117
def __cmp__(self, other):
118
return cmp(self.value, other.value)
120
def __lt__(self, other):
121
return self.value < other.value
124
p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)]
125
return "%s%s" % (self.value, "".join(p))
128
return ntob(self.__str__())
130
def __unicode__(self):
131
return ntou(self.__str__())
133
def parse(elementstr):
134
"""Transform 'token;key=val' to ('token', {'key': 'val'})."""
135
# Split the element into a value and parameters. The 'value' may
136
# be of the form, "token=token", but we don't split that here.
137
atoms = [x.strip() for x in elementstr.split(";") if x.strip()]
141
initial_value = atoms.pop(0).strip()
144
atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
151
return initial_value, params
152
parse = staticmethod(parse)
154
def from_str(cls, elementstr):
155
"""Construct an instance from a string of the form 'token;key=val'."""
156
ival, params = cls.parse(elementstr)
157
return cls(ival, params)
158
from_str = classmethod(from_str)
161
q_separator = re.compile(r'; *q *=')
163
class AcceptElement(HeaderElement):
164
"""An element (with parameters) from an Accept* header's element list.
166
AcceptElement objects are comparable; the more-preferred object will be
167
"less than" the less-preferred object. They are also therefore sortable;
168
if you sort a list of AcceptElement objects, they will be listed in
169
priority order; the most preferred value will be first. Yes, it should
170
have been the other way around, but it's too late to fix now.
173
def from_str(cls, elementstr):
175
# The first "q" parameter (if any) separates the initial
176
# media-range parameter(s) (if any) from the accept-params.
177
atoms = q_separator.split(elementstr, 1)
178
media_range = atoms.pop(0).strip()
180
# The qvalue for an Accept header can have extensions. The other
181
# headers cannot, but it's easier to parse them as if they did.
182
qvalue = HeaderElement.from_str(atoms[0].strip())
184
media_type, params = cls.parse(media_range)
185
if qvalue is not None:
187
return cls(media_type, params)
188
from_str = classmethod(from_str)
191
val = self.params.get("q", "1")
192
if isinstance(val, HeaderElement):
195
qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
197
def __cmp__(self, other):
198
diff = cmp(self.qvalue, other.qvalue)
200
diff = cmp(str(self), str(other))
203
def __lt__(self, other):
204
if self.qvalue == other.qvalue:
205
return str(self) < str(other)
207
return self.qvalue < other.qvalue
210
def header_elements(fieldname, fieldvalue):
211
"""Return a sorted HeaderElement list from a comma-separated header string."""
216
for element in fieldvalue.split(","):
217
if fieldname.startswith("Accept") or fieldname == 'TE':
218
hv = AcceptElement.from_str(element)
220
hv = HeaderElement.from_str(element)
223
return list(reversed(sorted(result)))
225
def decode_TEXT(value):
226
r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr")."""
229
from email.header import decode_header
231
from email.Header import decode_header
232
atoms = decode_header(value)
234
for atom, charset in atoms:
235
if charset is not None:
236
atom = atom.decode(charset)
240
def valid_status(status):
241
"""Return legal HTTP status Code, Reason-phrase and Message.
243
The status arg must be an int, or a str that begins with an int.
245
If status is an int, or a str and no reason-phrase is supplied,
246
a default reason-phrase will be provided.
253
parts = status.split(" ", 1)
255
# No reason supplied.
260
reason = reason.strip()
265
raise ValueError("Illegal response status from server "
266
"(%s is non-numeric)." % repr(code))
268
if code < 100 or code > 599:
269
raise ValueError("Illegal response status from server "
270
"(%s is out of range)." % repr(code))
272
if code not in response_codes:
273
# code is unknown but not illegal
274
default_reason, message = "", ""
276
default_reason, message = response_codes[code]
279
reason = default_reason
281
return code, reason, message
284
# NOTE: the parse_qs functions that follow are modified version of those
285
# in the python3.0 source - we need to pass through an encoding to the unquote
286
# method, but the default parse_qs function doesn't allow us to. These do.
288
def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'):
289
"""Parse a query given as a string argument.
293
qs: URL-encoded query string to be parsed
295
keep_blank_values: flag indicating whether blank values in
296
URL encoded queries should be treated as blank strings. A
297
true value indicates that blanks should be retained as blank
298
strings. The default false value indicates that blank values
299
are to be ignored and treated as if they were not included.
301
strict_parsing: flag indicating what to do with parsing errors. If
302
false (the default), errors are silently ignored. If true,
303
errors raise a ValueError exception.
305
Returns a dict, as G-d intended.
307
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
309
for name_value in pairs:
310
if not name_value and not strict_parsing:
312
nv = name_value.split('=', 1)
315
raise ValueError("bad query field: %r" % (name_value,))
316
# Handle case of a control-name with no equal sign
317
if keep_blank_values:
321
if len(nv[1]) or keep_blank_values:
322
name = unquote_qs(nv[0], encoding)
323
value = unquote_qs(nv[1], encoding)
325
if not isinstance(d[name], list):
327
d[name].append(value)
333
image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
335
def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'):
336
"""Build a params dictionary from a query_string.
338
Duplicate key/value pairs in the provided query_string will be
339
returned as {'key': [val1, val2, ...]}. Single key/values will
340
be returned as strings: {'key': 'value'}.
342
if image_map_pattern.match(query_string):
343
# Server-side image map. Map the coords to 'x' and 'y'
344
# (like CGI::Request does).
345
pm = query_string.split(",")
346
pm = {'x': int(pm[0]), 'y': int(pm[1])}
348
pm = _parse_qs(query_string, keep_blank_values, encoding=encoding)
352
class CaseInsensitiveDict(dict):
353
"""A case-insensitive dict subclass.
355
Each key is changed on entry to str(key).title().
358
def __getitem__(self, key):
359
return dict.__getitem__(self, str(key).title())
361
def __setitem__(self, key, value):
362
dict.__setitem__(self, str(key).title(), value)
364
def __delitem__(self, key):
365
dict.__delitem__(self, str(key).title())
367
def __contains__(self, key):
368
return dict.__contains__(self, str(key).title())
370
def get(self, key, default=None):
371
return dict.get(self, str(key).title(), default)
373
if hasattr({}, 'has_key'):
374
def has_key(self, key):
375
return dict.has_key(self, str(key).title())
379
self[str(k).title()] = E[k]
381
def fromkeys(cls, seq, value=None):
384
newdict[str(k).title()] = value
386
fromkeys = classmethod(fromkeys)
388
def setdefault(self, key, x=None):
389
key = str(key).title()
396
def pop(self, key, default):
397
return dict.pop(self, str(key).title(), default)
400
# TEXT = <any OCTET except CTLs, but including LWS>
402
# A CRLF is allowed in the definition of TEXT only as part of a header
403
# field continuation. It is expected that the folding LWS will be
404
# replaced with a single SP before interpretation of the TEXT value."
405
if nativestr == bytestr:
406
header_translate_table = ''.join([chr(i) for i in xrange(256)])
407
header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(127)
409
header_translate_table = None
410
header_translate_deletechars = bytes(range(32)) + bytes([127])
413
class HeaderMap(CaseInsensitiveDict):
414
"""A dict subclass for HTTP request and response headers.
416
Each key is changed on entry to str(key).title(). This allows headers
417
to be case-insensitive and avoid duplicates.
419
Values are header values (decoded according to :rfc:`2047` if necessary).
423
encodings = ["ISO-8859-1"]
425
# Someday, when http-bis is done, this will probably get dropped
426
# since few servers, clients, or intermediaries do it. But until then,
427
# we're going to obey the spec as is.
428
# "Words of *TEXT MAY contain characters from character sets other than
429
# ISO-8859-1 only when encoded according to the rules of RFC 2047."
432
def elements(self, key):
433
"""Return a sorted list of HeaderElements for the given header."""
434
key = str(key).title()
435
value = self.get(key)
436
return header_elements(key, value)
438
def values(self, key):
439
"""Return a sorted list of HeaderElement.value for the given header."""
440
return [e.value for e in self.elements(key)]
443
"""Transform self into a list of (name, value) tuples."""
445
for k, v in self.items():
446
if isinstance(k, unicodestr):
449
if not isinstance(v, basestring):
452
if isinstance(v, unicodestr):
455
# See header_translate_* constants above.
456
# Replace only if you really know what you're doing.
457
k = k.translate(header_translate_table, header_translate_deletechars)
458
v = v.translate(header_translate_table, header_translate_deletechars)
460
header_list.append((k, v))
464
"""Return the given header name or value, encoded for HTTP output."""
465
for enc in self.encodings:
468
except UnicodeEncodeError:
471
if self.protocol == (1, 1) and self.use_rfc_2047:
472
# Encode RFC-2047 TEXT
473
# (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
474
# We do our own here instead of using the email module
475
# because we never want to fold lines--folding has
476
# been deprecated by the HTTP working group.
477
v = b2a_base64(v.encode('utf-8'))
478
return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?='))
480
raise ValueError("Could not encode header part %r using "
481
"any of the encodings %r." %
486
"""An internet address.
489
Should be the client's host name. If not available (because no DNS
490
lookup is performed), the IP address should be used instead.
498
def __init__(self, ip, port, name=None):
506
return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name)