1
from __future__ import generators
5
A caching http interface that supports ETags and gzip
8
Requires Python 2.3 or later
11
2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
15
__author__ = "Joe Gregorio (joe@bitworking.org)"
16
__copyright__ = "Copyright 2006, Joe Gregorio"
17
__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
19
"Xavier Verges Farrero",
32
import email.FeedParser
45
# remove depracated warning in python2.6
47
from hashlib import sha1 as _sha, md5 as _md5
54
from gettext import gettext as _
58
from httplib2 import socks
62
# Build the appropriate socket wrapper for ssl
64
import ssl # python 2.6
65
ssl_SSLError = ssl.SSLError
66
def _ssl_wrap_socket(sock, key_file, cert_file,
67
disable_validation, ca_certs):
68
if disable_validation:
69
cert_reqs = ssl.CERT_NONE
71
cert_reqs = ssl.CERT_REQUIRED
72
# We should be specifying SSL version 3 or TLS v1, but the ssl module
73
# doesn't expose the necessary knobs. So we need to go with the default
75
return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file,
76
cert_reqs=cert_reqs, ca_certs=ca_certs)
77
except (AttributeError, ImportError):
79
def _ssl_wrap_socket(sock, key_file, cert_file,
80
disable_validation, ca_certs):
81
if not disable_validation:
82
raise CertificateValidationUnsupported(
83
"SSL certificate validation is not supported without "
84
"the ssl module installed. To avoid this error, install "
85
"the ssl module, or explicity disable validation.")
86
ssl_sock = socket.ssl(sock, key_file, cert_file)
87
return httplib.FakeSocket(sock, ssl_sock)
90
if sys.version_info >= (2,3):
91
from iri2uri import iri2uri
96
def has_timeout(timeout): # python 2.6
97
if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
98
return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
99
return (timeout is not None)
101
__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
102
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
103
'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
104
'debuglevel', 'ProxiesUnavailableError']
107
# The httplib debug level, set to a non-zero value to get debug output
112
if sys.version_info < (2,4):
118
def HTTPResponse__getheaders(self):
119
"""Return list of (header, value) tuples."""
121
raise httplib.ResponseNotReady()
122
return self.msg.items()
124
if not hasattr(httplib.HTTPResponse, 'getheaders'):
125
httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
127
# All exceptions raised here derive from HttpLib2Error
128
class HttpLib2Error(Exception): pass
130
# Some exceptions can be caught and optionally
131
# be turned back into responses.
132
class HttpLib2ErrorWithResponse(HttpLib2Error):
133
def __init__(self, desc, response, content):
134
self.response = response
135
self.content = content
136
HttpLib2Error.__init__(self, desc)
138
class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
139
class RedirectLimit(HttpLib2ErrorWithResponse): pass
140
class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
141
class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
142
class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
144
class MalformedHeader(HttpLib2Error): pass
145
class RelativeURIError(HttpLib2Error): pass
146
class ServerNotFoundError(HttpLib2Error): pass
147
class ProxiesUnavailableError(HttpLib2Error): pass
148
class CertificateValidationUnsupported(HttpLib2Error): pass
149
class SSLHandshakeError(HttpLib2Error): pass
150
class CertificateHostnameMismatch(SSLHandshakeError):
151
def __init__(self, desc, host, cert):
152
HttpLib2Error.__init__(self, desc)
160
# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
162
# Pluggable cache storage (supports storing the cache in
163
# flat files by default. We need a plug-in architecture
164
# that can support Berkeley DB and Squid)
167
# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
168
# Does not handle Cache-Control: max-stale
169
# Does not use Age: headers when calculating cache freshness.
172
# The number of redirections to follow before giving up.
173
# Note that only GET redirects are automatically followed.
174
# Will also honor 301 requests by saving that info and never
175
# requesting that URI again.
176
DEFAULT_MAX_REDIRECTS = 5
178
# Default CA certificates file bundled with httplib2.
179
CA_CERTS = os.path.join(
180
os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")
182
# Which headers are hop-by-hop headers by default
183
HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
185
def _get_end2end_headers(response):
186
hopbyhop = list(HOP_BY_HOP)
187
hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
188
return [header for header in response.keys() if header not in hopbyhop]
190
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
193
"""Parses a URI using the regex given in Appendix B of RFC 3986.
195
(scheme, authority, path, query, fragment) = parse_uri(uri)
197
groups = URI.match(uri).groups()
198
return (groups[1], groups[3], groups[4], groups[6], groups[8])
201
(scheme, authority, path, query, fragment) = parse_uri(uri)
202
if not scheme or not authority:
203
raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
204
authority = authority.lower()
205
scheme = scheme.lower()
208
# Could do syntax based normalization of the URI before
209
# computing the digest. See Section 6.2.2 of Std 66.
210
request_uri = query and "?".join([path, query]) or path
211
scheme = scheme.lower()
212
defrag_uri = scheme + "://" + authority + request_uri
213
return scheme, authority, request_uri, defrag_uri
216
# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
217
re_url_scheme = re.compile(r'^\w+://')
218
re_slash = re.compile(r'[?/:|]+')
220
def safename(filename):
221
"""Return a filename suitable for the cache.
223
Strips dangerous and common characters to create a filename we
224
can use to store the cache in.
228
if re_url_scheme.match(filename):
229
if isinstance(filename,str):
230
filename = filename.decode('utf-8')
231
filename = filename.encode('idna')
233
filename = filename.encode('idna')
236
if isinstance(filename,unicode):
237
filename=filename.encode('utf-8')
238
filemd5 = _md5(filename).hexdigest()
239
filename = re_url_scheme.sub("", filename)
240
filename = re_slash.sub(",", filename)
242
# limit length of filename
243
if len(filename)>200:
244
filename=filename[:200]
245
return ",".join((filename, filemd5))
247
NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
248
def _normalize_headers(headers):
249
return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
251
def _parse_cache_control(headers):
253
if headers.has_key('cache-control'):
254
parts = headers['cache-control'].split(',')
255
parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
256
parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
257
retval = dict(parts_with_args + parts_wo_args)
260
# Whether to use a strict mode to parse WWW-Authenticate headers
261
# Might lead to bad results in case of ill-formed header value,
262
# so disabled by default, falling back to relaxed parsing.
263
# Set to true to turn on, usefull for testing servers.
264
USE_WWW_AUTH_STRICT_PARSING = 0
267
# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
268
# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
269
# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
270
# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
271
WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
272
WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
273
UNQUOTE_PAIRS = re.compile(r'\\(.)')
274
def _parse_www_authenticate(headers, headername='www-authenticate'):
275
"""Returns a dictionary of dictionaries, one dict
278
if headers.has_key(headername):
280
authenticate = headers[headername].strip()
281
www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
283
# Break off the scheme at the beginning of the line
284
if headername == 'authentication-info':
285
(auth_scheme, the_rest) = ('digest', authenticate)
287
(auth_scheme, the_rest) = authenticate.split(" ", 1)
288
# Now loop over all the key value pairs that come after the scheme,
289
# being careful not to roll into the next scheme
290
match = www_auth.search(the_rest)
293
if match and len(match.groups()) == 3:
294
(key, value, the_rest) = match.groups()
295
auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
296
match = www_auth.search(the_rest)
297
retval[auth_scheme.lower()] = auth_params
298
authenticate = the_rest.strip()
300
raise MalformedHeader("WWW-Authenticate")
304
def _entry_disposition(response_headers, request_headers):
305
"""Determine freshness from the Date, Expires and Cache-Control headers.
307
We don't handle the following:
309
1. Cache-Control: max-stale
310
2. Age: headers are not used in the calculations.
312
Not that this algorithm is simpler than you might think
313
because we are operating as a private (non-shared) cache.
314
This lets us ignore 's-maxage'. We can also ignore
315
'proxy-invalidate' since we aren't a proxy.
316
We will never return a stale document as
317
fresh as a design decision, and thus the non-implementation
318
of 'max-stale'. This also lets us safely ignore 'must-revalidate'
319
since we operate as if every server has sent 'must-revalidate'.
320
Since we are private we get to ignore both 'public' and
321
'private' parameters. We also ignore 'no-transform' since
322
we don't do any transformations.
323
The 'no-store' parameter is handled at a higher level.
324
So the only Cache-Control parameters we look at are:
333
cc = _parse_cache_control(request_headers)
334
cc_response = _parse_cache_control(response_headers)
336
if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
337
retval = "TRANSPARENT"
338
if 'cache-control' not in request_headers:
339
request_headers['cache-control'] = 'no-cache'
340
elif cc.has_key('no-cache'):
341
retval = "TRANSPARENT"
342
elif cc_response.has_key('no-cache'):
344
elif cc.has_key('only-if-cached'):
346
elif response_headers.has_key('date'):
347
date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
349
current_age = max(0, now - date)
350
if cc_response.has_key('max-age'):
352
freshness_lifetime = int(cc_response['max-age'])
354
freshness_lifetime = 0
355
elif response_headers.has_key('expires'):
356
expires = email.Utils.parsedate_tz(response_headers['expires'])
358
freshness_lifetime = 0
360
freshness_lifetime = max(0, calendar.timegm(expires) - date)
362
freshness_lifetime = 0
363
if cc.has_key('max-age'):
365
freshness_lifetime = int(cc['max-age'])
367
freshness_lifetime = 0
368
if cc.has_key('min-fresh'):
370
min_fresh = int(cc['min-fresh'])
373
current_age += min_fresh
374
if freshness_lifetime > current_age:
378
def _decompressContent(response, new_content):
379
content = new_content
381
encoding = response.get('content-encoding', None)
382
if encoding in ['gzip', 'deflate']:
383
if encoding == 'gzip':
384
content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
385
if encoding == 'deflate':
386
content = zlib.decompress(content)
387
response['content-length'] = str(len(content))
388
# Record the historical presence of the encoding in a way the won't interfere.
389
response['-content-encoding'] = response['content-encoding']
390
del response['content-encoding']
393
raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
396
def _updateCache(request_headers, response_headers, content, cache, cachekey):
398
cc = _parse_cache_control(request_headers)
399
cc_response = _parse_cache_control(response_headers)
400
if cc.has_key('no-store') or cc_response.has_key('no-store'):
401
cache.delete(cachekey)
403
info = email.Message.Message()
404
for key, value in response_headers.iteritems():
405
if key not in ['status','content-encoding','transfer-encoding']:
408
# Add annotations to the cache to indicate what headers
409
# are variant for this request.
410
vary = response_headers.get('vary', None)
412
vary_headers = vary.lower().replace(' ', '').split(',')
413
for header in vary_headers:
414
key = '-varied-%s' % header
416
info[key] = request_headers[header]
420
status = response_headers.status
424
status_header = 'status: %d\r\n' % status
426
header_str = info.as_string()
428
header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
429
text = "".join([status_header, header_str, content])
431
cache.set(cachekey, text)
434
dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
437
def _wsse_username_token(cnonce, iso_now, password):
438
return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
441
# For credentials we need two things, first
442
# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
443
# Then we also need a list of URIs that have already demanded authentication
444
# That list is tricky since sub-URIs can take the same auth, or the
445
# auth scheme may change as you descend the tree.
446
# So we also need each Auth instance to be able to tell us
447
# how close to the 'top' it is.
449
class Authentication(object):
450
def __init__(self, credentials, host, request_uri, headers, response, content, http):
451
(scheme, authority, path, query, fragment) = parse_uri(request_uri)
454
self.credentials = credentials
457
def depth(self, request_uri):
458
(scheme, authority, path, query, fragment) = parse_uri(request_uri)
459
return request_uri[len(self.path):].count("/")
461
def inscope(self, host, request_uri):
462
# XXX Should we normalize the request_uri?
463
(scheme, authority, path, query, fragment) = parse_uri(request_uri)
464
return (host == self.host) and path.startswith(self.path)
466
def request(self, method, request_uri, headers, content):
467
"""Modify the request headers to add the appropriate
468
Authorization header. Over-rise this in sub-classes."""
471
def response(self, response, content):
472
"""Gives us a chance to update with new nonces
473
or such returned from the last authorized response.
474
Over-rise this in sub-classes if necessary.
476
Return TRUE is the request is to be retried, for
477
example Digest may return stale=true.
483
class BasicAuthentication(Authentication):
484
def __init__(self, credentials, host, request_uri, headers, response, content, http):
485
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
487
def request(self, method, request_uri, headers, content):
488
"""Modify the request headers to add the appropriate
489
Authorization header."""
490
headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
493
class DigestAuthentication(Authentication):
494
"""Only do qop='auth' and MD5, since that
495
is all Apache currently implements"""
496
def __init__(self, credentials, host, request_uri, headers, response, content, http):
497
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
498
challenge = _parse_www_authenticate(response, 'www-authenticate')
499
self.challenge = challenge['digest']
500
qop = self.challenge.get('qop', 'auth')
501
self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
502
if self.challenge['qop'] is None:
503
raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
504
self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
505
if self.challenge['algorithm'] != 'MD5':
506
raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
507
self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
508
self.challenge['nc'] = 1
510
def request(self, method, request_uri, headers, content, cnonce = None):
511
"""Modify the request headers"""
512
H = lambda x: _md5(x).hexdigest()
513
KD = lambda s, d: H("%s:%s" % (s, d))
514
A2 = "".join([method, ":", request_uri])
515
self.challenge['cnonce'] = cnonce or _cnonce()
516
request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
517
'%08x' % self.challenge['nc'],
518
self.challenge['cnonce'],
519
self.challenge['qop'], H(A2)
521
headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
523
self.challenge['realm'],
524
self.challenge['nonce'],
526
self.challenge['algorithm'],
528
self.challenge['qop'],
529
self.challenge['nc'],
530
self.challenge['cnonce'],
532
self.challenge['nc'] += 1
534
def response(self, response, content):
535
if not response.has_key('authentication-info'):
536
challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
537
if 'true' == challenge.get('stale'):
538
self.challenge['nonce'] = challenge['nonce']
539
self.challenge['nc'] = 1
542
updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
544
if updated_challenge.has_key('nextnonce'):
545
self.challenge['nonce'] = updated_challenge['nextnonce']
546
self.challenge['nc'] = 1
550
class HmacDigestAuthentication(Authentication):
551
"""Adapted from Robert Sayre's code and DigestAuthentication above."""
552
__author__ = "Thomas Broyer (t.broyer@ltgt.net)"
554
def __init__(self, credentials, host, request_uri, headers, response, content, http):
555
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
556
challenge = _parse_www_authenticate(response, 'www-authenticate')
557
self.challenge = challenge['hmacdigest']
558
# TODO: self.challenge['domain']
559
self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
560
if self.challenge['reason'] not in ['unauthorized', 'integrity']:
561
self.challenge['reason'] = 'unauthorized'
562
self.challenge['salt'] = self.challenge.get('salt', '')
563
if not self.challenge.get('snonce'):
564
raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
565
self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
566
if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
567
raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
568
self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
569
if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
570
raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
571
if self.challenge['algorithm'] == 'HMAC-MD5':
575
if self.challenge['pw-algorithm'] == 'MD5':
576
self.pwhashmod = _md5
578
self.pwhashmod = _sha
579
self.key = "".join([self.credentials[0], ":",
580
self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
581
":", self.challenge['realm']
583
self.key = self.pwhashmod.new(self.key).hexdigest().lower()
585
def request(self, method, request_uri, headers, content):
586
"""Modify the request headers"""
587
keys = _get_end2end_headers(headers)
588
keylist = "".join(["%s " % k for k in keys])
589
headers_val = "".join([headers[k] for k in keys])
590
created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
592
request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
593
request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
594
headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
596
self.challenge['realm'],
597
self.challenge['snonce'],
605
def response(self, response, content):
606
challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
607
if challenge.get('reason') in ['integrity', 'stale']:
612
class WsseAuthentication(Authentication):
613
"""This is thinly tested and should not be relied upon.
614
At this time there isn't any third party server to test against.
615
Blogger and TypePad implemented this algorithm at one point
616
but Blogger has since switched to Basic over HTTPS and
617
TypePad has implemented it wrong, by never issuing a 401
618
challenge but instead requiring your client to telepathically know that
619
their endpoint is expecting WSSE profile="UsernameToken"."""
620
def __init__(self, credentials, host, request_uri, headers, response, content, http):
621
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
623
def request(self, method, request_uri, headers, content):
624
"""Modify the request headers to add the appropriate
625
Authorization header."""
626
headers['authorization'] = 'WSSE profile="UsernameToken"'
627
iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
629
password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
630
headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
636
class GoogleLoginAuthentication(Authentication):
637
def __init__(self, credentials, host, request_uri, headers, response, content, http):
638
from urllib import urlencode
639
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
640
challenge = _parse_www_authenticate(response, 'www-authenticate')
641
service = challenge['googlelogin'].get('service', 'xapi')
642
# Bloggger actually returns the service in the challenge
643
# For the rest we guess based on the URI
644
if service == 'xapi' and request_uri.find("calendar") > 0:
646
# No point in guessing Base or Spreadsheet
647
#elif request_uri.find("spreadsheets") > 0:
650
auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
651
resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
652
lines = content.split('\n')
653
d = dict([tuple(line.split("=", 1)) for line in lines if line])
654
if resp.status == 403:
657
self.Auth = d['Auth']
659
def request(self, method, request_uri, headers, content):
660
"""Modify the request headers to add the appropriate
661
Authorization header."""
662
headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
665
AUTH_SCHEME_CLASSES = {
666
"basic": BasicAuthentication,
667
"wsse": WsseAuthentication,
668
"digest": DigestAuthentication,
669
"hmacdigest": HmacDigestAuthentication,
670
"googlelogin": GoogleLoginAuthentication
673
AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
675
class FileCache(object):
676
"""Uses a local directory as a store for cached files.
677
Not really safe to use if multiple threads or processes are going to
678
be running on the same cache.
680
def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
683
if not os.path.exists(cache):
684
os.makedirs(self.cache)
688
cacheFullPath = os.path.join(self.cache, self.safe(key))
690
f = file(cacheFullPath, "rb")
697
def set(self, key, value):
698
cacheFullPath = os.path.join(self.cache, self.safe(key))
699
f = file(cacheFullPath, "wb")
703
def delete(self, key):
704
cacheFullPath = os.path.join(self.cache, self.safe(key))
705
if os.path.exists(cacheFullPath):
706
os.remove(cacheFullPath)
708
class Credentials(object):
710
self.credentials = []
712
def add(self, name, password, domain=""):
713
self.credentials.append((domain.lower(), name, password))
716
self.credentials = []
718
def iter(self, domain):
719
for (cdomain, name, password) in self.credentials:
720
if cdomain == "" or domain == cdomain:
721
yield (name, password)
723
class KeyCerts(Credentials):
724
"""Identical to Credentials except that
725
name/password are mapped to key/cert."""
729
class ProxyInfo(object):
730
"""Collect information required to use a proxy."""
731
def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
732
"""The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
733
constants. For example:
735
p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
737
self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
740
return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
741
self.proxy_user, self.proxy_pass)
744
return (self.proxy_host != None) and (self.proxy_port != None)
747
class HTTPConnectionWithTimeout(httplib.HTTPConnection):
749
HTTPConnection subclass that supports timeouts
751
All timeouts are in seconds. If None is passed for timeout then
752
Python's default timeout for sockets will be used. See for example
753
the docs of socket.setdefaulttimeout():
754
http://docs.python.org/library/socket.html#socket.setdefaulttimeout
757
def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
758
httplib.HTTPConnection.__init__(self, host, port, strict)
759
self.timeout = timeout
760
self.proxy_info = proxy_info
763
"""Connect to the host and port specified in __init__."""
764
# Mostly verbatim from httplib.py.
765
if self.proxy_info and socks is None:
766
raise ProxiesUnavailableError(
767
'Proxy support missing but proxy use was requested!')
768
msg = "getaddrinfo returns an empty list"
769
for res in socket.getaddrinfo(self.host, self.port, 0,
771
af, socktype, proto, canonname, sa = res
773
if self.proxy_info and self.proxy_info.isgood():
774
self.sock = socks.socksocket(af, socktype, proto)
775
self.sock.setproxy(*self.proxy_info.astuple())
777
self.sock = socket.socket(af, socktype, proto)
778
self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
779
# Different from httplib: support timeouts.
780
if has_timeout(self.timeout):
781
self.sock.settimeout(self.timeout)
782
# End of difference from httplib.
783
if self.debuglevel > 0:
784
print "connect: (%s, %s)" % (self.host, self.port)
786
self.sock.connect(sa)
787
except socket.error, msg:
788
if self.debuglevel > 0:
789
print 'connect fail:', (self.host, self.port)
796
raise socket.error, msg
798
class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
800
This class allows communication via SSL.
802
All timeouts are in seconds. If None is passed for timeout then
803
Python's default timeout for sockets will be used. See for example
804
the docs of socket.setdefaulttimeout():
805
http://docs.python.org/library/socket.html#socket.setdefaulttimeout
807
def __init__(self, host, port=None, key_file=None, cert_file=None,
808
strict=None, timeout=None, proxy_info=None,
809
ca_certs=None, disable_ssl_certificate_validation=False):
810
httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
811
cert_file=cert_file, strict=strict)
812
self.timeout = timeout
813
self.proxy_info = proxy_info
816
self.ca_certs = ca_certs
817
self.disable_ssl_certificate_validation = \
818
disable_ssl_certificate_validation
820
# The following two methods were adapted from https_wrapper.py, released
821
# with the Google Appengine SDK at
822
# http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py
823
# under the following license:
825
# Copyright 2007 Google Inc.
827
# Licensed under the Apache License, Version 2.0 (the "License");
828
# you may not use this file except in compliance with the License.
829
# You may obtain a copy of the License at
831
# http://www.apache.org/licenses/LICENSE-2.0
833
# Unless required by applicable law or agreed to in writing, software
834
# distributed under the License is distributed on an "AS IS" BASIS,
835
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
836
# See the License for the specific language governing permissions and
837
# limitations under the License.
840
def _GetValidHostsForCert(self, cert):
841
"""Returns a list of valid host globs for an SSL certificate.
844
cert: A dictionary representing an SSL certificate.
846
list: A list of valid host globs.
848
if 'subjectAltName' in cert:
849
return [x[1] for x in cert['subjectAltName']
850
if x[0].lower() == 'dns']
852
return [x[0][1] for x in cert['subject']
853
if x[0][0].lower() == 'commonname']
855
def _ValidateCertificateHostname(self, cert, hostname):
856
"""Validates that a given hostname is valid for an SSL certificate.
859
cert: A dictionary representing an SSL certificate.
860
hostname: The hostname to test.
862
bool: Whether or not the hostname is valid for this certificate.
864
hosts = self._GetValidHostsForCert(cert)
866
host_re = host.replace('.', '\.').replace('*', '[^.]*')
867
if re.search('^%s$' % (host_re,), hostname, re.I):
872
"Connect to a host on a given (SSL) port."
874
msg = "getaddrinfo returns an empty list"
875
for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(
876
self.host, self.port, 0, socket.SOCK_STREAM):
878
if self.proxy_info and self.proxy_info.isgood():
879
sock = socks.socksocket(family, socktype, proto)
880
sock.setproxy(*self.proxy_info.astuple())
882
sock = socket.socket(family, socktype, proto)
883
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
885
if has_timeout(self.timeout):
886
sock.settimeout(self.timeout)
887
sock.connect((self.host, self.port))
888
self.sock =_ssl_wrap_socket(
889
sock, self.key_file, self.cert_file,
890
self.disable_ssl_certificate_validation, self.ca_certs)
891
if self.debuglevel > 0:
892
print "connect: (%s, %s)" % (self.host, self.port)
893
if not self.disable_ssl_certificate_validation:
894
cert = self.sock.getpeercert()
895
hostname = self.host.split(':', 0)[0]
896
if not self._ValidateCertificateHostname(cert, hostname):
897
raise CertificateHostnameMismatch(
898
'Server presented certificate that does not match '
899
'host %s: %s' % (hostname, cert), hostname, cert)
900
except ssl_SSLError, e:
906
# Unfortunately the ssl module doesn't seem to provide any way
907
# to get at more detailed error information, in particular
908
# whether the error is due to certificate validation or
909
# something else (such as SSL protocol mismatch).
910
if e.errno == ssl.SSL_ERROR_SSL:
911
raise SSLHandshakeError(e)
914
except (socket.timeout, socket.gaierror):
916
except socket.error, msg:
917
if self.debuglevel > 0:
918
print 'connect fail:', (self.host, self.port)
925
raise socket.error, msg
927
SCHEME_TO_CONNECTION = {
928
'http': HTTPConnectionWithTimeout,
929
'https': HTTPSConnectionWithTimeout
932
# Use a different connection object for Google App Engine
934
from google.appengine.api.urlfetch import fetch
935
from google.appengine.api.urlfetch import InvalidURLError
936
from google.appengine.api.urlfetch import DownloadError
937
from google.appengine.api.urlfetch import ResponseTooLargeError
938
from google.appengine.api.urlfetch import SSLCertificateError
941
class ResponseDict(dict):
942
"""Is a dictionary that also has a read() method, so
943
that it can pass itself off as an httlib.HTTPResponse()."""
948
class AppEngineHttpConnection(object):
949
"""Emulates an httplib.HTTPConnection object, but actually uses the Google
950
App Engine urlfetch library. This allows the timeout to be properly used on
951
Google App Engine, and avoids using httplib, which on Google App Engine is
952
just another wrapper around urlfetch.
954
def __init__(self, host, port=None, key_file=None, cert_file=None,
955
strict=None, timeout=None, proxy_info=None, ca_certs=None,
956
disable_certificate_validation=False):
959
self.timeout = timeout
960
if key_file or cert_file or proxy_info or ca_certs:
961
raise NotSupportedOnThisPlatform()
964
self.validate_certificate = not disable_certificate_validation
967
def request(self, method, url, body, headers):
968
# Calculate the absolute URI, which fetch requires
971
netloc = '%s:%s' % (self.host, self.port)
972
absolute_uri = '%s://%s%s' % (self.scheme, netloc, url)
974
response = fetch(absolute_uri, payload=body, method=method,
975
headers=headers, allow_truncated=False, follow_redirects=False,
976
deadline=self.timeout,
977
validate_certificate=self.validate_certificate)
978
self.response = ResponseDict(response.headers)
979
self.response['status'] = response.status_code
980
setattr(self.response, 'read', lambda : response.content)
982
# Make sure the exceptions raised match the exceptions expected.
983
except InvalidURLError:
984
raise socket.gaierror('')
985
except (DownloadError, ResponseTooLargeError, SSLCertificateError):
986
raise httplib.HTTPException()
988
def getresponse(self):
991
def set_debuglevel(self, level):
1001
class AppEngineHttpsConnection(AppEngineHttpConnection):
1002
"""Same as AppEngineHttpConnection, but for HTTPS URIs."""
1003
def __init__(self, host, port=None, key_file=None, cert_file=None,
1004
strict=None, timeout=None, proxy_info=None):
1005
AppEngineHttpConnection.__init__(self, host, port, key_file, cert_file,
1006
strict, timeout, proxy_info)
1007
self.scheme = 'https'
1009
# Update the connection classes to use the Googel App Engine specific ones.
1010
SCHEME_TO_CONNECTION = {
1011
'http': AppEngineHttpConnection,
1012
'https': AppEngineHttpsConnection
1020
"""An HTTP client that handles:
1032
def __init__(self, cache=None, timeout=None, proxy_info=None,
1033
ca_certs=None, disable_ssl_certificate_validation=False):
1035
The value of proxy_info is a ProxyInfo instance.
1037
If 'cache' is a string then it is used as a directory name for
1038
a disk cache. Otherwise it must be an object that supports the
1039
same interface as FileCache.
1041
All timeouts are in seconds. If None is passed for timeout
1042
then Python's default timeout for sockets will be used. See
1043
for example the docs of socket.setdefaulttimeout():
1044
http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1046
ca_certs is the path of a file containing root CA certificates for SSL
1047
server certificate validation. By default, a CA cert file bundled with
1050
If disable_ssl_certificate_validation is true, SSL cert validation will
1053
self.proxy_info = proxy_info
1054
self.ca_certs = ca_certs
1055
self.disable_ssl_certificate_validation = \
1056
disable_ssl_certificate_validation
1058
# Map domain name to an httplib connection
1059
self.connections = {}
1060
# The location of the cache, for now a directory
1061
# where cached responses are held.
1062
if cache and isinstance(cache, basestring):
1063
self.cache = FileCache(cache)
1068
self.credentials = Credentials()
1071
self.certificates = KeyCerts()
1073
# authorization objects
1074
self.authorizations = []
1076
# If set to False then no redirects are followed, even safe ones.
1077
self.follow_redirects = True
1079
# Which HTTP methods do we apply optimistic concurrency to, i.e.
1080
# which methods get an "if-match:" etag header added to them.
1081
self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1083
# If 'follow_redirects' is True, and this is set to True then
1084
# all redirecs are followed, including unsafe ones.
1085
self.follow_all_redirects = False
1087
self.ignore_etag = False
1089
self.force_exception_to_status_code = False
1091
self.timeout = timeout
1093
def _auth_from_challenge(self, host, request_uri, headers, response, content):
1094
"""A generator that creates Authorization objects
1095
that can be applied to requests.
1097
challenges = _parse_www_authenticate(response, 'www-authenticate')
1098
for cred in self.credentials.iter(host):
1099
for scheme in AUTH_SCHEME_ORDER:
1100
if challenges.has_key(scheme):
1101
yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1103
def add_credentials(self, name, password, domain=""):
1104
"""Add a name and password that will be used
1105
any time a request requires authentication."""
1106
self.credentials.add(name, password, domain)
1108
def add_certificate(self, key, cert, domain):
1109
"""Add a key and cert that will be used
1110
any time a request requires authentication."""
1111
self.certificates.add(key, cert, domain)
1113
def clear_credentials(self):
1114
"""Remove all the names and passwords
1115
that are used for authentication"""
1116
self.credentials.clear()
1117
self.authorizations = []
1119
def _conn_request(self, conn, request_uri, method, body, headers):
1122
if conn.sock is None:
1124
conn.request(method, request_uri, body, headers)
1125
except socket.timeout:
1127
except socket.gaierror:
1129
raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1130
except ssl_SSLError:
1133
except socket.error, e:
1135
if hasattr(e, 'args'):
1136
err = getattr(e, 'args')[0]
1139
if err == errno.ECONNREFUSED: # Connection refused
1141
except httplib.HTTPException:
1142
# Just because the server closed the connection doesn't apparently mean
1143
# that the server didn't send a response.
1144
if conn.sock is None:
1158
response = conn.getresponse()
1159
except (socket.error, httplib.HTTPException):
1168
if method == "HEAD":
1171
content = response.read()
1172
response = Response(response)
1173
if method != "HEAD":
1174
content = _decompressContent(response, content)
1176
return (response, content)
1179
def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
1180
"""Do the actual request using the connection object
1181
and also follow one level of redirects if necessary"""
1183
auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1184
auth = auths and sorted(auths)[0][1] or None
1186
auth.request(method, request_uri, headers, body)
1188
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
1191
if auth.response(response, body):
1192
auth.request(method, request_uri, headers, body)
1193
(response, content) = self._conn_request(conn, request_uri, method, body, headers )
1194
response._stale_digest = 1
1196
if response.status == 401:
1197
for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1198
authorization.request(method, request_uri, headers, body)
1199
(response, content) = self._conn_request(conn, request_uri, method, body, headers, )
1200
if response.status != 401:
1201
self.authorizations.append(authorization)
1202
authorization.response(response, body)
1205
if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
1206
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
1207
# Pick out the location header and basically start from the beginning
1208
# remembering first to strip the ETag header and decrement our 'depth'
1210
if not response.has_key('location') and response.status != 300:
1211
raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
1212
# Fix-up relative redirects (which violate an RFC 2616 MUST)
1213
if response.has_key('location'):
1214
location = response['location']
1215
(scheme, authority, path, query, fragment) = parse_uri(location)
1216
if authority == None:
1217
response['location'] = urlparse.urljoin(absolute_uri, location)
1218
if response.status == 301 and method in ["GET", "HEAD"]:
1219
response['-x-permanent-redirect-url'] = response['location']
1220
if not response.has_key('content-location'):
1221
response['content-location'] = absolute_uri
1222
_updateCache(headers, response, content, self.cache, cachekey)
1223
if headers.has_key('if-none-match'):
1224
del headers['if-none-match']
1225
if headers.has_key('if-modified-since'):
1226
del headers['if-modified-since']
1227
if response.has_key('location'):
1228
location = response['location']
1229
old_response = copy.deepcopy(response)
1230
if not old_response.has_key('content-location'):
1231
old_response['content-location'] = absolute_uri
1232
redirect_method = method
1233
if response.status in [302, 303]:
1234
redirect_method = "GET"
1236
(response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
1237
response.previous = old_response
1239
raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content)
1240
elif response.status in [200, 203] and method in ["GET", "HEAD"]:
1241
# Don't cache 206's since we aren't going to handle byte range requests
1242
if not response.has_key('content-location'):
1243
response['content-location'] = absolute_uri
1244
_updateCache(headers, response, content, self.cache, cachekey)
1246
return (response, content)
1248
def _normalize_headers(self, headers):
1249
return _normalize_headers(headers)
1251
# Need to catch and rebrand some exceptions
1252
# Then need to optionally turn all exceptions into status codes
1253
# including all socket.* and httplib.* exceptions.
1256
def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
1257
""" Performs a single HTTP request.
1258
The 'uri' is the URI of the HTTP resource and can begin
1259
with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1261
The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1262
There is no restriction on the methods allowed.
1264
The 'body' is the entity body to be sent with the request. It is a string
1267
Any extra headers that are to be sent with the request should be provided in the
1268
'headers' dictionary.
1270
The maximum number of redirect to follow before raising an
1271
exception is 'redirections. The default is 5.
1273
The return value is a tuple of (response, content), the first
1274
being and instance of the 'Response' class, the second being
1275
a string that contains the response entity body.
1281
headers = self._normalize_headers(headers)
1283
if not headers.has_key('user-agent'):
1284
headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__
1288
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1289
domain_port = authority.split(":")[0:2]
1290
if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
1292
authority = domain_port[0]
1294
conn_key = scheme+":"+authority
1295
if conn_key in self.connections:
1296
conn = self.connections[conn_key]
1298
if not connection_type:
1299
connection_type = SCHEME_TO_CONNECTION[scheme]
1300
certs = list(self.certificates.iter(authority))
1301
if issubclass(connection_type, HTTPSConnectionWithTimeout):
1303
conn = self.connections[conn_key] = connection_type(
1304
authority, key_file=certs[0][0],
1305
cert_file=certs[0][1], timeout=self.timeout,
1306
proxy_info=self.proxy_info,
1307
ca_certs=self.ca_certs,
1308
disable_ssl_certificate_validation=
1309
self.disable_ssl_certificate_validation)
1311
conn = self.connections[conn_key] = connection_type(
1312
authority, timeout=self.timeout,
1313
proxy_info=self.proxy_info,
1314
ca_certs=self.ca_certs,
1315
disable_ssl_certificate_validation=
1316
self.disable_ssl_certificate_validation)
1318
conn = self.connections[conn_key] = connection_type(
1319
authority, timeout=self.timeout,
1320
proxy_info=self.proxy_info)
1321
conn.set_debuglevel(debuglevel)
1323
if 'range' not in headers and 'accept-encoding' not in headers:
1324
headers['accept-encoding'] = 'gzip, deflate'
1326
info = email.Message.Message()
1329
cachekey = defrag_uri
1330
cached_value = self.cache.get(cachekey)
1332
# info = email.message_from_string(cached_value)
1334
# Need to replace the line above with the kludge below
1335
# to fix the non-existent bug not fixed in this
1336
# bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
1338
info, content = cached_value.split('\r\n\r\n', 1)
1339
feedparser = email.FeedParser.FeedParser()
1340
feedparser.feed(info)
1341
info = feedparser.close()
1342
feedparser._parse = None
1344
self.cache.delete(cachekey)
1350
if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
1351
# http://www.w3.org/1999/04/Editing/
1352
headers['if-match'] = info['etag']
1354
if method not in ["GET", "HEAD"] and self.cache and cachekey:
1355
# RFC 2616 Section 13.10
1356
self.cache.delete(cachekey)
1358
# Check the vary header in the cache to see if this request
1359
# matches what varies in the cache.
1360
if method in ['GET', 'HEAD'] and 'vary' in info:
1362
vary_headers = vary.lower().replace(' ', '').split(',')
1363
for header in vary_headers:
1364
key = '-varied-%s' % header
1366
if headers.get(header, None) != value:
1370
if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
1371
if info.has_key('-x-permanent-redirect-url'):
1372
# Should cached permanent redirects be counted in our redirection count? For now, yes.
1373
if redirections <= 0:
1374
raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "")
1375
(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
1376
response.previous = Response(info)
1377
response.previous.fromcache = True
1379
# Determine our course of action:
1380
# Is the cached entry fresh or stale?
1381
# Has the client requested a non-cached response?
1383
# There seems to be three possible answers:
1384
# 1. [FRESH] Return the cache entry w/o doing a GET
1385
# 2. [STALE] Do the GET (but add in cache validators if available)
1386
# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1387
entry_disposition = _entry_disposition(info, headers)
1389
if entry_disposition == "FRESH":
1390
if not cached_value:
1391
info['status'] = '504'
1393
response = Response(info)
1395
response.fromcache = True
1396
return (response, content)
1398
if entry_disposition == "STALE":
1399
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1400
headers['if-none-match'] = info['etag']
1401
if info.has_key('last-modified') and not 'last-modified' in headers:
1402
headers['if-modified-since'] = info['last-modified']
1403
elif entry_disposition == "TRANSPARENT":
1406
(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1408
if response.status == 304 and method == "GET":
1409
# Rewrite the cache entry with the new end-to-end headers
1410
# Take all headers that are in response
1411
# and overwrite their values in info.
1412
# unless they are hop-by-hop, or are listed in the connection header.
1414
for key in _get_end2end_headers(response):
1415
info[key] = response[key]
1416
merged_response = Response(info)
1417
if hasattr(response, "_stale_digest"):
1418
merged_response._stale_digest = response._stale_digest
1419
_updateCache(headers, merged_response, content, self.cache, cachekey)
1420
response = merged_response
1421
response.status = 200
1422
response.fromcache = True
1424
elif response.status == 200:
1425
content = new_content
1427
self.cache.delete(cachekey)
1428
content = new_content
1430
cc = _parse_cache_control(headers)
1431
if cc.has_key('only-if-cached'):
1432
info['status'] = '504'
1433
response = Response(info)
1436
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1437
except Exception, e:
1438
if self.force_exception_to_status_code:
1439
if isinstance(e, HttpLib2ErrorWithResponse):
1440
response = e.response
1442
response.status = 500
1443
response.reason = str(e)
1444
elif isinstance(e, socket.timeout):
1445
content = "Request Timeout"
1446
response = Response( {
1447
"content-type": "text/plain",
1449
"content-length": len(content)
1451
response.reason = "Request Timeout"
1454
response = Response( {
1455
"content-type": "text/plain",
1457
"content-length": len(content)
1459
response.reason = "Bad Request"
1464
return (response, content)
1468
class Response(dict):
1469
"""An object more like email.Message than httplib.HTTPResponse."""
1471
"""Is this response from our local cache"""
1474
"""HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1477
"Status code returned by server. "
1480
"""Reason phrase returned by server."""
1485
def __init__(self, info):
1486
# info is either an email.Message or
1487
# an httplib.HTTPResponse object.
1488
if isinstance(info, httplib.HTTPResponse):
1489
for key, value in info.getheaders():
1490
self[key.lower()] = value
1491
self.status = info.status
1492
self['status'] = str(self.status)
1493
self.reason = info.reason
1494
self.version = info.version
1495
elif isinstance(info, email.Message.Message):
1496
for key, value in info.items():
1498
self.status = int(self['status'])
1500
for key, value in info.iteritems():
1502
self.status = int(self.get('status', self.status))
1505
def __getattr__(self, name):
1509
raise AttributeError, name