2
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
4
# This module is part of urllib3 and is released under
5
# the MIT License: http://www.opensource.org/licenses/mit-license.php
8
from base64 import b64encode
9
from binascii import hexlify, unhexlify
10
from collections import namedtuple
11
from hashlib import md5, sha1
12
from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT
16
from select import poll, POLLIN
17
except ImportError: # `poll` doesn't exist on OSX and other platforms
20
from select import select
21
except ImportError: # `select` doesn't exist on AppEngine.
24
try: # Test for SSL features
29
from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
30
from ssl import SSLContext # Modern SSL?
31
from ssl import HAS_SNI # Has SNI?
35
from .packages import six
36
from .exceptions import LocationParseError, SSLError, TimeoutStateError
40
# The default timeout to use for socket connections. This is the attribute used
41
# by httplib to define the default timeout
46
Retrieve the current time, this function is mocked out in unit testing.
51
class Timeout(object):
53
Utility object for storing timeout values.
57
.. code-block:: python
59
timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
60
pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
61
pool.request(...) # Etc, etc
64
The maximum amount of time to wait for a connection attempt to a server
65
to succeed. Omitting the parameter will default the connect timeout to
66
the system default, probably `the global default timeout in socket.py
67
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
68
None will set an infinite timeout for connection attempts.
70
:type connect: integer, float, or None
73
The maximum amount of time to wait between consecutive
74
read operations for a response from the server. Omitting
75
the parameter will default the read timeout to the system
76
default, probably `the global default timeout in socket.py
77
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
78
None will set an infinite timeout.
80
:type read: integer, float, or None
83
This combines the connect and read timeouts into one; the read timeout
84
will be set to the time leftover from the connect attempt. In the
85
event that both a connect timeout and a total are specified, or a read
86
timeout and a total are specified, the shorter timeout will be applied.
90
:type total: integer, float, or None
94
Many factors can affect the total amount of time for urllib3 to return
95
an HTTP response. Specifically, Python's DNS resolver does not obey the
96
timeout specified on the socket. Other factors that can affect total
97
request time include high CPU load, high swap, the program running at a
98
low priority level, or other behaviors. The observed running time for
99
urllib3 to return a response may be greater than the value passed to
102
In addition, the read and total timeouts only measure the time between
103
read operations on the socket connecting the client and the server,
104
not the total amount of time for the request to return a complete
105
response. For most requests, the timeout is raised because the server
106
has not sent the first byte in the specified time. This is not always
107
the case; if a server streams one byte every fifteen seconds, a timeout
108
of 20 seconds will not ever trigger, even though the request will
109
take several minutes to complete.
111
If your goal is to cut off any request after a set amount of wall clock
112
time, consider having a second "watcher" thread to cut off a slow
116
#: A sentinel object representing the default timeout value
117
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
119
def __init__(self, total=None, connect=_Default, read=_Default):
120
self._connect = self._validate_timeout(connect, 'connect')
121
self._read = self._validate_timeout(read, 'read')
122
self.total = self._validate_timeout(total, 'total')
123
self._start_connect = None
126
return '%s(connect=%r, read=%r, total=%r)' % (
127
type(self).__name__, self._connect, self._read, self.total)
131
def _validate_timeout(cls, value, name):
132
""" Check that a timeout attribute is valid
134
:param value: The timeout value to validate
135
:param name: The name of the timeout attribute to validate. This is used
136
for clear error messages
138
:raises ValueError: if the type is not an integer or a float, or if it
139
is a numeric value less than zero
141
if value is _Default:
142
return cls.DEFAULT_TIMEOUT
144
if value is None or value is cls.DEFAULT_TIMEOUT:
149
except (TypeError, ValueError):
150
raise ValueError("Timeout value %s was %s, but it must be an "
151
"int or float." % (name, value))
155
raise ValueError("Attempted to set %s timeout to %s, but the "
156
"timeout cannot be set to a value less "
157
"than 0." % (name, value))
158
except TypeError: # Python 3
159
raise ValueError("Timeout value %s was %s, but it must be an "
160
"int or float." % (name, value))
165
def from_float(cls, timeout):
166
""" Create a new Timeout from a legacy timeout value.
168
The timeout value used by httplib.py sets the same timeout on the
169
connect(), and recv() socket requests. This creates a :class:`Timeout`
170
object that sets the individual timeouts to the ``timeout`` value passed
173
:param timeout: The legacy timeout value
174
:type timeout: integer, float, sentinel default object, or None
175
:return: a Timeout object
176
:rtype: :class:`Timeout`
178
return Timeout(read=timeout, connect=timeout)
181
""" Create a copy of the timeout object
183
Timeout properties are stored per-pool but each request needs a fresh
184
Timeout object to ensure each one has its own start/stop configured.
186
:return: a copy of the timeout object
187
:rtype: :class:`Timeout`
189
# We can't use copy.deepcopy because that will also create a new object
190
# for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
191
# detect the user default.
192
return Timeout(connect=self._connect, read=self._read,
195
def start_connect(self):
196
""" Start the timeout clock, used during a connect() attempt
198
:raises urllib3.exceptions.TimeoutStateError: if you attempt
199
to start a timer that has been started already.
201
if self._start_connect is not None:
202
raise TimeoutStateError("Timeout timer has already been started.")
203
self._start_connect = current_time()
204
return self._start_connect
206
def get_connect_duration(self):
207
""" Gets the time elapsed since the call to :meth:`start_connect`.
209
:return: the elapsed time
211
:raises urllib3.exceptions.TimeoutStateError: if you attempt
212
to get duration for a timer that hasn't been started.
214
if self._start_connect is None:
215
raise TimeoutStateError("Can't get connect duration for timer "
216
"that has not started.")
217
return current_time() - self._start_connect
220
def connect_timeout(self):
221
""" Get the value to use when setting a connection timeout.
223
This will be a positive float or integer, the value None
224
(never timeout), or the default system timeout.
226
:return: the connect timeout
227
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
229
if self.total is None:
232
if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
235
return min(self._connect, self.total)
238
def read_timeout(self):
239
""" Get the value for the read timeout.
241
This assumes some time has elapsed in the connection timeout and
242
computes the read timeout appropriately.
244
If self.total is set, the read timeout is dependent on the amount of
245
time taken by the connect timeout. If the connection time has not been
246
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
249
:return: the value to use for the read timeout
250
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
251
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
252
has not yet been called on this object.
254
if (self.total is not None and
255
self.total is not self.DEFAULT_TIMEOUT and
256
self._read is not None and
257
self._read is not self.DEFAULT_TIMEOUT):
258
# in case the connect timeout has not yet been established.
259
if self._start_connect is None:
261
return max(0, min(self.total - self.get_connect_duration(),
263
elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
264
return max(0, self.total - self.get_connect_duration())
269
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
271
Datastructure for representing an HTTP URL. Used as a return value for
276
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
277
return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
281
"""For backwards-compatibility with urlparse. We're nice like that."""
285
def request_uri(self):
286
"""Absolute path including the query string."""
287
uri = self.path or '/'
289
if self.query is not None:
290
uri += '?' + self.query
296
"""Network location including host and port"""
298
return '%s:%d' % (self.host, self.port)
302
def split_first(s, delims):
304
Given a string and an iterable of delimiters, split on the first found
305
delimiter. Return two split parts and the matched delimiter.
307
If not found, then the first part is the full input string.
311
>>> split_first('foo/bar?baz', '?/=')
312
('foo', 'bar?baz', '/')
313
>>> split_first('foo/bar?baz', '123')
314
('foo/bar?baz', '', None)
316
Scales linearly with number of delims. Not ideal for large number of delims.
325
if min_idx is None or idx < min_idx:
329
if min_idx is None or min_idx < 0:
332
return s[:min_idx], s[min_idx+1:], min_delim
337
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
338
performed to parse incomplete urls. Fields not provided will be None.
340
Partly backwards-compatible with :mod:`urlparse`.
344
>>> parse_url('http://google.com/mail/')
345
Url(scheme='http', host='google.com', port=None, path='/', ...)
346
>>> parse_url('google.com:80')
347
Url(scheme=None, host='google.com', port=80, path=None, ...)
348
>>> parse_url('/foo?bar')
349
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
352
# While this code has overlap with stdlib's urlparse, it is much
353
# simplified for our needs and less annoying.
354
# Additionally, this implementations does silly things to be optimal
367
scheme, url = url.split('://', 1)
369
# Find the earliest Authority Terminator
370
# (http://tools.ietf.org/html/rfc3986#section-3.2)
371
url, path_, delim = split_first(url, ['/', '?', '#'])
374
# Reassemble the path
379
# Last '@' denotes end of auth part
380
auth, url = url.rsplit('@', 1)
383
if url and url[0] == '[':
384
host, url = url.split(']', 1)
389
_host, port = url.split(':', 1)
395
# If given, ports must be integers.
396
if not port.isdigit():
397
raise LocationParseError("Failed to parse: %s" % url)
400
# Blank ports are cool, too. (rfc3986#section-3.2.3)
403
elif not host and url:
407
return Url(scheme, auth, host, port, path, query, fragment)
411
path, fragment = path.split('#', 1)
415
path, query = path.split('?', 1)
417
return Url(scheme, auth, host, port, path, query, fragment)
422
Deprecated. Use :func:`.parse_url` instead.
425
return p.scheme or 'http', p.hostname, p.port
428
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
429
basic_auth=None, proxy_basic_auth=None):
431
Shortcuts for generating request headers.
434
If ``True``, adds 'connection: keep-alive' header.
436
:param accept_encoding:
437
Can be a boolean, list, or string.
438
``True`` translates to 'gzip,deflate'.
439
List will get joined by comma.
440
String will be used as provided.
443
String representing the user-agent you want, such as
447
Colon-separated username:password string for 'authorization: basic ...'
450
:param proxy_basic_auth:
451
Colon-separated username:password string for 'proxy-authorization: basic ...'
456
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
457
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
458
>>> make_headers(accept_encoding=True)
459
{'accept-encoding': 'gzip,deflate'}
463
if isinstance(accept_encoding, str):
465
elif isinstance(accept_encoding, list):
466
accept_encoding = ','.join(accept_encoding)
468
accept_encoding = 'gzip,deflate'
469
headers['accept-encoding'] = accept_encoding
472
headers['user-agent'] = user_agent
475
headers['connection'] = 'keep-alive'
478
headers['authorization'] = 'Basic ' + \
479
b64encode(six.b(basic_auth)).decode('utf-8')
482
headers['proxy-authorization'] = 'Basic ' + \
483
b64encode(six.b(proxy_basic_auth)).decode('utf-8')
488
def is_connection_dropped(conn): # Platform-specific
490
Returns True if the connection is dropped and should be closed.
493
:class:`httplib.HTTPConnection` object.
495
Note: For platforms like AppEngine, this will always return ``False`` to
496
let the platform handle connection recycling transparently for us.
498
sock = getattr(conn, 'sock', False)
499
if not sock: # Platform-specific: AppEngine
503
if not select: # Platform-specific: AppEngine
507
return select([sock], [], [], 0.0)[0]
511
# This version is better on platforms that support it.
513
p.register(sock, POLLIN)
514
for (fno, ev) in p.poll(0.0):
515
if fno == sock.fileno():
516
# Either data is buffered (bad), or the connection is dropped.
520
def resolve_cert_reqs(candidate):
522
Resolves the argument to a numeric constant, which can be passed to
523
the wrap_socket function/method from the ssl module.
524
Defaults to :data:`ssl.CERT_NONE`.
525
If given a string it is assumed to be the name of the constant in the
526
:mod:`ssl` module or its abbrevation.
527
(So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
528
If it's neither `None` nor a string we assume it is already the numeric
529
constant which can directly be passed to wrap_socket.
531
if candidate is None:
534
if isinstance(candidate, str):
535
res = getattr(ssl, candidate, None)
537
res = getattr(ssl, 'CERT_' + candidate)
543
def resolve_ssl_version(candidate):
545
like resolve_cert_reqs
547
if candidate is None:
548
return PROTOCOL_SSLv23
550
if isinstance(candidate, str):
551
res = getattr(ssl, candidate, None)
553
res = getattr(ssl, 'PROTOCOL_' + candidate)
559
def assert_fingerprint(cert, fingerprint):
561
Checks if given fingerprint matches the supplied certificate.
564
Certificate as bytes object.
566
Fingerprint as string of hexdigits, can be interspersed by colons.
569
# Maps the length of a digest to a possible hash function producing
576
fingerprint = fingerprint.replace(':', '').lower()
578
digest_length, rest = divmod(len(fingerprint), 2)
580
if rest or digest_length not in hashfunc_map:
581
raise SSLError('Fingerprint is of invalid length.')
583
# We need encode() here for py32; works on py2 and p33.
584
fingerprint_bytes = unhexlify(fingerprint.encode())
586
hashfunc = hashfunc_map[digest_length]
588
cert_digest = hashfunc(cert).digest()
590
if not cert_digest == fingerprint_bytes:
591
raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
592
.format(hexlify(fingerprint_bytes),
593
hexlify(cert_digest)))
595
def is_fp_closed(obj):
597
Checks whether a given file-like object is closed.
600
The file-like object to check.
602
if hasattr(obj, 'fp'):
603
# Object is a container for another file-like object that gets released
604
# on exhaustion (e.g. HTTPResponse)
605
return obj.fp is None
610
if SSLContext is not None: # Python 3.2+
611
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
612
ca_certs=None, server_hostname=None,
615
All arguments except `server_hostname` have the same meaning as for
616
:func:`ssl.wrap_socket`
618
:param server_hostname:
619
Hostname of the expected certificate
621
context = SSLContext(ssl_version)
622
context.verify_mode = cert_reqs
624
# Disable TLS compression to migitate CRIME attack (issue #309)
625
OP_NO_COMPRESSION = 0x20000
626
context.options |= OP_NO_COMPRESSION
630
context.load_verify_locations(ca_certs)
631
# Py32 raises IOError
632
# Py33 raises FileNotFoundError
633
except Exception as e: # Reraise as SSLError
636
# FIXME: This block needs a test.
637
context.load_cert_chain(certfile, keyfile)
638
if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
639
return context.wrap_socket(sock, server_hostname=server_hostname)
640
return context.wrap_socket(sock)
642
else: # Python 3.1 and earlier
643
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
644
ca_certs=None, server_hostname=None,
646
return wrap_socket(sock, keyfile=keyfile, certfile=certfile,
647
ca_certs=ca_certs, cert_reqs=cert_reqs,
648
ssl_version=ssl_version)