1
# urllib3/connectionpool.py
2
# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
4
# This module is part of urllib3 and is released under
5
# the MIT License: http://www.opensource.org/licenses/mit-license.php
10
from socket import error as SocketError, timeout as SocketTimeout
13
from http.client import HTTPConnection, HTTPException
14
from http.client import HTTP_PORT, HTTPS_PORT
16
from httplib import HTTPConnection, HTTPException
17
from httplib import HTTP_PORT, HTTPS_PORT
20
from queue import LifoQueue, Empty, Full
22
from Queue import LifoQueue, Empty, Full
25
try: # Compiled with SSL?
26
HTTPSConnection = object
31
from http.client import HTTPSConnection
33
from httplib import HTTPSConnection
36
BaseSSLError = ssl.SSLError
38
except (ImportError, AttributeError):
42
from .request import RequestMethods
43
from .response import HTTPResponse
44
from .util import get_host, is_connection_dropped
45
from .exceptions import (
53
from .packages.ssl_match_hostname import match_hostname, CertificateError
57
xrange = six.moves.xrange
59
log = logging.getLogger(__name__)
69
## Connection objects (extension of httplib)
71
class VerifiedHTTPSConnection(HTTPSConnection):
73
Based on httplib.HTTPSConnection but wraps the socket with
79
def set_cert(self, key_file=None, cert_file=None,
80
cert_reqs='CERT_NONE', ca_certs=None):
82
'CERT_NONE': ssl.CERT_NONE,
83
'CERT_OPTIONAL': ssl.CERT_OPTIONAL,
84
'CERT_REQUIRED': ssl.CERT_REQUIRED
87
self.key_file = key_file
88
self.cert_file = cert_file
89
self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
90
self.ca_certs = ca_certs
93
# Add certificate verification
94
sock = socket.create_connection((self.host, self.port), self.timeout)
96
# Wrap socket using verification with the root certs in
98
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
99
cert_reqs=self.cert_reqs,
100
ca_certs=self.ca_certs)
102
match_hostname(self.sock.getpeercert(), self.host)
107
class ConnectionPool(object):
109
Base class for all connection pools, such as
110
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
116
def __init__(self, host, port=None):
121
return '%s(host=%r, port=%r)' % (type(self).__name__,
122
self.host, self.port)
125
class HTTPConnectionPool(ConnectionPool, RequestMethods):
127
Thread-safe connection pool for one host.
130
Host used for this HTTP Connection (e.g. "localhost"), passed into
131
:class:`httplib.HTTPConnection`.
134
Port used for this HTTP Connection (None is equivalent to 80), passed
135
into :class:`httplib.HTTPConnection`.
138
Causes BadStatusLine to be raised if the status line can't be parsed
139
as a valid HTTP/1.0 or 1.1 status line, passed into
140
:class:`httplib.HTTPConnection`.
143
Socket timeout for each individual connection, can be a float. None
147
Number of connections to save that can be reused. More than 1 is useful
148
in multithreaded situations. If ``block`` is set to false, more
149
connections will be created but they will not be saved once they've
153
If set to True, no more than ``maxsize`` connections will be used at
154
a time. When no free connections are available, the call will block
155
until a connection has been released. This is a useful side effect for
156
particular multithreaded situations where one does not want to use more
157
than maxsize connections per host to prevent flooding.
160
Headers to include with all requests, unless other headers are given
166
def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
167
block=False, headers=None):
168
super(HTTPConnectionPool, self).__init__(host, port)
171
self.timeout = timeout
172
self.pool = self.QueueCls(maxsize)
174
self.headers = headers or {}
176
# Fill the queue up so that doing get() on it will block properly
177
for _ in xrange(maxsize):
180
# These are mostly for testing and debugging purposes.
181
self.num_connections = 0
182
self.num_requests = 0
186
Return a fresh :class:`httplib.HTTPConnection`.
188
self.num_connections += 1
189
log.info("Starting new HTTP connection (%d): %s" %
190
(self.num_connections, self.host))
191
return HTTPConnection(host=self.host, port=self.port)
193
def _get_conn(self, timeout=None):
195
Get a connection. Will return a pooled connection if one is available.
197
If no connections are available and :prop:`.block` is ``False``, then a
198
fresh connection is returned.
201
Seconds to wait before giving up and raising
202
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
203
:prop:`.block` is ``True``.
207
conn = self.pool.get(block=self.block, timeout=timeout)
209
# If this is a persistent connection, check if it got disconnected
210
if conn and is_connection_dropped(conn):
211
log.info("Resetting dropped connection: %s" % self.host)
216
raise EmptyPoolError(self,
217
"Pool reached maximum size and no more "
218
"connections are allowed.")
219
pass # Oh well, we'll create a new connection then
221
return conn or self._new_conn()
223
def _put_conn(self, conn):
225
Put a connection back into the pool.
228
Connection object for the current host and port as returned by
229
:meth:`._new_conn` or :meth:`._get_conn`.
231
If the pool is already full, the connection is discarded because we
232
exceeded maxsize. If connections are discarded frequently, then maxsize
236
self.pool.put(conn, block=False)
238
# This should never happen if self.block == True
239
log.warning("HttpConnectionPool is full, discarding connection: %s"
242
def _make_request(self, conn, method, url, timeout=_Default,
243
**httplib_request_kw):
245
Perform a request on a given httplib connection object taken from our
248
self.num_requests += 1
250
if timeout is _Default:
251
timeout = self.timeout
253
conn.timeout = timeout # This only does anything in Py26+
254
conn.request(method, url, **httplib_request_kw)
257
sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr.
259
sock.settimeout(timeout)
261
httplib_response = conn.getresponse()
263
log.debug("\"%s %s %s\" %s %s" %
265
conn._http_vsn_str, # pylint: disable-msg=W0212
266
httplib_response.status, httplib_response.length))
268
return httplib_response
271
def is_same_host(self, url):
273
Check if the given ``url`` is a member of the same host as this
276
# TODO: Add optional support for socket.gethostbyname checking.
277
scheme, host, port = get_host(url)
279
if self.port and not port:
280
# Use explicit default port for comparison when none is given.
281
port = port_by_scheme.get(scheme)
283
return (url.startswith('/') or
284
(scheme, host, port) == (self.scheme, self.host, self.port))
286
def urlopen(self, method, url, body=None, headers=None, retries=3,
287
redirect=True, assert_same_host=True, timeout=_Default,
288
pool_timeout=None, release_conn=None, **response_kw):
290
Get a connection from the pool and perform an HTTP request. This is the
291
lowest level call for making a request, so you'll need to specify all
296
More commonly, it's appropriate to use a convenience method provided
297
by :class:`.RequestMethods`, such as :meth:`request`.
301
`release_conn` will only behave as expected if
302
`preload_content=False` because we want to make
303
`preload_content=False` the default behaviour someday soon without
304
breaking backwards compatibility.
307
HTTP request method (such as GET, POST, PUT, etc.)
310
Data to send in the request body (useful for creating
311
POST requests, see HTTPConnectionPool.post_url for
315
Dictionary of custom headers to send, such as User-Agent,
316
If-None-Match, etc. If None, pool headers are used. If provided,
317
these headers completely replace any pool-specific headers.
320
Number of retries to allow before raising a MaxRetryError exception.
323
Automatically handle redirects (status codes 301, 302, 303, 307),
324
each redirect counts as a retry.
326
:param assert_same_host:
327
If ``True``, will make sure that the host of the pool requests is
328
consistent else will raise HostChangedError. When False, you can
329
use the pool on an HTTP proxy and request foreign hosts.
332
If specified, overrides the default timeout for this one request.
335
If set and the pool is set to block=True, then this method will
336
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
337
connection is available within the time period.
340
If False, then the urlopen call will not release the connection
341
back into the pool once a response is received (but will release if
342
you read the entire contents of the response such as when
343
`preload_content=True`). This is useful if you're not preloading
344
the response's content immediately. You will need to call
345
``r.release_conn()`` on the response ``r`` to return the connection
346
back into the pool. If None, it takes the value of
347
``response_kw.get('preload_content', True)``.
349
:param \**response_kw:
350
Additional parameters are passed to
351
:meth:`urllib3.response.HTTPResponse.from_httplib`
354
headers = self.headers
357
raise MaxRetryError(self, url)
359
if timeout is _Default:
360
timeout = self.timeout
362
if release_conn is None:
363
release_conn = response_kw.get('preload_content', True)
366
if assert_same_host and not self.is_same_host(url):
367
host = "%s://%s" % (self.scheme, self.host)
369
host = "%s:%d" % (host, self.port)
371
raise HostChangedError(self, url, retries - 1)
376
# Request a connection from the queue
377
# (Could raise SocketError: Bad file descriptor)
378
conn = self._get_conn(timeout=pool_timeout)
380
# Make the request on the httplib connection object
381
httplib_response = self._make_request(conn, method, url,
383
body=body, headers=headers)
385
# If we're going to release the connection in ``finally:``, then
386
# the request doesn't need to know about the connection. Otherwise
387
# it will also try to release it and we'll have a double-release
389
response_conn = not release_conn and conn
391
# Import httplib's response into our own wrapper object
392
response = HTTPResponse.from_httplib(httplib_response,
394
connection=response_conn,
398
# The connection will be put back into the pool when
399
# ``response.release_conn()`` is called (implicitly by
400
# ``response.read()``)
404
raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" %
407
except SocketTimeout as e:
408
# Timed out by socket
409
raise TimeoutError(self, "Request timed out. (timeout=%s)" %
412
except BaseSSLError as e:
413
# SSL certificate error
416
except CertificateError as e:
420
except (HTTPException, SocketError) as e:
421
# Connection broken, discard. It will be replaced next _get_conn().
423
# This is necessary so we can access e below
427
if conn and release_conn:
428
# Put the connection back to be reused
432
log.warn("Retrying (%d attempts remain) after connection "
433
"broken by '%r': %s" % (retries, err, url))
434
return self.urlopen(method, url, body, headers, retries - 1,
435
redirect, assert_same_host) # Try again
438
redirect_location = redirect and response.get_redirect_location()
439
if redirect_location:
440
log.info("Redirecting %s -> %s" % (url, redirect_location))
441
return self.urlopen(method, redirect_location, body, headers,
442
retries - 1, redirect, assert_same_host)
447
class HTTPSConnectionPool(HTTPConnectionPool):
449
Same as :class:`.HTTPConnectionPool`, but HTTPS.
451
When Python is compiled with the :mod:`ssl` module, then
452
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
453
instead of :class:httplib.HTTPSConnection`.
455
The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters
456
are only used if :mod:`ssl` is available and are fed into
457
:meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket.
462
def __init__(self, host, port=None,
463
strict=False, timeout=None, maxsize=1,
464
block=False, headers=None,
465
key_file=None, cert_file=None,
466
cert_reqs='CERT_NONE', ca_certs=None):
468
super(HTTPSConnectionPool, self).__init__(host, port,
469
strict, timeout, maxsize,
471
self.key_file = key_file
472
self.cert_file = cert_file
473
self.cert_reqs = cert_reqs
474
self.ca_certs = ca_certs
478
Return a fresh :class:`httplib.HTTPSConnection`.
480
self.num_connections += 1
481
log.info("Starting new HTTPS connection (%d): %s"
482
% (self.num_connections, self.host))
484
if not ssl: # Platform-specific: Python compiled without +ssl
485
if not HTTPSConnection or HTTPSConnection is object:
486
raise SSLError("Can't connect to HTTPS URL because the SSL "
487
"module is not available.")
489
return HTTPSConnection(host=self.host, port=self.port)
491
connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
492
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
493
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
497
def connection_from_url(url, **kw):
499
Given a url, return an :class:`.ConnectionPool` instance of its host.
501
This is a shortcut for not having to parse out the scheme, host, and port
502
of the url before creating an :class:`.ConnectionPool` instance.
505
Absolute URL string that must include the scheme. Port is optional.
508
Passes additional parameters to the constructor of the appropriate
509
:class:`.ConnectionPool`. Useful for specifying things like
510
timeout, maxsize, headers, etc.
514
>>> conn = connection_from_url('http://google.com/')
515
>>> r = conn.request('GET', '/')
517
scheme, host, port = get_host(url)
518
if scheme == 'https':
519
return HTTPSConnectionPool(host, port=port, **kw)
521
return HTTPConnectionPool(host, port=port, **kw)