21
21
from . import __version__
22
22
from . import certs
23
23
from .compat import parse_http_list as _parse_list_header
24
from .compat import quote, urlparse, bytes, str, OrderedDict, urlunparse
24
from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse,
25
is_py2, is_py3, builtin_str, getproxies, proxy_bypass)
25
26
from .cookies import RequestsCookieJar, cookiejar_from_dict
26
27
from .structures import CaseInsensitiveDict
28
from .exceptions import MissingSchema, InvalidURL
28
30
_hush_pyflakes = (RequestsCookieJar,)
266
268
charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
269
pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
270
xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
268
return charset_re.findall(content)
272
return (charset_re.findall(content) +
273
pragma_re.findall(content) +
274
xml_re.findall(content))
271
277
def get_encoding_from_headers(headers):
361
367
for i in range(1, len(parts)):
362
368
h = parts[i][0:2]
363
369
if len(h) == 2 and h.isalnum():
373
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
365
375
if c in UNRESERVED_SET:
366
376
parts[i] = c + parts[i][2:]
386
396
def get_environ_proxies(url):
387
397
"""Return a dict of environment proxies."""
397
399
get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
399
401
# First check whether no_proxy is defined. If it is, check that the URL
400
402
# we're getting isn't in the no_proxy list.
401
403
no_proxy = get_proxy('no_proxy')
404
netloc = urlparse(url).netloc
404
407
# We need to check whether we match here. We need to see if we match
405
408
# the end of the netloc, both with and without the port.
406
no_proxy = no_proxy.split(',')
407
netloc = urlparse(url).netloc
409
no_proxy = no_proxy.replace(' ', '').split(',')
409
411
for host in no_proxy:
410
412
if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
412
414
# to apply the proxies on this URL.
417
# If the system proxy settings indicate that this URL should be bypassed,
419
if proxy_bypass(netloc):
415
422
# If we get here, we either didn't have no_proxy set or we're not going
416
# anywhere that no_proxy applies to.
417
proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys]
418
return dict([(key, val) for (key, val) in proxies if val])
423
# anywhere that no_proxy applies to, and the system settings don't require
424
# bypassing the proxy for the current URL.
421
428
def default_user_agent():
529
def prepend_scheme_if_needed(url, new_scheme):
530
'''Given a URL that may or may not have a scheme, prepend the given scheme.
531
Does not replace a present scheme with the one provided as an argument.'''
532
scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
534
# urlparse is a finicky beast, and sometimes decides that there isn't a
535
# netloc present. Assume that it's being over-cautious, and switch netloc
536
# and path if urlparse decided there was no netloc.
538
netloc, path = path, netloc
540
return urlunparse((scheme, netloc, path, params, query, fragment))
536
def except_on_missing_scheme(url):
537
"""Given a URL, raise a MissingSchema exception if the scheme is missing.
539
scheme, netloc, path, params, query, fragment = urlparse(url)
542
raise MissingSchema('Proxy URLs must have explicit schemes.')
543
545
def get_auth_from_url(url):
548
550
return (parsed.username, parsed.password)
555
def to_native_string(string, encoding='ascii'):
557
Given a string object, regardless of type, returns a representation of that
558
string in the native string type, encoding and decoding where necessary.
559
This assumes ASCII unless told otherwise.
563
if isinstance(string, builtin_str):
567
out = string.encode(encoding)
569
out = string.decode(encoding)