1
"""PyPI and direct package downloading"""
10
from functools import wraps
13
from urllib.parse import splituser
15
from urllib2 import splituser
17
from setuptools.extern import six
18
from setuptools.extern.six.moves import urllib, http_client, configparser, map
21
from pkg_resources import (
22
CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
23
require, Environment, find_distributions, safe_name, safe_version,
24
to_filename, Requirement, DEVELOP_DIST,
26
from setuptools import ssl_support
27
from distutils import log
28
from distutils.errors import DistutilsError
29
from fnmatch import translate
30
from setuptools.py26compat import strip_fragment
31
from setuptools.py27compat import get_all_headers
33
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
34
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
35
# this is here to fix emacs' cruddy broken syntax highlighting
36
PYPI_MD5 = re.compile(
37
'<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
38
'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\\)'
40
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
41
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
44
'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
45
'interpret_distro_name',
50
_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
51
user_agent = _tmpl.format(py_major=sys.version[:3], **globals())
54
def parse_requirement_arg(spec):
56
return Requirement.parse(spec)
59
"Not a URL, existing file, or requirement spec: %r" % (spec,)
63
def parse_bdist_wininst(name):
64
"""Return (base,pyversion) or (None,None) for possible .exe name"""
67
base, py_ver, plat = None, None, None
69
if lower.endswith('.exe'):
70
if lower.endswith('.win32.exe'):
73
elif lower.startswith('.win32-py', -16):
77
elif lower.endswith('.win-amd64.exe'):
80
elif lower.startswith('.win-amd64-py', -20):
84
return base, py_ver, plat
87
def egg_info_for_url(url):
88
parts = urllib.parse.urlparse(url)
89
scheme, server, path, parameters, query, fragment = parts
90
base = urllib.parse.unquote(path.split('/')[-1])
91
if server == 'sourceforge.net' and base == 'download': # XXX Yuck
92
base = urllib.parse.unquote(path.split('/')[-2])
94
base, fragment = base.split('#', 1)
98
def distros_for_url(url, metadata=None):
99
"""Yield egg or source distribution objects that might be found at a URL"""
100
base, fragment = egg_info_for_url(url)
101
for dist in distros_for_location(url, base, metadata):
104
match = EGG_FRAGMENT.match(fragment)
106
for dist in interpret_distro_name(
107
url, match.group(1), metadata, precedence=CHECKOUT_DIST
112
def distros_for_location(location, basename, metadata=None):
113
"""Yield egg or source distribution objects based on basename"""
114
if basename.endswith('.egg.zip'):
115
basename = basename[:-4] # strip the .zip
116
if basename.endswith('.egg') and '-' in basename:
117
# only one, unambiguous interpretation
118
return [Distribution.from_location(location, basename, metadata)]
119
if basename.endswith('.exe'):
120
win_base, py_ver, platform = parse_bdist_wininst(basename)
121
if win_base is not None:
122
return interpret_distro_name(
123
location, win_base, metadata, py_ver, BINARY_DIST, platform
125
# Try source distro extensions (.zip, .tgz, etc.)
127
for ext in EXTENSIONS:
128
if basename.endswith(ext):
129
basename = basename[:-len(ext)]
130
return interpret_distro_name(location, basename, metadata)
131
return [] # no extension matched
134
def distros_for_filename(filename, metadata=None):
135
"""Yield possible egg or source distribution objects based on a filename"""
136
return distros_for_location(
137
normalize_path(filename), os.path.basename(filename), metadata
141
def interpret_distro_name(
142
location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
145
"""Generate alternative interpretations of a source distro name
147
Note: if `location` is a filesystem filename, you should call
148
``pkg_resources.normalize_path()`` on it before passing it to this
151
# Generate alternative interpretations of a source distro name
152
# Because some packages are ambiguous as to name/versions split
153
# e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
154
# So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
155
# "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
156
# the spurious interpretations should be ignored, because in the event
157
# there's also an "adns" package, the spurious "python-1.1.0" version will
158
# compare lower than any numeric version number, and is therefore unlikely
159
# to match a request for it. It's still a potential problem, though, and
160
# in the long run PyPI and the distutils should go for "safe" names and
161
# versions in distribution archive names (sdist and bdist).
163
parts = basename.split('-')
164
if not py_version and any(re.match('py\d\.\d$', p) for p in parts[2:]):
165
# it is a bdist_dumb, not an sdist -- bail out
168
for p in range(1, len(parts) + 1):
170
location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
171
py_version=py_version, precedence=precedence,
176
# From Python 2.7 docs
177
def unique_everseen(iterable, key=None):
178
"List unique elements, preserving order. Remember all elements ever seen."
179
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
180
# unique_everseen('ABBCcAD', str.lower) --> A B C D
184
for element in six.moves.filterfalse(seen.__contains__, iterable):
188
for element in iterable:
195
def unique_values(func):
197
Wrap a function returning an iterable such that the resulting iterable
198
only ever yields unique items.
202
def wrapper(*args, **kwargs):
203
return unique_everseen(func(*args, **kwargs))
208
REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
209
# this line is here to fix emacs' cruddy broken syntax highlighting
213
def find_external_links(url, page):
214
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
216
for match in REL.finditer(page):
217
tag, rel = match.groups()
218
rels = set(map(str.strip, rel.lower().split(',')))
219
if 'homepage' in rels or 'download' in rels:
220
for match in HREF.finditer(tag):
221
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
223
for tag in ("<th>Home Page", "<th>Download URL"):
226
match = HREF.search(page, pos)
228
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
231
class ContentChecker(object):
233
A null content checker that defines the interface for checking content
236
def feed(self, block):
238
Feed a block of data to the hash.
244
Check the hash. Return False if validation fails.
248
def report(self, reporter, template):
250
Call reporter with information about the checker (hash name)
251
substituted into the template.
256
class HashChecker(ContentChecker):
257
pattern = re.compile(
258
r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
259
r'(?P<expected>[a-f0-9]+)'
262
def __init__(self, hash_name, expected):
263
self.hash_name = hash_name
264
self.hash = hashlib.new(hash_name)
265
self.expected = expected
268
def from_url(cls, url):
269
"Construct a (possibly null) ContentChecker from a URL"
270
fragment = urllib.parse.urlparse(url)[-1]
272
return ContentChecker()
273
match = cls.pattern.search(fragment)
275
return ContentChecker()
276
return cls(**match.groupdict())
278
def feed(self, block):
279
self.hash.update(block)
282
return self.hash.hexdigest() == self.expected
284
def report(self, reporter, template):
285
msg = template % self.hash_name
289
class PackageIndex(Environment):
290
"""A distribution index that scans web pages for download URLs"""
293
self, index_url="https://pypi.python.org/simple", hosts=('*',),
294
ca_bundle=None, verify_ssl=True, *args, **kw
296
Environment.__init__(self, *args, **kw)
297
self.index_url = index_url + "/" [:not index_url.endswith('/')]
298
self.scanned_urls = {}
299
self.fetched_urls = {}
300
self.package_pages = {}
301
self.allows = re.compile('|'.join(map(translate, hosts))).match
305
and ssl_support.is_available
306
and (ca_bundle or ssl_support.find_ca_bundle())
309
self.opener = ssl_support.opener_for(ca_bundle)
311
self.opener = urllib.request.urlopen
313
def process_url(self, url, retrieve=False):
314
"""Evaluate a URL as a possible download, and maybe retrieve it"""
315
if url in self.scanned_urls and not retrieve:
317
self.scanned_urls[url] = True
318
if not URL_SCHEME(url):
319
self.process_filename(url)
322
dists = list(distros_for_url(url))
324
if not self.url_ok(url):
326
self.debug("Found link: %s", url)
328
if dists or not retrieve or url in self.fetched_urls:
329
list(map(self.add, dists))
330
return # don't need the actual page
332
if not self.url_ok(url):
333
self.fetched_urls[url] = True
336
self.info("Reading %s", url)
337
self.fetched_urls[url] = True # prevent multiple fetch attempts
338
tmpl = "Download error on %s: %%s -- Some packages may not be found!"
339
f = self.open_url(url, tmpl % url)
342
self.fetched_urls[f.url] = True
343
if 'html' not in f.headers.get('content-type', '').lower():
344
f.close() # not html, we can't process it
347
base = f.url # handle redirects
349
if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
350
if isinstance(f, urllib.error.HTTPError):
351
# Errors have no charset, assume latin1:
354
charset = f.headers.get_param('charset') or 'latin-1'
355
page = page.decode(charset, "ignore")
357
for match in HREF.finditer(page):
358
link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
359
self.process_url(link)
360
if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
361
page = self.process_index(url, page)
363
def process_filename(self, fn, nested=False):
364
# process filenames or directories
365
if not os.path.exists(fn):
366
self.warn("Not found: %s", fn)
369
if os.path.isdir(fn) and not nested:
370
path = os.path.realpath(fn)
371
for item in os.listdir(path):
372
self.process_filename(os.path.join(path, item), True)
374
dists = distros_for_filename(fn)
376
self.debug("Found: %s", fn)
377
list(map(self.add, dists))
379
def url_ok(self, url, fatal=False):
381
is_file = s and s.group(1).lower() == 'file'
382
if is_file or self.allows(urllib.parse.urlparse(url)[1]):
384
msg = ("\nNote: Bypassing %s (disallowed host; see "
385
"http://bit.ly/1dg9ijs for details).\n")
387
raise DistutilsError(msg % url)
391
def scan_egg_links(self, search_path):
392
dirs = filter(os.path.isdir, search_path)
396
for entry in os.listdir(path)
397
if entry.endswith('.egg-link')
399
list(itertools.starmap(self.scan_egg_link, egg_links))
401
def scan_egg_link(self, path, entry):
402
with open(os.path.join(path, entry)) as raw_lines:
403
# filter non-empty lines
404
lines = list(filter(None, map(str.strip, raw_lines)))
407
# format is not recognized; punt
410
egg_path, setup_path = lines
412
for dist in find_distributions(os.path.join(path, egg_path)):
413
dist.location = os.path.join(path, *lines)
414
dist.precedence = SOURCE_DIST
417
def process_index(self, url, page):
418
"""Process the contents of a PyPI page"""
421
# Process a URL to see if it's for a package page
422
if link.startswith(self.index_url):
424
urllib.parse.unquote, link[len(self.index_url):].split('/')
426
if len(parts) == 2 and '#' not in parts[1]:
427
# it's a package page, sanitize and index it
428
pkg = safe_name(parts[0])
429
ver = safe_version(parts[1])
430
self.package_pages.setdefault(pkg.lower(), {})[link] = True
431
return to_filename(pkg), to_filename(ver)
434
# process an index page into the package-page index
435
for match in HREF.finditer(page):
437
scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
441
pkg, ver = scan(url) # ensure this page is in the page index
443
# process individual package page
444
for new_url in find_external_links(url, page):
445
# Process the found URL
446
base, frag = egg_info_for_url(new_url)
447
if base.endswith('.py') and not frag:
449
new_url += '#egg=%s-%s' % (pkg, ver)
451
self.need_version_info(url)
452
self.scan_url(new_url)
455
lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
458
return "" # no sense double-scanning non-package pages
460
def need_version_info(self, url):
462
"Page at %s links to .py file(s) without version info; an index "
463
"scan is required.", url
466
def scan_all(self, msg=None, *args):
467
if self.index_url not in self.fetched_urls:
469
self.warn(msg, *args)
471
"Scanning index of all packages (this may take a while)"
473
self.scan_url(self.index_url)
475
def find_packages(self, requirement):
476
self.scan_url(self.index_url + requirement.unsafe_name + '/')
478
if not self.package_pages.get(requirement.key):
479
# Fall back to safe version of the name
480
self.scan_url(self.index_url + requirement.project_name + '/')
482
if not self.package_pages.get(requirement.key):
483
# We couldn't find the target package, so search the index page too
484
self.not_found_in_index(requirement)
486
for url in list(self.package_pages.get(requirement.key, ())):
487
# scan each page that might be related to the desired package
490
def obtain(self, requirement, installer=None):
492
self.find_packages(requirement)
493
for dist in self[requirement.key]:
494
if dist in requirement:
496
self.debug("%s does not match %s", requirement, dist)
497
return super(PackageIndex, self).obtain(requirement, installer)
499
def check_hash(self, checker, filename, tfp):
501
checker is a ContentChecker
503
checker.report(self.debug,
504
"Validating %%s checksum for %s" % filename)
505
if not checker.is_valid():
508
raise DistutilsError(
509
"%s validation failed for %s; "
510
"possible download problem?" % (
511
checker.hash.name, os.path.basename(filename))
514
def add_find_links(self, urls):
515
"""Add `urls` to the list that will be prescanned for searches"""
518
self.to_scan is None # if we have already "gone online"
519
or not URL_SCHEME(url) # or it's a local file/directory
520
or url.startswith('file:')
521
or list(distros_for_url(url)) # or a direct package link
523
# then go ahead and process it now
526
# otherwise, defer retrieval till later
527
self.to_scan.append(url)
530
"""Scan urls scheduled for prescanning (e.g. --find-links)"""
532
list(map(self.scan_url, self.to_scan))
533
self.to_scan = None # from now on, go ahead and process immediately
535
def not_found_in_index(self, requirement):
536
if self[requirement.key]: # we've seen at least one distro
537
meth, msg = self.info, "Couldn't retrieve index page for %r"
538
else: # no distros seen for this name, might be misspelled
539
meth, msg = (self.warn,
540
"Couldn't find index page for %r (maybe misspelled?)")
541
meth(msg, requirement.unsafe_name)
544
def download(self, spec, tmpdir):
545
"""Locate and/or download `spec` to `tmpdir`, returning a local path
547
`spec` may be a ``Requirement`` object, or a string containing a URL,
548
an existing local filename, or a project/version requirement spec
549
(i.e. the string form of a ``Requirement`` object). If it is the URL
550
of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
551
that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
552
automatically created alongside the downloaded file.
554
If `spec` is a ``Requirement`` object or a string containing a
555
project/version requirement spec, this method returns the location of
556
a matching distribution (possibly after downloading it to `tmpdir`).
557
If `spec` is a locally existing file or directory name, it is simply
558
returned unchanged. If `spec` is a URL, it is downloaded to a subpath
559
of `tmpdir`, and the local filename is returned. Various errors may be
560
raised if a problem occurs during downloading.
562
if not isinstance(spec, Requirement):
563
scheme = URL_SCHEME(spec)
565
# It's a url, download it to tmpdir
566
found = self._download_url(scheme.group(1), spec, tmpdir)
567
base, fragment = egg_info_for_url(spec)
568
if base.endswith('.py'):
569
found = self.gen_setup(found, fragment, tmpdir)
571
elif os.path.exists(spec):
572
# Existing file or directory, just return it
575
spec = parse_requirement_arg(spec)
576
return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
578
def fetch_distribution(
579
self, requirement, tmpdir, force_scan=False, source=False,
580
develop_ok=False, local_index=None
582
"""Obtain a distribution suitable for fulfilling `requirement`
584
`requirement` must be a ``pkg_resources.Requirement`` instance.
585
If necessary, or if the `force_scan` flag is set, the requirement is
586
searched for in the (online) package index as well as the locally
587
installed packages. If a distribution matching `requirement` is found,
588
the returned distribution's ``location`` is the value you would have
589
gotten from calling the ``download()`` method with the matching
590
distribution's URL or filename. If no matching distribution is found,
591
``None`` is returned.
593
If the `source` flag is set, only source distributions and source
594
checkout links will be considered. Unless the `develop_ok` flag is
595
set, development and system eggs (i.e., those using the ``.egg-info``
596
format) will be ignored.
598
# process a Requirement
599
self.info("Searching for %s", requirement)
603
def find(req, env=None):
606
# Find a matching distribution; may be called more than once
608
for dist in env[req.key]:
610
if dist.precedence == DEVELOP_DIST and not develop_ok:
611
if dist not in skipped:
612
self.warn("Skipping development or system egg: %s", dist)
616
if dist in req and (dist.precedence <= SOURCE_DIST or not source):
617
dist.download_location = self.download(dist.location, tmpdir)
618
if os.path.exists(dist.download_location):
623
self.find_packages(requirement)
624
dist = find(requirement)
626
if not dist and local_index is not None:
627
dist = find(requirement, local_index)
630
if self.to_scan is not None:
632
dist = find(requirement)
634
if dist is None and not force_scan:
635
self.find_packages(requirement)
636
dist = find(requirement)
640
"No local packages or working download links found for %s%s",
641
(source and "a source distribution of " or ""),
645
self.info("Best match: %s", dist)
646
return dist.clone(location=dist.download_location)
648
def fetch(self, requirement, tmpdir, force_scan=False, source=False):
649
"""Obtain a file suitable for fulfilling `requirement`
651
DEPRECATED; use the ``fetch_distribution()`` method now instead. For
652
backward compatibility, this routine is identical but returns the
653
``location`` of the downloaded distribution instead of a distribution
656
dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
661
def gen_setup(self, filename, fragment, tmpdir):
662
match = EGG_FRAGMENT.match(fragment)
665
interpret_distro_name(filename, match.group(1), None) if d.version
668
if len(dists) == 1: # unambiguous ``#egg`` fragment
669
basename = os.path.basename(filename)
671
# Make sure the file has been downloaded to the temp dir.
672
if os.path.dirname(filename) != tmpdir:
673
dst = os.path.join(tmpdir, basename)
674
from setuptools.command.easy_install import samefile
675
if not samefile(filename, dst):
676
shutil.copy2(filename, dst)
679
with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
681
"from setuptools import setup\n"
682
"setup(name=%r, version=%r, py_modules=[%r])\n"
684
dists[0].project_name, dists[0].version,
685
os.path.splitext(basename)[0]
691
raise DistutilsError(
692
"Can't unambiguously interpret project/version identifier %r; "
693
"any dashes in the name or version should be escaped using "
694
"underscores. %r" % (fragment, dists)
697
raise DistutilsError(
698
"Can't process plain .py files without an '#egg=name-version'"
699
" suffix to enable automatic setup script generation."
704
def _download_to(self, url, filename):
705
self.info("Downloading %s", url)
707
fp, info = None, None
709
checker = HashChecker.from_url(url)
710
fp = self.open_url(strip_fragment(url))
711
if isinstance(fp, urllib.error.HTTPError):
712
raise DistutilsError(
713
"Can't download %s: %s %s" % (url, fp.code, fp.msg)
717
bs = self.dl_blocksize
719
if "content-length" in headers:
720
# Some servers return multiple Content-Length headers :(
721
sizes = get_all_headers(headers, 'Content-Length')
722
size = max(map(int, sizes))
723
self.reporthook(url, filename, blocknum, bs, size)
724
with open(filename, 'wb') as tfp:
731
self.reporthook(url, filename, blocknum, bs, size)
734
self.check_hash(checker, filename, tfp)
740
def reporthook(self, url, filename, blocknum, blksize, size):
743
def open_url(self, url, warning=None):
744
if url.startswith('file:'):
745
return local_open(url)
747
return open_with_auth(url, self.opener)
748
except (ValueError, http_client.InvalidURL) as v:
749
msg = ' '.join([str(arg) for arg in v.args])
751
self.warn(warning, msg)
753
raise DistutilsError('%s %s' % (url, msg))
754
except urllib.error.HTTPError as v:
756
except urllib.error.URLError as v:
758
self.warn(warning, v.reason)
760
raise DistutilsError("Download error for %s: %s"
762
except http_client.BadStatusLine as v:
764
self.warn(warning, v.line)
766
raise DistutilsError(
767
'%s returned a bad status line. The server might be '
771
except http_client.HTTPException as v:
773
self.warn(warning, v)
775
raise DistutilsError("Download error for %s: %s"
778
def _download_url(self, scheme, url, tmpdir):
779
# Determine download filename
781
name, fragment = egg_info_for_url(url)
784
name = name.replace('..', '.').replace('\\', '_')
786
name = "__downloaded__" # default if URL has no path contents
788
if name.endswith('.egg.zip'):
789
name = name[:-4] # strip the extra .zip before download
791
filename = os.path.join(tmpdir, name)
795
if scheme == 'svn' or scheme.startswith('svn+'):
796
return self._download_svn(url, filename)
797
elif scheme == 'git' or scheme.startswith('git+'):
798
return self._download_git(url, filename)
799
elif scheme.startswith('hg+'):
800
return self._download_hg(url, filename)
801
elif scheme == 'file':
802
return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
804
self.url_ok(url, True) # raises error if not allowed
805
return self._attempt_download(url, filename)
807
def scan_url(self, url):
808
self.process_url(url, True)
810
def _attempt_download(self, url, filename):
811
headers = self._download_to(url, filename)
812
if 'html' in headers.get('content-type', '').lower():
813
return self._download_html(url, headers, filename)
817
def _download_html(self, url, headers, filename):
818
file = open(filename)
821
# Check for a subversion index page
822
if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
823
# it's a subversion index page:
826
return self._download_svn(url, filename)
827
break # not an index page
830
raise DistutilsError("Unexpected HTML page found at " + url)
832
def _download_svn(self, url, filename):
833
url = url.split('#', 1)[0] # remove any fragment for svn's sake
835
if url.lower().startswith('svn:') and '@' in url:
836
scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
837
if not netloc and path.startswith('//') and '/' in path[2:]:
838
netloc, path = path[2:].split('/', 1)
839
auth, host = splituser(netloc)
842
user, pw = auth.split(':', 1)
843
creds = " --username=%s --password=%s" % (user, pw)
845
creds = " --username=" + auth
847
parts = scheme, netloc, url, p, q, f
848
url = urllib.parse.urlunparse(parts)
849
self.info("Doing subversion checkout from %s to %s", url, filename)
850
os.system("svn checkout%s -q %s %s" % (creds, url, filename))
854
def _vcs_split_rev_from_url(url, pop_prefix=False):
855
scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
857
scheme = scheme.split('+', 1)[-1]
859
# Some fragment identification fails
860
path = path.split('#', 1)[0]
864
path, rev = path.rsplit('@', 1)
866
# Also, discard fragment
867
url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
871
def _download_git(self, url, filename):
872
filename = filename.split('#', 1)[0]
873
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
875
self.info("Doing git clone from %s to %s", url, filename)
876
os.system("git clone --quiet %s %s" % (url, filename))
879
self.info("Checking out %s", rev)
880
os.system("(cd %s && git checkout --quiet %s)" % (
887
def _download_hg(self, url, filename):
888
filename = filename.split('#', 1)[0]
889
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
891
self.info("Doing hg clone from %s to %s", url, filename)
892
os.system("hg clone --quiet %s %s" % (url, filename))
895
self.info("Updating to %s", rev)
896
os.system("(cd %s && hg up -C -r %s >&-)" % (
903
def debug(self, msg, *args):
904
log.debug(msg, *args)
906
def info(self, msg, *args):
909
def warn(self, msg, *args):
913
# This pattern matches a character entity reference (a decimal numeric
914
# references, a hexadecimal numeric reference, or a named reference).
915
entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
919
if not isinstance(c, int):
926
def decode_entity(match):
927
what = match.group(1)
928
if what.startswith('#x'):
929
what = int(what[2:], 16)
930
elif what.startswith('#'):
933
what = six.moves.html_entities.name2codepoint.get(what, match.group(0))
937
def htmldecode(text):
938
"""Decode HTML entities in the given text."""
939
return entity_sub(decode_entity, text)
942
def socket_timeout(timeout=15):
943
def _socket_timeout(func):
944
def _socket_timeout(*args, **kwargs):
945
old_timeout = socket.getdefaulttimeout()
946
socket.setdefaulttimeout(timeout)
948
return func(*args, **kwargs)
950
socket.setdefaulttimeout(old_timeout)
952
return _socket_timeout
954
return _socket_timeout
957
def _encode_auth(auth):
959
A function compatible with Python 2.3-3.3 that will encode
960
auth from a URL suitable for an HTTP header.
961
>>> str(_encode_auth('username%3Apassword'))
962
'dXNlcm5hbWU6cGFzc3dvcmQ='
964
Long auth strings should not cause a newline to be inserted.
965
>>> long_auth = 'username:' + 'password'*10
966
>>> chr(10) in str(_encode_auth(long_auth))
969
auth_s = urllib.parse.unquote(auth)
971
auth_bytes = auth_s.encode()
972
# use the legacy interface for Python 2.3 support
973
encoded_bytes = base64.encodestring(auth_bytes)
974
# convert back to a string
975
encoded = encoded_bytes.decode()
976
# strip the trailing carriage return
977
return encoded.replace('\n', '')
980
class Credential(object):
982
A username/password pair. Use like a namedtuple.
985
def __init__(self, username, password):
986
self.username = username
987
self.password = password
994
return '%(username)s:%(password)s' % vars(self)
997
class PyPIConfig(configparser.RawConfigParser):
1002
defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1003
configparser.RawConfigParser.__init__(self, defaults)
1005
rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1006
if os.path.exists(rc):
1010
def creds_by_repository(self):
1011
sections_with_repositories = [
1012
section for section in self.sections()
1013
if self.get(section, 'repository').strip()
1016
return dict(map(self._get_repo_cred, sections_with_repositories))
1018
def _get_repo_cred(self, section):
1019
repo = self.get(section, 'repository').strip()
1020
return repo, Credential(
1021
self.get(section, 'username').strip(),
1022
self.get(section, 'password').strip(),
1025
def find_credential(self, url):
1027
If the URL indicated appears to be a repository defined in this
1028
config, return the credential for that repository.
1030
for repository, cred in self.creds_by_repository.items():
1031
if url.startswith(repository):
1035
def open_with_auth(url, opener=urllib.request.urlopen):
1036
"""Open a urllib2 request, handling HTTP authentication"""
1038
scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url)
1040
# Double scheme does not raise on Mac OS X as revealed by a
1041
# failing test. We would expect "nonnumeric port". Refs #20.
1042
if netloc.endswith(':'):
1043
raise http_client.InvalidURL("nonnumeric port: ''")
1045
if scheme in ('http', 'https'):
1046
auth, host = splituser(netloc)
1051
cred = PyPIConfig().find_credential(url)
1054
info = cred.username, url
1055
log.info('Authenticating as %s for %s (from .pypirc)', *info)
1058
auth = "Basic " + _encode_auth(auth)
1059
parts = scheme, host, path, params, query, frag
1060
new_url = urllib.parse.urlunparse(parts)
1061
request = urllib.request.Request(new_url)
1062
request.add_header("Authorization", auth)
1064
request = urllib.request.Request(url)
1066
request.add_header('User-Agent', user_agent)
1067
fp = opener(request)
1070
# Put authentication info back into request URL if same host,
1071
# so that links found on the page will work
1072
s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1073
if s2 == scheme and h2 == host:
1074
parts = s2, netloc, path2, param2, query2, frag2
1075
fp.url = urllib.parse.urlunparse(parts)
1080
# adding a timeout to avoid freezing package_index
1081
open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1084
def fix_sf_url(url):
1085
return url # backward compatibility
1088
def local_open(url):
1089
"""Read a local path, with special support for directories"""
1090
scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1091
filename = urllib.request.url2pathname(path)
1092
if os.path.isfile(filename):
1093
return urllib.request.urlopen(url)
1094
elif path.endswith('/') and os.path.isdir(filename):
1096
for f in os.listdir(filename):
1097
filepath = os.path.join(filename, f)
1098
if f == 'index.html':
1099
with open(filepath, 'r') as fp:
1102
elif os.path.isdir(filepath):
1104
files.append('<a href="{name}">{name}</a>'.format(name=f))
1106
tmpl = ("<html><head><title>{url}</title>"
1107
"</head><body>{files}</body></html>")
1108
body = tmpl.format(url=url, files='\n'.join(files))
1109
status, message = 200, "OK"
1111
status, message, body = 404, "Path not found", "Not found"
1113
headers = {'content-type': 'text/html'}
1114
body_stream = six.StringIO(body)
1115
return urllib.error.HTTPError(url, status, message, headers, body_stream)