~ubuntu-branches/debian/sid/python-pip/sid

Viewing changes to pip/vendor/distlib/locators.py

Committer: Package Import Robot
Author(s): Barry Warsaw
Date: 2013-08-19 18:33:23 UTC
mfrom: (1.2.5)
Revision ID: package-import@ubuntu.com-20130819183323-8xyoldb2798iil6e

Tags: 1.4.1-1

* Team upload.
* New upstream release.
  - d/control: Update Standards-Version to 3.9.4 with no additional
    changes required.
  - d/patches/no-python-specific-scripts.patch: Refreshed.
  - d/patches/format_egg_string.patch: Refreshed.
  - d/patches/system-ca-certificates.patch: Refreshed.

files added:
docs/configuration.rst

docs/cookbook.rst

docs/development.rst

docs/index.rst

docs/installing.rst

docs/logic.rst

docs/news.rst

docs/other-tools.rst

docs/quickstart.rst

docs/usage.rst

pip/commands/wheel.py

pip/pep425tags.py

pip/vendor

pip/vendor/__init__.py

pip/vendor/distlib

pip/vendor/distlib/__init__.py

pip/vendor/distlib/_backport

pip/vendor/distlib/_backport/__init__.py

pip/vendor/distlib/_backport/misc.py

pip/vendor/distlib/_backport/shutil.py

pip/vendor/distlib/_backport/sysconfig.py

pip/vendor/distlib/_backport/tarfile.py

pip/vendor/distlib/compat.py

pip/vendor/distlib/database.py

pip/vendor/distlib/index.py

pip/vendor/distlib/locators.py

pip/vendor/distlib/manifest.py

pip/vendor/distlib/markers.py

pip/vendor/distlib/metadata.py

pip/vendor/distlib/resources.py

pip/vendor/distlib/scripts.py

pip/vendor/distlib/util.py

pip/vendor/distlib/version.py

pip/vendor/distlib/wheel.py

pip/vendor/html5lib

pip/vendor/html5lib/__init__.py

pip/vendor/html5lib/constants.py

pip/vendor/html5lib/filters

pip/vendor/html5lib/filters/__init__.py

pip/vendor/html5lib/filters/_base.py

pip/vendor/html5lib/filters/alphabeticalattributes.py

pip/vendor/html5lib/filters/inject_meta_charset.py

pip/vendor/html5lib/filters/lint.py

pip/vendor/html5lib/filters/optionaltags.py

pip/vendor/html5lib/filters/sanitizer.py

pip/vendor/html5lib/filters/whitespace.py

pip/vendor/html5lib/html5parser.py

pip/vendor/html5lib/ihatexml.py

pip/vendor/html5lib/inputstream.py

pip/vendor/html5lib/sanitizer.py

pip/vendor/html5lib/serializer

pip/vendor/html5lib/serializer/__init__.py

pip/vendor/html5lib/serializer/htmlserializer.py

pip/vendor/html5lib/tokenizer.py

pip/vendor/html5lib/treebuilders

pip/vendor/html5lib/treebuilders/__init__.py

pip/vendor/html5lib/treebuilders/_base.py

pip/vendor/html5lib/treebuilders/dom.py

pip/vendor/html5lib/treebuilders/etree.py

pip/vendor/html5lib/treebuilders/etree_lxml.py

pip/vendor/html5lib/treewalkers

pip/vendor/html5lib/treewalkers/__init__.py

pip/vendor/html5lib/treewalkers/_base.py

pip/vendor/html5lib/treewalkers/dom.py

pip/vendor/html5lib/treewalkers/etree.py

pip/vendor/html5lib/treewalkers/genshistream.py

pip/vendor/html5lib/treewalkers/lxmletree.py

pip/vendor/html5lib/treewalkers/pulldom.py

pip/vendor/html5lib/trie

pip/vendor/html5lib/trie/__init__.py

pip/vendor/html5lib/trie/_base.py

pip/vendor/html5lib/trie/datrie.py

pip/vendor/html5lib/trie/py.py

pip/vendor/html5lib/utils.py

pip/vendor/six.py

pip/wheel.py

files removed:
docs/configuration.txt

docs/cookbook.txt

docs/development.txt

docs/index.txt

docs/installing.txt

docs/logic.txt

docs/news.txt

docs/other-tools.txt

docs/quickstart.txt

docs/usage.txt

pip/backwardcompat/socket_create_connection.py

files modified:
.pc/format_egg_string.patch/pip/req.py

.pc/no-python-specific-scripts.patch/setup.py

.pc/system-ca-certificates.patch/pip/download.py

.pc/system-ca-certificates.patch/pip/locations.py

.pc/system-ca-certificates.patch/setup.py

AUTHORS.txt

CHANGES.txt

MANIFEST.in

PKG-INFO

PROJECT.txt

README.rst

debian/changelog

debian/control

debian/patches/format_egg_string.patch

debian/patches/no-python-specific-scripts.patch

debian/patches/system-ca-certificates.patch

pip.egg-info/PKG-INFO

pip.egg-info/SOURCES.txt

pip.egg-info/requires.txt

pip/__init__.py

pip/backwardcompat/__init__.py

pip/backwardcompat/ssl_match_hostname.py

pip/basecommand.py

pip/baseparser.py

pip/cmdoptions.py

pip/commands/__init__.py

pip/commands/bundle.py

pip/commands/install.py

pip/commands/list.py

pip/commands/search.py

pip/commands/zip.py

pip/download.py

pip/exceptions.py

pip/index.py

pip/locations.py

pip/req.py

pip/util.py

pip/vcs/git.py

pip/vcs/subversion.py

setup.cfg

setup.py

Show diffs side-by-side

added added

removed removed

pip/vendor/distlib/locators.py

# -*- coding: utf-8 -*-

# Licensed to the Python Software Foundation under a contributor agreement.

# See LICENSE.txt and CONTRIBUTORS.txt.

import gzip

from io import BytesIO

import json

import logging

import os

import posixpath

import re

import threading

import zlib

from . import DistlibException

from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,

queue, quote, unescape, string_types, build_opener,

HTTPRedirectHandler as BaseRedirectHandler,

Request, HTTPError, URLError)

from .database import Distribution, DistributionPath, make_dist

from .metadata import Metadata

from .util import (cached_property, parse_credentials, ensure_slash,

split_filename, get_project_data, parse_requirement,

ServerProxy)

from .version import get_scheme, UnsupportedVersionError

from .wheel import Wheel, is_compatible

logger = logging.getLogger(__name__)

MD5_HASH = re.compile('^md5=([a-f0-9]+)$')

CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I)

HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml')

DEFAULT_INDEX = 'http://python.org/pypi'

def get_all_distribution_names(url=None):

"""

Return all distribution names known by an index.

:param url: The URL of the index.

:return: A list of all known distribution names.

"""

if url is None:

url = DEFAULT_INDEX

client = ServerProxy(url, timeout=3.0)

return client.list_packages()

class RedirectHandler(BaseRedirectHandler):

"""

A class to work around a bug in some Python 3.2.x releases.

"""

# There's a bug in the base version for some 3.2.x

# (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header

# returns e.g. /abc, it bails because it says the scheme ''

# is bogus, when actually it should use the request's

# URL for the scheme. See Python issue #13696.

def http_error_302(self, req, fp, code, msg, headers):

# Some servers (incorrectly) return multiple Location headers

# (so probably same goes for URI). Use first header.

newurl = None

for key in ('location', 'uri'):

if key in headers:

newurl = headers[key]

break

if newurl is None:

return

urlparts = urlparse(newurl)

if urlparts.scheme == '':

newurl = urljoin(req.get_full_url(), newurl)

if hasattr(headers, 'replace_header'):

headers.replace_header(key, newurl)

else:

headers[key] = newurl

return BaseRedirectHandler.http_error_302(self, req, fp, code, msg,

headers)

http_error_301 = http_error_303 = http_error_307 = http_error_302

class Locator(object):

"""

A base class for locators - things that locate distributions.

"""

source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz')

binary_extensions = ('.egg', '.exe', '.whl')

excluded_extensions = ('.pdf',)

# A list of tags indicating which wheels you want to match. The default

# value of None matches against the tags compatible with the running

# Python. If you want to match other values, set wheel_tags on a locator

# instance to a list of tuples (pyver, abi, arch) which you want to match.

wheel_tags = None

downloadable_extensions = source_extensions + ('.whl',)

def __init__(self, scheme='default'):

"""

Initialise an instance.

:param scheme: Because locators look for most recent versions, they

100

need to know the version scheme to use. This specifies

101

the current PEP-recommended scheme - use ``'legacy'``

102

if you need to support existing distributions on PyPI.

103

"""

104

self._cache = {}

105

self.scheme = scheme

106

# Because of bugs in some of the handlers on some of the platforms,

107

# we use our own opener rather than just using urlopen.

108

self.opener = build_opener(RedirectHandler())

109

110

def clear_cache(self):

111

self._cache.clear()

112

113

def _get_scheme(self):

114

return self._scheme

115

116

def _set_scheme(self, value):

117

self._scheme = value

118

119

scheme = property(_get_scheme, _set_scheme)

120

121

def _get_project(self, name):

122

"""

123

For a given project, get a dictionary mapping available versions to Distribution

124

instances.

125

126

This should be implemented in subclasses.

127

"""

128

raise NotImplementedError('Please implement in the subclass')

129

130

def get_distribution_names(self):

131

"""

132

Return all the distribution names known to this locator.

133

"""

134

raise NotImplementedError('Please implement in the subclass')

135

136

def get_project(self, name):

137

"""

138

For a given project, get a dictionary mapping available versions to Distribution

139

instances.

140

141

This calls _get_project to do all the work, and just implements a caching layer on top.

142

"""

143

if self._cache is None:

144

result = self._get_project(name)

145

elif name in self._cache:

146

result = self._cache[name]

147

else:

148

result = self._get_project(name)

149

self._cache[name] = result

150

return result

151

152

def score_url(self, url):

153

"""

154

Give an url a score which can be used to choose preferred URLs

155

for a given project release.

156

"""

157

t = urlparse(url)

158

return (t.scheme != 'https', 'pypi.python.org' in t.netloc,

159

posixpath.basename(t.path))

160

161

def prefer_url(self, url1, url2):

162

"""

163

Choose one of two URLs where both are candidates for distribution

164

archives for the same version of a distribution (for example,

165

.tar.gz vs. zip).

166

167

The current implement favours http:// URLs over https://, archives

168

from PyPI over those from other locations and then the archive name.

169

"""

170

if url1 == 'UNKNOWN':

171

result = url2

172

else:

173

result = url2

174

s1 = self.score_url(url1)

175

s2 = self.score_url(url2)

176

if s1 > s2:

177

result = url1

178

if result != url2:

179

logger.debug('Not replacing %r with %r', url1, url2)

180

else:

181

logger.debug('Replacing %r with %r', url1, url2)

182

return result

183

184

def split_filename(self, filename, project_name):

185

"""

186

Attempt to split a filename in project name, version and Python version.

187

"""

188

return split_filename(filename, project_name)

189

190

def convert_url_to_download_info(self, url, project_name):

191

"""

192

See if a URL is a candidate for a download URL for a project (the URL

193

has typically been scraped from an HTML page).

194

195

If it is, a dictionary is returned with keys "name", "version",

196

"filename" and "url"; otherwise, None is returned.

197

"""

198

def same_project(name1, name2):

199

name1, name2 = name1.lower(), name2.lower()

200

if name1 == name2:

201

result = True

202

else:

203

# distribute replaces '-' by '_' in project names, so it

204

# can tell where the version starts in a filename.

205

result = name1.replace('_', '-') == name2.replace('_', '-')

206

return result

207

208

result = None

209

scheme, netloc, path, params, query, frag = urlparse(url)

210

if frag.lower().startswith('egg='):

211

logger.debug('%s: version hint in fragment: %r',

212

project_name, frag)

213

origpath = path

214

if path and path[-1] == '/':

215

path = path[:-1]

216

if path.endswith('.whl'):

217

try:

218

wheel = Wheel(path)

219

if is_compatible(wheel, self.wheel_tags):

220

if project_name is None:

221

include = True

222

else:

223

include = same_project(wheel.name, project_name)

224

if include:

225

result = {

226

'name': wheel.name,

227

'version': wheel.version,

228

'filename': wheel.filename,

229

'url': urlunparse((scheme, netloc, origpath,

230

params, query, '')),

231

'python-version': ', '.join(

232

['.'.join(list(v[2:])) for v in wheel.pyver]),

233

}

234

m = MD5_HASH.match(frag)

235

if m:

236

result['md5_digest'] = m.group(1)

237

except Exception as e:

238

logger.warning('invalid path for wheel: %s', path)

239

elif path.endswith(self.downloadable_extensions):

240

path = filename = posixpath.basename(path)

241

for ext in self.downloadable_extensions:

242

if path.endswith(ext):

243

path = path[:-len(ext)]

244

t = self.split_filename(path, project_name)

245

if not t:

246

logger.debug('No match for project/version: %s', path)

247

else:

248

name, version, pyver = t

249

if not project_name or same_project(project_name, name):

250

result = {

251

'name': name,

252

'version': version,

253

'filename': filename,

254

'url': urlunparse((scheme, netloc, origpath,

255

params, query, '')),

256

#'packagetype': 'sdist',

257

}

258

if pyver:

259

result['python-version'] = pyver

260

m = MD5_HASH.match(frag)

261

if m:

262

result['md5_digest'] = m.group(1)

263

break

264

return result

265

266

def _update_version_data(self, result, info):

267

"""

268

Update a result dictionary (the final result from _get_project) with a dictionary for a

269

specific version, whih typically holds information gleaned from a filename or URL for an

270

archive for the distribution.

271

"""

272

name = info.pop('name')

273

version = info.pop('version')

274

if version in result:

275

dist = result[version]

276

md = dist.metadata

277

else:

278

dist = make_dist(name, version, scheme=self.scheme)

279

md = dist.metadata

280

dist.md5_digest = info.get('md5_digest')

281

if 'python-version' in info:

282

md['Requires-Python'] = info['python-version']

283

if md['Download-URL'] != info['url']:

284

md['Download-URL'] = self.prefer_url(md['Download-URL'],

285

info['url'])

286

dist.locator = self

287

result[version] = dist

288

289

def locate(self, requirement, prereleases=False):

290

"""

291

Find the most recent distribution which matches the given

292

requirement.

293

294

:param requirement: A requirement of the form 'foo (1.0)' or perhaps

295

'foo (>= 1.0, < 2.0, != 1.3)'

296

:param prereleases: If ``True``, allow pre-release versions

297

to be located. Otherwise, pre-release versions

298

are not returned.

299

:return: A :class:`Distribution` instance, or ``None`` if no such

300

distribution could be located.

301

"""

302

result = None

303

scheme = get_scheme(self.scheme)

304

r = parse_requirement(requirement)

305

if r is None:

306

raise DistlibException('Not a valid requirement: %r' % requirement)

307

if r.extras:

308

# lose the extras part of the requirement

309

requirement = r.requirement

310

matcher = scheme.matcher(requirement)

311

vcls = matcher.version_class

312

logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__)

313

versions = self.get_project(matcher.name)

314

if versions:

315

# sometimes, versions are invalid

316

slist = []

317

for k in versions:

318

try:

319

if not matcher.match(k):

320

logger.debug('%s did not match %r', matcher, k)

321

else:

322

if prereleases or not vcls(k).is_prerelease:

323

slist.append(k)

324

else:

325

logger.debug('skipping pre-release version %s', k)

326

except Exception:

327

logger.warning('error matching %s with %r', matcher, k)

328

pass # slist.append(k)

329

if len(slist) > 1:

330

slist = sorted(slist, key=scheme.key)

331

if slist:

332

logger.debug('sorted list: %s', slist)

333

result = versions[slist[-1]]

334

if result and r.extras:

335

result.extras = r.extras

336

return result

337

338

339

class PyPIRPCLocator(Locator):

340

"""

341

This locator uses XML-RPC to locate distributions. It therefore cannot be

342

used with simple mirrors (that only mirror file content).

343

"""

344

def __init__(self, url, **kwargs):

345

"""

346

Initialise an instance.

347

348

:param url: The URL to use for XML-RPC.

349

:param kwargs: Passed to the superclass constructor.

350

"""

351

super(PyPIRPCLocator, self).__init__(**kwargs)

352

self.base_url = url

353

self.client = ServerProxy(url, timeout=3.0)

354

355

def get_distribution_names(self):

356

"""

357

Return all the distribution names known to this locator.

358

"""

359

return set(self.client.list_packages())

360

361

def _get_project(self, name):

362

result = {}

363

versions = self.client.package_releases(name, True)

364

for v in versions:

365

urls = self.client.release_urls(name, v)

366

data = self.client.release_data(name, v)

367

metadata = Metadata(scheme=self.scheme)

368

metadata.update(data)

369

dist = Distribution(metadata)

370

if urls:

371

info = urls[0]

372

metadata['Download-URL'] = info['url']

373

dist.md5_digest = info.get('md5_digest')

374

dist.locator = self

375

result[v] = dist

376

return result

377

378

class PyPIJSONLocator(Locator):

379

"""

380

This locator uses PyPI's JSON interface. It's very limited in functionality

381

nad probably not worth using.

382

"""

383

def __init__(self, url, **kwargs):

384

super(PyPIJSONLocator, self).__init__(**kwargs)

385

self.base_url = ensure_slash(url)

386

387

def get_distribution_names(self):

388

"""

389

Return all the distribution names known to this locator.

390

"""

391

raise NotImplementedError('Not available from this locator')

392

393

def _get_project(self, name):

394

result = {}

395

url = urljoin(self.base_url, '%s/json' % quote(name))

396

try:

397

resp = self.opener.open(url)

398

data = resp.read().decode() # for now

399

d = json.loads(data)

400

md = Metadata(scheme=self.scheme)

401

md.update(d['info'])

402

dist = Distribution(md)

403

urls = d['urls']

404

if urls:

405

info = urls[0]

406

md['Download-URL'] = info['url']

407

dist.md5_digest = info.get('md5_digest')

408

dist.locator = self

409

result[md.version] = dist

410

except Exception as e:

411

logger.exception('JSON fetch failed: %s', e)

412

return result

413

414

415

class Page(object):

416

"""

417

This class represents a scraped HTML page.

418

"""

419

# The following slightly hairy-looking regex just looks for the contents of

420

# an anchor link, which has an attribute "href" either immediately preceded

421

# or immediately followed by a "rel" attribute. The attribute values can be

422

# declared with double quotes, single quotes or no quotes - which leads to

423

# the length of the expression.

424

_href = re.compile("""

425

(rel\s*=\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\s\n]*))\s+)?

426

href\s*=\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\s\n]*))

427

(\s+rel\s*=\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\s\n]*)))?

428

""", re.I | re.S | re.X)

429

_base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S)

430

431

def __init__(self, data, url):

432

"""

433

Initialise an instance with the Unicode page contents and the URL they

434

came from.

435

"""

436

self.data = data

437

self.base_url = self.url = url

438

m = self._base.search(self.data)

439

if m:

440

self.base_url = m.group(1)

441

442

_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)

443

444

@cached_property

445

def links(self):

446

"""

447

Return the URLs of all the links on a page together with information

448

about their "rel" attribute, for determining which ones to treat as

449

downloads and which ones to queue for further scraping.

450

"""

451

def clean(url):

452

"Tidy up an URL."

453

scheme, netloc, path, params, query, frag = urlparse(url)

454

return urlunparse((scheme, netloc, quote(path),

455

params, query, frag))

456

457

result = set()

458

for match in self._href.finditer(self.data):

459

d = match.groupdict('')

460

rel = (d['rel1'] or d['rel2'] or d['rel3'] or

461

d['rel4'] or d['rel5'] or d['rel6'])

462

url = d['url1'] or d['url2'] or d['url3']

463

url = urljoin(self.base_url, url)

464

url = unescape(url)

465

url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url)

466

result.add((url, rel))

467

# We sort the result, hoping to bring the most recent versions

468

# to the front

469

result = sorted(result, key=lambda t: t[0], reverse=True)

470

return result

471

472

473

class SimpleScrapingLocator(Locator):

474

"""

475

A locator which scrapes HTML pages to locate downloads for a distribution.

476

This runs multiple threads to do the I/O; performance is at least as good

477

as pip's PackageFinder, which works in an analogous fashion.

478

"""

479

480

# These are used to deal with various Content-Encoding schemes.

481

decoders = {

482

'deflate': zlib.decompress,

483

'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(),

484

'none': lambda b: b,

485

}

486

487

def __init__(self, url, timeout=None, num_workers=10, **kwargs):

488

"""

489

Initialise an instance.

490

:param url: The root URL to use for scraping.

491

:param timeout: The timeout, in seconds, to be applied to requests.

492

This defaults to ``None`` (no timeout specified).

493

:param num_workers: The number of worker threads you want to do I/O,

494

This defaults to 10.

495

:param kwargs: Passed to the superclass.

496

"""

497

super(SimpleScrapingLocator, self).__init__(**kwargs)

498

self.base_url = ensure_slash(url)

499

self.timeout = timeout

500

self._page_cache = {}

501

self._seen = set()

502

self._to_fetch = queue.Queue()

503

self._bad_hosts = set()

504

self.skip_externals = False

505

self.num_workers = num_workers

506

self._lock = threading.RLock()

507

508

def _prepare_threads(self):

509

"""

510

Threads are created only when get_project is called, and terminate

511

before it returns. They are there primarily to parallelise I/O (i.e.

512

fetching web pages).

513

"""

514

self._threads = []

515

for i in range(self.num_workers):

516

t = threading.Thread(target=self._fetch)

517

t.setDaemon(True)

518

t.start()

519

self._threads.append(t)

520

521

def _wait_threads(self):

522

"""

523

Tell all the threads to terminate (by sending a sentinel value) and

524

wait for them to do so.

525

"""

526

# Note that you need two loops, since you can't say which

527

# thread will get each sentinel

528

for t in self._threads:

529

self._to_fetch.put(None) # sentinel

530

for t in self._threads:

531

t.join()

532

self._threads = []

533

534

def _get_project(self, name):

535

self.result = result = {}

536

self.project_name = name

537

url = urljoin(self.base_url, '%s/' % quote(name))

538

self._seen.clear()

539

self._page_cache.clear()

540

self._prepare_threads()

541

try:

542

logger.debug('Queueing %s', url)

543

self._to_fetch.put(url)

544

self._to_fetch.join()

545

finally:

546

self._wait_threads()

547

del self.result

548

return result

549

550

platform_dependent = re.compile(r'\b(linux-(i\d86|x86_64|arm\w+)|'

551

r'win(32|-amd64)|macosx-?\d+)\b', re.I)

552

553

def _is_platform_dependent(self, url):

554

"""

555

Does an URL refer to a platform-specific download?

556

"""

557

return self.platform_dependent.search(url)

558

559

def _process_download(self, url):

560

"""

561

See if an URL is a suitable download for a project.

562

563

If it is, register information in the result dictionary (for

564

_get_project) about the specific version it's for.

565

566

Note that the return value isn't actually used other than as a boolean

567

value.

568

"""

569

if self._is_platform_dependent(url):

570

info = None

571

else:

572

info = self.convert_url_to_download_info(url, self.project_name)

573

logger.debug('process_download: %s -> %s', url, info)

574

if info:

575

with self._lock: # needed because self.result is shared

576

self._update_version_data(self.result, info)

577

return info

578

579

def _should_queue(self, link, referrer, rel):

580

"""

581

Determine whether a link URL from a referring page and with a

582

particular "rel" attribute should be queued for scraping.

583

"""

584

scheme, netloc, path, _, _, _ = urlparse(link)

585

if path.endswith(self.source_extensions + self.binary_extensions +

586

self.excluded_extensions):

587

result = False

588

elif self.skip_externals and not link.startswith(self.base_url):

589

result = False

590

elif not referrer.startswith(self.base_url):

591

result = False

592

elif rel not in ('homepage', 'download'):

593

result = False

594

elif scheme not in ('http', 'https', 'ftp'):

595

result = False

596

elif self._is_platform_dependent(link):

597

result = False

598

else:

599

host = netloc.split(':', 1)[0]

600

if host.lower() == 'localhost':

601

result = False

602

else:

603

result = True

604

logger.debug('should_queue: %s (%s) from %s -> %s', link, rel,

605

referrer, result)

606

return result

607

608

def _fetch(self):

609

"""

610

Get a URL to fetch from the work queue, get the HTML page, examine its

611

links for download candidates and candidates for further scraping.

612

613

This is a handy method to run in a thread.

614

"""

615

while True:

616

url = self._to_fetch.get()

617

try:

618

if url:

619

page = self.get_page(url)

620

if page is None: # e.g. after an error

621

continue

622

for link, rel in page.links:

623

if link not in self._seen:

624

self._seen.add(link)

625

if (not self._process_download(link) and

626

self._should_queue(link, url, rel)):

627

logger.debug('Queueing %s from %s', link, url)

628

self._to_fetch.put(link)

629

finally:

630

# always do this, to avoid hangs :-)

631

self._to_fetch.task_done()

632

if not url:

633

#logger.debug('Sentinel seen, quitting.')

634

break

635

636

def get_page(self, url):

637

"""

638

Get the HTML for an URL, possibly from an in-memory cache.

639

640

XXX TODO Note: this cache is never actually cleared. It's assumed that

641

the data won't get stale over the lifetime of a locator instance (not

642

necessarily true for the default_locator).

643

"""

644

# http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api

645

scheme, netloc, path, _, _, _ = urlparse(url)

646

if scheme == 'file' and os.path.isdir(url2pathname(path)):

647

url = urljoin(ensure_slash(url), 'index.html')

648

649

if url in self._page_cache:

650

result = self._page_cache[url]

651

logger.debug('Returning %s from cache: %s', url, result)

652

else:

653

host = netloc.split(':', 1)[0]

654

result = None

655

if host in self._bad_hosts:

656

logger.debug('Skipping %s due to bad host %s', url, host)

657

else:

658

req = Request(url, headers={'Accept-encoding': 'identity'})

659

try:

660

logger.debug('Fetching %s', url)

661

resp = self.opener.open(req, timeout=self.timeout)

662

logger.debug('Fetched %s', url)

663

headers = resp.info()

664

content_type = headers.get('Content-Type', '')

665

if HTML_CONTENT_TYPE.match(content_type):

666

final_url = resp.geturl()

667

data = resp.read()

668

encoding = headers.get('Content-Encoding')

669

if encoding:

670

decoder = self.decoders[encoding] # fail if not found

671

data = decoder(data)

672

encoding = 'utf-8'

673

m = CHARSET.search(content_type)

674

if m:

675

encoding = m.group(1)

676

try:

677

data = data.decode(encoding)

678

except UnicodeError:

679

data = data.decode('latin-1') # fallback

680

result = Page(data, final_url)

681

self._page_cache[final_url] = result

682

except HTTPError as e:

683

if e.code != 404:

684

logger.exception('Fetch failed: %s: %s', url, e)

685

except URLError as e:

686

logger.exception('Fetch failed: %s: %s', url, e)

687

with self._lock:

688

self._bad_hosts.add(host)

689

except Exception as e:

690

logger.exception('Fetch failed: %s: %s', url, e)

691

finally:

692

self._page_cache[url] = result # even if None (failure)

693

return result

694

695

_distname_re = re.compile('<a href=[^>]*>([^<]+)<')

696

697

def get_distribution_names(self):

698

"""

699

Return all the distribution names known to this locator.

700

"""

701

result = set()

702

page = self.get_page(self.base_url)

703

if not page:

704

raise DistlibException('Unable to get %s' % self.base_url)

705

for match in self._distname_re.finditer(page.data):

706

result.add(match.group(1))

707

return result

708

709

class DirectoryLocator(Locator):

710

"""

711

This class locates distributions in a directory tree.

712

"""

713

714

def __init__(self, path, **kwargs):

715

"""

716

Initialise an instance.

717

:param path: The root of the directory tree to search.

718

:param kwargs: Passed to the superclass constructor,

719

except for:

720

* recursive - if True (the default), subdirectories are

721

recursed into. If False, only the top-level directory

722

is searched,

723

"""

724

self.recursive = kwargs.pop('recursive', True)

725

super(DirectoryLocator, self).__init__(**kwargs)

726

path = os.path.abspath(path)

727

if not os.path.isdir(path):

728

raise DistlibException('Not a directory: %r' % path)

729

self.base_dir = path

730

731

def should_include(self, filename, parent):

732

"""

733

Should a filename be considered as a candidate for a distribution

734

archive? As well as the filename, the directory which contains it

735

is provided, though not used by the current implementation.

736

"""

737

return filename.endswith(self.downloadable_extensions)

738

739

def _get_project(self, name):

740

result = {}

741

for root, dirs, files in os.walk(self.base_dir):

742

for fn in files:

743

if self.should_include(fn, root):

744

fn = os.path.join(root, fn)

745

url = urlunparse(('file', '',

746

pathname2url(os.path.abspath(fn)),

747

'', '', ''))

748

info = self.convert_url_to_download_info(url, name)

749

if info:

750

self._update_version_data(result, info)

751

if not self.recursive:

752

break

753

return result

754

755

def get_distribution_names(self):

756

"""

757

Return all the distribution names known to this locator.

758

"""

759

result = set()

760

for root, dirs, files in os.walk(self.base_dir):

761

for fn in files:

762

if self.should_include(fn, root):

763

fn = os.path.join(root, fn)

764

url = urlunparse(('file', '',

765

pathname2url(os.path.abspath(fn)),

766

'', '', ''))

767

info = self.convert_url_to_download_info(url, None)

768

if info:

769

result.add(info['name'])

770

if not self.recursive:

771

break

772

return result

773

774

class JSONLocator(Locator):

775

"""

776

This locator uses special extended metadata (not available on PyPI) and is

777

the basis of performant dependency resolution in distlib. Other locators

778

require archive downloads before dependencies can be determined! As you

779

might imagine, that can be slow.

780

"""

781

def get_distribution_names(self):

782

"""

783

Return all the distribution names known to this locator.

784

"""

785

raise NotImplementedError('Not available from this locator')

786

787

def _get_project(self, name):

788

result = {}

789

data = get_project_data(name)

790

if data:

791

for info in data.get('files', []):

792

if info['ptype'] != 'sdist' or info['pyversion'] != 'source':

793

continue

794

dist = make_dist(data['name'], info['version'],

795

scheme=self.scheme)

796

md = dist.metadata

797

md['Download-URL'] = info['url']

798

dist.md5_digest = info.get('digest')

799

md.dependencies = info.get('requirements', {})

800

dist.exports = info.get('exports', {})

801

result[dist.version] = dist

802

return result

803

804

class DistPathLocator(Locator):

805

"""

806

This locator finds installed distributions in a path. It can be useful for

807

adding to an :class:`AggregatingLocator`.

808

"""

809

def __init__(self, distpath, **kwargs):

810

"""

811

Initialise an instance.

812

813

:param distpath: A :class:`DistributionPath` instance to search.

814

"""

815

super(DistPathLocator, self).__init__(**kwargs)

816

assert isinstance(distpath, DistributionPath)

817

self.distpath = distpath

818

819

def _get_project(self, name):

820

dist = self.distpath.get_distribution(name)

821

if dist is None:

822

result = {}

823

else:

824

result = { dist.version: dist }

825

return result

826

827

828

class AggregatingLocator(Locator):

829

"""

830

This class allows you to chain and/or merge a list of locators.

831

"""

832

def __init__(self, *locators, **kwargs):

833

"""

834

Initialise an instance.

835

836

:param locators: The list of locators to search.

837

:param kwargs: Passed to the superclass constructor,

838

except for:

839

* merge - if False (the default), the first successful

840

search from any of the locators is returned. If True,

841

the results from all locators are merged (this can be

842

slow).

843

"""

844

self.merge = kwargs.pop('merge', False)

845

self.locators = locators

846

super(AggregatingLocator, self).__init__(**kwargs)

847

848

def clear_cache(self):

849

super(AggregatingLocator, self).clear_cache()

850

for locator in self.locators:

851

locator.clear_cache()

852

853

def _set_scheme(self, value):

854

self._scheme = value

855

for locator in self.locators:

856

locator.scheme = value

857

858

scheme = property(Locator.scheme.fget, _set_scheme)

859

860

def _get_project(self, name):

861

result = {}

862

for locator in self.locators:

863

r = locator.get_project(name)

864

if r:

865

if self.merge:

866

result.update(r)

867

else:

868

result = r

869

break

870

return result

871

872

def get_distribution_names(self):

873

"""

874

Return all the distribution names known to this locator.

875

"""

876

result = set()

877

for locator in self.locators:

878

try:

879

result |= locator.get_distribution_names()

880

except NotImplementedError:

881

pass

882

return result

883

884

885

default_locator = AggregatingLocator(

886

JSONLocator(),

887

SimpleScrapingLocator('https://pypi.python.org/simple/',

888

timeout=3.0))

889

890

locate = default_locator.locate

891

892

class DependencyFinder(object):

893

"""

894

Locate dependencies for distributions.

895

"""

896

897

def __init__(self, locator=None):

898

"""

899

Initialise an instance, using the specified locator

900

to locate distributions.

901

"""

902

self.locator = locator or default_locator

903

self.scheme = get_scheme(self.locator.scheme)

904

905

def _get_name_and_version(self, p):

906

"""

907

A utility method used to get name and version from e.g. a Provides-Dist

908

value.

909

910

:param p: A value in a form foo (1.0)

911

:return: The name and version as a tuple.

912

"""

913

comps = p.strip().rsplit(' ', 1)

914

name = comps[0]

915

version = None

916

if len(comps) == 2:

917

version = comps[1]

918

if len(version) < 3 or version[0] != '(' or version[-1] != ')':

919

raise DistlibException('Ill-formed provides field: %r' % p)

920

version = version[1:-1] # trim off parentheses

921

# Name in lower case for case-insensitivity

922

return name.lower(), version

923

924

def add_distribution(self, dist):

925

"""

926

Add a distribution to the finder. This will update internal information

927

about who provides what.

928

:param dist: The distribution to add.

929

"""

930

logger.debug('adding distribution %s', dist)

931

name = dist.key

932

self.dists_by_name[name] = dist

933

self.dists[(name, dist.version)] = dist

934

for p in dist.provides:

935

name, version = self._get_name_and_version(p)

936

logger.debug('Add to provided: %s, %s, %s', name, version, dist)

937

self.provided.setdefault(name, set()).add((version, dist))

938

939

def remove_distribution(self, dist):

940

"""

941

Remove a distribution from the finder. This will update internal

942

information about who provides what.

943

:param dist: The distribution to remove.

944

"""

945

logger.debug('removing distribution %s', dist)

946

name = dist.key

947

del self.dists_by_name[name]

948

del self.dists[(name, dist.version)]

949

for p in dist.provides:

950

name, version = self._get_name_and_version(p)

951

logger.debug('Remove from provided: %s, %s, %s', name, version, dist)

952

s = self.provided[name]

953

s.remove((version, dist))

954

if not s:

955

del self.provided[name]

956

957

def get_matcher(self, reqt):

958

"""

959

Get a version matcher for a requirement.

960

:param reqt: The requirement

961

:type reqt: str

962

:return: A version matcher (an instance of

963

:class:`distlib.version.Matcher`).

964

"""

965

try:

966

matcher = self.scheme.matcher(reqt)

967

except UnsupportedVersionError:

968

# XXX compat-mode if cannot read the version

969

name = reqt.split()[0]

970

matcher = self.scheme.matcher(name)

971

return matcher

972

973

def find_providers(self, reqt):

974

"""

975

Find the distributions which can fulfill a requirement.

976

977

:param reqt: The requirement.

978

:type reqt: str

979

:return: A set of distribution which can fulfill the requirement.

980

"""

981

matcher = self.get_matcher(reqt)

982

name = matcher.key # case-insensitive

983

result = set()

984

provided = self.provided

985

if name in provided:

986

for version, provider in provided[name]:

987

try:

988

match = matcher.match(version)

989

except UnsupportedVersionError:

990

match = False

991

992

if match:

993

result.add(provider)

994

break

995

return result

996

997

def try_to_replace(self, provider, other, problems):

998

"""

999

Attempt to replace one provider with another. This is typically used

1000

when resolving dependencies from multiple sources, e.g. A requires

1001

(B >= 1.0) while C requires (B >= 1.1).

1002

1003

For successful replacement, ``provider`` must meet all the requirements

1004

which ``other`` fulfills.

1005

1006

:param provider: The provider we are trying to replace with.

1007

:param other: The provider we're trying to replace.

1008

:param problems: If False is returned, this will contain what

1009

problems prevented replacement. This is currently

1010

a tuple of the literal string 'cantreplace',

1011

``provider``, ``other`` and the set of requirements

1012

that ``provider`` couldn't fulfill.

1013

:return: True if we can replace ``other`` with ``provider``, else

1014

False.

1015

"""

1016

rlist = self.reqts[other]

1017

unmatched = set()

1018

for s in rlist:

1019

matcher = self.get_matcher(s)

1020

if not matcher.match(provider.version):

1021

unmatched.add(s)

1022

if unmatched:

1023

# can't replace other with provider

1024

problems.add(('cantreplace', provider, other, unmatched))

1025

result = False

1026

else:

1027

# can replace other with provider

1028

self.remove_distribution(other)

1029

del self.reqts[other]

1030

for s in rlist:

1031

self.reqts.setdefault(provider, set()).add(s)

1032

self.add_distribution(provider)

1033

result = True

1034

return result

1035

1036

def find(self, requirement, tests=False, prereleases=False):

1037

"""

1038

Find a distribution matching requirement and all distributions

1039

it depends on. Use the ``tests`` argument to determine whether

1040

distributions used only for testing should be included in the

1041

results. Allow ``requirement`` to be either a :class:`Distribution`

1042

instance or a string expressing a requirement. If ``prereleases``

1043

is True, allow pre-release versions to be returned - otherwise,

1044

don't.

1045

1046

Return a set of :class:`Distribution` instances and a set of

1047

problems.

1048

1049

The distributions returned should be such that they have the

1050

:attr:`required` attribute set to ``True`` if they were

1051

from the ``requirement`` passed to ``find()``, and they have the

1052

:attr:`build_time_dependency` attribute set to ``True`` unless they

1053

are post-installation dependencies of the ``requirement``.

1054

1055

The problems should be a tuple consisting of the string

1056

``'unsatisfied'`` and the requirement which couldn't be satisfied

1057

by any distribution known to the locator.

1058

"""

1059

1060

self.provided = {}

1061

self.dists = {}

1062

self.dists_by_name = {}

1063

self.reqts = {}

1064

1065

if isinstance(requirement, Distribution):

1066

dist = odist = requirement

1067

logger.debug('passed %s as requirement', odist)

1068

else:

1069

dist = odist = self.locator.locate(requirement,

1070

prereleases=prereleases)

1071

if dist is None:

1072

raise DistlibException('Unable to locate %r' % requirement)

1073

logger.debug('located %s', odist)

1074

dist.requested = True

1075

problems = set()

1076

todo = set([dist])

1077

install_dists = set([odist])

1078

while todo:

1079

dist = todo.pop()

1080

name = dist.key # case-insensitive

1081

if name not in self.dists_by_name:

1082

self.add_distribution(dist)

1083

else:

1084

#import pdb; pdb.set_trace()

1085

other = self.dists_by_name[name]

1086

if other != dist:

1087

self.try_to_replace(dist, other, problems)

1088

1089

ireqts = dist.requires

1090

sreqts = dist.setup_requires

1091

ereqts = set()

1092

if not tests or dist not in install_dists:

1093

treqts = set()

1094

else:

1095

treqts = dist.test_requires

1096

all_reqts = ireqts | sreqts | treqts | ereqts

1097

for r in all_reqts:

1098

providers = self.find_providers(r)

1099

if not providers:

1100

logger.debug('No providers found for %r', r)

1101

provider = self.locator.locate(r, prereleases=prereleases)

1102

if provider is None:

1103

logger.debug('Cannot satisfy %r', r)

1104

problems.add(('unsatisfied', r))

1105

else:

1106

n, v = provider.key, provider.version

1107

if (n, v) not in self.dists:

1108

todo.add(provider)

1109

providers.add(provider)

1110

if r in ireqts and dist in install_dists:

1111

install_dists.add(provider)

1112

logger.debug('Adding %s to install_dists',

1113

provider.name_and_version)

1114

for p in providers:

1115

name = p.key

1116

if name not in self.dists_by_name:

1117

self.reqts.setdefault(p, set()).add(r)

1118

else:

1119

other = self.dists_by_name[name]

1120

if other != p:

1121

# see if other can be replaced by p

1122

self.try_to_replace(p, other, problems)

1123

1124

dists = set(self.dists.values())

1125

for dist in dists:

1126

dist.build_time_dependency = dist not in install_dists

1127

if dist.build_time_dependency:

1128

logger.debug('%s is a build-time dependency only.',

1129

dist.name_and_version)

1130

logger.debug('find done for %s', odist)

1131

return dists, problems

Older »