~j5-dev/+junk/setuptools-0.6c11

« back to all changes in this revision

Viewing changes to setuptools/package_index.py

Committer: Bazaar Package Importer
Author(s): Matthias Klose
Date: 2007-07-25 02:11:49 UTC
mfrom: (1.1.7 upstream)
Revision ID: james.westby@ubuntu.com-20070725021149-uf4ym3gcbxheh0xu

Tags: 0.6c6-1

http://bugs.debian.org/433556

New upstream version (release candidate 6). Closes: #433556.

Show diffs side-by-side

added added

removed removed

setuptools/package_index.py

132

rels = map(str.strip, rel.lower().split(','))

133

if 'homepage' in rels or 'download' in rels:

134

for match in HREF.finditer(tag):

135

yield urlparse.urljoin(url, match.group(1))

135

yield urlparse.urljoin(url, htmldecode(match.group(1)))

136

137

for tag in ("<th>Home Page", "<th>Download URL"):

138

pos = page.find(tag)

139

if pos!=-1:

140

match = HREF.search(page,pos)

141

if match:

142

yield urlparse.urljoin(url, match.group(1))

142

yield urlparse.urljoin(url, htmldecode(match.group(1)))

143

144

user_agent = "Python-urllib/%s setuptools/%s" % (

145

urllib2.__version__, require('setuptools')[0].version

166

"""Evaluate a URL as a possible download, and maybe retrieve it"""

167

if url in self.scanned_urls and not retrieve:

168

return

169

170

169

self.scanned_urls[url] = True

171

170

if not URL_SCHEME(url):

172

171

self.process_filename(url)

187

186

return

188

187

189

188

self.info("Reading %s", url)

190

f = self.open_url(url)

189

f = self.open_url(url, "Download error: %s -- Some packages may not be found!")

190

if f is None: return

191

self.fetched_urls[url] = self.fetched_urls[f.url] = True

192

193

if 'html' not in f.headers.get('content-type', '').lower():

200

if url.startswith(self.index_url) and getattr(f,'code',None)!=404:

201

page = self.process_index(url, page)

202

for match in HREF.finditer(page):

203

link = urlparse.urljoin(base, match.group(1))

203

link = urlparse.urljoin(base, htmldecode(match.group(1)))

204

self.process_url(link)

205

206

def process_filename(self, fn, nested=False):

228

else:

229

self.warn(msg, url)

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

231

def scan_egg_links(self, search_path):

232

for item in search_path:

233

if os.path.isdir(item):

234

for entry in os.listdir(item):

235

if entry.endswith('.egg-link'):

236

self.scan_egg_link(item, entry)

237

238

def scan_egg_link(self, path, entry):

239

lines = filter(None, map(str.strip, file(os.path.join(path, entry))))

240

if len(lines)==2:

241

for dist in find_distributions(os.path.join(path, lines[0])):

242

dist.location = os.path.join(path, *lines)

243

dist.precedence = SOURCE_DIST

244

self.add(dist)

245

246

247

def process_index(self,url,page):

262

263

# process an index page into the package-page index

264

for match in HREF.finditer(page):

265

scan( urlparse.urljoin(url, match.group(1)) )

265

scan( urlparse.urljoin(url, htmldecode(match.group(1))) )

266

267

pkg, ver = scan(url) # ensure this page is in the page index

268

if pkg:

572

pass # no-op

573

574

575

def open_url(self, url):

575

def open_url(self, url, warning=None):

576

if url.startswith('file:'):

577

return local_open(url)

578

try:

580

except urllib2.HTTPError, v:

581

return v

582

except urllib2.URLError, v:

583

raise DistutilsError("Download error: %s" % v.reason)

583

if warning: self.warn(warning, v.reason)

584

else:

585

raise DistutilsError("Download error for %s: %s"

586

% (url, v.reason))

584

587

585

588

def _download_url(self, scheme, url, tmpdir):

586

589

# Determine download filename

608

611

self.url_ok(url, True) # raises error if not allowed

609

612

return self._attempt_download(url, filename)

610

613

614

615

611

616

def scan_url(self, url):

612

617

self.process_url(url, True)

613

618

614

619

615

616

620

def _attempt_download(self, url, filename):

617

621

headers = self._download_to(url, filename)

618

622

if 'html' in headers['content-type'].lower():

650

654

def warn(self, msg, *args):

651

655

log.warn(msg, *args)

652

656

657

# This pattern matches a character entity reference (a decimal numeric

658

# references, a hexadecimal numeric reference, or a named reference).

659

entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub

660

661

def uchr(c):

662

if not isinstance(c, int):

663

return c

664

if c>255: return unichr(c)

665

return chr(c)

666

667

def decode_entity(match):

668

what = match.group(1)

669

if what.startswith('#x'):

670

what = int(what[2:], 16)

671

elif what.startswith('#'):

672

what = int(what[1:])

673

else:

674

from htmlentitydefs import name2codepoint

675

what = name2codepoint.get(what, match.group(0))

676

return uchr(what)

677

678

def htmldecode(text):

679

"""Decode HTML entities in the given text."""

680

return entity_sub(decode_entity, text)

681

682

683

684

685

686

687

688

689

690

691

692

693

694

653

695

654

696

655

697

Older »