~vcs-imports/clientcookie/trunk : revision 2

1

"""Integration with Python standard library module urllib2.

2

3

Also includes a redirection bugfix, support for parsing HTML HEAD blocks for

4

the META HTTP-EQUIV tag contents, and following Refresh header redirects.

5

6

7

8

This code is free software; you can redistribute it and/or modify it under

9

the terms of the BSD License (see the file COPYING included with the

10

distribution).

11

12

"""

13

14

import copy, time

15

16

import ClientCookie

17

from _ClientCookie import CookieJar, request_host

18

from _Util import isstringlike, startswith, getheaders

19

from _Debug import getLogger

20

info = getLogger("ClientCookie").info

21

22

try: True

23

except NameError:

24

True = 1

25

False = 0

26

27

28

CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes

29

30

def methnames(obj):

31

"""Return method names of class instance.

32

33

dir(obj) doesn't work across Python versions, this does.

34

35

"""

36

return methnames_of_instance_as_dict(obj).keys()

37

38

def methnames_of_instance_as_dict(inst):

39

names = {}

40

names.update(methnames_of_class_as_dict(inst.__class__))

41

for methname in dir(inst):

42

candidate = getattr(inst, methname)

43

if callable(candidate):

44

names[methname] = None

45

return names

46

47

def methnames_of_class_as_dict(klass):

48

names = {}

49

for methname in dir(klass):

50

candidate = getattr(klass, methname)

51

if callable(candidate):

52

names[methname] = None

53

for baseclass in klass.__bases__:

54

names.update(methnames_of_class_as_dict(baseclass))

55

return names

56

57

try:

58

from urllib2 import AbstractHTTPHandler

59

except ImportError:

60

pass

61

else:

62

import urlparse, urllib2, urllib, httplib, robotparser

63

from urllib2 import URLError, HTTPError

64

import types, string, socket, bisect

65

from cStringIO import StringIO

66

from _Util import response_seek_wrapper

67

try:

68

import threading

69

_threading = threading; del threading

70

except ImportError:

71

import dummy_threading

72

_threading = dummy_threading; del dummy_threading

73

74

# This fixes a bug in urllib2 as of Python 2.1.3 and 2.2.2

75

# (http://www.python.org/sf/549151)

76

# 2.2.3 is broken here (my fault!), 2.3 is fixed.

77

class HTTPRedirectHandler(urllib2.BaseHandler):

78

# maximum number of redirections to any single URL

79

# this is needed because of the state that cookies introduce

80

max_repeats = 4

81

# maximum total number of redirections (regardless of URL) before

82

# assuming we're in a loop

83

max_redirections = 10

84

85

# Implementation notes:

86

87

# To avoid the server sending us into an infinite loop, the request

88

# object needs to track what URLs we have already seen. Do this by

89

# adding a handler-specific attribute to the Request object. The value

90

# of the dict is used to count the number of times the same url has

91

# been visited. This is needed because this isn't necessarily a loop:

92

# there is more than one way to redirect (Refresh, 302, 303, 307).

93

94

# Another handler-specific Request attribute, original_url, is used to

95

# remember the URL of the original request so that it is possible to

96

# decide whether or not RFC 2965 cookies should be turned on during

97

# redirect.

98

99

# Always unhandled redirection codes:

100

# 300 Multiple Choices: should not handle this here.

101

# 304 Not Modified: no need to handle here: only of interest to caches

102

# that do conditional GETs

103

# 305 Use Proxy: probably not worth dealing with here

104

# 306 Unused: what was this for in the previous versions of protocol??

105

106

def redirect_request(self, newurl, req, fp, code, msg, headers):

107

"""Return a Request or None in response to a redirect.

108

109

This is called by the http_error_30x methods when a redirection

110

response is received. If a redirection should take place, return a

111

new Request to allow http_error_30x to perform the redirect;

112

otherwise, return None to indicate that an HTTPError should be

113

raised.

114

115

"""

116

if code in (301, 302, 303, "refresh") or \

117

(code == 307 and not req.has_data()):

118

# Strictly (according to RFC 2616), 301 or 302 in response to

119

# a POST MUST NOT cause a redirection without confirmation

120

# from the user (of urllib2, in this case). In practice,

121

# essentially all clients do redirect in this case, so we do

122

# the same.

123

return Request(newurl, headers=req.headers)

124

else:

125

raise HTTPError(req.get_full_url(), code, msg, headers, fp)

126

127

def http_error_302(self, req, fp, code, msg, headers):

128

if headers.has_key('location'):

129

newurl = getheaders(headers, 'location')[0]

130

elif headers.has_key('uri'):

131

newurl = getheaders(headers, 'uri')[0]

132

else:

133

return

134

newurl = urlparse.urljoin(req.get_full_url(), newurl)

135

136

# XXX Probably want to forget about the state of the current

137

# request, although that might interact poorly with other

138

# handlers that also use handler-specific request attributes

139

new = self.redirect_request(newurl, req, fp, code, msg, headers)

140

if new is None:

141

return

142

143

# remember where we started from

144

try: new.origin_req_host = req.origin_req_host

145

except AttributeError: pass

146

147

# loop detection

148

# .redirect_dict has a key url if url was previously visited.

149

if hasattr(req, 'redirect_dict'):

150

visited = new.redirect_dict = req.redirect_dict

151

if (visited.get(newurl, 0) >= self.max_repeats or

152

len(visited) >= self.max_redirections):

153

raise HTTPError(req.get_full_url(), code,

154

self.inf_msg + msg, headers, fp)

155

else:

156

visited = new.redirect_dict = req.redirect_dict = {}

157

visited[newurl] = visited.get(newurl, 0) + 1

158

159

## # loop detection

160

## # .redirect_dict has a key (url, code) if url was previously

161

## # visited as a result of a redirection with this code. The

162

## # code is needed in addition to the URL because visiting a URL

163

## # twice isn't necessarily a loop: there is more than one way

164

## # to redirect (Refresh, 301, 302, 303, 307).

165

## key = (newurl, code)

166

## if hasattr(req, 'redirect_dict'):

167

## visited = new.redirect_dict = req.redirect_dict

168

## if (visited.has_key(key) or

169

## len(visited) >= self.max_redirections):

170

## raise HTTPError(req.get_full_url(), code,

171

## self.inf_msg + msg, headers, fp)

172

## else:

173

## visited = new.redirect_dict = req.redirect_dict = {}

174

## visited[key] = None

175

176

# Don't close the fp until we are sure that we won't use it

177

# with HTTPError.

178

fp.read()

179

fp.close()

180

181

return self.parent.open(new)

182

183

http_error_301 = http_error_303 = http_error_307 = http_error_302

184

http_error_refresh = http_error_302

185

186

inf_msg = "The HTTP server returned a redirect error that would " \

187

"lead to an infinite loop.\n" \

188

"The last 30x error message was:\n"

189

190

191

class Request(urllib2.Request):

192

def __init__(self, url, data=None, headers={}):

193

urllib2.Request.__init__(self, url, data, headers)

194

self.unredirected_hdrs = {}

195

196

def add_unredirected_header(self, key, val):

197

"""Add a header that will not be added to a redirected request."""

198

self.unredirected_hdrs[string.capitalize(key)] = val

199

200

def has_header(self, header_name):

201

"""True iff request has named header (regular or unredirected)."""

202

if (self.headers.has_key(header_name) or

203

self.unredirected_hdrs.has_key(header_name)):

204

return True

205

return False

206

207

def get_header(self, header_name, default=None):

208

return self.headers.get(

209

header_name,

210

self.unredirected_hdrs.get(header_name, default))

211

212

def iter_headers(self):

213

hdrs = self.unredirected_hdrs.copy()

214

hdrs.update(self.headers)

215

return hdrs.items()

216

217

218

class BaseProcessor:

219

processor_order = 500

220

221

def add_parent(self, parent):

222

self.parent = parent

223

def close(self):

224

self.parent = None

225

def __cmp__(self, other):

226

if not hasattr(other, "processor_order"):

227

return 0

228

return cmp(self.processor_order, other.processor_order)

229

## def __lt__(self, other):

230

## if not hasattr(other, "processor_order"):

231

## return True

232

## return self.processor_order < other.processor_order

233

234

class HTTPRequestUpgradeProcessor(BaseProcessor):

235

# upgrade Request to class with support for headers that don't get

236

# redirected

237

processor_order = 0 # before anything else

238

239

def http_request(self, request):

240

if not hasattr(request, "add_unredirected_header"):

241

newrequest = Request(request._Request__original, request.data,

242

request.headers)

243

# yuck

244

try: newrequest.origin_req_host = request.origin_req_host

245

except AttributeError: pass

246

try: newrequest.unverifiable = request.unverifiable

247

except AttributeError: pass

248

request = newrequest

249

return request

250

251

https_request = http_request

252

253

class HTTPEquivProcessor(BaseProcessor):

254

"""Append META HTTP-EQUIV headers to regular HTTP headers."""

255

def http_response(self, request, response):

256

if not hasattr(response, "seek"):

257

response = response_seek_wrapper(response)

258

# grab HTTP-EQUIV headers and add them to the true HTTP headers

259

headers = response.info()

260

for hdr, val in parse_head(response):

261

headers[hdr] = val

262

response.seek(0)

263

return response

264

265

https_response = http_response

266

267

# XXX ATM this only takes notice of http responses -- probably

268

# should be independent of protocol scheme (http, ftp, etc.)

269

class SeekableProcessor(BaseProcessor):

270

"""Make responses seekable."""

271

272

def http_response(self, request, response):

273

if not hasattr(response, "seek"):

274

return response_seek_wrapper(response)

275

return response

276

277

https_response = http_response

278

279

# XXX if this gets added to urllib2, unverifiable would end up as an

280

# attribute / method on Request.

281

class HTTPCookieProcessor(BaseProcessor):

282

"""Handle HTTP cookies."""

283

def __init__(self, cookies=None):

284

if cookies is None:

285

cookies = CookieJar()

286

self.cookies = cookies

287

288

def _unverifiable(self, request):

289

if hasattr(request, "redirect_dict") and request.redirect_dict:

290

redirect = True

291

else:

292

redirect = False

293

if (redirect or

294

(hasattr(request, "unverifiable") and request.unverifiable)):

295

unverifiable = True

296

else:

297

unverifiable = False

298

return unverifiable

299

300

def http_request(self, request):

301

unverifiable = self._unverifiable(request)

302

if not unverifiable:

303

# Stuff request-host of this origin transaction into Request

304

# object, because we need to know it to know whether cookies

305

# should be in operation during derived requests (redirects,

306

# specifically -- including refreshes).

307

request.origin_req_host = request_host(request)

308

self.cookies.add_cookie_header(request, unverifiable)

309

return request

310

311

def http_response(self, request, response):

312

unverifiable = self._unverifiable(request)

313

self.cookies.extract_cookies(response, request, unverifiable)

314

return response

315

316

https_request = http_request

317

https_response = http_response

318

319

class RobotExclusionError(urllib2.HTTPError):

320

def __init__(self, request, *args):

321

apply(urllib2.HTTPError.__init__, (self,)+args)

322

self.request = request

323

324

class HTTPRobotRulesProcessor(BaseProcessor):

325

# before redirections and response debugging, after everything else

326

processor_order = 800

327

def __init__(self, rfp_class=robotparser.RobotFileParser):

328

self.rfp_class = rfp_class

329

self.rfp = None

330

self._host = None

331

def http_request(self, request):

332

host = request.get_host()

333

if host != self._host:

334

self.rfp = self.rfp_class()

335

self.rfp.set_url("http://"+host+"/robots.txt")

336

self.rfp.read()

337

self._host = host

338

339

ua = request.get_header("User-agent", "")

340

if self.rfp.can_fetch(ua, request.get_full_url()):

341

return request

342

else:

343

msg = "request disallowed by robots.txt"

344

raise RobotExclusionError(

345

request,

346

request.get_full_url(),

347

403, msg,

348

httplib.HTTPMessage(StringIO()), StringIO(msg))

349

350

https_request = http_request

351

352

class HTTPRefererProcessor(BaseProcessor):

353

"""Add Referer header to requests.

354

355

This only makes sense if you use each RefererProcessor for a single

356

chain of requests only (so, for example, if you use a single

357

HTTPRefererProcessor to fetch a series of URLs extracted from a single

358

page, this will break).

359

360

"""

361

def __init__(self):

362

self.referer = None

363

364

def http_request(self, request):

365

if ((self.referer is not None) and

366

not request.has_header("Referer")):

367

request.add_unredirected_header("Referer", self.referer)

368

return request

369

370

def http_response(self, request, response):

371

self.referer = response.geturl()

372

return response

373

374

https_request = http_request

375

https_response = http_response

376

377

class HTTPResponseDebugProcessor(BaseProcessor):

378

processor_order = 900 # before redirections, after everything else

379

380

def http_response(self, request, response):

381

if not hasattr(response, "seek"):

382

response = response_seek_wrapper(response)

383

info(response.read())

384

info("*****************************************************")

385

response.seek(0)

386

return response

387

388

https_response = http_response

389

390

class HTTPRedirectDebugProcessor(BaseProcessor):

391

def http_request(self, request):

392

if hasattr(request, "redirect_dict"):

393

info("redirecting to %s", request.get_full_url())

394

return request

395

396

class HTTPRefreshProcessor(BaseProcessor):

397

"""Perform HTTP Refresh redirections.

398

399

Note that if a non-200 HTTP code has occurred (for example, a 30x

400

redirect), this processor will do nothing.

401

402

By default, only zero-time Refresh headers are redirected. Use the

403

max_time constructor argument to allow Refresh with longer pauses. Use

404

the honor_time argument to control whether the requested pause is

405

honoured (with a time.sleep()) or skipped in favour of immediate

406

redirection.

407

408

"""

409

processor_order = 1000

410

411

def __init__(self, max_time=0, honor_time=True):

412

self.max_time = max_time

413

self.honor_time = honor_time

414

415

def http_response(self, request, response):

416

code, msg, hdrs = response.code, response.msg, response.info()

417

418

if code == 200 and hdrs.has_key("refresh"):

419

refresh = getheaders(hdrs, "refresh")[0]

420

i = string.find(refresh, ";")

421

if i != -1:

422

pause, newurl_spec = refresh[:i], refresh[i+1:]

423

i = string.find(newurl_spec, "=")

424

if i != -1:

425

pause = int(pause)

426

if (self.max_time is None) or (pause <= self.max_time):

427

if pause != 0 and self.honor_time:

428

time.sleep(pause)

429

newurl = newurl_spec[i+1:]

430

hdrs["location"] = newurl

431

response = self.parent.error(

432

'http', request, response,

433

"refresh", msg, hdrs)

434

435

return response

436

437

https_response = http_response

438

439

class HTTPErrorProcessor(BaseProcessor):

440

"""Process HTTP error responses.

441

442

The purpose of this handler is to to allow other response processors a

443

look-in by removing the call to parent.error() from

444

AbstractHTTPHandler.

445

446

For non-200 error codes, this just passes the job on to the

447

Handler.<proto>_error_<code> methods, via the OpenerDirector.error

448

method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an

449

HTTPError if no other handler handles the error.

450

451

"""

452

processor_order = 1000 # after all other processors

453

454

def http_response(self, request, response):

455

code, msg, hdrs = response.code, response.msg, response.info()

456

457

if code != 200:

458

response = self.parent.error(

459

'http', request, response, code, msg, hdrs)

460

461

return response

462

463

https_response = http_response

464

465

466

def insort(a, x, lo=0, hi=None, lt=lambda x,y: x<y):

467

if hi is None:

468

hi = len(a)

469

while lo < hi:

470

mid = divmod((lo+hi), 2)[0]

471

if lt(x, a[mid]): hi = mid

472

else: lo = mid+1

473

a.insert(lo, x)

474

475

class OpenerDirector(urllib2.OpenerDirector):

476

def __init__(self):

477

urllib2.OpenerDirector.__init__(self)

478

self.process_response = {}

479

self.process_request = {}

480

481

def add_handler(self, handler):

482

added = False

483

for meth in methnames(handler):

484

i = string.find(meth, "_")

485

protocol = meth[:i]

486

condition = meth[i+1:]

487

488

if startswith(condition, "error"):

489

j = string.find(meth[i+1:], "_") + i + 1

490

kind = meth[j+1:]

491

try:

492

kind = int(kind)

493

except ValueError:

494

pass

495

map = self.handle_error.get(protocol, {})

496

self.handle_error[protocol] = map

497

processor = False

498

elif (condition == "open" and

499

protocol != "do"): # hack: see below

500

map = self.handle_open

501

kind = protocol

502

processor = False

503

elif (condition in ["response", "request"] and

504

protocol != "redirect"): # yucky hack

505

# hack above is to fix HTTPRedirectHandler problem, which

506

# appears to above line to be a processor because of the

507

# redirect_request method :-((

508

map = getattr(self, "process_"+condition)

509

kind = protocol

510

processor = True

511

else:

512

continue

513

514

if map.has_key(kind):

515

if processor:

516

lt = lambda x,y: x.processor_order < y.processor_order

517

else:

518

lt = lambda x,y: x<y

519

insort(map[kind], handler, lt=lt)

520

else:

521

map[kind] = [handler]

522

added = True

523

continue

524

525

if added:

526

# XXX why does self.handlers need to be sorted?

527

bisect.insort(self.handlers, handler)

528

handler.add_parent(self)

529

530

def _request(self, url_or_req, data):

531

if isstringlike(url_or_req):

532

req = Request(url_or_req, data)

533

else:

534

# already a urllib2.Request instance

535

req = url_or_req

536

if data is not None:

537

req.add_data(data)

538

return req

539

540

def open(self, fullurl, data=None):

541

req = self._request(fullurl, data)

542

type_ = req.get_type()

543

544

# pre-process request

545

# XXX should we allow a Processor to change the type (URL

546

# scheme) of the request?

547

meth_name = type_+"_request"

548

for processor in self.process_request.get(type_, []):

549

meth = getattr(processor, meth_name)

550

req = meth(req)

551

552

response = urllib2.OpenerDirector.open(self, req, data)

553

554

# post-process response

555

meth_name = type_+"_response"

556

for processor in self.process_response.get(type_, []):

557

meth = getattr(processor, meth_name)

558

response = meth(req, response)

559

560

return response

561

562

def error(self, proto, *args):

563

if proto in ['http', 'https']:

564

# XXX http[s] protocols are special-cased

565

dict = self.handle_error['http'] # https is not different than http

566

proto = args[2] # YUCK!

567

meth_name = 'http_error_%s' % proto

568

http_err = 1

569

orig_args = args

570

else:

571

dict = self.handle_error

572

meth_name = proto + '_error'

573

http_err = 0

574

args = (dict, proto, meth_name) + args

575

result = apply(self._call_chain, args)

576

if result:

577

return result

578

579

if http_err:

580

args = (dict, 'default', 'http_error_default') + orig_args

581

return apply(self._call_chain, args)

582

583

584

# Note the absence of redirect and header-adding code here

585

# (AbstractHTTPHandler), and the lack of other clutter that would be

586

# here without Processors.

587

class AbstractHTTPHandler(urllib2.BaseHandler):

588

processor_order = 500

589

590

def __init__(self, debuglevel=0):

591

self._debuglevel = debuglevel

592

593

def set_http_debuglevel(self, level):

594

self._debuglevel = level

595

596

def do_request_(self, request):

597

host = request.get_host()

598

if not host:

599

raise URLError('no host given')

600

601

if request.has_data(): # POST

602

data = request.get_data()

603

if not request.has_header('Content-type'):

604

request.add_unredirected_header(

605

'Content-type',

606

'application/x-www-form-urlencoded')

607

if not request.has_header('Content-length'):

608

request.add_unredirected_header(

609

'Content-length', '%d' % len(data))

610

611

scheme, sel = urllib.splittype(request.get_selector())

612

sel_host, sel_path = urllib.splithost(sel)

613

if not request.has_header('Host'):

614

request.add_unredirected_header('Host', sel_host or host)

615

for name, value in self.parent.addheaders:

616

name = string.capitalize(name)

617

if not request.has_header(name):

618

request.add_unredirected_header(name, value)

619

620

return request

621

622

def do_open(self, http_class, req):

623

host = req.get_host()

624

if not host:

625

raise URLError('no host given')

626

627

h = http_class(host) # will parse host:port

628

h.set_debuglevel(self._debuglevel)

629

630

#h.putrequest(req.get_method(), req.get_selector())

631

if req.has_data():

632

h.putrequest('POST', req.get_selector())

633

else:

634

h.putrequest('GET', req.get_selector())

635

636

for k, v in req.iter_headers():

637

h.putheader(k, v)

638

639

# httplib will attempt to connect() here. be prepared

640

# to convert a socket error to a URLError.

641

try:

642

h.endheaders()

643

except socket.error, err:

644

raise URLError(err)

645

if req.has_data():

646

h.send(req.get_data())

647

648

code, msg, hdrs = h.getreply()

649

fp = h.getfile()

650

651

response = urllib.addinfourl(fp, hdrs, req.get_full_url())

652

response.code = code

653

response.msg = msg

654

655

return response

656

657

# XXX would self.reset() work, instead of raising this exception?

658

class EndOfHeadError(Exception): pass

659

class AbstractHeadParser:

660

# only these elements are allowed in or before HEAD of document

661

head_elems = ("html", "head",

662

"title", "base",

663

"script", "style", "meta", "link", "object")

664

665

def __init__(self):

666

self.http_equiv = []

667

def start_meta(self, attrs):

668

http_equiv = content = None

669

for key, value in attrs:

670

if key == "http-equiv":

671

http_equiv = value

672

elif key == "content":

673

content = value

674

if http_equiv is not None:

675

self.http_equiv.append((http_equiv, content))

676

677

def end_head(self):

678

raise EndOfHeadError()

679

680

# use HTMLParser if we have it (it does XHTML), htmllib otherwise

681

try:

682

import HTMLParser

683

except ImportError:

684

import htmllib, formatter

685

class HeadParser(AbstractHeadParser, htmllib.HTMLParser):

686

def __init__(self):

687

htmllib.HTMLParser.__init__(self, formatter.NullFormatter())

688

AbstractHeadParser.__init__(self)

689

690

def handle_starttag(self, tag, method, attrs):

691

if tag in self.head_elems:

692

method(attrs)

693

else:

694

raise EndOfHeadError()

695

696

def handle_endtag(self, tag, method):

697

if tag in self.head_elems:

698

method()

699

else:

700

raise EndOfHeadError()

701

702

HEAD_PARSER_CLASS = HeadParser

703

else:

704

class XHTMLCompatibleHeadParser(AbstractHeadParser,

705

HTMLParser.HTMLParser):

706

def __init__(self):

707

HTMLParser.HTMLParser.__init__(self)

708

AbstractHeadParser.__init__(self)

709

710

def handle_starttag(self, tag, attrs):

711

if tag not in self.head_elems:

712

raise EndOfHeadError()

713

try:

714

method = getattr(self, 'start_' + tag)

715

except AttributeError:

716

try:

717

method = getattr(self, 'do_' + tag)

718

except AttributeError:

719

pass # unknown tag

720

else:

721

method(attrs)

722

else:

723

method(attrs)

724

725

def handle_endtag(self, tag):

726

if tag not in self.head_elems:

727

raise EndOfHeadError()

728

try:

729

method = getattr(self, 'end_' + tag)

730

except AttributeError:

731

pass # unknown tag

732

else:

733

method()

734

735

# handle_charref, handle_entityref and default entitydefs are taken

736

# from sgmllib

737

def handle_charref(self, name):

738

try:

739

n = int(name)

740

except ValueError:

741

self.unknown_charref(name)

742

return

743

if not 0 <= n <= 255:

744

self.unknown_charref(name)

745

return

746

self.handle_data(chr(n))

747

748

# Definition of entities -- derived classes may override

749

entitydefs = \

750

{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}

751

752

def handle_entityref(self, name):

753

table = self.entitydefs

754

if name in table:

755

self.handle_data(table[name])

756

else:

757

self.unknown_entityref(name)

758

return

759

760

def unknown_entityref(self, ref):

761

self.handle_data("&%s;" % ref)

762

763

def unknown_charref(self, ref):

764

self.handle_data("&#%s;" % ref)

765

766

HEAD_PARSER_CLASS = XHTMLCompatibleHeadParser

767

768

def parse_head(fileobj):

769

"""Return a list of key, value pairs."""

770

hp = HEAD_PARSER_CLASS()

771

while 1:

772

data = fileobj.read(CHUNK)

773

try:

774

hp.feed(data)

775

except EndOfHeadError:

776

break

777

if len(data) != CHUNK:

778

# this should only happen if there is no HTML body, or if

779

# CHUNK is big

780

break

781

return hp.http_equiv

782

783

784

class HTTPHandler(AbstractHTTPHandler):

785

def http_open(self, req):

786

return self.do_open(httplib.HTTP, req)

787

788

http_request = AbstractHTTPHandler.do_request_

789

790

if hasattr(httplib, 'HTTPS'):

791

class HTTPSHandler(AbstractHTTPHandler):

792

def https_open(self, req):

793

return self.do_open(httplib.HTTPS, req)

794

795

https_request = AbstractHTTPHandler.do_request_

796

797

def build_opener(*handlers):

798

"""Create an opener object from a list of handlers and processors.

799

800

The opener will use several default handlers and processors, including

801

support for HTTP and FTP.

802

803

If any of the handlers passed as arguments are subclasses of the

804

default handlers, the default handlers will not be used.

805

806

"""

807

opener = OpenerDirector()

808

default_classes = [

809

# handlers

810

urllib2.ProxyHandler,

811

urllib2.UnknownHandler,

812

HTTPHandler, # from this module (derived from new AbstractHTTPHandler)

813

urllib2.HTTPDefaultErrorHandler,

814

HTTPRedirectHandler, # from this module (bugfixed)

815

urllib2.FTPHandler,

816

urllib2.FileHandler,

817

# processors

818

HTTPRequestUpgradeProcessor,

819

#HTTPEquivProcessor,

820

#SeekableProcessor,

821

HTTPCookieProcessor,

822

#HTTPRefererProcessor,

823

#HTTPRefreshProcessor,

824

HTTPErrorProcessor

825

]

826

if hasattr(httplib, 'HTTPS'):

827

default_classes.append(HTTPSHandler)

828

skip = []

829

for klass in default_classes:

830

for check in handlers:

831

if type(check) == types.ClassType:

832

if issubclass(check, klass):

833

skip.append(klass)

834

elif type(check) == types.InstanceType:

835

if isinstance(check, klass):

836

skip.append(klass)

837

for klass in skip:

838

default_classes.remove(klass)

839

840

for klass in default_classes:

841

opener.add_handler(klass())

842

for h in handlers:

843

if type(h) == types.ClassType:

844

h = h()

845

opener.add_handler(h)

846

847

return opener

848

849

850

_opener = None

851

urlopen_lock = _threading.Lock()

852

def urlopen(url, data=None):

853

global _opener

854

if _opener is None:

855

urlopen_lock.acquire()

856

try:

857

if _opener is None:

858

_opener = build_opener()

859

finally:

860

urlopen_lock.release()

861

return _opener.open(url, data)

862

863

def install_opener(opener):

864

global _opener

865

_opener = opener