~ubuntu-branches/ubuntu/saucy/python-httplib2/saucy : revision 16

1

from __future__ import generators

2

"""

3

httplib2

4

5

A caching http interface that supports ETags and gzip

6

to conserve bandwidth.

7

8

Requires Python 2.3 or later

9

10

Changelog:

11

2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.

12

13

"""

14

15

__author__ = "Joe Gregorio (joe@bitworking.org)"

16

17

__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",

18

"James Antill",

19

"Xavier Verges Farrero",

20

"Jonathan Feinberg",

21

"Blair Zajac",

22

"Sam Ruby",

23

"Louis Nyffenegger"]

24

__license__ = "MIT"

25

__version__ = "0.7.0"

26

27

import re

28

import sys

29

import email

30

import email.Utils

31

import email.Message

32

import email.FeedParser

33

import StringIO

34

import gzip

35

import zlib

36

import httplib

37

import urlparse

38

import base64

39

import os

40

import copy

41

import calendar

42

import time

43

import random

44

import errno

45

# remove depracated warning in python2.6

46

try:

47

from hashlib import sha1 as _sha, md5 as _md5

48

except ImportError:

49

import sha

50

import md5

51

_sha = sha.new

52

_md5 = md5.new

53

import hmac

54

from gettext import gettext as _

55

import socket

56

57

try:

58

from httplib2 import socks

59

except ImportError:

60

socks = None

61

62

# Build the appropriate socket wrapper for ssl

63

try:

64

import ssl # python 2.6

65

ssl_SSLError = ssl.SSLError

66

def _ssl_wrap_socket(sock, key_file, cert_file,

67

disable_validation, ca_certs):

68

if disable_validation:

69

cert_reqs = ssl.CERT_NONE

70

else:

71

cert_reqs = ssl.CERT_REQUIRED

72

# We should be specifying SSL version 3 or TLS v1, but the ssl module

73

# doesn't expose the necessary knobs. So we need to go with the default

74

# of SSLv23.

75

return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file,

76

cert_reqs=cert_reqs, ca_certs=ca_certs)

77

except (AttributeError, ImportError):

78

ssl_SSLError = None

79

def _ssl_wrap_socket(sock, key_file, cert_file,

80

disable_validation, ca_certs):

81

if not disable_validation:

82

raise CertificateValidationUnsupported(

83

"SSL certificate validation is not supported without "

84

"the ssl module installed. To avoid this error, install "

85

"the ssl module, or explicity disable validation.")

86

ssl_sock = socket.ssl(sock, key_file, cert_file)

87

return httplib.FakeSocket(sock, ssl_sock)

88

89

90

if sys.version_info >= (2,3):

91

from iri2uri import iri2uri

92

else:

93

def iri2uri(uri):

94

return uri

95

96

def has_timeout(timeout): # python 2.6

97

if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):

98

return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)

99

return (timeout is not None)

100

101

__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',

102

'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',

103

'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',

104

'debuglevel', 'ProxiesUnavailableError']

105

106

107

# The httplib debug level, set to a non-zero value to get debug output

108

debuglevel = 0

109

110

111

# Python 2.3 support

112

if sys.version_info < (2,4):

113

def sorted(seq):

114

seq.sort()

115

return seq

116

117

# Python 2.3 support

118

def HTTPResponse__getheaders(self):

119

"""Return list of (header, value) tuples."""

120

if self.msg is None:

121

raise httplib.ResponseNotReady()

122

return self.msg.items()

123

124

if not hasattr(httplib.HTTPResponse, 'getheaders'):

125

httplib.HTTPResponse.getheaders = HTTPResponse__getheaders

126

127

# All exceptions raised here derive from HttpLib2Error

128

class HttpLib2Error(Exception): pass

129

130

# Some exceptions can be caught and optionally

131

# be turned back into responses.

132

class HttpLib2ErrorWithResponse(HttpLib2Error):

133

def __init__(self, desc, response, content):

134

self.response = response

135

self.content = content

136

HttpLib2Error.__init__(self, desc)

137

138

class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass

139

class RedirectLimit(HttpLib2ErrorWithResponse): pass

140

class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass

141

class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass

142

class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass

143

144

class MalformedHeader(HttpLib2Error): pass

145

class RelativeURIError(HttpLib2Error): pass

146

class ServerNotFoundError(HttpLib2Error): pass

147

class ProxiesUnavailableError(HttpLib2Error): pass

148

class CertificateValidationUnsupported(HttpLib2Error): pass

149

class SSLHandshakeError(HttpLib2Error): pass

150

class CertificateHostnameMismatch(SSLHandshakeError):

151

def __init__(self, desc, host, cert):

152

HttpLib2Error.__init__(self, desc)

153

self.host = host

154

self.cert = cert

155

156

# Open Items:

157

# -----------

158

# Proxy support

159

160

# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)

161

162

# Pluggable cache storage (supports storing the cache in

163

# flat files by default. We need a plug-in architecture

164

# that can support Berkeley DB and Squid)

165

166

# == Known Issues ==

167

# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.

168

# Does not handle Cache-Control: max-stale

169

# Does not use Age: headers when calculating cache freshness.

170

171

172

# The number of redirections to follow before giving up.

173

# Note that only GET redirects are automatically followed.

174

# Will also honor 301 requests by saving that info and never

175

# requesting that URI again.

176

DEFAULT_MAX_REDIRECTS = 5

177

178

# Default CA certificates file bundled with httplib2.

179

CA_CERTS = os.path.join(

180

os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")

181

182

# Which headers are hop-by-hop headers by default

183

HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']

184

185

def _get_end2end_headers(response):

186

hopbyhop = list(HOP_BY_HOP)

187

hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])

188

return [header for header in response.keys() if header not in hopbyhop]

189

190

URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")

191

192

def parse_uri(uri):

193

"""Parses a URI using the regex given in Appendix B of RFC 3986.

194

195

(scheme, authority, path, query, fragment) = parse_uri(uri)

196

"""

197

groups = URI.match(uri).groups()

198

return (groups[1], groups[3], groups[4], groups[6], groups[8])

199

200

def urlnorm(uri):

201

(scheme, authority, path, query, fragment) = parse_uri(uri)

202

if not scheme or not authority:

203

raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)

204

authority = authority.lower()

205

scheme = scheme.lower()

206

if not path:

207

path = "/"

208

# Could do syntax based normalization of the URI before

209

# computing the digest. See Section 6.2.2 of Std 66.

210

request_uri = query and "?".join([path, query]) or path

211

scheme = scheme.lower()

212

defrag_uri = scheme + "://" + authority + request_uri

213

return scheme, authority, request_uri, defrag_uri

214

215

216

# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)

217

re_url_scheme = re.compile(r'^\w+://')

218

re_slash = re.compile(r'[?/:|]+')

219

220

def safename(filename):

221

"""Return a filename suitable for the cache.

222

223

Strips dangerous and common characters to create a filename we

224

can use to store the cache in.

225

"""

226

227

try:

228

if re_url_scheme.match(filename):

229

if isinstance(filename,str):

230

filename = filename.decode('utf-8')

231

filename = filename.encode('idna')

232

else:

233

filename = filename.encode('idna')

234

except UnicodeError:

235

pass

236

if isinstance(filename,unicode):

237

filename=filename.encode('utf-8')

238

filemd5 = _md5(filename).hexdigest()

239

filename = re_url_scheme.sub("", filename)

240

filename = re_slash.sub(",", filename)

241

242

# limit length of filename

243

if len(filename)>200:

244

filename=filename[:200]

245

return ",".join((filename, filemd5))

246

247

NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')

248

def _normalize_headers(headers):

249

return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])

250

251

def _parse_cache_control(headers):

252

retval = {}

253

if headers.has_key('cache-control'):

254

parts = headers['cache-control'].split(',')

255

parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]

256

parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]

257

retval = dict(parts_with_args + parts_wo_args)

258

return retval

259

260

# Whether to use a strict mode to parse WWW-Authenticate headers

261

# Might lead to bad results in case of ill-formed header value,

262

# so disabled by default, falling back to relaxed parsing.

263

# Set to true to turn on, usefull for testing servers.

264

USE_WWW_AUTH_STRICT_PARSING = 0

265

266

# In regex below:

267

# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP

268

# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space

269

# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:

270

# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?

271

WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")

272

WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")

273

UNQUOTE_PAIRS = re.compile(r'\\(.)')

274

def _parse_www_authenticate(headers, headername='www-authenticate'):

275

"""Returns a dictionary of dictionaries, one dict

276

per auth_scheme."""

277

retval = {}

278

if headers.has_key(headername):

279

try:

280

authenticate = headers[headername].strip()

281

www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED

282

while authenticate:

283

# Break off the scheme at the beginning of the line

284

if headername == 'authentication-info':

285

(auth_scheme, the_rest) = ('digest', authenticate)

286

else:

287

(auth_scheme, the_rest) = authenticate.split(" ", 1)

288

# Now loop over all the key value pairs that come after the scheme,

289

# being careful not to roll into the next scheme

290

match = www_auth.search(the_rest)

291

auth_params = {}

292

while match:

293

if match and len(match.groups()) == 3:

294

(key, value, the_rest) = match.groups()

295

auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])

296

match = www_auth.search(the_rest)

297

retval[auth_scheme.lower()] = auth_params

298

authenticate = the_rest.strip()

299

except ValueError:

300

raise MalformedHeader("WWW-Authenticate")

301

return retval

302

303

304

def _entry_disposition(response_headers, request_headers):

305

"""Determine freshness from the Date, Expires and Cache-Control headers.

306

307

We don't handle the following:

308

309

1. Cache-Control: max-stale

310

2. Age: headers are not used in the calculations.

311

312

Not that this algorithm is simpler than you might think

313

because we are operating as a private (non-shared) cache.

314

This lets us ignore 's-maxage'. We can also ignore

315

'proxy-invalidate' since we aren't a proxy.

316

We will never return a stale document as

317

fresh as a design decision, and thus the non-implementation

318

of 'max-stale'. This also lets us safely ignore 'must-revalidate'

319

since we operate as if every server has sent 'must-revalidate'.

320

Since we are private we get to ignore both 'public' and

321

'private' parameters. We also ignore 'no-transform' since

322

we don't do any transformations.

323

The 'no-store' parameter is handled at a higher level.

324

So the only Cache-Control parameters we look at are:

325

326

no-cache

327

only-if-cached

328

max-age

329

min-fresh

330

"""

331

332

retval = "STALE"

333

cc = _parse_cache_control(request_headers)

334

cc_response = _parse_cache_control(response_headers)

335

336

if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:

337

retval = "TRANSPARENT"

338

if 'cache-control' not in request_headers:

339

request_headers['cache-control'] = 'no-cache'

340

elif cc.has_key('no-cache'):

341

retval = "TRANSPARENT"

342

elif cc_response.has_key('no-cache'):

343

retval = "STALE"

344

elif cc.has_key('only-if-cached'):

345

retval = "FRESH"

346

elif response_headers.has_key('date'):

347

date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))

348

now = time.time()

349

current_age = max(0, now - date)

350

if cc_response.has_key('max-age'):

351

try:

352

freshness_lifetime = int(cc_response['max-age'])

353

except ValueError:

354

freshness_lifetime = 0

355

elif response_headers.has_key('expires'):

356

expires = email.Utils.parsedate_tz(response_headers['expires'])

357

if None == expires:

358

freshness_lifetime = 0

359

else:

360

freshness_lifetime = max(0, calendar.timegm(expires) - date)

361

else:

362

freshness_lifetime = 0

363

if cc.has_key('max-age'):

364

try:

365

freshness_lifetime = int(cc['max-age'])

366

except ValueError:

367

freshness_lifetime = 0

368

if cc.has_key('min-fresh'):

369

try:

370

min_fresh = int(cc['min-fresh'])

371

except ValueError:

372

min_fresh = 0

373

current_age += min_fresh

374

if freshness_lifetime > current_age:

375

retval = "FRESH"

376

return retval

377

378

def _decompressContent(response, new_content):

379

content = new_content

380

try:

381

encoding = response.get('content-encoding', None)

382

if encoding in ['gzip', 'deflate']:

383

if encoding == 'gzip':

384

content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()

385

if encoding == 'deflate':

386

content = zlib.decompress(content)

387

response['content-length'] = str(len(content))

388

# Record the historical presence of the encoding in a way the won't interfere.

389

response['-content-encoding'] = response['content-encoding']

390

del response['content-encoding']

391

except IOError:

392

content = ""

393

raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)

394

return content

395

396

def _updateCache(request_headers, response_headers, content, cache, cachekey):

397

if cachekey:

398

cc = _parse_cache_control(request_headers)

399

cc_response = _parse_cache_control(response_headers)

400

if cc.has_key('no-store') or cc_response.has_key('no-store'):

401

cache.delete(cachekey)

402

else:

403

info = email.Message.Message()

404

for key, value in response_headers.iteritems():

405

if key not in ['status','content-encoding','transfer-encoding']:

406

info[key] = value

407

408

# Add annotations to the cache to indicate what headers

409

# are variant for this request.

410

vary = response_headers.get('vary', None)

411

if vary:

412

vary_headers = vary.lower().replace(' ', '').split(',')

413

for header in vary_headers:

414

key = '-varied-%s' % header

415

try:

416

info[key] = request_headers[header]

417

except KeyError:

418

pass

419

420

status = response_headers.status

421

if status == 304:

422

status = 200

423

424

status_header = 'status: %d\r\n' % status

425

426

header_str = info.as_string()

427

428

header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)

429

text = "".join([status_header, header_str, content])

430

431

cache.set(cachekey, text)

432

433

def _cnonce():

434

dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()

435

return dig[:16]

436

437

def _wsse_username_token(cnonce, iso_now, password):

438

return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()

439

440

441

# For credentials we need two things, first

442

# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)

443

# Then we also need a list of URIs that have already demanded authentication

444

# That list is tricky since sub-URIs can take the same auth, or the

445

# auth scheme may change as you descend the tree.

446

# So we also need each Auth instance to be able to tell us

447

# how close to the 'top' it is.

448

449

class Authentication(object):

450

def __init__(self, credentials, host, request_uri, headers, response, content, http):

451

(scheme, authority, path, query, fragment) = parse_uri(request_uri)

452

self.path = path

453

self.host = host

454

self.credentials = credentials

455

self.http = http

456

457

def depth(self, request_uri):

458

(scheme, authority, path, query, fragment) = parse_uri(request_uri)

459

return request_uri[len(self.path):].count("/")

460

461

def inscope(self, host, request_uri):

462

# XXX Should we normalize the request_uri?

463

(scheme, authority, path, query, fragment) = parse_uri(request_uri)

464

return (host == self.host) and path.startswith(self.path)

465

466

def request(self, method, request_uri, headers, content):

467

"""Modify the request headers to add the appropriate

468

Authorization header. Over-rise this in sub-classes."""

469

pass

470

471

def response(self, response, content):

472

"""Gives us a chance to update with new nonces

473

or such returned from the last authorized response.

474

Over-rise this in sub-classes if necessary.

475

476

Return TRUE is the request is to be retried, for

477

example Digest may return stale=true.

478

"""

479

return False

480

481

482

483

class BasicAuthentication(Authentication):

484

def __init__(self, credentials, host, request_uri, headers, response, content, http):

485

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

486

487

def request(self, method, request_uri, headers, content):

488

"""Modify the request headers to add the appropriate

489

Authorization header."""

490

headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()

491

492

493

class DigestAuthentication(Authentication):

494

"""Only do qop='auth' and MD5, since that

495

is all Apache currently implements"""

496

def __init__(self, credentials, host, request_uri, headers, response, content, http):

497

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

498

challenge = _parse_www_authenticate(response, 'www-authenticate')

499

self.challenge = challenge['digest']

500

qop = self.challenge.get('qop', 'auth')

501

self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None

502

if self.challenge['qop'] is None:

503

raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))

504

self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()

505

if self.challenge['algorithm'] != 'MD5':

506

raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))

507

self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])

508

self.challenge['nc'] = 1

509

510

def request(self, method, request_uri, headers, content, cnonce = None):

511

"""Modify the request headers"""

512

H = lambda x: _md5(x).hexdigest()

513

KD = lambda s, d: H("%s:%s" % (s, d))

514

A2 = "".join([method, ":", request_uri])

515

self.challenge['cnonce'] = cnonce or _cnonce()

516

request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],

517

'%08x' % self.challenge['nc'],

518

self.challenge['cnonce'],

519

self.challenge['qop'], H(A2)

520

))

521

headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (

522

self.credentials[0],

523

self.challenge['realm'],

524

self.challenge['nonce'],

525

request_uri,

526

self.challenge['algorithm'],

527

request_digest,

528

self.challenge['qop'],

529

self.challenge['nc'],

530

self.challenge['cnonce'],

531

)

532

self.challenge['nc'] += 1

533

534

def response(self, response, content):

535

if not response.has_key('authentication-info'):

536

challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})

537

if 'true' == challenge.get('stale'):

538

self.challenge['nonce'] = challenge['nonce']

539

self.challenge['nc'] = 1

540

return True

541

else:

542

updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})

543

544

if updated_challenge.has_key('nextnonce'):

545

self.challenge['nonce'] = updated_challenge['nextnonce']

546

self.challenge['nc'] = 1

547

return False

548

549

550

class HmacDigestAuthentication(Authentication):

551

"""Adapted from Robert Sayre's code and DigestAuthentication above."""

552

__author__ = "Thomas Broyer (t.broyer@ltgt.net)"

553

554

def __init__(self, credentials, host, request_uri, headers, response, content, http):

555

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

556

challenge = _parse_www_authenticate(response, 'www-authenticate')

557

self.challenge = challenge['hmacdigest']

558

# TODO: self.challenge['domain']

559

self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')

560

if self.challenge['reason'] not in ['unauthorized', 'integrity']:

561

self.challenge['reason'] = 'unauthorized'

562

self.challenge['salt'] = self.challenge.get('salt', '')

563

if not self.challenge.get('snonce'):

564

raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))

565

self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')

566

if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:

567

raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))

568

self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')

569

if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:

570

raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))

571

if self.challenge['algorithm'] == 'HMAC-MD5':

572

self.hashmod = _md5

573

else:

574

self.hashmod = _sha

575

if self.challenge['pw-algorithm'] == 'MD5':

576

self.pwhashmod = _md5

577

else:

578

self.pwhashmod = _sha

579

self.key = "".join([self.credentials[0], ":",

580

self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),

581

":", self.challenge['realm']

582

])

583

self.key = self.pwhashmod.new(self.key).hexdigest().lower()

584

585

def request(self, method, request_uri, headers, content):

586

"""Modify the request headers"""

587

keys = _get_end2end_headers(headers)

588

keylist = "".join(["%s " % k for k in keys])

589

headers_val = "".join([headers[k] for k in keys])

590

created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())

591

cnonce = _cnonce()

592

request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)

593

request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()

594

headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (

595

self.credentials[0],

596

self.challenge['realm'],

597

self.challenge['snonce'],

598

cnonce,

599

request_uri,

600

created,

601

request_digest,

602

keylist,

603

)

604

605

def response(self, response, content):

606

challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})

607

if challenge.get('reason') in ['integrity', 'stale']:

608

return True

609

return False

610

611

612

class WsseAuthentication(Authentication):

613

"""This is thinly tested and should not be relied upon.

614

At this time there isn't any third party server to test against.

615

Blogger and TypePad implemented this algorithm at one point

616

but Blogger has since switched to Basic over HTTPS and

617

TypePad has implemented it wrong, by never issuing a 401

618

challenge but instead requiring your client to telepathically know that

619

their endpoint is expecting WSSE profile="UsernameToken"."""

620

def __init__(self, credentials, host, request_uri, headers, response, content, http):

621

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

622

623

def request(self, method, request_uri, headers, content):

624

"""Modify the request headers to add the appropriate

625

Authorization header."""

626

headers['authorization'] = 'WSSE profile="UsernameToken"'

627

iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

628

cnonce = _cnonce()

629

password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])

630

headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (

631

self.credentials[0],

632

password_digest,

633

cnonce,

634

iso_now)

635

636

class GoogleLoginAuthentication(Authentication):

637

def __init__(self, credentials, host, request_uri, headers, response, content, http):

638

from urllib import urlencode

639

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

640

challenge = _parse_www_authenticate(response, 'www-authenticate')

641

service = challenge['googlelogin'].get('service', 'xapi')

642

# Bloggger actually returns the service in the challenge

643

# For the rest we guess based on the URI

644

if service == 'xapi' and request_uri.find("calendar") > 0:

645

service = "cl"

646

# No point in guessing Base or Spreadsheet

647

#elif request_uri.find("spreadsheets") > 0:

648

# service = "wise"

649

650

auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])

651

resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})

652

lines = content.split('\n')

653

d = dict([tuple(line.split("=", 1)) for line in lines if line])

654

if resp.status == 403:

655

self.Auth = ""

656

else:

657

self.Auth = d['Auth']

658

659

def request(self, method, request_uri, headers, content):

660

"""Modify the request headers to add the appropriate

661

Authorization header."""

662

headers['authorization'] = 'GoogleLogin Auth=' + self.Auth

663

664

665

AUTH_SCHEME_CLASSES = {

666

"basic": BasicAuthentication,

667

"wsse": WsseAuthentication,

668

"digest": DigestAuthentication,

669

"hmacdigest": HmacDigestAuthentication,

670

"googlelogin": GoogleLoginAuthentication

671

}

672

673

AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]

674

675

class FileCache(object):

676

"""Uses a local directory as a store for cached files.

677

Not really safe to use if multiple threads or processes are going to

678

be running on the same cache.

679

"""

680

def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior

681

self.cache = cache

682

self.safe = safe

683

if not os.path.exists(cache):

684

os.makedirs(self.cache)

685

686

def get(self, key):

687

retval = None

688

cacheFullPath = os.path.join(self.cache, self.safe(key))

689

try:

690

f = file(cacheFullPath, "rb")

691

retval = f.read()

692

f.close()

693

except IOError:

694

pass

695

return retval

696

697

def set(self, key, value):

698

cacheFullPath = os.path.join(self.cache, self.safe(key))

699

f = file(cacheFullPath, "wb")

700

f.write(value)

701

f.close()

702

703

def delete(self, key):

704

cacheFullPath = os.path.join(self.cache, self.safe(key))

705

if os.path.exists(cacheFullPath):

706

os.remove(cacheFullPath)

707

708

class Credentials(object):

709

def __init__(self):

710

self.credentials = []

711

712

def add(self, name, password, domain=""):

713

self.credentials.append((domain.lower(), name, password))

714

715

def clear(self):

716

self.credentials = []

717

718

def iter(self, domain):

719

for (cdomain, name, password) in self.credentials:

720

if cdomain == "" or domain == cdomain:

721

yield (name, password)

722

723

class KeyCerts(Credentials):

724

"""Identical to Credentials except that

725

name/password are mapped to key/cert."""

726

pass

727

728

729

class ProxyInfo(object):

730

"""Collect information required to use a proxy."""

731

def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):

732

"""The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX

733

constants. For example:

734

735

p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)

736

"""

737

self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass

738

739

def astuple(self):

740

return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,

741

self.proxy_user, self.proxy_pass)

742

743

def isgood(self):

744

return (self.proxy_host != None) and (self.proxy_port != None)

745

746

747

class HTTPConnectionWithTimeout(httplib.HTTPConnection):

748

"""

749

HTTPConnection subclass that supports timeouts

750

751

All timeouts are in seconds. If None is passed for timeout then

752

Python's default timeout for sockets will be used. See for example

753

the docs of socket.setdefaulttimeout():

754

http://docs.python.org/library/socket.html#socket.setdefaulttimeout

755

"""

756

757

def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):

758

httplib.HTTPConnection.__init__(self, host, port, strict)

759

self.timeout = timeout

760

self.proxy_info = proxy_info

761

762

def connect(self):

763

"""Connect to the host and port specified in __init__."""

764

# Mostly verbatim from httplib.py.

765

if self.proxy_info and socks is None:

766

raise ProxiesUnavailableError(

767

'Proxy support missing but proxy use was requested!')

768

msg = "getaddrinfo returns an empty list"

769

for res in socket.getaddrinfo(self.host, self.port, 0,

770

socket.SOCK_STREAM):

771

af, socktype, proto, canonname, sa = res

772

try:

773

if self.proxy_info and self.proxy_info.isgood():

774

self.sock = socks.socksocket(af, socktype, proto)

775

self.sock.setproxy(*self.proxy_info.astuple())

776

else:

777

self.sock = socket.socket(af, socktype, proto)

778

self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

779

# Different from httplib: support timeouts.

780

if has_timeout(self.timeout):

781

self.sock.settimeout(self.timeout)

782

# End of difference from httplib.

783

if self.debuglevel > 0:

784

print "connect: (%s, %s)" % (self.host, self.port)

785

786

self.sock.connect(sa)

787

except socket.error, msg:

788

if self.debuglevel > 0:

789

print 'connect fail:', (self.host, self.port)

790

if self.sock:

791

self.sock.close()

792

self.sock = None

793

continue

794

break

795

if not self.sock:

796

raise socket.error, msg

797

798

class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):

799

"""

800

This class allows communication via SSL.

801

802

All timeouts are in seconds. If None is passed for timeout then

803

Python's default timeout for sockets will be used. See for example

804

the docs of socket.setdefaulttimeout():

805

http://docs.python.org/library/socket.html#socket.setdefaulttimeout

806

"""

807

def __init__(self, host, port=None, key_file=None, cert_file=None,

808

strict=None, timeout=None, proxy_info=None,

809

ca_certs=None, disable_ssl_certificate_validation=False):

810

httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,

811

cert_file=cert_file, strict=strict)

812

self.timeout = timeout

813

self.proxy_info = proxy_info

814

if ca_certs is None:

815

ca_certs = CA_CERTS

816

self.ca_certs = ca_certs

817

self.disable_ssl_certificate_validation = \

818

disable_ssl_certificate_validation

819

820

# The following two methods were adapted from https_wrapper.py, released

821

# with the Google Appengine SDK at

822

# http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py

823

# under the following license:

824

#

825

826

#

827

# Licensed under the Apache License, Version 2.0 (the "License");

828

# you may not use this file except in compliance with the License.

829

# You may obtain a copy of the License at

830

#

831

# http://www.apache.org/licenses/LICENSE-2.0

832

#

833

# Unless required by applicable law or agreed to in writing, software

834

# distributed under the License is distributed on an "AS IS" BASIS,

835

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

836

# See the License for the specific language governing permissions and

837

# limitations under the License.

838

#

839

840

def _GetValidHostsForCert(self, cert):

841

"""Returns a list of valid host globs for an SSL certificate.

842

843

Args:

844

cert: A dictionary representing an SSL certificate.

845

Returns:

846

list: A list of valid host globs.

847

"""

848

if 'subjectAltName' in cert:

849

return [x[1] for x in cert['subjectAltName']

850

if x[0].lower() == 'dns']

851

else:

852

return [x[0][1] for x in cert['subject']

853

if x[0][0].lower() == 'commonname']

854

855

def _ValidateCertificateHostname(self, cert, hostname):

856

"""Validates that a given hostname is valid for an SSL certificate.

857

858

Args:

859

cert: A dictionary representing an SSL certificate.

860

hostname: The hostname to test.

861

Returns:

862

bool: Whether or not the hostname is valid for this certificate.

863

"""

864

hosts = self._GetValidHostsForCert(cert)

865

for host in hosts:

866

host_re = host.replace('.', '\.').replace('*', '[^.]*')

867

if re.search('^%s$' % (host_re,), hostname, re.I):

868

return True

869

return False

870

871

def connect(self):

872

"Connect to a host on a given (SSL) port."

873

874

msg = "getaddrinfo returns an empty list"

875

for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(

876

self.host, self.port, 0, socket.SOCK_STREAM):

877

try:

878

if self.proxy_info and self.proxy_info.isgood():

879

sock = socks.socksocket(family, socktype, proto)

880

sock.setproxy(*self.proxy_info.astuple())

881

else:

882

sock = socket.socket(family, socktype, proto)

883

sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

884

885

if has_timeout(self.timeout):

886

sock.settimeout(self.timeout)

887

sock.connect((self.host, self.port))

888

self.sock =_ssl_wrap_socket(

889

sock, self.key_file, self.cert_file,

890

self.disable_ssl_certificate_validation, self.ca_certs)

891

if self.debuglevel > 0:

892

print "connect: (%s, %s)" % (self.host, self.port)

893

if not self.disable_ssl_certificate_validation:

894

cert = self.sock.getpeercert()

895

hostname = self.host.split(':', 0)[0]

896

if not self._ValidateCertificateHostname(cert, hostname):

897

raise CertificateHostnameMismatch(

898

'Server presented certificate that does not match '

899

'host %s: %s' % (hostname, cert), hostname, cert)

900

except ssl_SSLError, e:

901

if sock:

902

sock.close()

903

if self.sock:

904

self.sock.close()

905

self.sock = None

906

# Unfortunately the ssl module doesn't seem to provide any way

907

# to get at more detailed error information, in particular

908

# whether the error is due to certificate validation or

909

# something else (such as SSL protocol mismatch).

910

if e.errno == ssl.SSL_ERROR_SSL:

911

raise SSLHandshakeError(e)

912

else:

913

raise

914

except (socket.timeout, socket.gaierror):

915

raise

916

except socket.error, msg:

917

if self.debuglevel > 0:

918

print 'connect fail:', (self.host, self.port)

919

if self.sock:

920

self.sock.close()

921

self.sock = None

922

continue

923

break

924

if not self.sock:

925

raise socket.error, msg

926

927

SCHEME_TO_CONNECTION = {

928

'http': HTTPConnectionWithTimeout,

929

'https': HTTPSConnectionWithTimeout

930

}

931

932

# Use a different connection object for Google App Engine

933

try:

934

from google.appengine.api.urlfetch import fetch

935

from google.appengine.api.urlfetch import InvalidURLError

936

from google.appengine.api.urlfetch import DownloadError

937

from google.appengine.api.urlfetch import ResponseTooLargeError

938

from google.appengine.api.urlfetch import SSLCertificateError

939

940

941

class ResponseDict(dict):

942

"""Is a dictionary that also has a read() method, so

943

that it can pass itself off as an httlib.HTTPResponse()."""

944

def read(self):

945

pass

946

947

948

class AppEngineHttpConnection(object):

949

"""Emulates an httplib.HTTPConnection object, but actually uses the Google

950

App Engine urlfetch library. This allows the timeout to be properly used on

951

Google App Engine, and avoids using httplib, which on Google App Engine is

952

just another wrapper around urlfetch.

953

"""

954

def __init__(self, host, port=None, key_file=None, cert_file=None,

955

strict=None, timeout=None, proxy_info=None, ca_certs=None,

956

disable_certificate_validation=False):

957

self.host = host

958

self.port = port

959

self.timeout = timeout

960

if key_file or cert_file or proxy_info or ca_certs:

961

raise NotSupportedOnThisPlatform()

962

self.response = None

963

self.scheme = 'http'

964

self.validate_certificate = not disable_certificate_validation

965

self.sock = True

966

967

def request(self, method, url, body, headers):

968

# Calculate the absolute URI, which fetch requires

969

netloc = self.host

970

if self.port:

971

netloc = '%s:%s' % (self.host, self.port)

972

absolute_uri = '%s://%s%s' % (self.scheme, netloc, url)

973

try:

974

response = fetch(absolute_uri, payload=body, method=method,

975

headers=headers, allow_truncated=False, follow_redirects=False,

976

deadline=self.timeout,

977

validate_certificate=self.validate_certificate)

978

self.response = ResponseDict(response.headers)

979

self.response['status'] = response.status_code

980

setattr(self.response, 'read', lambda : response.content)

981

982

# Make sure the exceptions raised match the exceptions expected.

983

except InvalidURLError:

984

raise socket.gaierror('')

985

except (DownloadError, ResponseTooLargeError, SSLCertificateError):

986

raise httplib.HTTPException()

987

988

def getresponse(self):

989

return self.response

990

991

def set_debuglevel(self, level):

992

pass

993

994

def connect(self):

995

pass

996

997

def close(self):

998

pass

999

1000

1001

class AppEngineHttpsConnection(AppEngineHttpConnection):

1002

"""Same as AppEngineHttpConnection, but for HTTPS URIs."""

1003

def __init__(self, host, port=None, key_file=None, cert_file=None,

1004

strict=None, timeout=None, proxy_info=None):

1005

AppEngineHttpConnection.__init__(self, host, port, key_file, cert_file,

1006

strict, timeout, proxy_info)

1007

self.scheme = 'https'

1008

1009

# Update the connection classes to use the Googel App Engine specific ones.

1010

SCHEME_TO_CONNECTION = {

1011

'http': AppEngineHttpConnection,

1012

'https': AppEngineHttpsConnection

1013

}

1014

1015

except ImportError:

1016

pass

1017

1018

1019

class Http(object):

1020

"""An HTTP client that handles:

1021

- all methods

1022

- caching

1023

- ETags

1024

- compression,

1025

- HTTPS

1026

- Basic

1027

- Digest

1028

- WSSE

1029

1030

and more.

1031

"""

1032

def __init__(self, cache=None, timeout=None, proxy_info=None,

1033

ca_certs=None, disable_ssl_certificate_validation=False):

1034

"""

1035

The value of proxy_info is a ProxyInfo instance.

1036

1037

If 'cache' is a string then it is used as a directory name for

1038

a disk cache. Otherwise it must be an object that supports the

1039

same interface as FileCache.

1040

1041

All timeouts are in seconds. If None is passed for timeout

1042

then Python's default timeout for sockets will be used. See

1043

for example the docs of socket.setdefaulttimeout():

1044

http://docs.python.org/library/socket.html#socket.setdefaulttimeout

1045

1046

ca_certs is the path of a file containing root CA certificates for SSL

1047

server certificate validation. By default, a CA cert file bundled with

1048

httplib2 is used.

1049

1050

If disable_ssl_certificate_validation is true, SSL cert validation will

1051

not be performed.

1052

"""

1053

self.proxy_info = proxy_info

1054

self.ca_certs = ca_certs

1055

self.disable_ssl_certificate_validation = \

1056

disable_ssl_certificate_validation

1057

1058

# Map domain name to an httplib connection

1059

self.connections = {}

1060

# The location of the cache, for now a directory

1061

# where cached responses are held.

1062

if cache and isinstance(cache, basestring):

1063

self.cache = FileCache(cache)

1064

else:

1065

self.cache = cache

1066

1067

# Name/password

1068

self.credentials = Credentials()

1069

1070

# Key/cert

1071

self.certificates = KeyCerts()

1072

1073

# authorization objects

1074

self.authorizations = []

1075

1076

# If set to False then no redirects are followed, even safe ones.

1077

self.follow_redirects = True

1078

1079

# Which HTTP methods do we apply optimistic concurrency to, i.e.

1080

# which methods get an "if-match:" etag header added to them.

1081

self.optimistic_concurrency_methods = ["PUT", "PATCH"]

1082

1083

# If 'follow_redirects' is True, and this is set to True then

1084

# all redirecs are followed, including unsafe ones.

1085

self.follow_all_redirects = False

1086

1087

self.ignore_etag = False

1088

1089

self.force_exception_to_status_code = False

1090

1091

self.timeout = timeout

1092

1093

def _auth_from_challenge(self, host, request_uri, headers, response, content):

1094

"""A generator that creates Authorization objects

1095

that can be applied to requests.

1096

"""

1097

challenges = _parse_www_authenticate(response, 'www-authenticate')

1098

for cred in self.credentials.iter(host):

1099

for scheme in AUTH_SCHEME_ORDER:

1100

if challenges.has_key(scheme):

1101

yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)

1102

1103

def add_credentials(self, name, password, domain=""):

1104

"""Add a name and password that will be used

1105

any time a request requires authentication."""

1106

self.credentials.add(name, password, domain)

1107

1108

def add_certificate(self, key, cert, domain):

1109

"""Add a key and cert that will be used

1110

any time a request requires authentication."""

1111

self.certificates.add(key, cert, domain)

1112

1113

def clear_credentials(self):

1114

"""Remove all the names and passwords

1115

that are used for authentication"""

1116

self.credentials.clear()

1117

self.authorizations = []

1118

1119

def _conn_request(self, conn, request_uri, method, body, headers):

1120

for i in range(2):

1121

try:

1122

if conn.sock is None:

1123

conn.connect()

1124

conn.request(method, request_uri, body, headers)

1125

except socket.timeout:

1126

raise

1127

except socket.gaierror:

1128

conn.close()

1129

raise ServerNotFoundError("Unable to find the server at %s" % conn.host)

1130

except ssl_SSLError:

1131

conn.close()

1132

raise

1133

except socket.error, e:

1134

err = 0

1135

if hasattr(e, 'args'):

1136

err = getattr(e, 'args')[0]

1137

else:

1138

err = e.errno

1139

if err == errno.ECONNREFUSED: # Connection refused

1140

raise

1141

except httplib.HTTPException:

1142

# Just because the server closed the connection doesn't apparently mean

1143

# that the server didn't send a response.

1144

if conn.sock is None:

1145

if i == 0:

1146

conn.close()

1147

conn.connect()

1148

continue

1149

else:

1150

conn.close()

1151

raise

1152

if i == 0:

1153

conn.close()

1154

conn.connect()

1155

continue

1156

pass

1157

try:

1158

response = conn.getresponse()

1159

except (socket.error, httplib.HTTPException):

1160

if i == 0:

1161

conn.close()

1162

conn.connect()

1163

continue

1164

else:

1165

raise

1166

else:

1167

content = ""

1168

if method == "HEAD":

1169

response.close()

1170

else:

1171

content = response.read()

1172

response = Response(response)

1173

if method != "HEAD":

1174

content = _decompressContent(response, content)

1175

break

1176

return (response, content)

1177

1178

1179

def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):

1180

"""Do the actual request using the connection object

1181

and also follow one level of redirects if necessary"""

1182

1183

auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]

1184

auth = auths and sorted(auths)[0][1] or None

1185

if auth:

1186

auth.request(method, request_uri, headers, body)

1187

1188

(response, content) = self._conn_request(conn, request_uri, method, body, headers)

1189

1190

if auth:

1191

if auth.response(response, body):

1192

auth.request(method, request_uri, headers, body)

1193

(response, content) = self._conn_request(conn, request_uri, method, body, headers )

1194

response._stale_digest = 1

1195

1196

if response.status == 401:

1197

for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):

1198

authorization.request(method, request_uri, headers, body)

1199

(response, content) = self._conn_request(conn, request_uri, method, body, headers, )

1200

if response.status != 401:

1201

self.authorizations.append(authorization)

1202

authorization.response(response, body)

1203

break

1204

1205

if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):

1206

if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:

1207

# Pick out the location header and basically start from the beginning

1208

# remembering first to strip the ETag header and decrement our 'depth'

1209

if redirections:

1210

if not response.has_key('location') and response.status != 300:

1211

raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)

1212

# Fix-up relative redirects (which violate an RFC 2616 MUST)

1213

if response.has_key('location'):

1214

location = response['location']

1215

(scheme, authority, path, query, fragment) = parse_uri(location)

1216

if authority == None:

1217

response['location'] = urlparse.urljoin(absolute_uri, location)

1218

if response.status == 301 and method in ["GET", "HEAD"]:

1219

response['-x-permanent-redirect-url'] = response['location']

1220

if not response.has_key('content-location'):

1221

response['content-location'] = absolute_uri

1222

_updateCache(headers, response, content, self.cache, cachekey)

1223

if headers.has_key('if-none-match'):

1224

del headers['if-none-match']

1225

if headers.has_key('if-modified-since'):

1226

del headers['if-modified-since']

1227

if response.has_key('location'):

1228

location = response['location']

1229

old_response = copy.deepcopy(response)

1230

if not old_response.has_key('content-location'):

1231

old_response['content-location'] = absolute_uri

1232

redirect_method = method

1233

if response.status in [302, 303]:

1234

redirect_method = "GET"

1235

body = None

1236

(response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)

1237

response.previous = old_response

1238

else:

1239

raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content)

1240

elif response.status in [200, 203] and method in ["GET", "HEAD"]:

1241

# Don't cache 206's since we aren't going to handle byte range requests

1242

if not response.has_key('content-location'):

1243

response['content-location'] = absolute_uri

1244

_updateCache(headers, response, content, self.cache, cachekey)

1245

1246

return (response, content)

1247

1248

def _normalize_headers(self, headers):

1249

return _normalize_headers(headers)

1250

1251

# Need to catch and rebrand some exceptions

1252

# Then need to optionally turn all exceptions into status codes

1253

# including all socket.* and httplib.* exceptions.

1254

1255

1256

def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):

1257

""" Performs a single HTTP request.

1258

The 'uri' is the URI of the HTTP resource and can begin

1259

with either 'http' or 'https'. The value of 'uri' must be an absolute URI.

1260

1261

The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.

1262

There is no restriction on the methods allowed.

1263

1264

The 'body' is the entity body to be sent with the request. It is a string

1265

object.

1266

1267

Any extra headers that are to be sent with the request should be provided in the

1268

'headers' dictionary.

1269

1270

The maximum number of redirect to follow before raising an

1271

exception is 'redirections. The default is 5.

1272

1273

The return value is a tuple of (response, content), the first

1274

being and instance of the 'Response' class, the second being

1275

a string that contains the response entity body.

1276

"""

1277

try:

1278

if headers is None:

1279

headers = {}

1280

else:

1281

headers = self._normalize_headers(headers)

1282

1283

if not headers.has_key('user-agent'):

1284

headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__

1285

1286

uri = iri2uri(uri)

1287

1288

(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)

1289

domain_port = authority.split(":")[0:2]

1290

if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':

1291

scheme = 'https'

1292

authority = domain_port[0]

1293

1294

conn_key = scheme+":"+authority

1295

if conn_key in self.connections:

1296

conn = self.connections[conn_key]

1297

else:

1298

if not connection_type:

1299

connection_type = SCHEME_TO_CONNECTION[scheme]

1300

certs = list(self.certificates.iter(authority))

1301

if issubclass(connection_type, HTTPSConnectionWithTimeout):

1302

if certs:

1303

conn = self.connections[conn_key] = connection_type(

1304

authority, key_file=certs[0][0],

1305

cert_file=certs[0][1], timeout=self.timeout,

1306

proxy_info=self.proxy_info,

1307

ca_certs=self.ca_certs,

1308

disable_ssl_certificate_validation=

1309

self.disable_ssl_certificate_validation)

1310

else:

1311

conn = self.connections[conn_key] = connection_type(

1312

authority, timeout=self.timeout,

1313

proxy_info=self.proxy_info,

1314

ca_certs=self.ca_certs,

1315

disable_ssl_certificate_validation=

1316

self.disable_ssl_certificate_validation)

1317

else:

1318

conn = self.connections[conn_key] = connection_type(

1319

authority, timeout=self.timeout,

1320

proxy_info=self.proxy_info)

1321

conn.set_debuglevel(debuglevel)

1322

1323

if 'range' not in headers and 'accept-encoding' not in headers:

1324

headers['accept-encoding'] = 'gzip, deflate'

1325

1326

info = email.Message.Message()

1327

cached_value = None

1328

if self.cache:

1329

cachekey = defrag_uri

1330

cached_value = self.cache.get(cachekey)

1331

if cached_value:

1332

# info = email.message_from_string(cached_value)

1333

#

1334

# Need to replace the line above with the kludge below

1335

# to fix the non-existent bug not fixed in this

1336

# bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html

1337

try:

1338

info, content = cached_value.split('\r\n\r\n', 1)

1339

feedparser = email.FeedParser.FeedParser()

1340

feedparser.feed(info)

1341

info = feedparser.close()

1342

feedparser._parse = None

1343

except IndexError:

1344

self.cache.delete(cachekey)

1345

cachekey = None

1346

cached_value = None

1347

else:

1348

cachekey = None

1349

1350

if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:

1351

# http://www.w3.org/1999/04/Editing/

1352

headers['if-match'] = info['etag']

1353

1354

if method not in ["GET", "HEAD"] and self.cache and cachekey:

1355

# RFC 2616 Section 13.10

1356

self.cache.delete(cachekey)

1357

1358

# Check the vary header in the cache to see if this request

1359

# matches what varies in the cache.

1360

if method in ['GET', 'HEAD'] and 'vary' in info:

1361

vary = info['vary']

1362

vary_headers = vary.lower().replace(' ', '').split(',')

1363

for header in vary_headers:

1364

key = '-varied-%s' % header

1365

value = info[key]

1366

if headers.get(header, None) != value:

1367

cached_value = None

1368

break

1369

1370

if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:

1371

if info.has_key('-x-permanent-redirect-url'):

1372

# Should cached permanent redirects be counted in our redirection count? For now, yes.

1373

if redirections <= 0:

1374

raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "")

1375

(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)

1376

response.previous = Response(info)

1377

response.previous.fromcache = True

1378

else:

1379

# Determine our course of action:

1380

# Is the cached entry fresh or stale?

1381

# Has the client requested a non-cached response?

1382

#

1383

# There seems to be three possible answers:

1384

# 1. [FRESH] Return the cache entry w/o doing a GET

1385

# 2. [STALE] Do the GET (but add in cache validators if available)

1386

# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request

1387

entry_disposition = _entry_disposition(info, headers)

1388

1389

if entry_disposition == "FRESH":

1390

if not cached_value:

1391

info['status'] = '504'

1392

content = ""

1393

response = Response(info)

1394

if cached_value:

1395

response.fromcache = True

1396

return (response, content)

1397

1398

if entry_disposition == "STALE":

1399

if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:

1400

headers['if-none-match'] = info['etag']

1401

if info.has_key('last-modified') and not 'last-modified' in headers:

1402

headers['if-modified-since'] = info['last-modified']

1403

elif entry_disposition == "TRANSPARENT":

1404

pass

1405

1406

(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)

1407

1408

if response.status == 304 and method == "GET":

1409

# Rewrite the cache entry with the new end-to-end headers

1410

# Take all headers that are in response

1411

# and overwrite their values in info.

1412

# unless they are hop-by-hop, or are listed in the connection header.

1413

1414

for key in _get_end2end_headers(response):

1415

info[key] = response[key]

1416

merged_response = Response(info)

1417

if hasattr(response, "_stale_digest"):

1418

merged_response._stale_digest = response._stale_digest

1419

_updateCache(headers, merged_response, content, self.cache, cachekey)

1420

response = merged_response

1421

response.status = 200

1422

response.fromcache = True

1423

1424

elif response.status == 200:

1425

content = new_content

1426

else:

1427

self.cache.delete(cachekey)

1428

content = new_content

1429

else:

1430

cc = _parse_cache_control(headers)

1431

if cc.has_key('only-if-cached'):

1432

info['status'] = '504'

1433

response = Response(info)

1434

content = ""

1435

else:

1436

(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)

1437

except Exception, e:

1438

if self.force_exception_to_status_code:

1439

if isinstance(e, HttpLib2ErrorWithResponse):

1440

response = e.response

1441

content = e.content

1442

response.status = 500

1443

response.reason = str(e)

1444

elif isinstance(e, socket.timeout):

1445

content = "Request Timeout"

1446

response = Response( {

1447

"content-type": "text/plain",

1448

"status": "408",

1449

"content-length": len(content)

1450

})

1451

response.reason = "Request Timeout"

1452

else:

1453

content = str(e)

1454

response = Response( {

1455

"content-type": "text/plain",

1456

"status": "400",

1457

"content-length": len(content)

1458

})

1459

response.reason = "Bad Request"

1460

else:

1461

raise

1462

1463

1464

return (response, content)

1465

1466

1467

1468

class Response(dict):

1469

"""An object more like email.Message than httplib.HTTPResponse."""

1470

1471

"""Is this response from our local cache"""

1472

fromcache = False

1473

1474

"""HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """

1475

version = 11

1476

1477

"Status code returned by server. "

1478

status = 200

1479

1480

"""Reason phrase returned by server."""

1481

reason = "Ok"

1482

1483

previous = None

1484

1485

def __init__(self, info):

1486

# info is either an email.Message or

1487

# an httplib.HTTPResponse object.

1488

if isinstance(info, httplib.HTTPResponse):

1489

for key, value in info.getheaders():

1490

self[key.lower()] = value

1491

self.status = info.status

1492

self['status'] = str(self.status)

1493

self.reason = info.reason

1494

self.version = info.version

1495

elif isinstance(info, email.Message.Message):

1496

for key, value in info.items():

1497

self[key] = value

1498

self.status = int(self['status'])

1499

else:

1500

for key, value in info.iteritems():

1501

self[key] = value

1502

self.status = int(self.get('status', self.status))

1503

1504

1505

def __getattr__(self, name):

1506

if name == 'dict':

1507

return self

1508

else:

1509

raise AttributeError, name