~ubuntu-branches/ubuntu/lucid/python-httplib2/lucid-proposed : revision 6

1

from __future__ import generators

2

"""

3

httplib2

4

5

A caching http interface that supports ETags and gzip

6

to conserve bandwidth.

7

8

Requires Python 2.3 or later

9

10

Changelog:

11

2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.

12

13

"""

14

15

__author__ = "Joe Gregorio (joe@bitworking.org)"

16

17

__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",

18

"James Antill",

19

"Xavier Verges Farrero",

20

"Jonathan Feinberg",

21

"Blair Zajac",

22

"Sam Ruby",

23

"Louis Nyffenegger"]

24

__license__ = "MIT"

25

__version__ = "$Rev$"

26

27

import re

28

import sys

29

import email

30

import email.Utils

31

import email.Message

32

import email.FeedParser

33

import StringIO

34

import gzip

35

import zlib

36

import httplib

37

import urlparse

38

import base64

39

import os

40

import copy

41

import calendar

42

import time

43

import random

44

# remove depracated warning in python2.6

45

try:

46

from hashlib import sha1 as _sha, md5 as _md5

47

except ImportError:

48

import sha

49

import md5

50

_sha = sha.new

51

_md5 = md5.new

52

import hmac

53

from gettext import gettext as _

54

import socket

55

56

try:

57

import socks

58

except ImportError:

59

socks = None

60

61

# Build the appropriate socket wrapper for ssl

62

try:

63

import ssl # python 2.6

64

_ssl_wrap_socket = ssl.wrap_socket

65

except ImportError:

66

def _ssl_wrap_socket(sock, key_file, cert_file):

67

ssl_sock = socket.ssl(sock, key_file, cert_file)

68

return httplib.FakeSocket(sock, ssl_sock)

69

70

71

if sys.version_info >= (2,3):

72

from iri2uri import iri2uri

73

else:

74

def iri2uri(uri):

75

return uri

76

77

def has_timeout(timeout): # python 2.6

78

if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):

79

return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)

80

return (timeout is not None)

81

82

__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',

83

'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',

84

'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',

85

'debuglevel']

86

87

88

# The httplib debug level, set to a non-zero value to get debug output

89

debuglevel = 0

90

91

92

# Python 2.3 support

93

if sys.version_info < (2,4):

94

def sorted(seq):

95

seq.sort()

96

return seq

97

98

# Python 2.3 support

99

def HTTPResponse__getheaders(self):

100

"""Return list of (header, value) tuples."""

101

if self.msg is None:

102

raise httplib.ResponseNotReady()

103

return self.msg.items()

104

105

if not hasattr(httplib.HTTPResponse, 'getheaders'):

106

httplib.HTTPResponse.getheaders = HTTPResponse__getheaders

107

108

# All exceptions raised here derive from HttpLib2Error

109

class HttpLib2Error(Exception): pass

110

111

# Some exceptions can be caught and optionally

112

# be turned back into responses.

113

class HttpLib2ErrorWithResponse(HttpLib2Error):

114

def __init__(self, desc, response, content):

115

self.response = response

116

self.content = content

117

HttpLib2Error.__init__(self, desc)

118

119

class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass

120

class RedirectLimit(HttpLib2ErrorWithResponse): pass

121

class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass

122

class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass

123

class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass

124

125

class RelativeURIError(HttpLib2Error): pass

126

class ServerNotFoundError(HttpLib2Error): pass

127

128

# Open Items:

129

# -----------

130

# Proxy support

131

132

# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)

133

134

# Pluggable cache storage (supports storing the cache in

135

# flat files by default. We need a plug-in architecture

136

# that can support Berkeley DB and Squid)

137

138

# == Known Issues ==

139

# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.

140

# Does not handle Cache-Control: max-stale

141

# Does not use Age: headers when calculating cache freshness.

142

143

144

# The number of redirections to follow before giving up.

145

# Note that only GET redirects are automatically followed.

146

# Will also honor 301 requests by saving that info and never

147

# requesting that URI again.

148

DEFAULT_MAX_REDIRECTS = 5

149

150

# Which headers are hop-by-hop headers by default

151

HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']

152

153

def _get_end2end_headers(response):

154

hopbyhop = list(HOP_BY_HOP)

155

hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])

156

return [header for header in response.keys() if header not in hopbyhop]

157

158

URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")

159

160

def parse_uri(uri):

161

"""Parses a URI using the regex given in Appendix B of RFC 3986.

162

163

(scheme, authority, path, query, fragment) = parse_uri(uri)

164

"""

165

groups = URI.match(uri).groups()

166

return (groups[1], groups[3], groups[4], groups[6], groups[8])

167

168

def urlnorm(uri):

169

(scheme, authority, path, query, fragment) = parse_uri(uri)

170

if not scheme or not authority:

171

raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)

172

authority = authority.lower()

173

scheme = scheme.lower()

174

if not path:

175

path = "/"

176

# Could do syntax based normalization of the URI before

177

# computing the digest. See Section 6.2.2 of Std 66.

178

request_uri = query and "?".join([path, query]) or path

179

scheme = scheme.lower()

180

defrag_uri = scheme + "://" + authority + request_uri

181

return scheme, authority, request_uri, defrag_uri

182

183

184

# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)

185

re_url_scheme = re.compile(r'^\w+://')

186

re_slash = re.compile(r'[?/:|]+')

187

188

def safename(filename):

189

"""Return a filename suitable for the cache.

190

191

Strips dangerous and common characters to create a filename we

192

can use to store the cache in.

193

"""

194

195

try:

196

if re_url_scheme.match(filename):

197

if isinstance(filename,str):

198

filename = filename.decode('utf-8')

199

filename = filename.encode('idna')

200

else:

201

filename = filename.encode('idna')

202

except UnicodeError:

203

pass

204

if isinstance(filename,unicode):

205

filename=filename.encode('utf-8')

206

filemd5 = _md5(filename).hexdigest()

207

filename = re_url_scheme.sub("", filename)

208

filename = re_slash.sub(",", filename)

209

210

# limit length of filename

211

if len(filename)>200:

212

filename=filename[:200]

213

return ",".join((filename, filemd5))

214

215

NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')

216

def _normalize_headers(headers):

217

return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])

218

219

def _parse_cache_control(headers):

220

retval = {}

221

if headers.has_key('cache-control'):

222

parts = headers['cache-control'].split(',')

223

parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]

224

parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]

225

retval = dict(parts_with_args + parts_wo_args)

226

return retval

227

228

# Whether to use a strict mode to parse WWW-Authenticate headers

229

# Might lead to bad results in case of ill-formed header value,

230

# so disabled by default, falling back to relaxed parsing.

231

# Set to true to turn on, usefull for testing servers.

232

USE_WWW_AUTH_STRICT_PARSING = 0

233

234

# In regex below:

235

# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP

236

# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space

237

# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:

238

# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?

239

WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")

240

WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")

241

UNQUOTE_PAIRS = re.compile(r'\\(.)')

242

def _parse_www_authenticate(headers, headername='www-authenticate'):

243

"""Returns a dictionary of dictionaries, one dict

244

per auth_scheme."""

245

retval = {}

246

if headers.has_key(headername):

247

authenticate = headers[headername].strip()

248

www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED

249

while authenticate:

250

# Break off the scheme at the beginning of the line

251

if headername == 'authentication-info':

252

(auth_scheme, the_rest) = ('digest', authenticate)

253

else:

254

(auth_scheme, the_rest) = authenticate.split(" ", 1)

255

# Now loop over all the key value pairs that come after the scheme,

256

# being careful not to roll into the next scheme

257

match = www_auth.search(the_rest)

258

auth_params = {}

259

while match:

260

if match and len(match.groups()) == 3:

261

(key, value, the_rest) = match.groups()

262

auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])

263

match = www_auth.search(the_rest)

264

retval[auth_scheme.lower()] = auth_params

265

authenticate = the_rest.strip()

266

return retval

267

268

269

def _entry_disposition(response_headers, request_headers):

270

"""Determine freshness from the Date, Expires and Cache-Control headers.

271

272

We don't handle the following:

273

274

1. Cache-Control: max-stale

275

2. Age: headers are not used in the calculations.

276

277

Not that this algorithm is simpler than you might think

278

because we are operating as a private (non-shared) cache.

279

This lets us ignore 's-maxage'. We can also ignore

280

'proxy-invalidate' since we aren't a proxy.

281

We will never return a stale document as

282

fresh as a design decision, and thus the non-implementation

283

of 'max-stale'. This also lets us safely ignore 'must-revalidate'

284

since we operate as if every server has sent 'must-revalidate'.

285

Since we are private we get to ignore both 'public' and

286

'private' parameters. We also ignore 'no-transform' since

287

we don't do any transformations.

288

The 'no-store' parameter is handled at a higher level.

289

So the only Cache-Control parameters we look at are:

290

291

no-cache

292

only-if-cached

293

max-age

294

min-fresh

295

"""

296

297

retval = "STALE"

298

cc = _parse_cache_control(request_headers)

299

cc_response = _parse_cache_control(response_headers)

300

301

if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:

302

retval = "TRANSPARENT"

303

if 'cache-control' not in request_headers:

304

request_headers['cache-control'] = 'no-cache'

305

elif cc.has_key('no-cache'):

306

retval = "TRANSPARENT"

307

elif cc_response.has_key('no-cache'):

308

retval = "STALE"

309

elif cc.has_key('only-if-cached'):

310

retval = "FRESH"

311

elif response_headers.has_key('date'):

312

date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))

313

now = time.time()

314

current_age = max(0, now - date)

315

if cc_response.has_key('max-age'):

316

try:

317

freshness_lifetime = int(cc_response['max-age'])

318

except ValueError:

319

freshness_lifetime = 0

320

elif response_headers.has_key('expires'):

321

expires = email.Utils.parsedate_tz(response_headers['expires'])

322

if None == expires:

323

freshness_lifetime = 0

324

else:

325

freshness_lifetime = max(0, calendar.timegm(expires) - date)

326

else:

327

freshness_lifetime = 0

328

if cc.has_key('max-age'):

329

try:

330

freshness_lifetime = int(cc['max-age'])

331

except ValueError:

332

freshness_lifetime = 0

333

if cc.has_key('min-fresh'):

334

try:

335

min_fresh = int(cc['min-fresh'])

336

except ValueError:

337

min_fresh = 0

338

current_age += min_fresh

339

if freshness_lifetime > current_age:

340

retval = "FRESH"

341

return retval

342

343

def _decompressContent(response, new_content):

344

content = new_content

345

try:

346

encoding = response.get('content-encoding', None)

347

if encoding in ['gzip', 'deflate']:

348

if encoding == 'gzip':

349

content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()

350

if encoding == 'deflate':

351

content = zlib.decompress(content)

352

response['content-length'] = str(len(content))

353

# Record the historical presence of the encoding in a way the won't interfere.

354

response['-content-encoding'] = response['content-encoding']

355

del response['content-encoding']

356

except IOError:

357

content = ""

358

raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)

359

return content

360

361

def _updateCache(request_headers, response_headers, content, cache, cachekey):

362

if cachekey:

363

cc = _parse_cache_control(request_headers)

364

cc_response = _parse_cache_control(response_headers)

365

if cc.has_key('no-store') or cc_response.has_key('no-store'):

366

cache.delete(cachekey)

367

else:

368

info = email.Message.Message()

369

for key, value in response_headers.iteritems():

370

if key not in ['status','content-encoding','transfer-encoding']:

371

info[key] = value

372

373

# Add annotations to the cache to indicate what headers

374

# are variant for this request.

375

vary = response_headers.get('vary', None)

376

if vary:

377

vary_headers = vary.lower().replace(' ', '').split(',')

378

for header in vary_headers:

379

key = '-varied-%s' % header

380

try:

381

info[key] = request_headers[header]

382

except KeyError:

383

pass

384

385

status = response_headers.status

386

if status == 304:

387

status = 200

388

389

status_header = 'status: %d\r\n' % response_headers.status

390

391

header_str = info.as_string()

392

393

header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)

394

text = "".join([status_header, header_str, content])

395

396

cache.set(cachekey, text)

397

398

def _cnonce():

399

dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()

400

return dig[:16]

401

402

def _wsse_username_token(cnonce, iso_now, password):

403

return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()

404

405

406

# For credentials we need two things, first

407

# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)

408

# Then we also need a list of URIs that have already demanded authentication

409

# That list is tricky since sub-URIs can take the same auth, or the

410

# auth scheme may change as you descend the tree.

411

# So we also need each Auth instance to be able to tell us

412

# how close to the 'top' it is.

413

414

class Authentication(object):

415

def __init__(self, credentials, host, request_uri, headers, response, content, http):

416

(scheme, authority, path, query, fragment) = parse_uri(request_uri)

417

self.path = path

418

self.host = host

419

self.credentials = credentials

420

self.http = http

421

422

def depth(self, request_uri):

423

(scheme, authority, path, query, fragment) = parse_uri(request_uri)

424

return request_uri[len(self.path):].count("/")

425

426

def inscope(self, host, request_uri):

427

# XXX Should we normalize the request_uri?

428

(scheme, authority, path, query, fragment) = parse_uri(request_uri)

429

return (host == self.host) and path.startswith(self.path)

430

431

def request(self, method, request_uri, headers, content):

432

"""Modify the request headers to add the appropriate

433

Authorization header. Over-rise this in sub-classes."""

434

pass

435

436

def response(self, response, content):

437

"""Gives us a chance to update with new nonces

438

or such returned from the last authorized response.

439

Over-rise this in sub-classes if necessary.

440

441

Return TRUE is the request is to be retried, for

442

example Digest may return stale=true.

443

"""

444

return False

445

446

447

448

class BasicAuthentication(Authentication):

449

def __init__(self, credentials, host, request_uri, headers, response, content, http):

450

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

451

452

def request(self, method, request_uri, headers, content):

453

"""Modify the request headers to add the appropriate

454

Authorization header."""

455

headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()

456

457

458

class DigestAuthentication(Authentication):

459

"""Only do qop='auth' and MD5, since that

460

is all Apache currently implements"""

461

def __init__(self, credentials, host, request_uri, headers, response, content, http):

462

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

463

challenge = _parse_www_authenticate(response, 'www-authenticate')

464

self.challenge = challenge['digest']

465

qop = self.challenge.get('qop', 'auth')

466

self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None

467

if self.challenge['qop'] is None:

468

raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))

469

self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()

470

if self.challenge['algorithm'] != 'MD5':

471

raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))

472

self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])

473

self.challenge['nc'] = 1

474

475

def request(self, method, request_uri, headers, content, cnonce = None):

476

"""Modify the request headers"""

477

H = lambda x: _md5(x).hexdigest()

478

KD = lambda s, d: H("%s:%s" % (s, d))

479

A2 = "".join([method, ":", request_uri])

480

self.challenge['cnonce'] = cnonce or _cnonce()

481

request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],

482

'%08x' % self.challenge['nc'],

483

self.challenge['cnonce'],

484

self.challenge['qop'], H(A2)

485

))

486

headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (

487

self.credentials[0],

488

self.challenge['realm'],

489

self.challenge['nonce'],

490

request_uri,

491

self.challenge['algorithm'],

492

request_digest,

493

self.challenge['qop'],

494

self.challenge['nc'],

495

self.challenge['cnonce'],

496

)

497

self.challenge['nc'] += 1

498

499

def response(self, response, content):

500

if not response.has_key('authentication-info'):

501

challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})

502

if 'true' == challenge.get('stale'):

503

self.challenge['nonce'] = challenge['nonce']

504

self.challenge['nc'] = 1

505

return True

506

else:

507

updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})

508

509

if updated_challenge.has_key('nextnonce'):

510

self.challenge['nonce'] = updated_challenge['nextnonce']

511

self.challenge['nc'] = 1

512

return False

513

514

515

class HmacDigestAuthentication(Authentication):

516

"""Adapted from Robert Sayre's code and DigestAuthentication above."""

517

__author__ = "Thomas Broyer (t.broyer@ltgt.net)"

518

519

def __init__(self, credentials, host, request_uri, headers, response, content, http):

520

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

521

challenge = _parse_www_authenticate(response, 'www-authenticate')

522

self.challenge = challenge['hmacdigest']

523

# TODO: self.challenge['domain']

524

self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')

525

if self.challenge['reason'] not in ['unauthorized', 'integrity']:

526

self.challenge['reason'] = 'unauthorized'

527

self.challenge['salt'] = self.challenge.get('salt', '')

528

if not self.challenge.get('snonce'):

529

raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))

530

self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')

531

if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:

532

raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))

533

self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')

534

if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:

535

raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))

536

if self.challenge['algorithm'] == 'HMAC-MD5':

537

self.hashmod = _md5

538

else:

539

self.hashmod = _sha

540

if self.challenge['pw-algorithm'] == 'MD5':

541

self.pwhashmod = _md5

542

else:

543

self.pwhashmod = _sha

544

self.key = "".join([self.credentials[0], ":",

545

self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),

546

":", self.challenge['realm']

547

])

548

self.key = self.pwhashmod.new(self.key).hexdigest().lower()

549

550

def request(self, method, request_uri, headers, content):

551

"""Modify the request headers"""

552

keys = _get_end2end_headers(headers)

553

keylist = "".join(["%s " % k for k in keys])

554

headers_val = "".join([headers[k] for k in keys])

555

created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())

556

cnonce = _cnonce()

557

request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)

558

request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()

559

headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (

560

self.credentials[0],

561

self.challenge['realm'],

562

self.challenge['snonce'],

563

cnonce,

564

request_uri,

565

created,

566

request_digest,

567

keylist,

568

)

569

570

def response(self, response, content):

571

challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})

572

if challenge.get('reason') in ['integrity', 'stale']:

573

return True

574

return False

575

576

577

class WsseAuthentication(Authentication):

578

"""This is thinly tested and should not be relied upon.

579

At this time there isn't any third party server to test against.

580

Blogger and TypePad implemented this algorithm at one point

581

but Blogger has since switched to Basic over HTTPS and

582

TypePad has implemented it wrong, by never issuing a 401

583

challenge but instead requiring your client to telepathically know that

584

their endpoint is expecting WSSE profile="UsernameToken"."""

585

def __init__(self, credentials, host, request_uri, headers, response, content, http):

586

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

587

588

def request(self, method, request_uri, headers, content):

589

"""Modify the request headers to add the appropriate

590

Authorization header."""

591

headers['Authorization'] = 'WSSE profile="UsernameToken"'

592

iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

593

cnonce = _cnonce()

594

password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])

595

headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (

596

self.credentials[0],

597

password_digest,

598

cnonce,

599

iso_now)

600

601

class GoogleLoginAuthentication(Authentication):

602

def __init__(self, credentials, host, request_uri, headers, response, content, http):

603

from urllib import urlencode

604

Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)

605

challenge = _parse_www_authenticate(response, 'www-authenticate')

606

service = challenge['googlelogin'].get('service', 'xapi')

607

# Bloggger actually returns the service in the challenge

608

# For the rest we guess based on the URI

609

if service == 'xapi' and request_uri.find("calendar") > 0:

610

service = "cl"

611

# No point in guessing Base or Spreadsheet

612

#elif request_uri.find("spreadsheets") > 0:

613

# service = "wise"

614

615

auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])

616

resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})

617

lines = content.split('\n')

618

d = dict([tuple(line.split("=", 1)) for line in lines if line])

619

if resp.status == 403:

620

self.Auth = ""

621

else:

622

self.Auth = d['Auth']

623

624

def request(self, method, request_uri, headers, content):

625

"""Modify the request headers to add the appropriate

626

Authorization header."""

627

headers['authorization'] = 'GoogleLogin Auth=' + self.Auth

628

629

630

AUTH_SCHEME_CLASSES = {

631

"basic": BasicAuthentication,

632

"wsse": WsseAuthentication,

633

"digest": DigestAuthentication,

634

"hmacdigest": HmacDigestAuthentication,

635

"googlelogin": GoogleLoginAuthentication

636

}

637

638

AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]

639

640

class FileCache(object):

641

"""Uses a local directory as a store for cached files.

642

Not really safe to use if multiple threads or processes are going to

643

be running on the same cache.

644

"""

645

def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior

646

self.cache = cache

647

self.safe = safe

648

if not os.path.exists(cache):

649

os.makedirs(self.cache)

650

651

def get(self, key):

652

retval = None

653

cacheFullPath = os.path.join(self.cache, self.safe(key))

654

try:

655

f = file(cacheFullPath, "rb")

656

retval = f.read()

657

f.close()

658

except IOError:

659

pass

660

return retval

661

662

def set(self, key, value):

663

cacheFullPath = os.path.join(self.cache, self.safe(key))

664

f = file(cacheFullPath, "wb")

665

f.write(value)

666

f.close()

667

668

def delete(self, key):

669

cacheFullPath = os.path.join(self.cache, self.safe(key))

670

if os.path.exists(cacheFullPath):

671

os.remove(cacheFullPath)

672

673

class Credentials(object):

674

def __init__(self):

675

self.credentials = []

676

677

def add(self, name, password, domain=""):

678

self.credentials.append((domain.lower(), name, password))

679

680

def clear(self):

681

self.credentials = []

682

683

def iter(self, domain):

684

for (cdomain, name, password) in self.credentials:

685

if cdomain == "" or domain == cdomain:

686

yield (name, password)

687

688

class KeyCerts(Credentials):

689

"""Identical to Credentials except that

690

name/password are mapped to key/cert."""

691

pass

692

693

694

class ProxyInfo(object):

695

"""Collect information required to use a proxy."""

696

def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):

697

"""The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX

698

constants. For example:

699

700

p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)

701

"""

702

self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass

703

704

def astuple(self):

705

return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,

706

self.proxy_user, self.proxy_pass)

707

708

def isgood(self):

709

return socks and (self.proxy_host != None) and (self.proxy_port != None)

710

711

712

class HTTPConnectionWithTimeout(httplib.HTTPConnection):

713

"""HTTPConnection subclass that supports timeouts"""

714

715

def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):

716

httplib.HTTPConnection.__init__(self, host, port, strict)

717

self.timeout = timeout

718

self.proxy_info = proxy_info

719

720

def connect(self):

721

"""Connect to the host and port specified in __init__."""

722

# Mostly verbatim from httplib.py.

723

msg = "getaddrinfo returns an empty list"

724

for res in socket.getaddrinfo(self.host, self.port, 0,

725

socket.SOCK_STREAM):

726

af, socktype, proto, canonname, sa = res

727

try:

728

if self.proxy_info and self.proxy_info.isgood():

729

self.sock = socks.socksocket(af, socktype, proto)

730

self.sock.setproxy(*self.proxy_info.astuple())

731

else:

732

self.sock = socket.socket(af, socktype, proto)

733

# Different from httplib: support timeouts.

734

if has_timeout(self.timeout):

735

self.sock.settimeout(self.timeout)

736

# End of difference from httplib.

737

if self.debuglevel > 0:

738

print "connect: (%s, %s)" % (self.host, self.port)

739

740

self.sock.connect(sa)

741

except socket.error, msg:

742

if self.debuglevel > 0:

743

print 'connect fail:', (self.host, self.port)

744

if self.sock:

745

self.sock.close()

746

self.sock = None

747

continue

748

break

749

if not self.sock:

750

raise socket.error, msg

751

752

class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):

753

"This class allows communication via SSL."

754

755

def __init__(self, host, port=None, key_file=None, cert_file=None,

756

strict=None, timeout=None, proxy_info=None):

757

httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,

758

cert_file=cert_file, strict=strict)

759

self.timeout = timeout

760

self.proxy_info = proxy_info

761

762

def connect(self):

763

"Connect to a host on a given (SSL) port."

764

765

if self.proxy_info and self.proxy_info.isgood():

766

sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)

767

sock.setproxy(*self.proxy_info.astuple())

768

else:

769

sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

770

771

if has_timeout(self.timeout):

772

sock.settimeout(self.timeout)

773

sock.connect((self.host, self.port))

774

self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)

775

776

777

778

class Http(object):

779

"""An HTTP client that handles:

780

- all methods

781

- caching

782

- ETags

783

- compression,

784

- HTTPS

785

- Basic

786

- Digest

787

- WSSE

788

789

and more.

790

"""

791

def __init__(self, cache=None, timeout=None, proxy_info=None):

792

"""The value of proxy_info is a ProxyInfo instance.

793

794

If 'cache' is a string then it is used as a directory name

795

for a disk cache. Otherwise it must be an object that supports

796

the same interface as FileCache."""

797

self.proxy_info = proxy_info

798

# Map domain name to an httplib connection

799

self.connections = {}

800

# The location of the cache, for now a directory

801

# where cached responses are held.

802

if cache and isinstance(cache, str):

803

self.cache = FileCache(cache)

804

else:

805

self.cache = cache

806

807

# Name/password

808

self.credentials = Credentials()

809

810

# Key/cert

811

self.certificates = KeyCerts()

812

813

# authorization objects

814

self.authorizations = []

815

816

# If set to False then no redirects are followed, even safe ones.

817

self.follow_redirects = True

818

819

# Which HTTP methods do we apply optimistic concurrency to, i.e.

820

# which methods get an "if-match:" etag header added to them.

821

self.optimistic_concurrency_methods = ["PUT"]

822

823

# If 'follow_redirects' is True, and this is set to True then

824

# all redirecs are followed, including unsafe ones.

825

self.follow_all_redirects = False

826

827

self.ignore_etag = False

828

829

self.force_exception_to_status_code = False

830

831

self.timeout = timeout

832

833

def _auth_from_challenge(self, host, request_uri, headers, response, content):

834

"""A generator that creates Authorization objects

835

that can be applied to requests.

836

"""

837

challenges = _parse_www_authenticate(response, 'www-authenticate')

838

for cred in self.credentials.iter(host):

839

for scheme in AUTH_SCHEME_ORDER:

840

if challenges.has_key(scheme):

841

yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)

842

843

def add_credentials(self, name, password, domain=""):

844

"""Add a name and password that will be used

845

any time a request requires authentication."""

846

self.credentials.add(name, password, domain)

847

848

def add_certificate(self, key, cert, domain):

849

"""Add a key and cert that will be used

850

any time a request requires authentication."""

851

self.certificates.add(key, cert, domain)

852

853

def clear_credentials(self):

854

"""Remove all the names and passwords

855

that are used for authentication"""

856

self.credentials.clear()

857

self.authorizations = []

858

859

def _conn_request(self, conn, request_uri, method, body, headers):

860

for i in range(2):

861

try:

862

conn.request(method, request_uri, body, headers)

863

except socket.gaierror:

864

conn.close()

865

raise ServerNotFoundError("Unable to find the server at %s" % conn.host)

866

except (socket.error, httplib.HTTPException):

867

# Just because the server closed the connection doesn't apparently mean

868

# that the server didn't send a response.

869

pass

870

try:

871

response = conn.getresponse()

872

except (socket.error, httplib.HTTPException):

873

if i == 0:

874

conn.close()

875

conn.connect()

876

continue

877

else:

878

raise

879

else:

880

content = ""

881

if method == "HEAD":

882

response.close()

883

else:

884

content = response.read()

885

response = Response(response)

886

if method != "HEAD":

887

content = _decompressContent(response, content)

888

break

889

return (response, content)

890

891

892

def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):

893

"""Do the actual request using the connection object

894

and also follow one level of redirects if necessary"""

895

896

auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]

897

auth = auths and sorted(auths)[0][1] or None

898

if auth:

899

auth.request(method, request_uri, headers, body)

900

901

(response, content) = self._conn_request(conn, request_uri, method, body, headers)

902

903

if auth:

904

if auth.response(response, body):

905

auth.request(method, request_uri, headers, body)

906

(response, content) = self._conn_request(conn, request_uri, method, body, headers )

907

response._stale_digest = 1

908

909

if response.status == 401:

910

for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):

911

authorization.request(method, request_uri, headers, body)

912

(response, content) = self._conn_request(conn, request_uri, method, body, headers, )

913

if response.status != 401:

914

self.authorizations.append(authorization)

915

authorization.response(response, body)

916

break

917

918

if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):

919

if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:

920

# Pick out the location header and basically start from the beginning

921

# remembering first to strip the ETag header and decrement our 'depth'

922

if redirections:

923

if not response.has_key('location') and response.status != 300:

924

raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)

925

# Fix-up relative redirects (which violate an RFC 2616 MUST)

926

if response.has_key('location'):

927

location = response['location']

928

(scheme, authority, path, query, fragment) = parse_uri(location)

929

if authority == None:

930

response['location'] = urlparse.urljoin(absolute_uri, location)

931

if response.status == 301 and method in ["GET", "HEAD"]:

932

response['-x-permanent-redirect-url'] = response['location']

933

if not response.has_key('content-location'):

934

response['content-location'] = absolute_uri

935

_updateCache(headers, response, content, self.cache, cachekey)

936

if headers.has_key('if-none-match'):

937

del headers['if-none-match']

938

if headers.has_key('if-modified-since'):

939

del headers['if-modified-since']

940

if response.has_key('location'):

941

location = response['location']

942

old_response = copy.deepcopy(response)

943

if not old_response.has_key('content-location'):

944

old_response['content-location'] = absolute_uri

945

redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method

946

(response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)

947

response.previous = old_response

948

else:

949

raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)

950

elif response.status in [200, 203] and method == "GET":

951

# Don't cache 206's since we aren't going to handle byte range requests

952

if not response.has_key('content-location'):

953

response['content-location'] = absolute_uri

954

_updateCache(headers, response, content, self.cache, cachekey)

955

956

return (response, content)

957

958

def _normalize_headers(self, headers):

959

return _normalize_headers(headers)

960

961

# Need to catch and rebrand some exceptions

962

# Then need to optionally turn all exceptions into status codes

963

# including all socket.* and httplib.* exceptions.

964

965

966

def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):

967

""" Performs a single HTTP request.

968

The 'uri' is the URI of the HTTP resource and can begin

969

with either 'http' or 'https'. The value of 'uri' must be an absolute URI.

970

971

The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.

972

There is no restriction on the methods allowed.

973

974

The 'body' is the entity body to be sent with the request. It is a string

975

object.

976

977

Any extra headers that are to be sent with the request should be provided in the

978

'headers' dictionary.

979

980

The maximum number of redirect to follow before raising an

981

exception is 'redirections. The default is 5.

982

983

The return value is a tuple of (response, content), the first

984

being and instance of the 'Response' class, the second being

985

a string that contains the response entity body.

986

"""

987

try:

988

if headers is None:

989

headers = {}

990

else:

991

headers = self._normalize_headers(headers)

992

993

if not headers.has_key('user-agent'):

994

headers['user-agent'] = "Python-httplib2/%s" % __version__

995

996

uri = iri2uri(uri)

997

998

(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)

999

domain_port = authority.split(":")[0:2]

1000

if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':

1001

scheme = 'https'

1002

authority = domain_port[0]

1003

1004

conn_key = scheme+":"+authority

1005

if conn_key in self.connections:

1006

conn = self.connections[conn_key]

1007

else:

1008

if not connection_type:

1009

connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout

1010

certs = list(self.certificates.iter(authority))

1011

if scheme == 'https' and certs:

1012

conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],

1013

cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)

1014

else:

1015

conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)

1016

conn.set_debuglevel(debuglevel)

1017

1018

if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:

1019

headers['accept-encoding'] = 'gzip, deflate'

1020

1021

info = email.Message.Message()

1022

cached_value = None

1023

if self.cache:

1024

cachekey = defrag_uri

1025

cached_value = self.cache.get(cachekey)

1026

if cached_value:

1027

# info = email.message_from_string(cached_value)

1028

#

1029

# Need to replace the line above with the kludge below

1030

# to fix the non-existent bug not fixed in this

1031

# bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html

1032

try:

1033

info, content = cached_value.split('\r\n\r\n', 1)

1034

feedparser = email.FeedParser.FeedParser()

1035

feedparser.feed(info)

1036

info = feedparser.close()

1037

feedparser._parse = None

1038

except IndexError:

1039

self.cache.delete(cachekey)

1040

cachekey = None

1041

cached_value = None

1042

else:

1043

cachekey = None

1044

1045

if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:

1046

# http://www.w3.org/1999/04/Editing/

1047

headers['if-match'] = info['etag']

1048

1049

if method not in ["GET", "HEAD"] and self.cache and cachekey:

1050

# RFC 2616 Section 13.10

1051

self.cache.delete(cachekey)

1052

1053

# Check the vary header in the cache to see if this request

1054

# matches what varies in the cache.

1055

if method in ['GET', 'HEAD'] and 'vary' in info:

1056

vary = info['vary']

1057

vary_headers = vary.lower().replace(' ', '').split(',')

1058

for header in vary_headers:

1059

key = '-varied-%s' % header

1060

value = info[key]

1061

if headers.get(header, '') != value:

1062

cached_value = None

1063

break

1064

1065

if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:

1066

if info.has_key('-x-permanent-redirect-url'):

1067

# Should cached permanent redirects be counted in our redirection count? For now, yes.

1068

(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)

1069

response.previous = Response(info)

1070

response.previous.fromcache = True

1071

else:

1072

# Determine our course of action:

1073

# Is the cached entry fresh or stale?

1074

# Has the client requested a non-cached response?

1075

#

1076

# There seems to be three possible answers:

1077

# 1. [FRESH] Return the cache entry w/o doing a GET

1078

# 2. [STALE] Do the GET (but add in cache validators if available)

1079

# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request

1080

entry_disposition = _entry_disposition(info, headers)

1081

1082

if entry_disposition == "FRESH":

1083

if not cached_value:

1084

info['status'] = '504'

1085

content = ""

1086

response = Response(info)

1087

if cached_value:

1088

response.fromcache = True

1089

return (response, content)

1090

1091

if entry_disposition == "STALE":

1092

if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:

1093

headers['if-none-match'] = info['etag']

1094

if info.has_key('last-modified') and not 'last-modified' in headers:

1095

headers['if-modified-since'] = info['last-modified']

1096

elif entry_disposition == "TRANSPARENT":

1097

pass

1098

1099

(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)

1100

1101

if response.status == 304 and method == "GET":

1102

# Rewrite the cache entry with the new end-to-end headers

1103

# Take all headers that are in response

1104

# and overwrite their values in info.

1105

# unless they are hop-by-hop, or are listed in the connection header.

1106

1107

for key in _get_end2end_headers(response):

1108

info[key] = response[key]

1109

merged_response = Response(info)

1110

if hasattr(response, "_stale_digest"):

1111

merged_response._stale_digest = response._stale_digest

1112

_updateCache(headers, merged_response, content, self.cache, cachekey)

1113

response = merged_response

1114

response.status = 200

1115

response.fromcache = True

1116

1117

elif response.status == 200:

1118

content = new_content

1119

else:

1120

self.cache.delete(cachekey)

1121

content = new_content

1122

else:

1123

cc = _parse_cache_control(headers)

1124

if cc.has_key('only-if-cached'):

1125

info['status'] = '504'

1126

response = Response(info)

1127

content = ""

1128

else:

1129

(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)

1130

except Exception, e:

1131

if self.force_exception_to_status_code:

1132

if isinstance(e, HttpLib2ErrorWithResponse):

1133

response = e.response

1134

content = e.content

1135

response.status = 500

1136

response.reason = str(e)

1137

elif isinstance(e, socket.timeout):

1138

content = "Request Timeout"

1139

response = Response( {

1140

"content-type": "text/plain",

1141

"status": "408",

1142

"content-length": len(content)

1143

})

1144

response.reason = "Request Timeout"

1145

else:

1146

content = str(e)

1147

response = Response( {

1148

"content-type": "text/plain",

1149

"status": "400",

1150

"content-length": len(content)

1151

})

1152

response.reason = "Bad Request"

1153

else:

1154

raise

1155

1156

1157

return (response, content)

1158

1159

1160

1161

class Response(dict):

1162

"""An object more like email.Message than httplib.HTTPResponse."""

1163

1164

"""Is this response from our local cache"""

1165

fromcache = False

1166

1167

"""HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """

1168

version = 11

1169

1170

"Status code returned by server. "

1171

status = 200

1172

1173

"""Reason phrase returned by server."""

1174

reason = "Ok"

1175

1176

previous = None

1177

1178

def __init__(self, info):

1179

# info is either an email.Message or

1180

# an httplib.HTTPResponse object.

1181

if isinstance(info, httplib.HTTPResponse):

1182

for key, value in info.getheaders():

1183

self[key.lower()] = value

1184

self.status = info.status

1185

self['status'] = str(self.status)

1186

self.reason = info.reason

1187

self.version = info.version

1188

elif isinstance(info, email.Message.Message):

1189

for key, value in info.items():

1190

self[key] = value

1191

self.status = int(self['status'])

1192

else:

1193

for key, value in info.iteritems():

1194

self[key] = value

1195

self.status = int(self.get('status', self.status))

1196

1197

1198

def __getattr__(self, name):

1199

if name == 'dict':

1200

return self

1201

else:

1202

raise AttributeError, name