5
from twisted.web import client, http
6
from twisted.internet import reactor
12
def __new__(self, char):
13
token = Token.tokens.get(char)
15
Token.tokens[char] = token = str.__new__(self, char)
19
return "Token(%s)" % str.__repr__(self)
22
http_tokens = " \t\"()<>@,;:\\/[]?={}"
23
http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f"
26
def tokenize(header, foldCase=True):
27
"""Tokenize a string according to normal HTTP header parsing rules.
30
- Whitespace is irrelevant and eaten next to special separator tokens.
31
Its existance (but not amount) is important between character strings.
32
- Quoted string support including embedded backslashes.
33
- Case is insignificant (and thus lowercased), except in quoted strings.
34
(unless foldCase=False)
35
- Multiple headers are concatenated with ','
37
NOTE: not all headers can be parsed with this function.
39
Takes a raw header value (list of strings), and
40
Returns a generator of strings and Token class instances.
45
string = ",".join(header)
58
qstring = qstring+string[start:cur-1]+x
64
yield qstring+string[start:cur]
70
yield string[start:cur].lower()
72
yield string[start:cur]
86
raise ValueError("Invalid control character: %d in header" % ord(x))
96
raise ValueError, "Missing character after '\\'"
98
raise ValueError, "Missing end quote"
102
yield string[start:cur].lower()
104
yield string[start:cur]
107
def parseWWWAuthenticate(tokenized):
110
tokenList = list(tokenized)
113
scheme = tokenList.pop(0)
119
token = tokenList.pop(0)
120
if token == Token('='):
122
challenge[last] = tokenList.pop(0)
125
elif token == Token(','):
127
if len(tokenList) > 1 and tokenList[1] != Token('='):
136
if last and scheme and not challenge and not kvChallenge:
140
headers.append((scheme, challenge))
142
if last and last not in (Token('='), Token(',')):
143
if headers[-1] == (scheme, challenge):
146
headers.append((scheme, challenge))
151
def parse(url, defaultPort=None):
153
Split the given URL into the scheme, host, port, and path.
156
@param url: An URL to parse.
158
@type defaultPort: C{int} or C{None}
159
@param defaultPort: An alternate value to use as the port if the URL does
162
@return: A four-tuple of the scheme, host, port, and path of the URL. All
163
of these are C{str} instances except for port, which is an C{int}.
166
parsed = http.urlparse(url)
168
path = urlparse.urlunparse(('','')+parsed[2:])
169
if defaultPort is None:
170
if scheme == 'https':
174
host, port = parsed[1], defaultPort
176
host, port = host.split(':')
180
return scheme, host, port, path
183
def makeGetterFactory(url, factoryFactory, contextFactory=None,
186
Create and connect an HTTP page getting factory.
188
Any additional positional or keyword arguments are used when calling
191
@param factoryFactory: Factory factory that is called with C{url}, C{args}
192
and C{kwargs} to produce the getter
194
@param contextFactory: Context factory to use when creating a secure
195
connection, defaulting to C{None}
197
@return: The factory created by C{factoryFactory}
199
scheme, host, port, path = parse(url)
200
factory = factoryFactory(url, *args, **kwargs)
201
if scheme == 'https':
202
from twisted.internet import ssl
203
if contextFactory is None:
204
contextFactory = ssl.ClientContextFactory()
205
reactor.connectSSL(host, port, factory, contextFactory)
207
reactor.connectTCP(host, port, factory)
211
def getPage(url, contextFactory=None, *args, **kwargs):
213
Download a web page as a string.
215
Download a page. Return a deferred, which will callback with a
216
page (as a string) or errback with a description of the error.
218
See HTTPClientFactory to see what extra args can be passed.
220
return makeGetterFactory(
222
client.HTTPClientFactory,
223
contextFactory=contextFactory,
229
# md5-sess is more complicated than just another algorithm. It requires
230
# H(A1) state to be remembered from the first WWW-Authenticate challenge
231
# issued and re-used to process any Authorization header in response to
232
# that WWW-Authenticate challenge. It is *not* correct to simply
233
# recalculate H(A1) each time an Authorization header is received. Read
234
# RFC 2617, section 3.2.2.2 and do not try to make DigestCredentialFactory
235
# support this unless you completely understand it. -exarkun
242
def calcHA1(pszAlg, pszUserName, pszRealm, pszPassword, pszNonce, pszCNonce,
245
Compute H(A1) from RFC 2617.
247
@param pszAlg: The name of the algorithm to use to calculate the digest.
248
Currently supported are md5, md5-sess, and sha.
249
@param pszUserName: The username
250
@param pszRealm: The realm
251
@param pszPassword: The password
252
@param pszNonce: The nonce
253
@param pszCNonce: The cnonce
255
@param preHA1: If available this is a str containing a previously
256
calculated H(A1) as a hex string. If this is given then the values for
257
pszUserName, pszRealm, and pszPassword must be C{None} and are ignored.
260
if (preHA1 and (pszUserName or pszRealm or pszPassword)):
261
raise TypeError(("preHA1 is incompatible with the pszUserName, "
262
"pszRealm, and pszPassword arguments"))
265
# We need to calculate the HA1 from the username:realm:password
266
m = algorithms[pszAlg]()
267
m.update(pszUserName)
271
m.update(pszPassword)
274
# We were given a username:realm:password
275
HA1 = preHA1.decode('hex')
277
if pszAlg == "md5-sess":
278
m = algorithms[pszAlg]()
286
return HA1.encode('hex')
289
def calcHA2(algo, pszMethod, pszDigestUri, pszQop, pszHEntity):
291
Compute H(A2) from RFC 2617.
293
@param pszAlg: The name of the algorithm to use to calculate the digest.
294
Currently supported are md5, md5-sess, and sha.
295
@param pszMethod: The request method.
296
@param pszDigestUri: The request URI.
297
@param pszQop: The Quality-of-Protection value.
298
@param pszHEntity: The hash of the entity body or C{None} if C{pszQop} is
300
@return: The hash of the A2 value for the calculation of the response
303
m = algorithms[algo]()
306
m.update(pszDigestUri)
307
if pszQop == "auth-int":
310
return m.digest().encode('hex')
313
def calcResponse(HA1, HA2, algo, pszNonce, pszNonceCount, pszCNonce, pszQop):
315
Compute the digest for the given parameters.
317
@param HA1: The H(A1) value, as computed by L{calcHA1}.
318
@param HA2: The H(A2) value, as computed by L{calcHA2}.
319
@param pszNonce: The challenge nonce.
320
@param pszNonceCount: The (client) nonce count value for this response.
321
@param pszCNonce: The client nonce.
322
@param pszQop: The Quality-of-Protection value.
324
m = algorithms[algo]()
329
if pszNonceCount and pszCNonce:
330
m.update(pszNonceCount)
337
respHash = m.digest().encode('hex')