~gary/python-openid/python-openid-2.2.1-patched

« back to all changes in this revision

Viewing changes to openid/consumer/parse.py

Committer: Launchpad Patch Queue Manager
Date: 2007-11-30 02:46:28 UTC
mfrom: (1.1.1 pyopenid-2.0)
Revision ID: launchpad@pqm.canonical.com-20071130024628-qktwsew3383iawmq

[rs=SteveA] upgrade to python-openid-2.0.1

files added:
LICENSE

NOTICE

admin/pythonsource

background-associations.txt

contrib

contrib/upgrade-store-1.1-to-2.0

darcs-ignore

doc/HTMLParser.HTMLParser-class.html

doc/__builtin__.object-class.html

doc/__builtin__.type-class.html

doc/exceptions.BaseException-class.html

doc/exceptions.Exception-class.html

doc/exceptions.StandardError-class.html

doc/exceptions.ValueError-class.html

doc/indices.html

doc/markupbase.ParserBase-class.html

doc/openid.association.SessionNegotiator-class.html

doc/openid.consumer.html_parse-module.html

doc/openid.cryptutil-module.html

doc/openid.extension-module.html

doc/openid.extension.Extension-class.html

doc/openid.fetchers-module.html

doc/openid.fetchers.HTTPError-class.html

doc/openid.fetchers.HTTPFetcher-class.html

doc/openid.fetchers.HTTPFetchingError-class.html

doc/openid.fetchers.HTTPResponse-class.html

doc/openid.kvform-module.html

doc/openid.message-module.html

doc/openid.message.Message-class.html

doc/openid.message.NamespaceMap-class.html

doc/openid.oidutil-module.html

doc/openid.server.server.DiffieHellmanSHA1ServerSession-class.html

doc/openid.server.server.DiffieHellmanSHA256ServerSession-class.html

doc/openid.server.server.NoReturnToError-class.html

doc/openid.server.server.VersionError-class.html

doc/openid.sreg-module.html

doc/openid.sreg.SRegRequest-class.html

doc/openid.sreg.SRegResponse-class.html

doc/openid.store.memstore-module.html

doc/openid.store.memstore.MemoryStore-class.html

doc/openid.store.memstore.ServerAssocs-class.html

doc/openid.store.nonce-module.html

doc/openid.urinorm-module.html

doc/openid.yadis-module.html

doc/openid.yadis.accept-module.html

doc/openid.yadis.constants-module.html

doc/openid.yadis.discover-module.html

doc/openid.yadis.discover.DiscoveryFailure-class.html

doc/openid.yadis.discover.DiscoveryResult-class.html

doc/openid.yadis.etxrd-module.html

doc/openid.yadis.filters-module.html

doc/openid.yadis.filters.BasicServiceEndpoint-class.html

doc/openid.yadis.filters.CompoundFilter-class.html

doc/openid.yadis.filters.IFilter-class.html

doc/openid.yadis.filters.TransformFilterMaker-class.html

doc/openid.yadis.manager-module.html

doc/openid.yadis.manager.Discovery-class.html

doc/openid.yadis.manager.YadisServiceManager-class.html

doc/openid.yadis.parsehtml-module.html

doc/openid.yadis.parsehtml.MetaNotFound-class.html

doc/openid.yadis.services-module.html

doc/openid.yadis.xri-module.html

doc/openid.yadis.xrires-module.html

doc/openid.yadis.xrires.ProxyResolver-class.html

doc/toc-openid.consumer.html_parse-module.html

doc/toc-openid.cryptutil-module.html

doc/toc-openid.extension-module.html

doc/toc-openid.fetchers-module.html

doc/toc-openid.kvform-module.html

doc/toc-openid.message-module.html

doc/toc-openid.oidutil-module.html

doc/toc-openid.sreg-module.html

doc/toc-openid.store.memstore-module.html

doc/toc-openid.store.nonce-module.html

doc/toc-openid.urinorm-module.html

doc/toc-openid.yadis-module.html

doc/toc-openid.yadis.accept-module.html

doc/toc-openid.yadis.constants-module.html

doc/toc-openid.yadis.discover-module.html

doc/toc-openid.yadis.etxrd-module.html

doc/toc-openid.yadis.filters-module.html

doc/toc-openid.yadis.manager-module.html

doc/toc-openid.yadis.parsehtml-module.html

doc/toc-openid.yadis.services-module.html

doc/toc-openid.yadis.xri-module.html

doc/toc-openid.yadis.xrires-module.html

doc/trees.html

examples/djopenid

examples/djopenid/README

examples/djopenid/__init__.py

examples/djopenid/consumer

examples/djopenid/consumer/__init__.py

examples/djopenid/consumer/models.py

examples/djopenid/consumer/urls.py

examples/djopenid/consumer/views.py

examples/djopenid/manage.py

examples/djopenid/server

examples/djopenid/server/__init__.py

examples/djopenid/server/models.py

examples/djopenid/server/tests.py

examples/djopenid/server/urls.py

examples/djopenid/server/views.py

examples/djopenid/settings.py

examples/djopenid/templates

examples/djopenid/templates/consumer

examples/djopenid/templates/consumer/index.html

examples/djopenid/templates/consumer/request_form.html

examples/djopenid/templates/index.html

examples/djopenid/templates/server

examples/djopenid/templates/server/endpoint.html

examples/djopenid/templates/server/idPage.html

examples/djopenid/templates/server/index.html

examples/djopenid/templates/server/trust.html

examples/djopenid/templates/server/xrds.html

examples/djopenid/urls.py

examples/djopenid/util.py

examples/djopenid/views.py

openid/consumer/html_parse.py

openid/extension.py

openid/fetchers.py

openid/message.py

openid/sreg.py

openid/store/memstore.py

openid/store/nonce.py

openid/test/data

openid/test/data/accept.txt

openid/test/data/example-xrds.xml

openid/test/data/openid-1.2-consumer-sqlitestore.db

openid/test/data/test1-discover.txt

openid/test/data/test1-parsehtml.txt

openid/test/data/test_discover

openid/test/data/test_discover/openid.html

openid/test/data/test_discover/openid2.html

openid/test/data/test_discover/openid2_xrds.xml

openid/test/data/test_discover/openid2_xrds_no_local_id.xml

openid/test/data/test_discover/openid_1_and_2.html

openid/test/data/test_discover/openid_1_and_2_xrds.xml

openid/test/data/test_discover/openid_1_and_2_xrds_bad_delegate.xml

openid/test/data/test_discover/openid_and_yadis.html

openid/test/data/test_discover/openid_no_delegate.html

openid/test/data/test_discover/yadis_0entries.xml

openid/test/data/test_discover/yadis_2_bad_local_id.xml

openid/test/data/test_discover/yadis_2entries_delegate.xml

openid/test/data/test_discover/yadis_2entries_idp.xml

openid/test/data/test_discover/yadis_another_delegate.xml

openid/test/data/test_discover/yadis_idp.xml

openid/test/data/test_discover/yadis_idp_delegate.xml

openid/test/data/test_discover/yadis_no_delegate.xml

openid/test/data/test_etxrd

openid/test/data/test_etxrd/README

openid/test/data/test_etxrd/delegated-20060809-r1.xrds

openid/test/data/test_etxrd/delegated-20060809-r2.xrds

openid/test/data/test_etxrd/delegated-20060809.xrds

openid/test/data/test_etxrd/no-xrd.xml

openid/test/data/test_etxrd/not-xrds.xml

openid/test/data/test_etxrd/prefixsometimes.xrds

openid/test/data/test_etxrd/ref.xrds

openid/test/data/test_etxrd/sometimesprefix.xrds

openid/test/data/test_etxrd/spoof1.xrds

openid/test/data/test_etxrd/spoof2.xrds

openid/test/data/test_etxrd/spoof3.xrds

openid/test/data/test_etxrd/status222.xrds

openid/test/data/test_etxrd/valid-populated-xrds.xml

openid/test/discoverdata.py

openid/test/support.py

openid/test/test_accept.py

openid/test/test_association.py

openid/test/test_association_response.py

openid/test/test_auth_request.py

openid/test/test_consumer.py

openid/test/test_etxrd.py

openid/test/test_examples.py

openid/test/test_fetchers.py

openid/test/test_message.py

openid/test/test_negotiation.py

openid/test/test_nonce.py

openid/test/test_parsehtml.py

openid/test/test_server.py

openid/test/test_sreg.py

openid/test/test_symbol.py

openid/test/test_urinorm.py

openid/test/test_verifydisco.py

openid/test/test_xri.py

openid/test/test_xrires.py

openid/test/test_yadis_discover.py

openid/test/urinorm.txt

openid/urinorm.py

openid/yadis

openid/yadis/__init__.py

openid/yadis/accept.py

openid/yadis/constants.py

openid/yadis/discover.py

openid/yadis/etxrd.py

openid/yadis/filters.py

openid/yadis/manager.py

openid/yadis/parsehtml.py

openid/yadis/services.py

openid/yadis/xri.py

openid/yadis/xrires.py

pylintrc

files removed:
COPYING

INSTALL

PKG-INFO

admin/combo-setup.py

doc/class-tree.html

doc/identifier-index.html

doc/module-tree.html

doc/openid-pysrc.html

doc/openid.association-pysrc.html

doc/openid.consumer-pysrc.html

doc/openid.consumer.consumer-pysrc.html

doc/openid.consumer.discover-pysrc.html

doc/openid.dh-pysrc.html

doc/openid.server-pysrc.html

doc/openid.server.server-pysrc.html

doc/openid.server.server.DiffieHellmanServerSession-class.html

doc/openid.server.trustroot-pysrc.html

doc/openid.store-pysrc.html

doc/openid.store.dumbstore-module.html

doc/openid.store.dumbstore-pysrc.html

doc/openid.store.dumbstore.DumbStore-class.html

doc/openid.store.filestore-pysrc.html

doc/openid.store.interface-pysrc.html

doc/openid.store.sqlstore-pysrc.html

doc/term-index.html

doc/toc-openid.store.dumbstore-module.html

openid/consumer/parse.py

openid/store/dumbstore.py

openid/test/_memstore.py

openid/test/association.py

openid/test/consumer.py

openid/test/server.py

files modified:
CHANGELOG

MANIFEST.in

NEWS

README

admin/makechangelog *

admin/makedoc *

admin/runtests *

admin/setversion *

admin/tagrelease *

doc/epydoc.css

doc/frames.html

doc/help.html

doc/index.html

doc/openid-module.html

doc/openid.association-module.html

doc/openid.association.Association-class.html

doc/openid.consumer-module.html

doc/openid.consumer.consumer-module.html

doc/openid.consumer.consumer.AuthRequest-class.html

doc/openid.consumer.consumer.CancelResponse-class.html

doc/openid.consumer.consumer.Consumer-class.html

doc/openid.consumer.consumer.FailureResponse-class.html

doc/openid.consumer.consumer.SetupNeededResponse-class.html

doc/openid.consumer.consumer.SuccessResponse-class.html

doc/openid.consumer.discover-module.html

doc/openid.consumer.discover.OpenIDServiceEndpoint-class.html

doc/openid.dh-module.html

doc/openid.dh.DiffieHellman-class.html

doc/openid.server-module.html

doc/openid.server.server-module.html

doc/openid.server.server.AlreadySigned-class.html

doc/openid.server.server.AssociateRequest-class.html

doc/openid.server.server.CheckAuthRequest-class.html

doc/openid.server.server.CheckIDRequest-class.html

doc/openid.server.server.Decoder-class.html

doc/openid.server.server.Encoder-class.html

doc/openid.server.server.EncodingError-class.html

doc/openid.server.server.MalformedReturnURL-class.html

doc/openid.server.server.MalformedTrustRoot-class.html

doc/openid.server.server.OpenIDRequest-class.html

doc/openid.server.server.OpenIDResponse-class.html

doc/openid.server.server.PlainTextServerSession-class.html

doc/openid.server.server.ProtocolError-class.html

doc/openid.server.server.Server-class.html

doc/openid.server.server.Signatory-class.html

doc/openid.server.server.SigningEncoder-class.html

doc/openid.server.server.UntrustedReturnURL-class.html

doc/openid.server.server.WebResponse-class.html

doc/openid.server.trustroot-module.html

doc/openid.server.trustroot.TrustRoot-class.html

doc/openid.store-module.html

doc/openid.store.filestore-module.html

doc/openid.store.filestore.FileOpenIDStore-class.html

doc/openid.store.interface-module.html

doc/openid.store.interface.OpenIDStore-class.html

doc/openid.store.sqlstore-module.html

doc/openid.store.sqlstore.MySQLStore-class.html

doc/openid.store.sqlstore.PostgreSQLStore-class.html

doc/openid.store.sqlstore.SQLStore-class.html

doc/openid.store.sqlstore.SQLiteStore-class.html

doc/toc-everything.html

doc/toc-openid-module.html

doc/toc-openid.association-module.html

doc/toc-openid.consumer-module.html

doc/toc-openid.consumer.consumer-module.html

doc/toc-openid.consumer.discover-module.html

doc/toc-openid.dh-module.html

doc/toc-openid.server-module.html

doc/toc-openid.server.server-module.html

doc/toc-openid.server.trustroot-module.html

doc/toc-openid.store-module.html

doc/toc-openid.store.filestore-module.html

doc/toc-openid.store.interface-module.html

doc/toc-openid.store.sqlstore-module.html

doc/toc.html

examples/README

examples/consumer.py

examples/server.py

openid/__init__.py

openid/association.py

openid/consumer/consumer.py

openid/consumer/discover.py

openid/cryptutil.py

openid/dh.py

openid/kvform.py

openid/oidutil.py

openid/server/server.py

openid/server/trustroot.py

openid/store/__init__.py

openid/store/filestore.py

openid/store/interface.py

openid/store/sqlstore.py

openid/test/cryptutil.py

openid/test/kvform.py

openid/test/linkparse.py

openid/test/oidutil.py

openid/test/storetest.py

openid/test/test_discover.py

openid/test/test_htmldiscover.py

openid/test/test_openidyadis.py

openid/test/trustroot.txt

setup.py

Show diffs side-by-side

added added

removed removed

openid/consumer/parse.py

"""

This module implements a VERY limited parser that finds <link> tags in

the head of HTML or XHTML documents and parses out their attributes

according to the OpenID spec. It is a liberal parser, but it requires

these things from the data in order to work:

* There must be an open <html> tag

* There must be an open <head> tag inside of the <html> tag

* Only <link>s that are found inside of the <head> tag are parsed

(this is by design)

* The parser follows the OpenID specification in resolving the

attributes of the link tags. This means that the attributes DO NOT

get resolved as they would by an XML or HTML parser. In particular,

only certain entities get replaced, and href attributes do not get

resolved relative to a base URL.

From http://openid.net/specs.bml#linkrel:

* The openid.server URL MUST be an absolute URL. OpenID consumers

MUST NOT attempt to resolve relative URLs.

* The openid.server URL MUST NOT include entities other than &,

<, >, and ".

The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds of

quoting are allowed for attributes.

The parser deals with invalid markup in these ways:

* Tag names are not case-sensitive

* The <html> tag is accepted even when it is not at the top level

* The <head> tag is accepted even when it is not a direct child of

the <html> tag, but a <html> tag must be an ancestor of the <head>

tag

* <link> tags are accepted even when they are not direct children of

the <head> tag, but a <head> tag must be an ancestor of the <link>

tag

* If there is no closing tag for an open <html> or <head> tag, the

remainder of the document is viewed as being inside of the tag. If

there is no closing tag for a <link> tag, the link tag is treated

as a short tag. Exceptions to this rule are that <html> closes

<html> and <body> or <head> closes <head>

* Attributes of the <link> tag are not required to be quoted.

* In the case of duplicated attribute names, the attribute coming

last in the tag will be the value returned.

* Any text that does not parse as an attribute within a link tag will

be ignored. (e.g. <link pumpkin rel='openid.server' /> will ignore

pumpkin)

* If there are more than one <html> or <head> tag, the parser only

looks inside of the first one.

* The contents of <script> tags are ignored entirely, except unclosed

* Any other invalid markup is ignored, including unclosed SGML

comments and unclosed <![CDATA[blocks.

"""

__all__ = ['parseLinkAttrs']

import re

flags = ( re.DOTALL # Match newlines with '.'

| re.IGNORECASE

| re.VERBOSE # Allow comments and whitespace in patterns

| re.UNICODE # Make \b respect Unicode word boundaries

)

# Stuff to remove before we start looking for tags

removed_re = re.compile(r'''

# Comments

# CDATA blocks

| <!\[CDATA\[.*?\]\]>

# script blocks

| <script\b

# make sure script is not an XML namespace

(?!:)

[^>]*>.*?</script>

''', flags)

tag_expr = r'''

# Starts with the tag name at a word boundary, where the tag name is

100

# not a namespace

101

<%(tag_name)s\b(?!:)

102

103

# All of the stuff up to a ">", hopefully attributes.

104

(?P<attrs>[^>]*?)

105

106

(?: # Match a short tag

107

108

109

| # Match a full tag

110

111

112

(?P<contents>.*?)

113

114

# Closed by

115

(?: # One of the specified close tags

116

</?%(closers)s\s*>

117

118

# End of the string

119

| \Z

120

121

)

122

123

)

124

'''

125

126

def tagMatcher(tag_name, *close_tags):

127

if close_tags:

128

options = '|'.join((tag_name,) + close_tags)

129

closers = '(?:%s)' % (options,)

130

else:

131

closers = tag_name

132

133

expr = tag_expr % locals()

134

return re.compile(expr, flags)

135

136

# Must contain at least an open html and an open head tag

137

html_find = tagMatcher('html')

138

head_find = tagMatcher('head', 'body')

139

link_find = re.compile(r'<link\b(?!:)', flags)

140

141

attr_find = re.compile(r'''

142

# Must start with a sequence of word-characters, followed by an equals sign

143

(?P<attr_name>\w+)=

144

145

# Then either a quoted or unquoted attribute

146

(?:

147

148

# Match everything that\'s between matching quote marks

149

(?P<qopen>["\'])(?P<q_val>.*?)(?P=qopen)

150

151

152

# If the value is not quoted, match up to whitespace

153

(?P<unq_val>(?:[^\s<>/]|/(?!>))+)

154

)

155

156

157

158

(?P<end_link>[<>])

159

''', flags)

160

161

# Entity replacement:

162

replacements = {

163

'amp':'&',

164

'lt':'<',

165

'gt':'>',

166

'quot':'"',

167

}

168

169

ent_replace = re.compile(r'&(%s);' % '|'.join(replacements.keys()))

170

def replaceEnt(mo):

171

"Replace the entities that are specified by OpenID"

172

return replacements.get(mo.group(1), mo.group())

173

174

def parseLinkAttrs(html):

175

"""Find all link tags in a string representing a HTML document and

176

return a list of their attributes.

177

178

@param html: the text to parse

179

@type html: str or unicode

180

181

@return: A list of dictionaries of attributes, one for each link tag

182

@rtype: [[(type(html), type(html))]]

183

"""

184

stripped = removed_re.sub('', html)

185

html_mo = html_find.search(stripped)

186

if html_mo is None or html_mo.start('contents') == -1:

187

return []

188

189

start, end = html_mo.span('contents')

190

head_mo = head_find.search(stripped, start, end)

191

if head_mo is None or head_mo.start('contents') == -1:

192

return []

193

194

start, end = head_mo.span('contents')

195

link_mos = link_find.finditer(stripped, head_mo.start(), head_mo.end())

196

197

matches = []

198

for link_mo in link_mos:

199

start = link_mo.start() + 5

200

link_attrs = {}

201

for attr_mo in attr_find.finditer(stripped, start):

202

if attr_mo.lastgroup == 'end_link':

203

break

204

205

# Either q_val or unq_val must be present, but not both

206

# unq_val is a True (non-empty) value if it is present

207

attr_name, q_val, unq_val = attr_mo.group(

208

'attr_name', 'q_val', 'unq_val')

209

attr_val = ent_replace.sub(replaceEnt, unq_val or q_val)

210

211

link_attrs[attr_name] = attr_val

212

213

matches.append(link_attrs)

214

215

return matches

216

217

def relMatches(rel_attr, target_rel):

218

"""Does this target_rel appear in the rel_str?"""

219

# XXX: TESTME

220

rels = rel_attr.strip().split()

221

for rel in rels:

222

rel = rel.lower()

223

if rel == target_rel:

224

return 1

225

226

return 0

227

228

def linkHasRel(link_attrs, target_rel):

229

"""Does this link have target_rel as a relationship?"""

230

# XXX: TESTME

231

rel_attr = link_attrs.get('rel')

232

return rel_attr and relMatches(rel_attr, target_rel)

233

234

def findLinksRel(link_attrs_list, target_rel):

235

"""Filter the list of link attributes on whether it has target_rel

236

as a relationship."""

237

# XXX: TESTME

238

matchesTarget = lambda attrs: linkHasRel(attrs, target_rel)

239

return filter(matchesTarget, link_attrs_list)

240

241

def findFirstHref(link_attrs_list, target_rel):

242

"""Return the value of the href attribute for the first link tag

243

in the list that has target_rel as a relationship."""

244

# XXX: TESTME

245

matches = findLinksRel(link_attrs_list, target_rel)

246

if not matches:

247

return None

248

first = matches[0]

249

return first.get('href')

250

251

class ParseError(ValueError):

252

"""Exception for errors in parsing the HTML text for OpenID

253

settings"""

254

255

def openIDDiscover(html_text):

256

"""Parse OpenID settings out of the gived HTML text

257

258

@raises: ParseError

259

# XXX: document interface

260

# XXX: TESTME

261

"""

262

link_attrs = parseLinkAttrs(html_text)

263

264

server_url = findFirstHref(link_attrs, 'openid.server')

265

if server_url is None:

266

raise ParseError('No openid.server found')

267

268

delegate_url = findFirstHref(link_attrs, 'openid.delegate')

269

return delegate_url, server_url

Older »