1
# Miro - an RSS based video player application
2
# Copyright (C) 2009-2010 Participatory Culture Foundation
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
# In addition, as a special exception, the copyright holders give
19
# permission to link the code of portions of this program with the OpenSSL
22
# You must obey the GNU General Public License in all respects for all of
23
# the code used other than OpenSSL. If you modify file(s) with this
24
# exception, you may extend this exception to your version of the file(s),
25
# but you are not obligated to do so. If you do not wish to do so, delete
26
# this exception statement from your version. If you delete this exception
27
# statement from all source files in the program, then also delete it here.
30
This file contains the RSS/Atom/OPML autodiscovery path. It used to
31
live in subscription.py
37
import xml.dom.minidom
38
from xml.parsers.expat import ExpatError
44
REFLEXIVE_AUTO_DISCOVERY_OPENER = urllib2.urlopen
46
def flatten(subscriptions):
48
Take a nested subscription list, and remove the folders, putting
49
everything at the root level.
51
def _flat(subscriptions):
52
for subscription in subscriptions:
53
if subscription['type'] == 'folder':
54
for child in _flat(subscription['children']):
58
return list(_flat(subscriptions))
61
"""Opens the file at path, parses it into a list of subscriptions,
62
and returns the list of subscriptions. Each subscription is a
65
:param path: absolute path of the file to parse
66
:returns: list of subscriptions or None
69
subscription_file = open(path, "r")
70
content = subscription_file.read()
71
subscription_file.close()
72
return parse_content(content)
73
except (IOError, ExpatError):
76
def parse_content(content):
77
"""Parses content into a list of subscriptions, and returns the
78
list of subscriptions. Each subscription is a dict.
80
:param content: utf-8 encoded string to parse
81
:returns: list of subscriptions or None
84
dom = xml.dom.minidom.parseString(content)
85
except (ExpatError, TypeError):
87
logging.warn("Error parsing XML content...\n%s",
88
traceback.format_exc())
92
root = dom.documentElement
93
if root.nodeName == "rss":
94
return _get_subs_from_rss_channel(root)
95
elif root.nodeName == "feed":
96
return _get_subs_from_atom_feed(root)
97
elif root.nodeName == "opml":
98
subscriptions = opml.Importer().import_content(content)
99
return flatten(subscriptions)
103
def _get_subs_from_rss_channel(root):
105
channel = root.getElementsByTagName("channel").pop()
106
subscriptions = _get_subs_from_atom_link_construct(channel)
107
if subscriptions is not None:
110
link = channel.getElementsByTagName("link").pop()
111
href = link.firstChild.data
112
return _get_subs_from_reflexive_auto_discovery(
113
href, "application/rss+xml")
114
except (IndexError, AttributeError):
117
def _get_subs_from_atom_feed(root):
119
subscriptions = _get_subs_from_atom_link_construct(root)
120
if subscriptions is not None:
123
link = _get_atom_link(root)
124
rel = link.getAttribute("rel")
125
if rel == "alternate":
126
href = link.getAttribute("href")
127
return _get_subs_from_reflexive_auto_discovery(
128
href, "application/atom+xml")
129
except (IndexError, AttributeError):
132
def _get_subs_from_atom_link_construct(node):
134
link = _get_atom_link(node)
135
if link.getAttribute("rel") in ("self", "start"):
136
href = link.getAttribute("href")
137
return [{'type': 'feed', 'url': href}]
138
except (IndexError, AttributeError):
141
ALT_RE = re.compile("rel=\"alternate\"")
142
HREF_RE = re.compile("href=\"([^\"]*)\"")
144
def _get_subs_from_reflexive_auto_discovery(url, ltype):
147
html = REFLEXIVE_AUTO_DISCOVERY_OPENER(url).read()
148
for match in re.findall("<link[^>]+>", html):
149
alt_match = ALT_RE.search(match)
150
type_match = re.search("type=\"%s\"" % re.escape(ltype), match)
151
href_match = HREF_RE.search(match)
152
if None not in (alt_match, type_match, href_match):
153
href = href_match.group(1)
158
return [{'type': 'feed', 'url': url} for url in urls]
160
ATOM_SPEC = "http://www.w3.org/2005/Atom"
162
def _get_atom_link(node):
163
return node.getElementsByTagNameNS(ATOM_SPEC, "link").pop()