~ubuntu-branches/ubuntu/lucid/exaile/lucid

Viewing changes to plugins/podcasts/_feedparser.py

Committer: Bazaar Package Importer
Author(s): Andrew Starr-Bochicchio
Date: 2010-02-12 19:51:01 UTC
mfrom: (1.1.11 upstream)
Revision ID: james.westby@ubuntu.com-20100212195101-8jt3tculxcl92e6v

Tags: 0.3.1~b1-0ubuntu1

* New upstream release.
* Adjust exaile.install for new plugins.
* debian/control:
- Drop unneeded python-dev Build-Dep.
- Bump Standards-Version to 3.8.4
* debian/rules: No empty po files to delete.

files added:
data/images/scalable

data/images/scalable/exaile-pause.svg

data/images/scalable/exaile-play.svg

data/images/scalable/exaile.svg

data/ui

data/ui/about_dialog.ui

data/ui/appearance_prefs_pane.ui

data/ui/collection_manager.ui

data/ui/collection_panel.ui

data/ui/collection_prefs_pane.ui

data/ui/cover_prefs_pane.ui

data/ui/coverchooser.ui

data/ui/covermanager.ui

data/ui/coverwindow.ui

data/ui/device_manager.ui

data/ui/device_panel.ui

data/ui/files_panel.ui

data/ui/flatplaylist_panel.ui

data/ui/general_prefs_pane.ui

data/ui/main.ui

data/ui/osd_prefs_pane.ui

data/ui/osd_window.ui

data/ui/playback_prefs_pane.ui

data/ui/playlists_panel.ui

data/ui/playlists_prefs_pane.ui

data/ui/plugin_prefs_pane.ui

data/ui/preferences_dialog.ui

data/ui/queue_dialog.ui

data/ui/radio_panel.ui

data/ui/splash.ui

data/ui/trackproperties_dialog.ui

doc/xl/cover.rst

doc/xl/metadata.rst

doc/xl/trax.rst

plugins/alarmclock/acprefs_pane.ui

plugins/amazoncovers/amazonprefs_pane.ui

plugins/audioscrobbler/asprefs_pane.ui

plugins/awn

plugins/awn/PLUGININFO

plugins/awn/__init__.py

plugins/awn/awn_prefs.py

plugins/awn/awn_prefs_pane.ui

plugins/bookmarks/bookmarks_pane.ui

plugins/cd/cdprefs_pane.ui

plugins/contextinfo/context.ui

plugins/contextinfo/context_pane.ui

plugins/contextinfo/contextprefs.py

plugins/currentsong

plugins/currentsong/PLUGININFO

plugins/currentsong/__init__.py

plugins/daapclient

plugins/daapclient/PLUGININFO

plugins/daapclient/__init__.py

plugins/daapclient/daap.py

plugins/daapclient/daap_data.py

plugins/daapserver

plugins/daapserver/COPYING

plugins/daapserver/PLUGININFO

plugins/daapserver/README

plugins/daapserver/TODO

plugins/daapserver/__init__.py

plugins/daapserver/config.py

plugins/daapserver/daapserver_prefs.ui

plugins/daapserver/daapserverprefs.py

plugins/daapserver/server.py

plugins/daapserver/spydaap

plugins/daapserver/spydaap.py

plugins/daapserver/spydaap/__init__.py

plugins/daapserver/spydaap/cache.py

plugins/daapserver/spydaap/containers.py

plugins/daapserver/spydaap/daap.py

plugins/daapserver/spydaap/daap_data.py

plugins/daapserver/spydaap/metadata.py

plugins/daapserver/spydaap/parser

plugins/daapserver/spydaap/parser/__init__.py

plugins/daapserver/spydaap/parser/avi.py

plugins/daapserver/spydaap/parser/exaile.py

plugins/daapserver/spydaap/parser/flac.py

plugins/daapserver/spydaap/parser/mov.py

plugins/daapserver/spydaap/parser/mp3.py

plugins/daapserver/spydaap/parser/ogg.py

plugins/daapserver/spydaap/parser/vorbis.py

plugins/daapserver/spydaap/playlists.py

plugins/daapserver/spydaap/server.py

plugins/daapserver/spydaap/zeroconf.py

plugins/droptrayicon

plugins/droptrayicon/PLUGININFO

plugins/droptrayicon/__init__.py

plugins/droptrayicon/drop_target_window.ui

plugins/equalizer

plugins/equalizer/PLUGININFO

plugins/equalizer/__init__.py

plugins/equalizer/equalizer.ui

plugins/exfalso

plugins/exfalso/PLUGININFO

plugins/exfalso/__init__.py

plugins/ipconsole/ipconsole_prefs.ui

plugins/jamendo

plugins/jamendo/PLUGININFO

plugins/jamendo/__init__.py

plugins/jamendo/jamapi.py

plugins/jamendo/jamtree.py

plugins/jamendo/menu.py

plugins/jamendo/simplejson

plugins/jamendo/simplejson/__init__.py

plugins/jamendo/simplejson/_speedups.c

plugins/jamendo/simplejson/decoder.py

plugins/jamendo/simplejson/encoder.py

plugins/jamendo/simplejson/scanner.py

plugins/jamendo/simplejson/tests

plugins/jamendo/simplejson/tests/__init__.py

plugins/jamendo/simplejson/tests/test_check_circular.py

plugins/jamendo/simplejson/tests/test_decode.py

plugins/jamendo/simplejson/tests/test_default.py

plugins/jamendo/simplejson/tests/test_dump.py

plugins/jamendo/simplejson/tests/test_encode_basestring_ascii.py

plugins/jamendo/simplejson/tests/test_fail.py

plugins/jamendo/simplejson/tests/test_float.py

plugins/jamendo/simplejson/tests/test_indent.py

plugins/jamendo/simplejson/tests/test_pass1.py

plugins/jamendo/simplejson/tests/test_pass2.py

plugins/jamendo/simplejson/tests/test_pass3.py

plugins/jamendo/simplejson/tests/test_recursion.py

plugins/jamendo/simplejson/tests/test_scanstring.py

plugins/jamendo/simplejson/tests/test_separators.py

plugins/jamendo/simplejson/tests/test_unicode.py

plugins/jamendo/simplejson/tool.py

plugins/jamendo/ui

plugins/jamendo/ui/jamendo_panel.ui

plugins/karaoke

plugins/karaoke/PLUGININFO

plugins/karaoke/__init__.py

plugins/lyricsviewer

plugins/lyricsviewer/PLUGININFO

plugins/lyricsviewer/__init__.py

plugins/minimode/minimodeprefs_pane.ui

plugins/moodbar/moodbarprefs.py

plugins/moodbar/moodbarprefs_pane.ui

plugins/multialarmclock/alarmclk.ui

plugins/notify/notifyprefs_pane.ui

plugins/notifyosd/notifyosdprefs_pane.ui

plugins/podcasts/podcasts.ui

plugins/replaygain/replaygainprefs_pane.ui

plugins/streamripper/streamripper.ui

po/be.po

po/bs.po

po/csb.po

po/ml.po

po/ms.po

tests/xl

tests/xl/__init__.py

tests/xl/trax

tests/xl/trax/__init__.py

tests/xl/trax/test_data.py

tests/xl/trax/test_search.py

tests/xl/trax/test_track.py

tests/xl/trax/test_util.py

tools/.bpythonrc

tools/bpython

tools/ipshell

tools/pylint.cfg

tools/xdg-prefix-launch

xl/metadata/ape.py

xl/migrations

xl/migrations/__init__.py

xl/migrations/database

xl/migrations/database/__init__.py

xl/migrations/database/from1to2.py

xl/trax

xl/trax/__init__.py

xl/trax/search.py

xl/trax/track.py

xl/trax/trackdb.py

xl/trax/util.py

xlgui/prefs/collection_prefs.py

files removed:
data/glade

data/glade/about_dialog.glade

data/glade/appearance_prefs_pane.glade

data/glade/collection_manager.glade

data/glade/collection_panel.glade

data/glade/cover_prefs_pane.glade

data/glade/coverchooser.glade

data/glade/covermanager.glade

data/glade/coverwindow.glade

data/glade/device_manager.glade

data/glade/device_panel.glade

data/glade/files_panel.glade

data/glade/flatplaylist_panel.glade

data/glade/general_prefs_pane.glade

data/glade/main.glade

data/glade/osd_prefs_pane.glade

data/glade/osd_window.glade

data/glade/playback_prefs_pane.glade

data/glade/playlists_panel.glade

data/glade/playlists_prefs_pane.glade

data/glade/plugin_prefs_pane.glade

data/glade/preferences_dialog.glade

data/glade/queue_dialog.glade

data/glade/radio_panel.glade

data/glade/splash.glade

data/glade/trackproperties_dialog.glade

data/images/svg

data/images/svg/exaile-pause.svg

data/images/svg/exaile-play.svg

data/images/svg/exaile.svg

doc/planning

doc/planning/DEPS.rst

doc/planning/future.rst

doc/planning/index.rst

doc/xl/trackdb.rst

ipshell

plugins/alarmclock/acprefs_pane.glade

plugins/amazoncovers/amazonprefs_pane.glade

plugins/amazoncovers/test.py

plugins/audioscrobbler/asprefs_pane.glade

plugins/bookmarks/bookmarks_pane.glade

plugins/cd/cdprefs_pane.glade

plugins/contextinfo/context.glade

plugins/helloworld/test.py

plugins/ipconsole/ipconsole_prefs.glade

plugins/lastfmcovers/test.py

plugins/lastfmdynamic/test.py

plugins/lyricsfly/test.py

plugins/lyricwiki

plugins/lyricwiki/PLUGININFO

plugins/lyricwiki/__init__.py

plugins/lyricwiki/test.py

plugins/minimode/minimodeprefs_pane.glade

plugins/mpris/test.py

plugins/multialarmclock/alarmclk.glade

plugins/notify/notifyprefs_pane.glade

plugins/notify/test.py

plugins/notifyosd/notifyosdprefs_pane.glade

plugins/notifyosd/test.py

plugins/podcasts/podcasts.glade

plugins/replaygain/replaygainprefs_pane.glade

plugins/shoutcast/test.py

plugins/streamripper/streamripper.glade

plugins/tagcovers

plugins/tagcovers/PLUGININFO

plugins/tagcovers/__init__.py

po/an.po

po/br.po

po/en.po

po/mr.po

po/ps.po

tests/base.py

tests/collection.py

tests/cover.py

tests/gui

tests/gui/__init__.py

tests/gui/base.py

tests/gui/collectionpanel.py

tests/gui/main.py

tests/lyrics.py

tests/playlists.py

tools/createpot.py

tools/guitest

tools/guitest/Tkintertest.py

tools/guitest/__init__.py

tools/guitest/gtktest.py

tools/guitest/state.py

tools/guitest/tests

tools/guitest/tests/__init__.py

tools/guitest/tests/sample.glade

tools/guitest/tests/test_Tkintertest.py

tools/guitest/tests/test_gtktest.py

tools/guitest/tests/test_utils.py

tools/guitest/tests/test_wxtest.py

tools/guitest/utils.py

tools/guitest/wxtest.py

tools/runtests.py

tools/test_mems.py

xl/migration.py

xl/track.py

xl/trackdb.py

files modified:
DEPS

Makefile

data/exaile.desktop

data/migrations/migration_200907100931/__init__.py

data/migrations/migration_200907100931/olddb.py

debian/changelog

debian/control

debian/exaile.install

debian/rules

doc/conf.py

doc/index.rst

exaile.py

plugins/alarmclock/PLUGININFO

plugins/alarmclock/__init__.py

plugins/alarmclock/acprefs.py

plugins/amazoncovers/__init__.py

plugins/amazoncovers/_ecs.py

plugins/amazoncovers/amazonprefs.py

plugins/audioscrobbler/__init__.py

plugins/audioscrobbler/asprefs.py

plugins/bookmarks/__init__.py

plugins/bookmarks/bookmarksprefs.py

plugins/cd/__init__.py

plugins/cd/_cdguipanel.py

plugins/cd/cdprefs.py

plugins/cd/importer.py

plugins/contextinfo/PLUGININFO

plugins/contextinfo/__init__.py

plugins/contextinfo/pylast.py

plugins/desktopcover/prefs.py

plugins/dist_plugin.py

plugins/gnomemmkeys/__init__.py

plugins/helloworld/__init__.py

plugins/helloworld/testlib.py

plugins/ipconsole/__init__.py

plugins/ipconsole/ipconsoleprefs.py

plugins/ipconsole/ipython_view.py

plugins/ipod/__init__.py

plugins/lastfmcovers/PLUGININFO

plugins/lastfmcovers/__init__.py

plugins/lastfmdynamic/PLUGININFO

plugins/lastfmdynamic/__init__.py

plugins/librivox/__init__.py

plugins/librivox/about_window.py

plugins/librivox/librivoxsearch.py

plugins/lyricsfly/__init__.py

plugins/massstorage/__init__.py

plugins/minimode/PLUGININFO

plugins/minimode/__init__.py

plugins/minimode/minimodeprefs.py

plugins/minimode/mmwidgets.py

plugins/moodbar/PLUGININFO

plugins/moodbar/__init__.py

plugins/mpris/PLUGININFO

plugins/mpris/exaile_mpris.py

plugins/mpris/mpris_player.py

plugins/mpris/mpris_tag_converter.py

plugins/mpris/mpris_tracklist.py

plugins/multialarmclock/__init__.py

plugins/notify/__init__.py

plugins/notify/notify_cover.py

plugins/notify/notifyprefs.py

plugins/notifyosd/__init__.py

plugins/notifyosd/notifyosd_cover.py

plugins/notifyosd/notifyosdprefs.py

plugins/podcasts/__init__.py

plugins/podcasts/_feedparser.py

plugins/replaygain/__init__.py

plugins/replaygain/replaygainprefs.py

plugins/screensaverpause/__init__.py

plugins/shoutcast/__init__.py

plugins/streamripper/__init__.py

plugins/streamripper/srprefs.py

plugins/xkeys/__init__.py

po/ar.po

po/ast.po

po/bg.po

po/bn.po

po/ca.po

po/cs.po

po/cy.po

po/da.po

po/de.po

po/el.po

po/en_CA.po

po/en_GB.po

po/eo.po

po/es.po

po/et.po

po/eu.po

po/fa.po

po/fi.po

po/fr.po

po/frp.po

po/gl.po

po/gu.po

po/he.po

po/hi.po

po/hr.po

po/hu.po

po/id.po

po/it.po

po/ja.po

po/ka.po

po/kk.po

po/ko.po

po/lt.po

po/lv.po

po/messages.pot

po/mk.po

po/nb.po

po/nl.po

po/oc.po

po/pl.po

po/pt.po

po/pt_BR.po

po/ro.po

po/ru.po

po/sk.po

po/sl.po

po/sq.po

po/sr.po

po/sv.po

po/sw.po

po/ta.po

po/te.po

po/tl.po

po/tr.po

po/uk.po

po/vi.po

po/zh_CN.po

po/zh_TW.po

tests/__init__.py

tools/funcs.py

xl/__init__.py

xl/collection.py

xl/common.py

xl/cover.py

xl/devices.py

xl/dynamic.py

xl/event.py

xl/hal.py

xl/lyrics.py

xl/main.py

xl/metadata/__init__.py

xl/metadata/_apev2.py

xl/metadata/_base.py

xl/metadata/_id3.py

xl/metadata/asf.py

xl/metadata/flac.py

xl/metadata/mod.py

xl/metadata/mp3.py

xl/metadata/mp4.py

xl/metadata/mpc.py

xl/metadata/ogg.py

xl/metadata/sid.py

xl/metadata/speex.py

xl/metadata/tta.py

xl/metadata/wav.py

xl/metadata/wv.py

xl/nls.py

xl/player/__init__.py

xl/player/_base.py

xl/player/engine_normal.py

xl/player/engine_unified.py

xl/player/pipe.py

xl/player/queue.py

xl/playlist.py

xl/plugins.py

xl/providers.py

xl/radio.py

xl/settings.py

xl/transcoder.py

xl/xdg.py

xl/xldbus.py

xlgui/__init__.py

xlgui/collection.py

xlgui/commondialogs.py

xlgui/cover.py

xlgui/devices.py

xlgui/filtergui.py

xlgui/guiutil.py

xlgui/icons.py

xlgui/main.py

xlgui/menu.py

xlgui/osd.py

xlgui/panel/__init__.py

xlgui/panel/collection.py

xlgui/panel/device.py

xlgui/panel/files.py

xlgui/panel/flatplaylist.py

xlgui/panel/playlists.py

xlgui/panel/radio.py

xlgui/playlist.py

xlgui/plcolumns.py

xlgui/prefs/__init__.py

xlgui/prefs/appearance_prefs.py

xlgui/prefs/cover_prefs.py

xlgui/prefs/osd_prefs.py

xlgui/prefs/playback_prefs.py

xlgui/prefs/playlists_prefs.py

xlgui/prefs/plugin_prefs.py

xlgui/prefs/widgets.py

xlgui/progress.py

xlgui/properties.py

xlgui/queue.py

xlgui/rating.py

xlgui/tray.py

Show diffs side-by-side

added added

removed removed

plugins/podcasts/_feedparser.py

217

if not self.has_key(key):

218

self[key] = value

219

return self[key]

220

221

def has_key(self, key):

222

try:

223

return hasattr(self, key) or UserDict.has_key(self, key)

224

except AttributeError:

225

return False

226

227

def __getattr__(self, key):

228

try:

229

return self.__dict__[key]

299

'http://purl.org/atom/ns#': '',

300

'http://www.w3.org/2005/Atom': '',

301

'http://purl.org/rss/1.0/modules/rss091#': '',

302

303

'http://webns.net/mvcb/': 'admin',

304

'http://purl.org/rss/1.0/modules/aggregation/': 'ag',

305

'http://purl.org/rss/1.0/modules/annotate/': 'annotate',

353

can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']

354

can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']

355

html_types = ['text/html', 'application/xhtml+xml']

356

357

def __init__(self, baseuri=None, baselang=None, encoding='utf-8'):

358

if _debug: sys.stderr.write('initializing FeedParser\n')

359

if not self._matchnamespaces:

393

# normalize attrs

394

attrs = [(k.lower(), v) for k, v in attrs]

395

attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]

396

397

# track xml:base and xml:lang

398

attrsD = dict(attrs)

399

baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri

411

self.lang = lang

412

self.basestack.append(self.baseuri)

413

self.langstack.append(lang)

414

415

# track namespaces

416

for prefix, uri in attrs:

417

if prefix.startswith('xmlns:'):

449

self.intextinput = 0

450

if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'):

451

self.inimage = 0

452

453

# call special handler (if defined) or default handler

454

methodname = '_start_' + prefix + suffix

455

try:

570

elif contentType == 'xhtml':

571

contentType = 'application/xhtml+xml'

572

return contentType

573

574

def trackNamespace(self, prefix, uri):

575

loweruri = uri.lower()

576

if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version:

591

592

def resolveURI(self, uri):

593

return _urljoin(self.baseuri or '', uri)

594

595

def decodeEntities(self, element, data):

596

return data

597

601

def pop(self, element, stripWhitespace=1):

602

if not self.elementstack: return

603

if self.elementstack[-1][0] != element: return

604

605

element, expectingText, pieces = self.elementstack.pop()

606

output = ''.join(pieces)

607

if stripWhitespace:

616

pass

617

except binascii.Incomplete:

618

pass

619

620

# resolve relative URIs

621

if (element in self.can_be_relative_uri) and output:

622

output = self.resolveURI(output)

623

624

# decode entities within embedded markup

625

if not self.contentparams.get('base64', 0):

626

output = self.decodeEntities(element, output)

639

if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:

640

if element in self.can_contain_relative_uris:

641

output = _resolveRelativeURIs(output, self.baseuri, self.encoding)

642

643

# sanitize embedded markup

644

if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:

645

if element in self.can_contain_dangerous_markup:

654

# categories/tags/keywords/whatever are handled in _end_category

655

if element == 'category':

656

return output

657

658

# store output in appropriate place(s)

659

if self.inentry and not self.insource:

660

if element == 'content':

701

self.incontent -= 1

702

self.contentparams.clear()

703

return value

704

705

def _mapToStandardPrefix(self, name):

706

colonpos = name.find(':')

707

if colonpos <> -1:

710

prefix = self.namespacemap.get(prefix, prefix)

711

name = prefix + ':' + suffix

712

return name

713

714

def _getAttribute(self, attrsD, name):

715

return attrsD.get(self._mapToStandardPrefix(name))

716

738

pass

739

attrsD['href'] = href

740

return attrsD

741

742

def _save(self, key, value):

743

context = self._getContext()

744

context.setdefault(key, value)

757

self.version = 'rss20'

758

else:

759

self.version = 'rss'

760

761

def _start_dlhottitles(self, attrsD):

762

self.version = 'hotrss'

763

775

self._start_link({})

776

self.elementstack[-1][-1] = attrsD['href']

777

self._end_link()

778

779

def _start_feed(self, attrsD):

780

self.infeed = 1

781

versionmap = {'0.1': 'atom01',

792

def _end_channel(self):

793

self.infeed = 0

794

_end_feed = _end_channel

795

796

def _start_image(self, attrsD):

797

self.inimage = 1

798

self.push('image', 0)

799

context = self._getContext()

800

context.setdefault('image', FeedParserDict())

801

802

def _end_image(self):

803

self.pop('image')

804

self.inimage = 0

809

context = self._getContext()

810

context.setdefault('textinput', FeedParserDict())

811

_start_textInput = _start_textinput

812

813

def _end_textinput(self):

814

self.pop('textinput')

815

self.intextinput = 0

1000

self.popContent('subtitle')

1001

_end_tagline = _end_subtitle

1002

_end_itunes_subtitle = _end_subtitle

1003

1004

def _start_rights(self, attrsD):

1005

self.pushContent('rights', attrsD, 'text/plain', 1)

1006

_start_dc_rights = _start_rights

1094

if value:

1095

self.elementstack[-1][2].append(value)

1096

self.pop('license')

1097

1098

def _start_creativecommons_license(self, attrsD):

1099

self.push('license', 1)

1100

1118

self.push('category', 1)

1119

_start_dc_subject = _start_category

1120

_start_keywords = _start_category

1121

1122

def _end_itunes_keywords(self):

1123

for term in self.pop('itunes_keywords').split():

1124

self._addTag(term, 'http://www.itunes.com/', None)

1125

1126

def _start_itunes_category(self, attrsD):

1127

self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None)

1128

self.push('category', 1)

1129

1130

def _end_category(self):

1131

value = self.pop('category')

1132

if not value: return

1142

1143

def _start_cloud(self, attrsD):

1144

self._getContext()['cloud'] = FeedParserDict(attrsD)

1145

1146

def _start_link(self, attrsD):

1147

attrsD.setdefault('rel', 'alternate')

1148

attrsD.setdefault('type', 'text/html')

1244

context = self._getContext()

1245

if context.has_key('generator_detail'):

1246

context['generator_detail']['name'] = value

1247

1248

def _start_admin_generatoragent(self, attrsD):

1249

self.push('generator', 1)

1250

value = self._getAttribute(attrsD, 'rdf:resource')

1259

if value:

1260

self.elementstack[-1][2].append(value)

1261

self.pop('errorreportsto')

1262

1263

def _start_summary(self, attrsD):

1264

context = self._getContext()

1265

if context.has_key('summary'):

1277

self.popContent(self._summaryKey or 'summary')

1278

self._summaryKey = None

1279

_end_itunes_summary = _end_summary

1280

1281

def _start_enclosure(self, attrsD):

1282

attrsD = self._itsAnHrefDamnIt(attrsD)

1283

self._getContext().setdefault('enclosures', []).append(FeedParserDict(attrsD))

1286

context = self._getContext()

1287

if not context.get('id'):

1288

context['id'] = href

1289

1290

def _start_source(self, attrsD):

1291

self.insource = 1

1292

1328

self.push('itunes_image', 0)

1329

self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})

1330

_start_itunes_link = _start_itunes_image

1331

1332

def _end_itunes_block(self):

1333

value = self.pop('itunes_block', 0)

1334

self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0

1345

_FeedParserMixin.__init__(self, baseuri, baselang, encoding)

1346

self.bozo = 0

1347

self.exc = None

1348

1349

def startPrefixMapping(self, prefix, uri):

1350

self.trackNamespace(prefix, uri)

1351

1352

def startElementNS(self, name, qname, attrs):

1353

namespace, localname = name

1354

lowernamespace = str(namespace or '').lower()

1405

def error(self, exc):

1406

self.bozo = 1

1407

self.exc = exc

1408

1409

def fatalError(self, exc):

1410

self.error(exc)

1411

raise exc

1413

class _BaseHTMLProcessor(sgmllib.SGMLParser):

1414

elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',

1415

'img', 'input', 'isindex', 'link', 'meta', 'param']

1416

1417

def __init__(self, encoding):

1418

self.encoding = encoding

1419

if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)

1420

sgmllib.SGMLParser.__init__(self)

1421

1422

def reset(self):

1423

self.pieces = []

1424

sgmllib.SGMLParser.reset(self)

1429

return '<' + tag + ' />'

1430

else:

1431

return '<' + tag + '></' + tag + '>'

1432

1433

def feed(self, data):

1434

data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data)

1435

#data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace

1436

data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data)

1436

data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data)

1437

data = data.replace(''', "'")

1438

data = data.replace('"', '"')

1439

if self.encoding and type(data) == type(u''):

1473

# called for each character reference, e.g. for ' ', ref will be '160'

1474

# Reconstruct the original character reference.

1475

self.pieces.append('&#%(ref)s;' % locals())

1476

1477

def handle_entityref(self, ref):

1478

1479

# Reconstruct the original entity reference.

1485

# Store the original text verbatim.

1486

if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text)

1487

self.pieces.append(text)

1488

1489

def handle_comment(self, text):

1490

# called for each HTML comment, e.g.

1491

# Reconstruct the original comment.

1492

self.pieces.append('' % locals())

1493

1494

def handle_pi(self, text):

1495

# called for each processing instruction, e.g. <?instruction>

1496

# Reconstruct original processing instruction.

1502

# "http://www.w3.org/TR/html4/loose.dtd">

1503

# Reconstruct original DOCTYPE

1504

self.pieces.append('<!%(text)s>' % locals())

1505

1506

_new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match

1507

def _scan_name(self, i, declstartpos):

1508

rawdata = self.rawdata

1548

data = data.replace('"', '"')

1549

data = data.replace(''', "'")

1550

return data

1551

1552

class _RelativeURIResolver(_BaseHTMLProcessor):

1553

relative_uris = [('a', 'href'),

1554

('applet', 'codebase'),

1582

1583

def resolveURI(self, uri):

1584

return _urljoin(self.baseuri, uri)

1585

1586

def unknown_starttag(self, tag, attrs):

1587

attrs = self.normalize_attrs(attrs)

1588

attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs]

1589

_BaseHTMLProcessor.unknown_starttag(self, tag, attrs)

1590

1591

def _resolveRelativeURIs(htmlSource, baseURI, encoding):

1592

if _debug: sys.stderr.write('entering _resolveRelativeURIs\n')

1593

p = _RelativeURIResolver(baseURI, encoding)

1620

def reset(self):

1621

_BaseHTMLProcessor.reset(self)

1622

self.unacceptablestack = 0

1623

1624

def unknown_starttag(self, tag, attrs):

1625

if not tag in self.acceptable_elements:

1626

if tag in self.unacceptable_elements_with_end_tag:

1629

attrs = self.normalize_attrs(attrs)

1630

attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes]

1631

_BaseHTMLProcessor.unknown_starttag(self, tag, attrs)

1632

1633

def unknown_endtag(self, tag):

1634

if not tag in self.acceptable_elements:

1635

if tag in self.unacceptable_elements_with_end_tag:

1715

http_error_300 = http_error_302

1716

http_error_303 = http_error_302

1717

http_error_307 = http_error_302

1718

1719

def http_error_401(self, req, fp, code, msg, headers):

1720

# Check if

1721

# - server requires digest auth, AND

1820

return opener.open(request)

1821

finally:

1822

opener.close() # JohnD

1823

1824

# try to open with native open function (if url_file_stream_or_string is a filename)

1825

try:

1826

return open(url_file_stream_or_string)

1834

def registerDateHandler(func):

1835

'''Register a date handler function (takes string, returns 9-tuple date in GMT)'''

1836

_date_handlers.insert(0, func)

1837

1838

# ISO-8601 date parsing routines written by Fazal Majid.

1839

# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601

1840

# parser is beyond the scope of feedparser and would be a worthwhile addition

1845

# Please note the order in templates is significant because we need a

1846

# greedy match.

1847

_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO',

1848

'YY-?MM-?DD', 'YY-?OOO', 'YYYY',

1848

'YY-?MM-?DD', 'YY-?OOO', 'YYYY',

1849

'-YY-?MM', '-OOO', '-YY',

1850

'--MM-?DD', '--MM',

1851

'---DD',

1944

# Many implementations have bugs, but we'll pretend they don't.

1945

return time.localtime(time.mktime(tm))

1946

registerDateHandler(_parse_date_iso8601)

1947

1948

# 8-bit date handling routines written by ytrewq1.

1949

_korean_year = u'\ub144' # b3e2 in euc-kr

1950

_korean_month = u'\uc6d4' # bff9 in euc-kr

2035

u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7

2036

u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7

2037

u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7

2038

u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7

2038

u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7

2039

}

2040

2041

_greek_date_format_re = \

2221

# 'ET' is equivalent to 'EST', etc.

2222

_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800}

2223

rfc822._timezones.update(_additional_timezones)

2224

registerDateHandler(_parse_date_rfc822)

2224

registerDateHandler(_parse_date_rfc822)

2225

2226

def _parse_date(dateString):

2227

'''Parses a variety of date formats into a 9-tuple in GMT'''

2244

2245

http_headers is a dictionary

2246

xml_data is a raw string (not Unicode)

2247

2248

This is so much trickier than it sounds, it's not even funny.

2249

According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type

2250

is application/xml, application/*+xml,

2263

served with a Content-Type of text/* and no charset parameter

2264

must be treated as us-ascii. (We now do this.) And also that it

2265

must always be flagged as non-well-formed. (We now do this too.)

2266

2267

If Content-Type is unspecified (input was local file or non-HTTP source)

2268

or unrecognized (server just got it totally wrong), then go by the

2269

encoding given in the XML prefix of the document and default to

2270

'iso-8859-1' as per the HTTP specification (RFC 2616).

2271

2272

Then, assuming we didn't find a character encoding in the HTTP headers

2273

(and the HTTP Content-type allowed us to look in the body), we need

2274

to sniff the first few bytes of the XML data and try to determine

2374

else:

2375

true_encoding = xml_encoding or 'utf-8'

2376

return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type

2377

2378

def _toUTF8(data, encoding):

2379

'''Changes an XML data stream on the fly to specify a new encoding

2380

2445

version = None

2446

data = doctype_pattern.sub('', data)

2447

return version, data

2448

2449

def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[]):

2450

'''Parse a feed from a URL, file, stream, or string'''

2451

result = FeedParserDict()

2517

bozo_message = 'no Content-type specified'

2518

result['bozo'] = 1

2519

result['bozo_exception'] = NonXMLContentType(bozo_message)

2520

2521

result['version'], data = _stripDoctype(data)

2522

2523

baseuri = http_headers.get('content-location', result.get('href'))

Older »