~ubuntu-branches/ubuntu/karmic/calibre/karmic

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/lrf/web/profiles/ap.py

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-07-30 12:49:41 UTC
  • mfrom: (1.3.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20090730124941-qjdsmri25zt8zocn
Tags: 0.6.3+dfsg-0ubuntu1
* New upstream release. Please see http://calibre.kovidgoyal.net/new_in_6/
  for the list of new features and changes.
* remove_postinstall.patch: Update for new version.
* build_debug.patch: Does not apply any more, disable for now. Might not be
  necessary any more.
* debian/copyright: Fix reference to versionless GPL.
* debian/rules: Drop obsolete dh_desktop call.
* debian/rules: Add workaround for weird Python 2.6 setuptools behaviour of
  putting compiled .so files into src/calibre/plugins/calibre/plugins
  instead of src/calibre/plugins.
* debian/rules: Drop hal fdi moving, new upstream version does not use hal
  any more. Drop hal dependency, too.
* debian/rules: Install udev rules into /lib/udev/rules.d.
* Add debian/calibre.preinst: Remove unmodified
  /etc/udev/rules.d/95-calibre.rules on upgrade.
* debian/control: Bump Python dependencies to 2.6, since upstream needs
  it now.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
import re
2
 
from calibre.ebooks.lrf.web.profiles import DefaultProfile
3
 
 
4
 
 
5
 
class AssociatedPress(DefaultProfile):
6
 
 
7
 
    title = 'Associated Press'
8
 
    max_recursions = 2
9
 
    max_articles_per_feed = 15
10
 
    html2lrf_options = ['--force-page-break-before-tag="chapter"']
11
 
 
12
 
    
13
 
    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
14
 
[
15
 
        (r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
16
 
        (r'<body class="apple-rss-no-unread-mode" onLoad="setup(null)">.*?<!-- start Entries -->', lambda match : '<body>'),
17
 
        (r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
18
 
        (r'<script.*?>.*?</script>', lambda match : ''),
19
 
        (r'<body.*?>.*?<span class="headline">', lambda match : '<body><span class="headline"><chapter>'),
20
 
        (r'<tr><td><div class="body">.*?<p class="ap-story-p">', lambda match : '<p class="ap-story-p">'),
21
 
        (r'<p class="ap-story-p">', lambda match : '<p>'),
22
 
        (r'Learn more about our <a href="http://apdigitalnews.com/privacy.html">Privacy Policy</a>.*?</body>', lambda match : '</body>'),
23
 
    ]
24
 
    ]   
25
 
     
26
 
 
27
 
  
28
 
    def get_feeds(self):
29
 
        return [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'),
30
 
                  ('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'),
31
 
                   ('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'),
32
 
                   ('AP Political News', 'http://hosted.ap.org/lineups/POLITICSHEADS-rss_2.0.xml?SITE=ORMED&SECTION=HOME'),
33
 
                   ('AP Washington State News', 'http://hosted.ap.org/lineups/WASHINGTONHEADS-rss_2.0.xml?SITE=NYPLA&SECTION=HOME'),
34
 
                   ('AP Technology News', 'http://hosted.ap.org/lineups/TECHHEADS-rss_2.0.xml?SITE=CTNHR&SECTION=HOME'),
35
 
                   ('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'),
36
 
                   ('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'),
37
 
                   ('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'),
38
 
        ]
 
 
b'\\ No newline at end of file'