~ubuntu-branches/ubuntu/karmic/calibre/karmic

« back to all changes in this revision

Viewing changes to src/calibre/web/feeds/recipes/recipe_wsj.py

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-07-30 12:49:41 UTC
  • mfrom: (1.3.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20090730124941-qjdsmri25zt8zocn
Tags: 0.6.3+dfsg-0ubuntu1
* New upstream release. Please see http://calibre.kovidgoyal.net/new_in_6/
  for the list of new features and changes.
* remove_postinstall.patch: Update for new version.
* build_debug.patch: Does not apply any more, disable for now. Might not be
  necessary any more.
* debian/copyright: Fix reference to versionless GPL.
* debian/rules: Drop obsolete dh_desktop call.
* debian/rules: Add workaround for weird Python 2.6 setuptools behaviour of
  putting compiled .so files into src/calibre/plugins/calibre/plugins
  instead of src/calibre/plugins.
* debian/rules: Drop hal fdi moving, new upstream version does not use hal
  any more. Drop hal dependency, too.
* debian/rules: Install udev rules into /lib/udev/rules.d.
* Add debian/calibre.preinst: Remove unmodified
  /etc/udev/rules.d/95-calibre.rules on upgrade.
* debian/control: Bump Python dependencies to 2.6, since upstream needs
  it now.

Show diffs side-by-side

added added

removed removed

Lines of Context:
7
7
 
8
8
# http://online.wsj.com/page/us_in_todays_paper.html
9
9
 
10
 
class WallStreetJournal(BasicNewsRecipe): 
11
 
    
12
 
        title = 'The Wall Street Journal' 
13
 
        __author__ = 'Kovid Goyal'
 
10
class WallStreetJournal(BasicNewsRecipe):
 
11
 
 
12
        title = 'The Wall Street Journal'
 
13
        __author__ = 'Kovid Goyal and Sujata Raman'
14
14
        description = 'News and current affairs.'
15
15
        needs_subscription = True
16
16
        language = _('English')
17
17
        max_articles_per_feed = 10
18
 
        timefmt  = ' [%a, %b %d, %Y]' 
 
18
        timefmt  = ' [%a, %b %d, %Y]'
19
19
        no_stylesheets = True
 
20
 
 
21
        extra_css      = '''h1{color:#093D72 ; font-size:large ; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; }
 
22
                        h2{color:#474537; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
 
23
                        .subhead{color:gray; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
 
24
                        .insettipUnit {color:#666666; font-family:Arial,Sans-serif;font-size:xx-small }
 
25
                        .targetCaption{ font-size:x-small; color:#333333; font-family:Arial,Helvetica,sans-serif}
 
26
                        .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
 
27
                        .tagline {color:#333333; font-size:xx-small}
 
28
                        .dateStamp {color:#666666; font-family:Arial,Helvetica,sans-serif}
 
29
                         h3{color:blue ;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
 
30
                         .byline{color:blue;font-family:Arial,Helvetica,sans-serif; font-size:xx-small}
 
31
                         h6{color:#333333; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic; }
 
32
                        .paperLocation{color:#666666; font-size:xx-small}'''
 
33
 
20
34
        remove_tags_before = dict(name='h1')
21
35
        remove_tags = [
22
 
                       dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive"]),
23
 
                       {'class':['more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
 
36
                       dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive","articleTabs_tab_video","articleTabs_tab_map","articleTabs_tab_slideshow"]),
 
37
                       {'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map','insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
24
38
                       dict(rel='shortcut icon'),
25
39
                      ]
26
40
        remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
27
41
 
28
 
        
29
 
        def get_browser(self): 
30
 
            br = BasicNewsRecipe.get_browser() 
31
 
            if self.username is not None and self.password is not None: 
32
 
                br.open('http://commerce.wsj.com/auth/login') 
33
 
                br.select_form(nr=0) 
34
 
                br['user']   = self.username 
35
 
                br['password'] = self.password 
36
 
                br.submit() 
 
42
 
 
43
        def get_browser(self):
 
44
            br = BasicNewsRecipe.get_browser()
 
45
            if self.username is not None and self.password is not None:
 
46
                br.open('http://commerce.wsj.com/auth/login')
 
47
                br.select_form(nr=0)
 
48
                br['user']   = self.username
 
49
                br['password'] = self.password
 
50
                br.submit()
37
51
            return br
38
 
        
 
52
 
39
53
        def postprocess_html(self, soup, first):
40
54
            for tag in soup.findAll(name=['table', 'tr', 'td']):
41
55
                tag.name = 'div'
 
56
 
 
57
            for tag in soup.findAll('div', dict(id=["articleThumbnail_1", "articleThumbnail_2", "articleThumbnail_3", "articleThumbnail_4", "articleThumbnail_5", "articleThumbnail_6", "articleThumbnail_7"])):
 
58
                tag.extract()
 
59
 
42
60
            return soup
43
 
        
 
61
 
44
62
        def get_article_url(self, article):
45
63
            try:
46
64
                return article.feedburner_origlink.split('?')[0]
47
65
            except AttributeError:
48
66
                return article.link.split('?')[0]
49
 
 
50
 
        def cleanup(self): 
51
 
            self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com') 
52
 
 
53
 
        feeds =  [ 
54
 
                #('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'), 
55
 
                #('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'), 
56
 
                #('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'), 
57
 
                (' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'), 
58
 
                (' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'), 
59
 
                # ('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'), 
60
 
                ('Today\'s Newspaper -  Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'), 
61
 
                ('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'), 
62
 
                ('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'), 
63
 
                ('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'), 
64
 
                ('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'), 
65
 
                ('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'), 
66
 
                ('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'), 
67
 
                ('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'), 
68
 
                ('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'), 
69
 
                ('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'), 
70
 
                ('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'), 
71
 
                ('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'), 
72
 
                ('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'), 
73
 
                ('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'), 
74
 
                ('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'), 
75
 
                ('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'), 
76
 
                ('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'), 
77
 
                ('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'), 
78
 
                ('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'), 
79
 
                ('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'), 
80
 
                ('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'), 
81
 
                ('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'), 
82
 
                ('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'), 
83
 
                ('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'), 
84
 
                ('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'), 
85
 
                ('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'), 
86
 
                ('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'), 
87
 
                ('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'), 
88
 
                ('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'), 
89
 
                ('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'), 
90
 
                ('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'), 
91
 
                ('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'), 
92
 
                ('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'), 
93
 
                ('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'), 
94
 
                ('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'), 
95
 
                ('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'), 
96
 
                ('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'), 
97
 
                ('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'), 
98
 
                ('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'), 
 
67
 
 
68
        def cleanup(self):
 
69
            self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
 
70
 
 
71
        feeds =  [
 
72
                #('Most Emailed - Day', 'http://online.wsj.com/xml/rss/3_7030.xml'),
 
73
                #('Most Emailed - Week', 'http://online.wsj.com/xml/rss/3_7253.xml'),
 
74
                #('Most Emailed - Month', 'http://online.wsj.com/xml/rss/3_7254.xml'),
 
75
                (' Most Viewed - Day', 'http://online.wsj.com/xml/rss/3_7198.xml'),
 
76
                (' Most Viewed - Week', 'http://online.wsj.com/xml/rss/3_7251.xml'),
 
77
                #('Most Viewed - Month', 'http://online.wsj.com/xml/rss/3_7252.xml'),
 
78
                ('Today\'s Newspaper -  Page One', 'http://online.wsj.com/xml/rss/3_7205.xml'),
 
79
                ('Today\'s Newspaper - Marketplace', 'http://online.wsj.com/xml/rss/3_7206.xml'),
 
80
                ('Today\'s Newspaper - Money & Investing', 'http://online.wsj.com/xml/rss/3_7207.xml'),
 
81
                ('Today\'s Newspaper - Personal Journal', 'http://online.wsj.com/xml/rss/3_7208.xml'),
 
82
                ('Today\'s Newspaper - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7209.xml'),
 
83
                ('Opinion', 'http://online.wsj.com/xml/rss/3_7041.xml'),
 
84
                ('News - U.S.: What\'s News', 'http://online.wsj.com/xml/rss/3_7011.xml'),
 
85
                ('News - U.S. Business', 'http://online.wsj.com/xml/rss/3_7014.xml'),
 
86
                ('News - Europe: What\'s News', 'http://online.wsj.com/xml/rss/3_7012.xml'),
 
87
                ('News - Asia: What\'s News', 'http://online.wsj.com/xml/rss/3_7013.xml'),
 
88
                ('News - World News', 'http://online.wsj.com/xml/rss/3_7085.xml'),
 
89
                ('News - Economy', 'http://online.wsj.com/xml/rss/3_7086.xml'),
 
90
                ('News - Earnings', 'http://online.wsj.com/xml/rss/3_7088.xml'),
 
91
                ('News - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
 
92
                ('News - Law', 'http://online.wsj.com/xml/rss/3_7091.xml'),
 
93
                ('News - Media & Marketing', 'http://online.wsj.com/xml/rss/3_7020.xml'),
 
94
                ('Technology - What\'s News', 'http://online.wsj.com/xml/rss/3_7015.xml'),
 
95
                ('Technology - Gadgets', 'http://online.wsj.com/xml/rss/3_7094.xml'),
 
96
                ('Technology - Telecommunications', 'http://online.wsj.com/xml/rss/3_7095.xml'),
 
97
                ('Technology - E-commerce/Media', 'http://online.wsj.com/xml/rss/3_7096.xml'),
 
98
                ('Technology - Asia', 'http://online.wsj.com/xml/rss/3_7097.xml'),
 
99
                ('Technology - Europe', 'http://online.wsj.com/xml/rss/3_7098.xml'),
 
100
                ('Markets - News', 'http://online.wsj.com/xml/rss/3_7031.xml'),
 
101
                ('Markets - Europe News', 'http://online.wsj.com/xml/rss/3_7101.xml'),
 
102
                ('Markets - Asia News', 'http://online.wsj.com/xml/rss/3_7102.xml'),
 
103
                ('Markets - Deals & Deal Makers', 'http://online.wsj.com/xml/rss/3_7099.xml'),
 
104
                ('Markets - Hedge Funds', 'http://online.wsj.com/xml/rss/3_7199.xml'),
 
105
                ('Personal Journal', 'http://online.wsj.com/xml/rss/3_7200.xml'),
 
106
                ('Personal Journal - Money', 'http://online.wsj.com/xml/rss/3_7104.xml'),
 
107
                ('Personal Journal - Health', 'http://online.wsj.com/xml/rss/3_7089.xml'),
 
108
                ('Personal Journal - Autos', 'http://online.wsj.com/xml/rss/3_7092.xml'),
 
109
                ('Personal Journal - Homes', 'http://online.wsj.com/xml/rss/3_7105.xml'),
 
110
                ('Personal Journal - Travel', 'http://online.wsj.com/xml/rss/3_7106.xml'),
 
111
                ('Personal Journal - Careers', 'http://online.wsj.com/xml/rss/3_7107.xml'),
 
112
                ('Weekend & Leisure', 'http://online.wsj.com/xml/rss/3_7201.xml'),
 
113
                ('Weekend & Leisure - Weekend Journal', 'http://online.wsj.com/xml/rss/3_7202.xml'),
 
114
                ('Weekend & Leisure - Arts & Entertainment', 'http://online.wsj.com/xml/rss/3_7177.xml'),
 
115
                ('Weekend & Leisure - Books', 'http://online.wsj.com/xml/rss/3_7203.xml'),
 
116
                ('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'),
99
117
                ]
100
118