~ubuntu-branches/ubuntu/oneiric/calibre/oneiric

« back to all changes in this revision

Viewing changes to resources/recipes/honoluluadvertiser.recipe

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2010-06-21 10:18:08 UTC
  • mfrom: (1.3.12 upstream)
  • Revision ID: james.westby@ubuntu.com-20100621101808-aue828f532tmo4zt
Tags: 0.7.2+dfsg-1
* New major upstream version. See http://calibre-ebook.com/new-in/seven for
  details.
* Refresh patches to apply cleanly.
* debian/control: Bump python-cssutils to >= 0.9.7~ to ensure the existence
  of the CSSRuleList.rulesOfType attribute. This makes epub conversion work
  again. (Closes: #584756)
* Add debian/local/calibre-mount-helper: Simple and safe replacement for
  upstream's calibre-mount-helper, using udisks --mount and eject.
  (Closes: #584915, LP: #561958)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
#!/usr/bin/env  python
2
 
# -*- coding: cp1252 -*-
3
 
 
4
 
__license__   = 'GPL v3'
5
 
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
6
 
'''
7
 
honoluluadvertiser.com
8
 
'''
9
 
 
10
 
from calibre.web.feeds.news import BasicNewsRecipe
11
 
 
12
 
class Honoluluadvertiser(BasicNewsRecipe):
13
 
    title                 = 'Honolulu Advertiser'
14
 
    __author__            = 'Darko Miletic and Sujata Raman'
15
 
    description           = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
16
 
    publisher             = 'Honolulu Advertiser'
17
 
    category              = 'news, Honolulu, Hawaii'
18
 
    oldest_article        = 2
19
 
    language = 'en'
20
 
 
21
 
    max_articles_per_feed = 100
22
 
    no_stylesheets        = True
23
 
    use_embedded_content  = False
24
 
    encoding              = 'cp1252'
25
 
    remove_javascript     = True
26
 
    cover_url             = 'http://www.honoluluadvertiser.com/graphics/frontpage/frontpage.jpg'
27
 
 
28
 
    html2lrf_options = [
29
 
                          '--comment'       , description
30
 
                        , '--category'      , category
31
 
                        , '--publisher'     , publisher
32
 
                        ]
33
 
 
34
 
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
35
 
 
36
 
    keep_only_tags = [dict(name='div', attrs={'class':["hon_article_top","article-bodytext","hon_article_photo","storyphoto","article"]}),
37
 
                      dict(name='div', attrs={'id':["storycontentleft","article"]})
38
 
                      ]
39
 
 
40
 
    remove_tags = [dict(name=['object','link','embed']),
41
 
                   dict(name='div', attrs={'class':["article-tools","titleBar","invisiblespacer","articleflex-container","hon_newslist","categoryheader","columnframe","subHeadline","poster-container"]}),
42
 
                   dict(name='div', attrs={'align':["right"]}),
43
 
                   dict(name='div', attrs={'id':["pluckcomments"]}),
44
 
                   dict(name='td', attrs={'class':["prepsfacts"]}),
45
 
                   dict(name='img', attrs={'height':["1"]}),
46
 
                   dict(name='img', attrs={'alt':["Advertisement"]}),
47
 
                   dict(name='img', attrs={'src':["/gcicommonfiles/sr/graphics/common/adlabel_horz.gif","/gcicommonfiles/sr/graphics/common/icon_whatsthis.gif",]}),
48
 
                   ]
49
 
 
50
 
    extra_css = '''
51
 
                    h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; }
52
 
                    .hon_article_timestamp{font-family:Arial,Helvetica,sans-serif; font-size:70%; }
53
 
                    .postedStoryDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
54
 
                    .postedDate{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
55
 
                    .credit{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
56
 
                    .hon_article_top{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%; font-weight:bold;}
57
 
                    .grayBackground{font-family:Arial,Helvetica,sans-serif; color:#666666; font-size:30%;}
58
 
                    .hon_photocaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
59
 
                    .photoCaption{font-family:Arial,Helvetica,sans-serif; font-size:30%; }
60
 
                    .hon_photocredit{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
61
 
                    .storyphoto{font-family:Arial,Helvetica,sans-serif; font-size:30%; color:#666666;}
62
 
                    .article-bodytext{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
63
 
                    .storycontentleft{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
64
 
                    #article{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
65
 
                    .contentarea{font-family:Arial,Helvetica,sans-serif; font-size:xx-small; }
66
 
                    .storytext{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:xx-small;}
67
 
                    .storyHeadline{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000000; font-weight:bold;}
68
 
                    .source{font-family:Arial,Helvetica,sans-serif; color:#333333; font-style: italic; font-weight:bold; }
69
 
                '''
70
 
 
71
 
    feeds = [
72
 
              (u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
73
 
             ,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
74
 
             ,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
75
 
             ,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
76
 
             ,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
77
 
             ,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
78
 
            ]
79
 
 
80
 
    def preprocess_html(self, soup):
81
 
        for item in soup.findAll(style=True):
82
 
            del item['style']
83
 
        mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
84
 
        soup.head.insert(0,mtag)
85
 
 
86
 
        for tag in soup.findAll(name=['span','table','font']):
87
 
               tag.name = 'div'
88
 
 
89
 
        return soup
90
 
 
91
 
 
92
 
   # def print_version(self, url):
93
 
   #     ubody, sep, rest = url.rpartition('/-1/')
94
 
   #     root, sep2, article_id = ubody.partition('/article/')
95
 
   #     return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'
96