~ubuntu-branches/ubuntu/karmic/calibre/karmic-updates

« back to all changes in this revision

Viewing changes to src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-04-05 18:42:16 UTC
  • mfrom: (1.1.7 sid)
  • Revision ID: james.westby@ubuntu.com-20090405184216-cyb0x4edrwjcaw33
Tags: 0.5.9+dfsg-1
* New upstream release. (Closes: #525339)
* manpages-installation.patch: Encode generated manpages as UTF-8, to avoid
  UnicodeDecodeErrors when writing them out to files.
* debian/control: Demote calibre dependency of calibre-bin to Recommends:,
  which is sufficient and avoids a circular dependency. (Closes: #522059)
* debian/control: Drop build dependency help2man, current version does not
  need it any more.
* debian/control: Drop versioned build dependency on python-mechanize,
  current sid version is enough.
* debian/rules: Copy "setup.py install" command from cdbs'
  python-distutils.mk, since the current version broke this. This is a
  hackish workaround until #525436 gets fixed.
* debian/rules: Drop using $(wildcard ), use `ls`; the former does not work
  any more.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env  python
 
2
 
 
3
__license__   = 'GPL v3'
 
4
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 
5
 
 
6
'''
 
7
dnevnik.hr
 
8
'''
 
9
 
 
10
import re
 
11
from calibre.web.feeds.recipes import BasicNewsRecipe
 
12
 
 
13
class DnevnikCro(BasicNewsRecipe):
 
14
    title                 = 'Dnevnik - Hr'
 
15
    __author__            = 'Darko Miletic'
 
16
    description           = "Vijesti iz Hrvatske"
 
17
    publisher             = 'Dnevnik.hr'
 
18
    category              = 'news, politics, Croatia'    
 
19
    oldest_article        = 2
 
20
    max_articles_per_feed = 100
 
21
    delay                 = 4
 
22
    no_stylesheets        = True
 
23
    encoding              = 'utf-8'
 
24
    use_embedded_content  = False
 
25
    remove_javascript     = True    
 
26
    language              = _('Croatian')
 
27
 
 
28
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
 
29
    
 
30
    html2lrf_options = [
 
31
                          '--comment', description
 
32
                        , '--category', category
 
33
                        , '--publisher', publisher
 
34
                        , '--ignore-tables'
 
35
                        ]
 
36
    
 
37
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
 
38
     
 
39
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
 
40
 
 
41
    keep_only_tags     = [dict(name='div', attrs={'id':'article'})]
 
42
        
 
43
    remove_tags = [
 
44
                    dict(name=['object','link','embed'])
 
45
                   ,dict(name='div', attrs={'class':'menu'})
 
46
                   ,dict(name='div', attrs={'id':'video'})
 
47
                  ]
 
48
 
 
49
    remove_tags_after  = dict(name='div', attrs={'id':'content'})
 
50
 
 
51
    feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
 
52
 
 
53
    def preprocess_html(self, soup):
 
54
        soup.html['lang']     = 'hr-HR'
 
55
        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
 
56
        soup.head.insert(0,mtag)
 
57
        for item in soup.findAll(style=True):
 
58
            del item['style']
 
59
        return soup
 
60