~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to recipes/hindustan_times.recipe

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2014-05-14 18:17:50 UTC
  • mto: This revision was merged to the branch mainline in revision 75.
  • Revision ID: package-import@ubuntu.com-20140514181750-efj1wymey2vb4cao
Tags: upstream-1.36.0+dfsg
ImportĀ upstreamĀ versionĀ 1.36.0+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
from calibre.web.feeds.news import BasicNewsRecipe
2
 
import urllib, re
3
2
 
4
3
class HindustanTimes(BasicNewsRecipe):
5
4
    title          = u'Hindustan Times'
6
5
    language       = 'en_IN'
7
6
    __author__     = 'Krittika Goyal'
8
 
    oldest_article = 1 #days
 
7
    oldest_article = 1  # days
9
8
    max_articles_per_feed = 25
10
9
    use_embedded_content = False
11
10
 
13
12
    auto_cleanup = True
14
13
 
15
14
    feeds          = [
16
 
            ('News',
17
 
            'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
18
 
            ('Views',
19
 
            'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
20
 
            ('Cricket',
21
 
            'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
22
 
            ('Business',
23
 
            'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
24
 
            ('Entertainment',
25
 
            'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
26
 
            ('Lifestyle',
27
 
            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
 
15
        ('News',
 
16
         'http://feeds.hindustantimes.com/HT-HomePage-TopStories'),
 
17
        ('India',
 
18
         'http://feeds.hindustantimes.com/HT-India'),
 
19
        ('World',
 
20
         'http://feeds.hindustantimes.com/HT-World'),
 
21
        ('Business',
 
22
         'http://feeds.hindustantimes.com/HT-Business'),
 
23
        ('Fashion',
 
24
         'http://feeds.hindustantimes.com/HT-Fashion'),
 
25
        ('Sex & Relationships',
 
26
         'http://feeds.hindustantimes.com/HT-Sexandrelationships'),
 
27
        ('Travel',
 
28
         'http://feeds.hindustantimes.com/HT-Travel'),
 
29
        ('Books',
 
30
         'http://feeds.hindustantimes.com/HT-Books'),
28
31
]
29
32
 
30
33
    def get_article_url(self, article):
31
34
        '''
32
35
        HT uses a variant of the feedportal RSS ad display mechanism
33
36
        '''
34
 
        try:
35
 
            s = article.summary
36
 
            return urllib.unquote(
37
 
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
38
 
        except:
39
 
            pass
40
 
        url = BasicNewsRecipe.get_article_url(self, article)
41
 
        res = self.browser.open_novisit(url)
42
 
        url = res.geturl().split('/')[-2]
43
 
        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
44
 
                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
45
 
                'www.'}
46
 
        for k, v in encoding.iteritems():
47
 
            url = url.replace(k, v)
48
 
        return url
 
37
        url = article.get('feedburner_origlink', None)
 
38
        if url is not None:
 
39
            idx = url.find('0L0S')
 
40
            url = url[idx:]
 
41
            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
 
42
                    '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
 
43
                    'www.'}
 
44
            for k, v in encoding.iteritems():
 
45
                url = url.replace(k, v)
 
46
            if url.endswith('/story01.htm'):
 
47
                url = url.rpartition('/')[0]
 
48
            return url
 
49
        return article.get('link', None)
49
50
 
50
51