1
1
from calibre.web.feeds.news import BasicNewsRecipe
4
3
class HindustanTimes(BasicNewsRecipe):
5
4
title = u'Hindustan Times'
7
6
__author__ = 'Krittika Goyal'
8
oldest_article = 1 #days
7
oldest_article = 1 # days
9
8
max_articles_per_feed = 25
10
9
use_embedded_content = False
13
12
auto_cleanup = True
17
'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
19
'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
21
'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
23
'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
25
'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
27
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
16
'http://feeds.hindustantimes.com/HT-HomePage-TopStories'),
18
'http://feeds.hindustantimes.com/HT-India'),
20
'http://feeds.hindustantimes.com/HT-World'),
22
'http://feeds.hindustantimes.com/HT-Business'),
24
'http://feeds.hindustantimes.com/HT-Fashion'),
25
('Sex & Relationships',
26
'http://feeds.hindustantimes.com/HT-Sexandrelationships'),
28
'http://feeds.hindustantimes.com/HT-Travel'),
30
'http://feeds.hindustantimes.com/HT-Books'),
30
33
def get_article_url(self, article):
32
35
HT uses a variant of the feedportal RSS ad display mechanism
36
return urllib.unquote(
37
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
40
url = BasicNewsRecipe.get_article_url(self, article)
41
res = self.browser.open_novisit(url)
42
url = res.geturl().split('/')[-2]
43
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
44
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
46
for k, v in encoding.iteritems():
47
url = url.replace(k, v)
37
url = article.get('feedburner_origlink', None)
39
idx = url.find('0L0S')
41
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
42
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
44
for k, v in encoding.iteritems():
45
url = url.replace(k, v)
46
if url.endswith('/story01.htm'):
47
url = url.rpartition('/')[0]
49
return article.get('link', None)