3
from calibre.ebooks.lrf.web.profiles import DefaultProfile
4
from calibre.ebooks.BeautifulSoup import BeautifulSoup
6
class ChristianScienceMonitor(DefaultProfile):
8
title = 'Christian Science Monitor'
10
max_articles_per_feed = 20
16
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
18
(r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
19
(r'<div class="pubdate">.*?</div>', lambda m: ''),
20
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
21
lambda match : '</body>'),
25
def parse_feeds(self):
26
soup = BeautifulSoup(self.browser.open('http://www.csmonitor.com/textedition'))
29
for tag in soup.findAll(['h2', 'p']):
31
title = self.tag_to_string(tag)
33
articles[title] = feed
34
elif tag.has_key('class') and tag['class'] == 'story':
36
if a is not None and a.has_key('href'):
38
'title': self.tag_to_string(a),
39
'url' : 'http://www.csmonitor.com'+a['href'],
40
'date' : time.strftime('%d %b'),
44
feed[-1]['description'] = self.tag_to_string(tag).strip()