4
Copyright (c) 2009, David Rodrigues - http://sixhat.net
10
from calibre.web.feeds.news import BasicNewsRecipe
13
class Publico(BasicNewsRecipe):
15
__author__ = 'David Rodrigues'
17
max_articles_per_feed = 30
20
language = _('Portuguese')
21
preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),]
24
(u'Geral', u'http://feeds.feedburner.com/PublicoUltimaHora'),
25
(u'Internacional', u'http://www.publico.clix.pt/rss.ashx?idCanal=11'),
26
(u'Pol\xedtica', u'http://www.publico.clix.pt/rss.ashx?idCanal=12'),
27
(u'Ci\xcencias', u'http://www.publico.clix.pt/rss.ashx?idCanal=13'),
28
(u'Desporto', u'http://desporto.publico.pt/rss.ashx'),
29
(u'Economia', u'http://www.publico.clix.pt/rss.ashx?idCanal=57'),
30
(u'Educa\xe7\xe3o', u'http://www.publico.clix.pt/rss.ashx?idCanal=58'),
31
(u'Local', u'http://www.publico.clix.pt/rss.ashx?idCanal=59'),
32
(u'Media e Tecnologia', u'http://www.publico.clix.pt/rss.ashx?idCanal=61'),
33
(u'Sociedade', u'http://www.publico.clix.pt/rss.ashx?idCanal=62')
35
remove_tags = [dict(name='script'), dict(id='linhaTitulosHeader')]
36
keep_only_tags = [dict(name='div')]
38
def print_version(self,url):
39
s=re.findall("id=[0-9]+",url);
40
return "http://ww2.publico.clix.pt/print.aspx?"+s[0]