18
19
max_articles_per_feed = 100
19
20
no_stylesheets = True
20
21
use_embedded_content = False
21
remove_javascript = True
23
23
html2lrf_options = [
24
24
'--comment', description
25
25
, '--category', category
26
26
, '--publisher', publisher
29
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
29
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
32
32
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
34
dict(name='table', attrs={'class':'rcnt'})
35
,dict(name='table', attrs={'class':'rcnt topline'})
34
dict(name='table', attrs={'class':['rcnt','rcnt topline']})
36
35
,dict(name=['link','object','embed'])
39
38
feeds = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]
40
def preprocess_html(self, soup):
41
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
42
soup.head.insert(1,mcharset)
43
for item in soup.findAll(style=True):
45
for item in soup.findAll(xmlns=True):