3
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
6
from calibre.ebooks.lrf.web.profiles import DefaultProfile
7
from calibre.ebooks.BeautifulSoup import BeautifulSoup
8
from calibre import iswindows
9
from calibre.ebooks.chardet import xml_to_unicode
11
class AutomaticRSSProfile(DefaultProfile):
13
Make downloading of RSS feeds completely automatic. Only input
14
required is the URL of the feed.
19
def __init__(self, *args, **kwargs):
21
DefaultProfile.__init__(*args, **kwargs)
23
def fetch_content(self, index):
24
raw = open(index, 'rb').read()
26
raw = raw.decode(self.encoding)
29
raw, enc = xml_to_unicode(raw)
30
isoup = BeautifulSoup(raw)
31
for a in isoup.findAll('a', href=True):
33
if src.startswith('file:'):
35
if os.access(src, os.R_OK):
36
self.fetch_content(src)
39
src = self.browser.open(src).read()
42
soup = BeautifulSoup(src)
43
header, content = [], []
44
head = soup.find('head')
46
for style in head('style'):
47
header.append(unicode(style))
48
body = soup.find('body')
51
for tag in body(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
61
content.append(unicode(tag))
63
cfile = 'content%d.html'%self.cindex
65
cfile = os.path.join(os.path.dirname(index), cfile)
66
html = '<html>\n<head>%s</head>\n<body>%s</body></html>'%('\n'.join(header), '\n'.join(content))
68
open(cfile, 'wb').write(html.encode(enc))
69
a['href'] = ('file:' if iswindows else '') + cfile
70
open(index, 'wb').write(unicode(isoup).encode(enc))
72
def build_index(self):
73
index = DefaultProfile.build_index(self)
74
self.fetch_content(index)
b'\\ No newline at end of file'