3
3
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
5
5
__license__ = 'GPL v3'
6
__copyright__ = '2013, Niels Giesen'
6
__copyright__ = '2014, Niels Giesen'
11
import os, zipfile, re
12
from io import BytesIO
13
14
from calibre.web.feeds.news import BasicNewsRecipe
14
from calibre.ptempfile import PersistentTemporaryFile
15
from datetime import date, timedelta
17
18
class NRCNext(BasicNewsRecipe):
28
29
'no_default_epub_cover' : True
31
def get_browser(self):
32
br = BasicNewsRecipe.get_browser(self)
33
if self.username is not None and self.password is not None:
34
br.open('http://login.nrc.nl/login')
36
br['username'] = self.username
37
br['password'] = self.password
41
32
def build_index(self):
43
today = time.strftime("%Y%m%d")
45
domain = "http://digitaleeditie.nrc.nl"
47
url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
51
br = self.get_browser()
54
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
33
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
35
br.visit('http://login.nrc.nl/login', timeout=60)
36
f = br.select_form('#command')
37
f['username'] = self.username
38
f['password'] = self.password
41
if '>log out<' not in raw:
42
raise ValueError('Failed to login, check username and password')
44
for today in (date.today(), date.today() - timedelta(days=1),):
45
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
46
self.log('Trying to download epub from:', url)
47
br.start_load(url, timeout=60)
49
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
51
except ElementNotFound:
52
self.log('%r not available yet' % url)
55
56
raise ValueError('Krant van vandaag nog niet beschikbaar')
57
tmp = PersistentTemporaryFile(suffix='.epub')
58
self.report_progress(0,_('downloading epub'))
62
if zipfile.is_zipfile(tmp):
64
zfile = zipfile.ZipFile(tmp.name, 'r')
65
zfile.extractall(self.output_dir)
66
self.report_progress(0,_('extracting epub'))
67
except zipfile.BadZipfile:
68
self.report_progress(0,_('BadZip error, continuing'))
58
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
59
zfile.extractall(self.output_dir)
60
namelist = zfile.namelist()
61
emre = re.compile("<em(?:.*)>(.*)</em>")
64
_, ext = os.path.splitext(name);
65
if (ext == '.html') or (ext == '.ncx'):
66
fname = os.path.join(self.output_dir, name)
67
with open(fname) as f:
69
s = emre.sub(subst, s)
70
with open(fname, 'w') as f:
71
72
index = os.path.join(self.output_dir, 'metadata.opf')
73
self.report_progress(1,_('epub downloaded and extracted'))