1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3'
__copyright__ = '2014, Niels Giesen'
'''
www.nrc.nl
'''
import os, zipfile, re
from io import BytesIO
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import date, timedelta
class NRCNext(BasicNewsRecipe):
title = u'nrc•next'
description = u'De ePaper-versie van nrc•next'
language = 'nl'
needs_subscription = True
requires_version = (1, 24, 0)
__author__ = 'Niels Giesen'
conversion_options = {
'no_default_epub_cover' : True
}
def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
br = Browser()
br.visit('http://login.nrc.nl/login', timeout=60)
f = br.select_form('#command')
f['username'] = self.username
f['password'] = self.password
br.submit()
raw = br.html
if '>log out<' not in raw:
raise ValueError('Failed to login, check username and password')
epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try:
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
break
except ElementNotFound:
self.log('%r not available yet' % url)
continue
if epubraw is None:
raise ValueError('Krant van vandaag nog niet beschikbaar')
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
zfile.extractall(self.output_dir)
namelist = zfile.namelist()
emre = re.compile("<em(?:.*)>(.*)</em>")
subst = '\\1'
for name in namelist:
_, ext = os.path.splitext(name);
if (ext == '.html') or (ext == '.ncx'):
fname = os.path.join(self.output_dir, name)
with open(fname) as f:
s = f.read()
s = emre.sub(subst, s)
with open(fname, 'w') as f:
f.write(s)
index = os.path.join(self.output_dir, 'metadata.opf')
return index
|