~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to recipes/nrc_next.recipe

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2014-05-14 18:17:50 UTC
  • mto: This revision was merged to the branch mainline in revision 75.
  • Revision ID: package-import@ubuntu.com-20140514181750-efj1wymey2vb4cao
Tags: upstream-1.36.0+dfsg
Import upstream version 1.36.0+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
4
4
 
5
5
__license__   = 'GPL v3'
6
 
__copyright__ = '2013, Niels Giesen'
 
6
__copyright__ = '2014, Niels Giesen'
7
7
 
8
8
'''
9
9
www.nrc.nl
10
10
'''
11
 
import os, zipfile
12
 
import time
 
11
import os, zipfile, re
 
12
from io import BytesIO
 
13
 
13
14
from calibre.web.feeds.news import BasicNewsRecipe
14
 
from calibre.ptempfile import PersistentTemporaryFile
 
15
from datetime import date, timedelta
15
16
 
16
17
 
17
18
class NRCNext(BasicNewsRecipe):
19
20
    title = u'nrc•next'
20
21
    description = u'De ePaper-versie van nrc•next'
21
22
    language = 'nl'
22
 
    lang = 'nl-NL'
23
23
    needs_subscription = True
 
24
    requires_version = (1, 24, 0)
24
25
 
25
26
    __author__ = 'Niels Giesen'
26
27
 
28
29
        'no_default_epub_cover' : True
29
30
    }
30
31
 
31
 
    def get_browser(self):
32
 
        br = BasicNewsRecipe.get_browser(self)
33
 
        if self.username is not None and self.password is not None:
34
 
            br.open('http://login.nrc.nl/login')
35
 
            br.select_form(nr=0)
36
 
            br['username'] = self.username
37
 
            br['password'] = self.password
38
 
            br.submit()
39
 
        return br
40
 
 
41
32
    def build_index(self):
42
 
 
43
 
        today = time.strftime("%Y%m%d")
44
 
 
45
 
        domain = "http://digitaleeditie.nrc.nl"
46
 
 
47
 
        url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
48
 
        #print url
49
 
 
50
 
        try:
51
 
            br = self.get_browser()
52
 
            f = br.open(url)
53
 
        except:
54
 
            self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
 
33
        from calibre.web.jsbrowser.browser import Browser, ElementNotFound
 
34
        br = Browser()
 
35
        br.visit('http://login.nrc.nl/login', timeout=60)
 
36
        f = br.select_form('#command')
 
37
        f['username'] = self.username
 
38
        f['password'] = self.password
 
39
        br.submit()
 
40
        raw = br.html
 
41
        if '>log out<' not in raw:
 
42
            raise ValueError('Failed to login, check username and password')
 
43
        epubraw = None
 
44
        for today in (date.today(), date.today() - timedelta(days=1),):
 
45
            url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
 
46
            self.log('Trying to download epub from:', url)
 
47
            br.start_load(url, timeout=60)
 
48
            try:
 
49
                epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
 
50
                break
 
51
            except ElementNotFound:
 
52
                self.log('%r not available yet' % url)
 
53
                continue
 
54
 
 
55
        if epubraw is None:
55
56
            raise ValueError('Krant van vandaag nog niet beschikbaar')
56
57
 
57
 
        tmp = PersistentTemporaryFile(suffix='.epub')
58
 
        self.report_progress(0,_('downloading epub'))
59
 
        tmp.write(f.read())
60
 
        f.close()
61
 
        br.close()
62
 
        if zipfile.is_zipfile(tmp):
63
 
            try:
64
 
                zfile = zipfile.ZipFile(tmp.name, 'r')
65
 
                zfile.extractall(self.output_dir)
66
 
                self.report_progress(0,_('extracting epub'))
67
 
            except zipfile.BadZipfile:
68
 
                self.report_progress(0,_('BadZip error, continuing'))
69
 
 
70
 
        tmp.close()
 
58
        zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
 
59
        zfile.extractall(self.output_dir)
 
60
        namelist = zfile.namelist()
 
61
        emre = re.compile("&lt;em(?:.*)&gt;(.*)&lt;/em&gt;")
 
62
        subst = '\\1'
 
63
        for name in namelist:
 
64
            _, ext = os.path.splitext(name);
 
65
            if (ext == '.html') or (ext == '.ncx'):
 
66
                fname = os.path.join(self.output_dir, name)
 
67
                with open(fname) as f:
 
68
                    s = f.read()
 
69
                    s = emre.sub(subst, s)
 
70
                with open(fname, 'w') as f:
 
71
                    f.write(s)
71
72
        index = os.path.join(self.output_dir, 'metadata.opf')
72
 
 
73
 
        self.report_progress(1,_('epub downloaded and extracted'))
74
 
 
75
73
        return index