~kovid/calibre/trunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
'''
readitlaterlist.com
'''
__license__   = 'GPL v3'
__copyright__ = '''
2011, Keith Callenberg <keithcallenberg@gmail.com>
2012, Alayn Gortazar <zutoin at gmail dot com>
'''

from contextlib import closing
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
import json
import urllib
import urllib2

class Readitlaterv2(BasicNewsRecipe):
    title                 = 'Read It Later v2'
    __author__            = 'Keith Callenberg'
    description           = '''Personalized news feeds. Go to readitlaterlist.com to
                               setup up your news. Fill in your account
                               username, and optionally you can add your password.'''
    publisher             = 'readitlaterlist.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    KEY                   = '8e0p5f19A74emL3a47goP87m69d4VF8b'
    INDEX                 = 'https://readitlaterlist.com/'
    LOGIN                 = INDEX + u'/l'

    articles           = []
    
    feeds = [(u'Unread articles' , INDEX)]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
                br['password'] = self.password
            br.submit()
        return br



    def parse_index(self):
        index = self.INDEX + 'v2/get?'
        index += 'apikey=' + self.KEY
        index += '&username=' + self.username + '&password=' + self.password 
        index += '&state=unread'
        index += '&count=' + str(self.max_articles_per_feed) 

        open_func = getattr(self.browser, 'open_novisit', self.browser.open)
        with closing(open_func(index)) as f:
            results = f.read()
        if not results:
            raise RuntimeError('Could not fetch index!')

        json_obj = json.loads(results)

        if len(json_obj['list']) > 0:
            for item in json_obj['list'].iteritems():
                dataurl = "https://readitlaterlist.com/a/x/getArticle.php?itemId=" + item[1]['item_id']
                self.articles.append({
                                 'title':item[1]['title'],
                                 'date':item[1]['time_added'],
                                 'url':dataurl,
                                 'description':item[1]['item_id'],
                                 'real_url':item[1]['url']
                            })
        return [('Unread', self.articles)]

    def preprocess_raw_html(self, raw_html, url):
        # get article and image urls from json object
        json_obj = json.loads(raw_html)
        self.images = {}
        for image in json_obj['article']['images']:
            self.images[image] = json_obj['article']['images'][image]['src']
        return json_obj['article']['article']

    def preprocess_html(self, soup):
        # Insert images on RIL_IMG_# divs
        for key, url in self.images.iteritems():
            tag = Tag(soup, 'img')
            tag['src'] = url
            div = soup.find('div', attrs={'id':'RIL_IMG_' + key})
            div.insert(0, tag)
        return soup

    def cleanup(self):
        # From a list of urls, create a human-readable JSON string
        # suitable for passing to the ReadItLater SEND::READ method.
    
        self.markAsRead(self.createMarkList(self.articles))
    

    def createMarkList(self, articles):
        urls = []
        for article in self.articles:
            urls.append(article['real_url'])
        items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)]
        s = '{\n %s\n}' % (',\n '.join(items),)
        return s

    def markAsRead(self, markList):
        url = self.INDEX + 'v2/send'
        values = {
            'username' : self.username,
            'password' : self.password,
            'apikey' : self.KEY,
            'read' : markList
            }
        data = urllib.urlencode(values)
    
        try:
            print 'Calling ReadItLater API...'
            request = urllib2.Request(url,data)
            response = urllib2.urlopen(request)
            the_page = response.read()
            print 'response =', response.code
        except urllib2.HTTPError as e:
            print 'The server could not fulfill the request: ', e
        except urllib2.URLError as e:
            print 'The call to ReadItLater API failed:', e