~ubuntu-branches/debian/sid/calibre/sid

1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
1
# -*- coding: utf-8 -*-
2
3
from __future__ import (unicode_literals, division, absolute_import, print_function)
1.5.9 by Martin Pitt
Import upstream version 1.25.0+dfsg
4
store_version = 5 # Needed for dynamic plugin loading
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
5
6
__license__ = 'GPL 3'
7
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
8
__docformat__ = 'restructuredtext en'
9
1.3.39 by Martin Pitt
Import upstream version 0.9.27+dfsg
10
import re
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
11
from contextlib import closing
12
from lxml import html
13
14
from PyQt4.Qt import QUrl
15
16
from calibre import browser
17
from calibre.gui2 import open_url
18
from calibre.gui2.store import StorePlugin
19
from calibre.gui2.store.search_result import SearchResult
20
1.3.38 by Martin Pitt
Import upstream version 0.9.18+dfsg
21
1.3.39 by Martin Pitt
Import upstream version 0.9.27+dfsg
22
23
class AmazonUKKindleStore(StorePlugin):
24
    aff_id = {'tag': 'calcharles-21'}
25
    store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
26
                  'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
27
                  'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
28
                  'linkCode=ur2&camp=1634&creative=19450')
29
    store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
30
                          'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
31
                          'linkCode=ur2&camp=1634&creative=6738')
32
    search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
33
34
    author_article = 'by '
35
36
    and_word = ' and '
37
38
    # This code is copy/pasted from from here to the other amazon EU. Do not
39
    # modify it in any other amazon EU plugin. Be sure to paste it into all
40
    # other amazon EU plugins when modified.
41
42
    # ---- Copy from here to end
43
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
44
    '''
45
    For comments on the implementation, please see amazon_plugin.py
46
    '''
47
48
    def open(self, parent=None, detail_item=None, external=False):
49
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
50
        store_link = self.store_link % self.aff_id
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
51
        if detail_item:
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
52
            self.aff_id['asin'] = detail_item
53
            store_link = self.store_link_details % self.aff_id
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
54
        open_url(QUrl(store_link))
55
56
    def search(self, query, max_results=10, timeout=60):
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
57
        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
1.5.2 by Martin Pitt
Import upstream version 0.9.41+dfsg
58
        #print(url)
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
59
        br = browser()
60
61
        counter = max_results
62
        with closing(br.open(url, timeout=timeout)) as f:
1.5.2 by Martin Pitt
Import upstream version 0.9.41+dfsg
63
            allText = f.read()
64
            doc = html.fromstring(allText)#.decode('latin-1', 'replace'))
65
66
            if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
67
                #print('grid form')
68
                data_xpath = '//div[contains(@class, "prod")]'
69
                format_xpath = (
70
                        './/ul[contains(@class, "rsltGridList")]'
71
                        '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
72
                asin_xpath = '@name'
1.5.9 by Martin Pitt
Import upstream version 1.25.0+dfsg
73
                cover_xpath = './/img[contains(@class, "productImage")]/@src'
1.5.2 by Martin Pitt
Import upstream version 0.9.41+dfsg
74
                title_xpath = './/h3[@class="newaps"]/a//text()'
75
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
76
                price_xpath = (
77
                        './/ul[contains(@class, "rsltGridList")]'
78
                        '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
79
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
80
                #print('ilo form')
81
                data_xpath = '//li[(@class="ilo")]'
82
                format_xpath = (
83
                        './/ul[contains(@class, "rsltGridList")]'
84
                        '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
85
                asin_xpath = '@name'
86
                cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
87
                title_xpath = './/h3[@class="newaps"]/a//text()'
88
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
89
                # Results can be in a grid (table) or a column
90
                price_xpath = (
91
                        './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
92
                        '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
93
            elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
94
                #print('list form')
95
                data_xpath = '//div[contains(@class, "prod")]'
96
                format_xpath = (
97
                        './/ul[contains(@class, "rsltL")]'
98
                        '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
99
                asin_xpath = '@name'
1.5.10 by Martin Pitt
Import upstream version 1.36.0+dfsg
100
                cover_xpath = './/img[contains(@class, "productImage")]/@src'
1.5.2 by Martin Pitt
Import upstream version 0.9.41+dfsg
101
                title_xpath = './/h3[@class="newaps"]/a//text()'
102
                author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
103
                price_xpath = (
104
                        './/ul[contains(@class, "rsltL")]'
105
                        '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
106
            else:
107
                # URK -- whats this?
108
                print('unknown result table form for Amazon EU search')
109
                #with open("c:/amazon_search_results.html", "w") as out:
110
                #    out.write(allText)
111
                return
112
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
113
114
            for data in doc.xpath(data_xpath):
115
                if counter <= 0:
116
                    break
117
1.3.25 by Martin Pitt
Import upstream version 0.8.21+dfsg
118
                # Even though we are searching digital-text only Amazon will still
1.3.39 by Martin Pitt
Import upstream version 0.9.27+dfsg
119
                # put in results for non Kindle books (authors pages). Se we need
1.3.25 by Martin Pitt
Import upstream version 0.8.21+dfsg
120
                # to explicitly check if the item is a Kindle book and ignore it
121
                # if it isn't.
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
122
                format_ = ''.join(data.xpath(format_xpath))
123
                if 'kindle' not in format_.lower():
1.3.25 by Martin Pitt
Import upstream version 0.8.21+dfsg
124
                    continue
125
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
126
                # We must have an asin otherwise we can't easily reference the
127
                # book later.
1.3.38 by Martin Pitt
Import upstream version 0.9.18+dfsg
128
                asin = data.xpath(asin_xpath)
129
                if asin:
130
                    asin = asin[0]
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
131
                else:
132
                    continue
1.3.25 by Martin Pitt
Import upstream version 0.8.21+dfsg
133
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
134
                cover_url = ''.join(data.xpath(cover_xpath))
135
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
136
                title = ''.join(data.xpath(title_xpath))
1.3.39 by Martin Pitt
Import upstream version 0.9.27+dfsg
137
138
                authors = ''.join(data.xpath(author_xpath))
139
                authors = re.sub('^' + self.author_article, '', authors)
140
                authors = re.sub(self.and_word, ' & ', authors)
141
                mo = re.match(r'(.*)(\(\d.*)$', authors)
142
                if mo:
143
                    authors = mo.group(1).strip()
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
144
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
145
                price = ''.join(data.xpath(price_xpath))
1.3.25 by Martin Pitt
Import upstream version 0.8.21+dfsg
146
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
147
                counter -= 1
148
149
                s = SearchResult()
150
                s.cover_url = cover_url.strip()
151
                s.title = title.strip()
1.3.39 by Martin Pitt
Import upstream version 0.9.27+dfsg
152
                s.author = authors.strip()
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
153
                s.price = price.strip()
154
                s.detail_item = asin.strip()
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
155
                s.drm = SearchResult.DRM_UNKNOWN
1.3.25 by Martin Pitt
Import upstream version 0.8.21+dfsg
156
                s.formats = 'Kindle'
1.3.24 by Martin Pitt
Import upstream version 0.8.8+dfsg
157
158
                yield s
159
160
    def get_details(self, search_result, timeout):
1.3.37 by Martin Pitt
Import upstream version 0.9.11+dfsg
161
        pass
1.3.38 by Martin Pitt
Import upstream version 0.9.18+dfsg
162