1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
1 |
# -*- coding: utf-8 -*-
|
2 |
||
3 |
from __future__ import (unicode_literals, division, absolute_import, print_function) |
|
1.5.9
by Martin Pitt
Import upstream version 1.25.0+dfsg |
4 |
store_version = 5 # Needed for dynamic plugin loading |
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
5 |
|
6 |
__license__ = 'GPL 3' |
|
7 |
__copyright__ = '2011, John Schember <john@nachtimwald.com>' |
|
8 |
__docformat__ = 'restructuredtext en' |
|
9 |
||
1.3.39
by Martin Pitt
Import upstream version 0.9.27+dfsg |
10 |
import re |
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
11 |
from contextlib import closing |
12 |
from lxml import html |
|
13 |
||
14 |
from PyQt4.Qt import QUrl |
|
15 |
||
16 |
from calibre import browser |
|
17 |
from calibre.gui2 import open_url |
|
18 |
from calibre.gui2.store import StorePlugin |
|
19 |
from calibre.gui2.store.search_result import SearchResult |
|
20 |
||
1.3.38
by Martin Pitt
Import upstream version 0.9.18+dfsg |
21 |
|
1.3.39
by Martin Pitt
Import upstream version 0.9.27+dfsg |
22 |
|
23 |
class AmazonUKKindleStore(StorePlugin): |
|
24 |
aff_id = {'tag': 'calcharles-21'} |
|
25 |
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' |
|
26 |
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
|
|
27 |
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&' |
|
28 |
'linkCode=ur2&camp=1634&creative=19450') |
|
29 |
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' |
|
30 |
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&' |
|
31 |
'linkCode=ur2&camp=1634&creative=6738') |
|
32 |
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' |
|
33 |
||
34 |
author_article = 'by ' |
|
35 |
||
36 |
and_word = ' and ' |
|
37 |
||
38 |
# This code is copy/pasted from from here to the other amazon EU. Do not
|
|
39 |
# modify it in any other amazon EU plugin. Be sure to paste it into all
|
|
40 |
# other amazon EU plugins when modified.
|
|
41 |
||
42 |
# ---- Copy from here to end
|
|
43 |
||
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
44 |
'''
|
45 |
For comments on the implementation, please see amazon_plugin.py
|
|
46 |
'''
|
|
47 |
||
48 |
def open(self, parent=None, detail_item=None, external=False): |
|
49 |
||
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
50 |
store_link = self.store_link % self.aff_id |
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
51 |
if detail_item: |
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
52 |
self.aff_id['asin'] = detail_item |
53 |
store_link = self.store_link_details % self.aff_id |
|
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
54 |
open_url(QUrl(store_link)) |
55 |
||
56 |
def search(self, query, max_results=10, timeout=60): |
|
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
57 |
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+') |
1.5.2
by Martin Pitt
Import upstream version 0.9.41+dfsg |
58 |
#print(url)
|
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
59 |
br = browser() |
60 |
||
61 |
counter = max_results |
|
62 |
with closing(br.open(url, timeout=timeout)) as f: |
|
1.5.2
by Martin Pitt
Import upstream version 0.9.41+dfsg |
63 |
allText = f.read() |
64 |
doc = html.fromstring(allText)#.decode('latin-1', 'replace')) |
|
65 |
||
66 |
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): |
|
67 |
#print('grid form')
|
|
68 |
data_xpath = '//div[contains(@class, "prod")]' |
|
69 |
format_xpath = ( |
|
70 |
'.//ul[contains(@class, "rsltGridList")]'
|
|
71 |
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') |
|
72 |
asin_xpath = '@name' |
|
1.5.9
by Martin Pitt
Import upstream version 1.25.0+dfsg |
73 |
cover_xpath = './/img[contains(@class, "productImage")]/@src' |
1.5.2
by Martin Pitt
Import upstream version 0.9.41+dfsg |
74 |
title_xpath = './/h3[@class="newaps"]/a//text()' |
75 |
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
|
76 |
price_xpath = ( |
|
77 |
'.//ul[contains(@class, "rsltGridList")]'
|
|
78 |
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') |
|
79 |
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'): |
|
80 |
#print('ilo form')
|
|
81 |
data_xpath = '//li[(@class="ilo")]' |
|
82 |
format_xpath = ( |
|
83 |
'.//ul[contains(@class, "rsltGridList")]'
|
|
84 |
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') |
|
85 |
asin_xpath = '@name' |
|
86 |
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src' |
|
87 |
title_xpath = './/h3[@class="newaps"]/a//text()' |
|
88 |
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
|
89 |
# Results can be in a grid (table) or a column
|
|
90 |
price_xpath = ( |
|
91 |
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
|
92 |
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') |
|
93 |
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): |
|
94 |
#print('list form')
|
|
95 |
data_xpath = '//div[contains(@class, "prod")]' |
|
96 |
format_xpath = ( |
|
97 |
'.//ul[contains(@class, "rsltL")]'
|
|
98 |
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') |
|
99 |
asin_xpath = '@name' |
|
1.5.10
by Martin Pitt
Import upstream version 1.36.0+dfsg |
100 |
cover_xpath = './/img[contains(@class, "productImage")]/@src' |
1.5.2
by Martin Pitt
Import upstream version 0.9.41+dfsg |
101 |
title_xpath = './/h3[@class="newaps"]/a//text()' |
102 |
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
|
103 |
price_xpath = ( |
|
104 |
'.//ul[contains(@class, "rsltL")]'
|
|
105 |
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') |
|
106 |
else: |
|
107 |
# URK -- whats this?
|
|
108 |
print('unknown result table form for Amazon EU search') |
|
109 |
#with open("c:/amazon_search_results.html", "w") as out:
|
|
110 |
# out.write(allText)
|
|
111 |
return
|
|
112 |
||
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
113 |
|
114 |
for data in doc.xpath(data_xpath): |
|
115 |
if counter <= 0: |
|
116 |
break
|
|
117 |
||
1.3.25
by Martin Pitt
Import upstream version 0.8.21+dfsg |
118 |
# Even though we are searching digital-text only Amazon will still
|
1.3.39
by Martin Pitt
Import upstream version 0.9.27+dfsg |
119 |
# put in results for non Kindle books (authors pages). Se we need
|
1.3.25
by Martin Pitt
Import upstream version 0.8.21+dfsg |
120 |
# to explicitly check if the item is a Kindle book and ignore it
|
121 |
# if it isn't.
|
|
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
122 |
format_ = ''.join(data.xpath(format_xpath)) |
123 |
if 'kindle' not in format_.lower(): |
|
1.3.25
by Martin Pitt
Import upstream version 0.8.21+dfsg |
124 |
continue
|
125 |
||
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
126 |
# We must have an asin otherwise we can't easily reference the
|
127 |
# book later.
|
|
1.3.38
by Martin Pitt
Import upstream version 0.9.18+dfsg |
128 |
asin = data.xpath(asin_xpath) |
129 |
if asin: |
|
130 |
asin = asin[0] |
|
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
131 |
else: |
132 |
continue
|
|
1.3.25
by Martin Pitt
Import upstream version 0.8.21+dfsg |
133 |
|
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
134 |
cover_url = ''.join(data.xpath(cover_xpath)) |
135 |
||
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
136 |
title = ''.join(data.xpath(title_xpath)) |
1.3.39
by Martin Pitt
Import upstream version 0.9.27+dfsg |
137 |
|
138 |
authors = ''.join(data.xpath(author_xpath)) |
|
139 |
authors = re.sub('^' + self.author_article, '', authors) |
|
140 |
authors = re.sub(self.and_word, ' & ', authors) |
|
141 |
mo = re.match(r'(.*)(\(\d.*)$', authors) |
|
142 |
if mo: |
|
143 |
authors = mo.group(1).strip() |
|
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
144 |
|
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
145 |
price = ''.join(data.xpath(price_xpath)) |
1.3.25
by Martin Pitt
Import upstream version 0.8.21+dfsg |
146 |
|
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
147 |
counter -= 1 |
148 |
||
149 |
s = SearchResult() |
|
150 |
s.cover_url = cover_url.strip() |
|
151 |
s.title = title.strip() |
|
1.3.39
by Martin Pitt
Import upstream version 0.9.27+dfsg |
152 |
s.author = authors.strip() |
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
153 |
s.price = price.strip() |
154 |
s.detail_item = asin.strip() |
|
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
155 |
s.drm = SearchResult.DRM_UNKNOWN |
1.3.25
by Martin Pitt
Import upstream version 0.8.21+dfsg |
156 |
s.formats = 'Kindle' |
1.3.24
by Martin Pitt
Import upstream version 0.8.8+dfsg |
157 |
|
158 |
yield s |
|
159 |
||
160 |
def get_details(self, search_result, timeout): |
|
1.3.37
by Martin Pitt
Import upstream version 0.9.11+dfsg |
161 |
pass
|
1.3.38
by Martin Pitt
Import upstream version 0.9.18+dfsg |
162 |