~ubuntu-branches/debian/experimental/calibre/experimental

Viewing changes to src/calibre/ebooks/metadata/sources/edelweiss.py

Committer: Package Import Robot
Author(s): Martin Pitt
Date: 2013-02-12 16:45:34 UTC
mfrom: (1.3.38)
Revision ID: package-import@ubuntu.com-20130212164534-4tue9c37ui3lgdsl

Tags: 0.9.18+dfsg-1

http://bugs.debian.org/699700

* New upstream release. (Closes: #699700)
* Unfuzz patches.
* Add new libqt4-private-dev build dependency, required by this version.

files added:
imgsrc/polish.svg

recipes/asco_de_vida.recipe

recipes/contemporary_argentine_writers.recipe

recipes/dobanevinosti.recipe

recipes/el_diplo.recipe

recipes/icons/libartes.png

recipes/icons/libertad_digital.png

recipes/icons/spiders_web_pl.png

recipes/la_nacion_cr.recipe

recipes/libartes.recipe

recipes/libertad_digital.recipe

recipes/lvivs_ks_ghazieta.recipe

recipes/outside_magazine.recipe

recipes/oxford_mail.recipe

recipes/schattenblick.recipe

recipes/spectator_magazine.recipe

recipes/ukraiyns_kii_tizhdien.recipe

recipes/vice_magazine_de.recipe

recipes/zaxid_net.recipe

resources/images/icon_choose.png

resources/images/polish.png

src/calibre/db/categories.py

src/calibre/db/search.py

src/calibre/devices/mtp/unix/sysfs.py

src/calibre/ebooks/metadata/sources/edelweiss.py

src/calibre/ebooks/oeb/polish

src/calibre/ebooks/oeb/polish/__init__.py

src/calibre/ebooks/oeb/polish/container.py

src/calibre/ebooks/oeb/polish/errors.py

src/calibre/ebooks/oeb/polish/font_stats.coffee

src/calibre/ebooks/oeb/polish/main.py

src/calibre/ebooks/oeb/polish/stats.py

src/calibre/ebooks/oeb/polish/subset.py

src/calibre/ebooks/pdf/render/from_html.py

src/calibre/ebooks/pdf/render/gradients.py

src/calibre/ebooks/pdf/render/graphics.py

src/calibre/ebooks/pdf/render/links.py

src/calibre/ebooks/pdf/render/qt_hack.cpp

src/calibre/ebooks/pdf/render/qt_hack.h

src/calibre/ebooks/pdf/render/qt_hack.sip

src/calibre/ebooks/pdf/render/test.py

src/calibre/gui2/actions/polish.py

src/calibre/gui2/dialogs/duplicates.py

src/calibre/gui2/metadata/pdf_covers.py

src/calibre/gui2/store/loader.py

src/calibre/gui2/store/stores/beam_ebooks_de_plugin.py

src/calibre/gui2/store/stores/nook_uk_plugin.py

src/qt-harfbuzz

src/qt-harfbuzz/AUTHORS

src/qt-harfbuzz/COPYING

src/qt-harfbuzz/src

src/qt-harfbuzz/src/harfbuzz-buffer-private.h

src/qt-harfbuzz/src/harfbuzz-buffer.h

src/qt-harfbuzz/src/harfbuzz-dump.h

src/qt-harfbuzz/src/harfbuzz-external.h

src/qt-harfbuzz/src/harfbuzz-gdef-private.h

src/qt-harfbuzz/src/harfbuzz-gdef.h

src/qt-harfbuzz/src/harfbuzz-global.h

src/qt-harfbuzz/src/harfbuzz-gpos-private.h

src/qt-harfbuzz/src/harfbuzz-gpos.h

src/qt-harfbuzz/src/harfbuzz-gsub-private.h

src/qt-harfbuzz/src/harfbuzz-gsub.h

src/qt-harfbuzz/src/harfbuzz-impl.h

src/qt-harfbuzz/src/harfbuzz-open-private.h

src/qt-harfbuzz/src/harfbuzz-open.h

src/qt-harfbuzz/src/harfbuzz-shape.h

src/qt-harfbuzz/src/harfbuzz-shaper-private.h

src/qt-harfbuzz/src/harfbuzz-shaper.h

src/qt-harfbuzz/src/harfbuzz-stream-private.h

src/qt-harfbuzz/src/harfbuzz-stream.h

src/qt-harfbuzz/src/harfbuzz.h

files removed:
.pc/use-system-feedparser.patch/recipes/economist_free.recipe

recipes/microwave_and_rf.recipe

recipes/spiders_web_pl.png

src/calibre/gui2/duplicates.py

files modified:
.pc/python_multiarch_inc.patch/setup/extensions.py

Changelog.yaml

README

debian/changelog

debian/control

debian/patches/no_updates_dialog.patch

debian/patches/python_multiarch_inc.patch

debian/patches/use-system-feedparser.patch

manual/develop.rst

manual/faq.rst

recipes/alternet.recipe

recipes/ambito_financiero.recipe

recipes/apple_daily.recipe

recipes/archeowiesci.recipe

recipes/azstarnet.recipe

recipes/baltimore_sun.recipe

recipes/barrons.recipe

recipes/big_oven.recipe

recipes/birmingham_post.recipe

recipes/borse_online.recipe

recipes/brecha.recipe

recipes/bwmagazine2.recipe

recipes/cacm.recipe

recipes/caijing.recipe

recipes/calgary_herald.recipe

recipes/chronicle_higher_ed.recipe

recipes/cnn.recipe

recipes/conowego_pl.recipe *

recipes/corriere_della_sera_it.recipe

recipes/dani.recipe

recipes/der_spiegel.recipe

recipes/dziennik_polski.recipe

recipes/economist.recipe

recipes/economist_free.recipe

recipes/edmonton_journal.recipe

recipes/el_correo.recipe

recipes/el_mundo_today.recipe

recipes/elet_es_irodalom.recipe

recipes/elmundo.recipe

recipes/espn.recipe

recipes/expansion_spanish.recipe

recipes/financial_times.recipe

recipes/financial_times_uk.recipe

recipes/fleshbot.recipe

recipes/fluter_de.recipe

recipes/folhadesaopaulo_sub.recipe

recipes/foreignaffairs.recipe

recipes/globe_and_mail.recipe

recipes/haaretz_en.recipe

recipes/harpers_full.recipe

recipes/hbr.recipe

recipes/heraldo.recipe

recipes/hindu.recipe

recipes/history_today.recipe

recipes/il_messaggero.recipe

recipes/insider.recipe

recipes/instapaper.recipe

recipes/japan_times.recipe

recipes/jbpress.recipe

recipes/johm.recipe

recipes/kidney.recipe

recipes/klip_me.recipe

recipes/korben.recipe

recipes/la_stampa.recipe

recipes/la_voce.recipe

recipes/le_monde_sub.recipe

recipes/ledevoir.recipe

recipes/leggo_it.recipe

recipes/lemonde_dip.recipe

recipes/lepoint.recipe

recipes/lexpress.recipe

recipes/liberation.recipe

recipes/liberation_sub.recipe

recipes/libero.recipe

recipes/linux_journal.recipe *

recipes/lrb_payed.recipe

recipes/lwn.recipe

recipes/lwn_weekly.recipe

recipes/mediapart.recipe

recipes/medscape.recipe

recipes/metro_uk.recipe

recipes/michellemalkin.recipe

recipes/ming_pao.recipe

recipes/ming_pao_toronto.recipe

recipes/ming_pao_vancouver.recipe

recipes/montreal_gazette.recipe

recipes/nbonline.recipe

recipes/nejm.recipe

recipes/new_scientist.recipe

recipes/new_york_review_of_books.recipe

recipes/nikkei_news.recipe

recipes/nikkei_sub.recipe

recipes/nikkei_sub_economy.recipe

recipes/nikkei_sub_industry.recipe

recipes/nikkei_sub_life.recipe

recipes/nikkei_sub_main.recipe

recipes/nikkei_sub_shakai.recipe

recipes/nikkei_sub_sports.recipe

recipes/nin.recipe

recipes/novilist_novine_hr.recipe

recipes/novistandard.recipe

recipes/nowa_fantastyka.recipe

recipes/nrc-nl-epub.recipe

recipes/nsfw_corp.recipe

recipes/nspm.recipe

recipes/nursingtimes.recipe

recipes/nytimes.recipe

recipes/nytimes_sub.recipe

recipes/nzz_webpaper.recipe

recipes/omgubuntu.recipe

recipes/oreilly_premium.recipe

recipes/ottawa_citizen.recipe

recipes/pajama.recipe

recipes/people_daily.recipe

recipes/phillosophy_now.recipe

recipes/physics_today.recipe

recipes/prospectmaguk.recipe

recipes/readitlater.recipe

recipes/real_clear.recipe

recipes/regina_leader_post.recipe

recipes/saskatoon_star_phoenix.recipe

recipes/science_aas.recipe

recipes/scmp.recipe

recipes/seattle_times.recipe

recipes/singtaohk.recipe

recipes/slate.recipe

recipes/smilezilla.recipe

recipes/smith.recipe

recipes/st_louis_post_dispatch.recipe

recipes/staradvertiser.recipe

recipes/sueddeutsche_mobil.recipe

recipes/sueddeutschezeitung.recipe

recipes/sunday_times.recipe

recipes/techtarget.recipe

recipes/the_age.recipe

recipes/the_nation.recipe

recipes/thestar.recipe

recipes/tidbits.recipe

recipes/times_online.recipe

recipes/todays_zaman.recipe

recipes/tomshardware.recipe

recipes/toyokeizai.recipe

recipes/tsn.recipe

recipes/usatoday.recipe

recipes/vancouver_provice.recipe

recipes/vancouver_province.recipe

recipes/vancouver_sun.recipe

recipes/vic_times.recipe

recipes/volksrant.recipe

recipes/vreme.recipe

recipes/windsor_star.recipe

recipes/winsupersite.recipe

recipes/wsj.recipe

recipes/ynet.recipe

recipes/zaobao.recipe

recipes/zdnet.fr.recipe

resources/builtin_recipes.xml

resources/builtin_recipes.zip

resources/compiled_coffeescript.zip

resources/content_server/browse/browse.css

resources/ebook-convert-complete.pickle

resources/images.qrc

resources/localization/locales.zip

resources/localization/stats.pickle

resources/quick_start.epub

session.vim

setup/build_environment.py

setup/check.py

setup/extensions.py

setup/iso_639/ca.po

setup/iso_639/de.po

setup/iso_639/es.po

setup/iso_639/nl.po

setup/iso_639/pt_BR.po

setup/iso_639/ru.po

setup/resources.py

setup/translations.py

src/calibre/constants.py

src/calibre/customize/__init__.py

src/calibre/customize/builtins.py

src/calibre/customize/profiles.py

src/calibre/customize/ui.py

src/calibre/db/backend.py

src/calibre/db/cache.py

src/calibre/db/fields.py

src/calibre/db/tables.py

src/calibre/db/tests/base.py

src/calibre/db/tests/metadata.db

src/calibre/db/tests/reading.py

src/calibre/devices/android/driver.py

src/calibre/devices/apple/driver.py

src/calibre/devices/eb600/driver.py

src/calibre/devices/hanvon/driver.py

src/calibre/devices/kobo/driver.py

src/calibre/devices/mtp/unix/devices.c

src/calibre/devices/mtp/unix/driver.py

src/calibre/devices/mtp/unix/libmtp.c

src/calibre/devices/prst1/driver.py

src/calibre/devices/smart_device_app/driver.py

src/calibre/devices/teclast/driver.py

src/calibre/ebooks/chm/reader.py

src/calibre/ebooks/conversion/plugins/chm_input.py

src/calibre/ebooks/conversion/plugins/epub_input.py

src/calibre/ebooks/conversion/plugins/pdf_output.py

src/calibre/ebooks/conversion/preprocess.py

src/calibre/ebooks/html/input.py

src/calibre/ebooks/lit/reader.py

src/calibre/ebooks/lrf/__init__.py

src/calibre/ebooks/metadata/epub.py

src/calibre/ebooks/metadata/mobi.py

src/calibre/ebooks/metadata/opf2.py

src/calibre/ebooks/metadata/pdf.py

src/calibre/ebooks/metadata/sources/amazon.py

src/calibre/ebooks/metadata/sources/base.py

src/calibre/ebooks/metadata/sources/test.py

src/calibre/ebooks/metadata/toc.py

src/calibre/ebooks/mobi/reader/headers.py

src/calibre/ebooks/mobi/reader/mobi6.py

src/calibre/ebooks/mobi/reader/mobi8.py

src/calibre/ebooks/mobi/writer8/exth.py

src/calibre/ebooks/mobi/writer8/main.py

src/calibre/ebooks/oeb/base.py

src/calibre/ebooks/oeb/display/indexing.coffee

src/calibre/ebooks/oeb/display/paged.coffee

src/calibre/ebooks/oeb/parse_utils.py

src/calibre/ebooks/oeb/reader.py

src/calibre/ebooks/oeb/transforms/flatcss.py

src/calibre/ebooks/oeb/transforms/metadata.py

src/calibre/ebooks/oeb/transforms/subset.py

src/calibre/ebooks/pdf/render/common.py

src/calibre/ebooks/pdf/render/engine.py

src/calibre/ebooks/pdf/render/fonts.py

src/calibre/ebooks/pdf/render/serialize.py

src/calibre/ebooks/pdf/writer.py

src/calibre/gui2/__init__.py

src/calibre/gui2/actions/add.py

src/calibre/gui2/actions/choose_library.py

src/calibre/gui2/actions/copy_to_library.py

src/calibre/gui2/actions/store.py

src/calibre/gui2/add.py

src/calibre/gui2/convert/look_and_feel.ui

src/calibre/gui2/convert/look_and_feel_ui.py

src/calibre/gui2/convert/metadata.py

src/calibre/gui2/convert/pdf_output.py

src/calibre/gui2/convert/pdf_output.ui

src/calibre/gui2/convert/pdf_output_ui.py

src/calibre/gui2/convert/single.py

src/calibre/gui2/cover_flow.py

src/calibre/gui2/custom_column_widgets.py

src/calibre/gui2/device.py

src/calibre/gui2/dialogs/add_empty_book.py

src/calibre/gui2/dialogs/metadata_bulk.ui

src/calibre/gui2/dialogs/metadata_bulk_ui.py

src/calibre/gui2/dialogs/plugin_updater.py

src/calibre/gui2/dialogs/tag_list_editor.py

src/calibre/gui2/dialogs/template_dialog.py

src/calibre/gui2/dialogs/user_profiles.ui

src/calibre/gui2/dialogs/user_profiles_ui.py

src/calibre/gui2/layout.py

src/calibre/gui2/library/delegates.py

src/calibre/gui2/library/models.py

src/calibre/gui2/library/views.py

src/calibre/gui2/metadata/basic_widgets.py

src/calibre/gui2/metadata/single.py

src/calibre/gui2/metadata/single_download.py

src/calibre/gui2/pictureflow/pictureflow.cpp

src/calibre/gui2/pictureflow/pictureflow.h

src/calibre/gui2/pictureflow/pictureflow.sip

src/calibre/gui2/preferences/coloring.py

src/calibre/gui2/preferences/ignored_devices.py

src/calibre/gui2/preferences/look_feel.py

src/calibre/gui2/preferences/look_feel.ui

src/calibre/gui2/preferences/look_feel_ui.py

src/calibre/gui2/preferences/metadata_sources_ui.py

src/calibre/gui2/preferences/server.ui

src/calibre/gui2/preferences/server_ui.py

src/calibre/gui2/store/__init__.py

src/calibre/gui2/store/config/chooser/models.py

src/calibre/gui2/store/search/search.py

src/calibre/gui2/store/stores/amazon_de_plugin.py

src/calibre/gui2/store/stores/amazon_es_plugin.py

src/calibre/gui2/store/stores/amazon_fr_plugin.py

src/calibre/gui2/store/stores/amazon_it_plugin.py

src/calibre/gui2/store/stores/amazon_plugin.py

src/calibre/gui2/store/stores/amazon_uk_plugin.py

src/calibre/gui2/store/stores/archive_org_plugin.py

src/calibre/gui2/store/stores/baen_webscription_plugin.py

src/calibre/gui2/store/stores/bewrite_plugin.py

src/calibre/gui2/store/stores/biblio_plugin.py

src/calibre/gui2/store/stores/bn_plugin.py

src/calibre/gui2/store/stores/bookoteka_plugin.py

src/calibre/gui2/store/stores/chitanka_plugin.py

src/calibre/gui2/store/stores/diesel_ebooks_plugin.py

src/calibre/gui2/store/stores/ebook_nl_plugin.py

src/calibre/gui2/store/stores/ebookpoint_plugin.py

src/calibre/gui2/store/stores/ebooks_com_plugin.py

src/calibre/gui2/store/stores/ebooksgratuits_plugin.py

src/calibre/gui2/store/stores/ebookshoppe_uk_plugin.py

src/calibre/gui2/store/stores/eharlequin_plugin.py

src/calibre/gui2/store/stores/eknigi_plugin.py

src/calibre/gui2/store/stores/empik_plugin.py

src/calibre/gui2/store/stores/escapemagazine_plugin.py

src/calibre/gui2/store/stores/feedbooks_plugin.py

src/calibre/gui2/store/stores/foyles_uk_plugin.py

src/calibre/gui2/store/stores/google_books_plugin.py

src/calibre/gui2/store/stores/gutenberg_plugin.py

src/calibre/gui2/store/stores/kobo_plugin.py

src/calibre/gui2/store/stores/legimi_plugin.py

src/calibre/gui2/store/stores/libri_de_plugin.py

src/calibre/gui2/store/stores/litres_plugin.py

src/calibre/gui2/store/stores/manybooks_plugin.py

src/calibre/gui2/store/stores/mills_boon_uk_plugin.py

src/calibre/gui2/store/stores/mobileread/mobileread_plugin.py

src/calibre/gui2/store/stores/mobileread/models.py

src/calibre/gui2/store/stores/nexto_plugin.py

src/calibre/gui2/store/stores/open_books_plugin.py

src/calibre/gui2/store/stores/ozon_ru_plugin.py

src/calibre/gui2/store/stores/pragmatic_bookshelf_plugin.py

src/calibre/gui2/store/stores/publio_plugin.py

src/calibre/gui2/store/stores/rw2010_plugin.py

src/calibre/gui2/store/stores/smashwords_plugin.py

src/calibre/gui2/store/stores/sony_plugin.py

src/calibre/gui2/store/stores/virtualo_plugin.py

src/calibre/gui2/store/stores/waterstones_uk_plugin.py

src/calibre/gui2/store/stores/weightless_books_plugin.py

src/calibre/gui2/store/stores/whsmith_uk_plugin.py

src/calibre/gui2/store/stores/woblink_plugin.py

src/calibre/gui2/store/stores/xinxii_plugin.py

src/calibre/gui2/store/stores/zixo_plugin.py

src/calibre/gui2/tag_browser/model.py

src/calibre/gui2/tag_browser/view.py

src/calibre/gui2/tools.py

src/calibre/gui2/ui.py

src/calibre/gui2/viewer/config.py

src/calibre/gui2/viewer/config.ui

src/calibre/gui2/viewer/config_ui.py

src/calibre/gui2/viewer/documentview.py

src/calibre/gui2/viewer/main.py

src/calibre/gui2/viewer/main.ui

src/calibre/gui2/viewer/main_ui.py

src/calibre/gui2/viewer/toc.py

src/calibre/gui2/wizard/__init__.py

src/calibre/gui2/wizard/send_email_ui.py

src/calibre/library/caches.py

src/calibre/library/catalogs/csv_xml.py

src/calibre/library/catalogs/epub_mobi.py

src/calibre/library/catalogs/epub_mobi_builder.py

src/calibre/library/coloring.py

src/calibre/library/custom_columns.py

src/calibre/library/database2.py

src/calibre/library/field_metadata.py

src/calibre/library/server/__init__.py

src/calibre/library/server/ajax.py

src/calibre/library/server/browse.py

src/calibre/library/server/content.py

src/calibre/library/server/mobile.py

src/calibre/library/server/opds.py

src/calibre/translations/af.po

src/calibre/translations/ar.po

src/calibre/translations/ast.po

src/calibre/translations/az.po

src/calibre/translations/ber.po

src/calibre/translations/bg.po

src/calibre/translations/bn.po

src/calibre/translations/br.po

src/calibre/translations/bs.po

src/calibre/translations/ca.po

src/calibre/translations/calibre.pot

src/calibre/translations/cs.po

src/calibre/translations/cy.po

src/calibre/translations/da.po

src/calibre/translations/de.po

src/calibre/translations/el.po

src/calibre/translations/en_AU.po

src/calibre/translations/en_CA.po

src/calibre/translations/en_GB.po

src/calibre/translations/eo.po

src/calibre/translations/es.po

src/calibre/translations/et.po

src/calibre/translations/eu.po

src/calibre/translations/fa.po

src/calibre/translations/fi.po

src/calibre/translations/fo.po

src/calibre/translations/fr.po

src/calibre/translations/fr_CA.po

src/calibre/translations/fur.po

src/calibre/translations/gl.po

src/calibre/translations/gu.po

src/calibre/translations/he.po

src/calibre/translations/hi.po

src/calibre/translations/him.po

src/calibre/translations/hr.po

src/calibre/translations/hu.po

src/calibre/translations/id.po

src/calibre/translations/is.po

src/calibre/translations/it.po

src/calibre/translations/ja.po

src/calibre/translations/jv.po

src/calibre/translations/ka.po

src/calibre/translations/kn.po

src/calibre/translations/ko.po

src/calibre/translations/ku.po

src/calibre/translations/lt.po

src/calibre/translations/ltg.po

src/calibre/translations/lv.po

src/calibre/translations/mk.po

src/calibre/translations/ml.po

src/calibre/translations/mr.po

src/calibre/translations/ms.po

src/calibre/translations/nb.po

src/calibre/translations/nds.po

src/calibre/translations/nl.po

src/calibre/translations/nn.po

src/calibre/translations/oc.po

src/calibre/translations/pa.po

src/calibre/translations/pl.po

src/calibre/translations/pt.po

src/calibre/translations/pt_BR.po

src/calibre/translations/ro.po

src/calibre/translations/ru.po

src/calibre/translations/sc.po

src/calibre/translations/si.po

src/calibre/translations/sk.po

src/calibre/translations/sl.po

src/calibre/translations/sq.po

src/calibre/translations/sr.po

src/calibre/translations/sr@latin.po

src/calibre/translations/sv.po

src/calibre/translations/ta.po

src/calibre/translations/te.po

src/calibre/translations/th.po

src/calibre/translations/tr.po

src/calibre/translations/uk.po

src/calibre/translations/ur.po

src/calibre/translations/vi.po

src/calibre/translations/wa.po

src/calibre/translations/yi.po

src/calibre/translations/zh_CN.po

src/calibre/translations/zh_HK.po

src/calibre/translations/zh_TW.po

src/calibre/utils/browser.py

src/calibre/utils/chm/chm.py

src/calibre/utils/filenames.py

src/calibre/utils/fonts/sfnt/cmap.py

src/calibre/utils/fonts/sfnt/container.py

src/calibre/utils/fonts/sfnt/gsub.py

src/calibre/utils/fonts/sfnt/metrics.py

src/calibre/utils/fonts/sfnt/subset.py

src/calibre/utils/formatter.py

src/calibre/utils/ipc/worker.py

src/calibre/utils/localization.py

src/calibre/utils/mdns.py

src/calibre/utils/pyparsing.py

src/calibre/utils/smtp.py

src/calibre/utils/speedup.c

src/calibre/web/feeds/news.py

src/calibre/web/fetch/simple.py

src/calibre/web/jsbrowser/browser.py

src/calibre/web/jsbrowser/test.py

Show diffs side-by-side

added added

removed removed

src/calibre/ebooks/metadata/sources/edelweiss.py

#!/usr/bin/env python

# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai

from __future__ import (unicode_literals, division, absolute_import,

print_function)

__license__ = 'GPL v3'

__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

__docformat__ = 'restructuredtext en'

import time, re

from threading import Thread

from Queue import Queue, Empty

from calibre import as_unicode, random_user_agent

from calibre.ebooks.metadata import check_isbn

from calibre.ebooks.metadata.sources.base import Source

def parse_html(raw):

import html5lib

from calibre.ebooks.chardet import xml_to_unicode

from calibre.utils.cleantext import clean_ascii_chars

raw = clean_ascii_chars(xml_to_unicode(raw, strip_encoding_pats=True,

resolve_entities=True, assume_utf8=True)[0])

return html5lib.parse(raw, treebuilder='lxml',

namespaceHTMLElements=False).getroot()

def CSSSelect(expr):

from cssselect import HTMLTranslator

from lxml.etree import XPath

return XPath(HTMLTranslator().css_to_xpath(expr))

def astext(node):

from lxml import etree

return etree.tostring(node, method='text', encoding=unicode,

with_tail=False).strip()

class Worker(Thread): # {{{

def __init__(self, sku, url, relevance, result_queue, br, timeout, log, plugin):

Thread.__init__(self)

self.daemon = True

self.url, self.br, self.log, self.timeout = url, br, log, timeout

self.result_queue, self.plugin, self.sku = result_queue, plugin, sku

self.relevance = relevance

def run(self):

try:

raw = self.br.open_novisit(self.url, timeout=self.timeout).read()

except:

self.log.exception('Failed to load details page: %r'%self.url)

return

try:

mi = self.parse(raw)

mi.source_relevance = self.relevance

self.plugin.clean_downloaded_metadata(mi)

self.result_queue.put(mi)

except:

self.log.exception('Failed to parse details page: %r'%self.url)

def parse(self, raw):

from calibre.ebooks.metadata.book.base import Metadata

from calibre.utils.date import parse_only_date, UNDEFINED_DATE

root = parse_html(raw)

sku = CSSSelect('div.sku.attGroup')(root)[0]

info = sku.getparent()

top = info.getparent().getparent()

banner = top.find('div')

spans = banner.findall('span')

title = ''

for i, span in enumerate(spans):

if i == 0 or '12pt' in span.get('style', ''):

title += astext(span)

else:

break

authors = [re.sub(r'\(.*\)', '', x).strip() for x in astext(spans[-1]).split(',')]

mi = Metadata(title.strip(), authors)

# Identifiers

isbns = [check_isbn(x.strip()) for x in astext(sku).split(',')]

for isbn in isbns:

if isbn:

self.plugin.cache_isbn_to_identifier(isbn, self.sku)

isbns = sorted(isbns, key=lambda x:len(x) if x else 0, reverse=True)

if isbns and isbns[0]:

mi.isbn = isbns[0]

mi.set_identifier('edelweiss', self.sku)

# Tags

bisac = CSSSelect('div.bisac.attGroup')(root)

if bisac:

bisac = astext(bisac[0])

mi.tags = [x.strip() for x in bisac.split(',')]

mi.tags = [t[1:].strip() if t.startswith('&') else t for t in mi.tags]

# Publisher

pub = CSSSelect('div.supplier.attGroup')(root)

if pub:

pub = astext(pub[0])

100

mi.publisher = pub

101

102

# Pubdate

103

pub = CSSSelect('div.shipDate.attGroupItem')(root)

104

if pub:

105

pub = astext(pub[0])

106

parts = pub.partition(':')[0::2]

107

pub = parts[1] or parts[0]

108

try:

109

q = parse_only_date(pub, assume_utc=True)

110

if q.year != UNDEFINED_DATE:

111

mi.pubdate = q

112

except:

113

self.log.exception('Error parsing published date: %r'%pub)

114

115

# Comments

116

comm = ''

117

general = CSSSelect('div#pd-general-overview-content')(root)

118

if general:

119

q = self.render_comments(general[0])

120

if q != '<p>No title summary available. </p>':

121

comm += q

122

general = CSSSelect('div#pd-general-contributor-content')(root)

123

if general:

124

comm += self.render_comments(general[0])

125

general = CSSSelect('div#pd-general-quotes-content')(root)

126

if general:

127

comm += self.render_comments(general[0])

128

if comm:

129

mi.comments = comm

130

131

# Cover

132

img = CSSSelect('img.title-image[src]')(root)

133

if img:

134

href = img[0].get('src').replace('jacket_covers/medium/',

135

'jacket_covers/flyout/')

136

self.plugin.cache_identifier_to_cover_url(self.sku, href)

137

138

mi.has_cover = self.plugin.cached_identifier_to_cover_url(self.sku) is not None

139

140

return mi

141

142

def render_comments(self, desc):

143

from lxml import etree

144

from calibre.library.comments import sanitize_comments_html

145

for c in desc.xpath('descendant::noscript'):

146

c.getparent().remove(c)

147

for a in desc.xpath('descendant::a[@href]'):

148

del a.attrib['href']

149

a.tag = 'span'

150

desc = etree.tostring(desc, method='html', encoding=unicode).strip()

151

152

# remove all attributes from tags

153

desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)

154

# Collapse whitespace

155

#desc = re.sub('\n+', '\n', desc)

156

#desc = re.sub(' +', ' ', desc)

157

# Remove comments

158

desc = re.sub(r'(?s)', '', desc)

159

return sanitize_comments_html(desc)

160

# }}}

161

162

class Edelweiss(Source):

163

164

name = 'Edelweiss'

165

description = _('Downloads metadata and covers from Edelweiss - A catalog updated by book publishers')

166

167

capabilities = frozenset(['identify', 'cover'])

168

touched_fields = frozenset([

169

'title', 'authors', 'tags', 'pubdate', 'comments', 'publisher',

170

'identifier:isbn', 'identifier:edelweiss'])

171

supports_gzip_transfer_encoding = True

172

has_html_comments = True

173

174

@property

175

def user_agent(self):

176

# Pass in an index to random_user_agent() to test with a particular

177

# user agent

178

return random_user_agent()

179

180

def _get_book_url(self, sku):

181

if sku:

182

return 'http://edelweiss.abovethetreeline.com/ProductDetailPage.aspx?sku=%s'%sku

183

184

def get_book_url(self, identifiers): # {{{

185

sku = identifiers.get('edelweiss', None)

186

if sku:

187

return 'edelweiss', sku, self._get_book_url(sku)

188

189

# }}}

190

191

def get_cached_cover_url(self, identifiers): # {{{

192

sku = identifiers.get('edelweiss', None)

193

if not sku:

194

isbn = identifiers.get('isbn', None)

195

if isbn is not None:

196

sku = self.cached_isbn_to_identifier(isbn)

197

return self.cached_identifier_to_cover_url(sku)

198

# }}}

199

200

def create_query(self, log, title=None, authors=None, identifiers={}): # {{{

201

from urllib import urlencode

202

BASE_URL = 'http://edelweiss.abovethetreeline.com/CatalogOverview.aspx?'

203

params = {

204

'group':'search',

205

'searchType':999,

206

'searchOrgID':'',

207

'dateRange':0,

208

'isbn':'',

209

}

210

for num in (0, 1, 2, 3, 4, 5, 6, 200, 201, 202, 204):

211

params['condition%d'%num] = 1

212

params['keywords%d'%num] = ''

213

title_key, author_key = 'keywords200', 'keywords201'

214

215

isbn = check_isbn(identifiers.get('isbn', None))

216

found = False

217

if isbn is not None:

218

params['isbn'] = isbn

219

found = True

220

elif title or authors:

221

title_tokens = list(self.get_title_tokens(title))

222

if title_tokens:

223

params[title_key] = ' '.join(title_tokens)

224

found = True

225

author_tokens = self.get_author_tokens(authors,

226

only_first_author=True)

227

if author_tokens:

228

params[author_key] = ' '.join(author_tokens)

229

found = True

230

231

if not found:

232

return None

233

234

for k in (title_key, author_key, 'isbn'):

235

v = params[k]

236

if isinstance(v, unicode):

237

params[k] = v.encode('utf-8')

238

239

return BASE_URL+urlencode(params)

240

# }}}

241

242

def identify(self, log, result_queue, abort, title=None, authors=None, # {{{

243

identifiers={}, timeout=30):

244

from urlparse import parse_qs

245

246

book_url = self._get_book_url(identifiers.get('edelweiss', None))

247

br = self.browser

248

if book_url:

249

entries = [(book_url, identifiers['edelweiss'])]

250

else:

251

entries = []

252

query = self.create_query(log, title=title, authors=authors,

253

identifiers=identifiers)

254

if not query:

255

log.error('Insufficient metadata to construct query')

256

return

257

try:

258

raw = br.open_novisit(query, timeout=timeout).read()

259

except Exception as e:

260

log.exception('Failed to make identify query: %r'%query)

261

return as_unicode(e)

262

263

try:

264

root = parse_html(raw)

265

except Exception as e:

266

log.exception('Failed to parse identify results')

267

return as_unicode(e)

268

269

for entry in CSSSelect('div.listRow div.listRowMain')(root):

270

a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "ProductDetailPage.aspx")]')

271

if not a: continue

272

href = a[0].get('href')

273

prefix, qs = href.partition('?')[0::2]

274

sku = parse_qs(qs).get('sku', None)

275

if sku and sku[0]:

276

sku = sku[0]

277

div = CSSSelect('div.sku.attGroup')(entry)

278

if div:

279

text = astext(div[0])

280

isbns = [check_isbn(x.strip()) for x in text.split(',')]

281

for isbn in isbns:

282

if isbn:

283

self.cache_isbn_to_identifier(isbn, sku)

284

for img in entry.xpath('descendant::img[contains(@src, "/jacket_covers/thumbnail/")]'):

285

self.cache_identifier_to_cover_url(sku, img.get('src').replace('/thumbnail/', '/flyout/'))

286

287

div = CSSSelect('div.format.attGroup')(entry)

288

text = astext(div[0]).lower()

289

if 'audio' in text or 'mp3' in text: # Audio-book, ignore

290

continue

291

entries.append((self._get_book_url(sku), sku))

292

293

if (not entries and identifiers and title and authors and

294

not abort.is_set()):

295

return self.identify(log, result_queue, abort, title=title,

296

authors=authors, timeout=timeout)

297

298

if not entries:

299

return

300

301

workers = [Worker(sku, url, i, result_queue, br.clone_browser(), timeout, log, self)

302

for i, (url, sku) in enumerate(entries[:5])]

303

304

for w in workers:

305

w.start()

306

# Don't send all requests at the same time

307

time.sleep(0.1)

308

309

while not abort.is_set():

310

a_worker_is_alive = False

311

for w in workers:

312

w.join(0.2)

313

if abort.is_set():

314

break

315

if w.is_alive():

316

a_worker_is_alive = True

317

if not a_worker_is_alive:

318

break

319

320

# }}}

321

322

def download_cover(self, log, result_queue, abort, # {{{

323

title=None, authors=None, identifiers={}, timeout=30):

324

cached_url = self.get_cached_cover_url(identifiers)

325

if cached_url is None:

326

log.info('No cached cover found, running identify')

327

rq = Queue()

328

self.identify(log, rq, abort, title=title, authors=authors,

329

identifiers=identifiers)

330

if abort.is_set():

331

return

332

results = []

333

while True:

334

try:

335

results.append(rq.get_nowait())

336

except Empty:

337

break

338

results.sort(key=self.identify_results_keygen(

339

title=title, authors=authors, identifiers=identifiers))

340

for mi in results:

341

cached_url = self.get_cached_cover_url(mi.identifiers)

342

if cached_url is not None:

343

break

344

if cached_url is None:

345

log.info('No cover found')

346

return

347

348

if abort.is_set():

349

return

350

br = self.browser

351

log('Downloading cover from:', cached_url)

352

try:

353

cdata = br.open_novisit(cached_url, timeout=timeout).read()

354

result_queue.put((self, cdata))

355

except:

356

log.exception('Failed to download cover from:', cached_url)

357

# }}}

358

359

if __name__ == '__main__':

360

from calibre.ebooks.metadata.sources.test import (

361

test_identify_plugin, title_test, authors_test, comments_test, pubdate_test)

362

tests = [

363

# Multiple authors and two part title and no general description

364

({'identifiers':{'edelweiss':'0321180607'}},

365

[title_test(

366

"XQuery from the Experts: A Guide to the W3C XML Query Language"

367

, exact=True), authors_test([

368

'Howard Katz', 'Don Chamberlin', 'Denise Draper', 'Mary Fernandez',

369

'Michael Kay', 'Jonathan Robie', 'Michael Rys', 'Jerome Simeon',

370

'Jim Tivy', 'Philip Wadler']), pubdate_test(2003, 8, 22),

371

comments_test('Jérôme Siméon'), lambda mi: bool(mi.comments and 'No title summary' not in mi.comments)

372

]),

373

374

( # An isbn not present in edelweiss

375

{'identifiers':{'isbn': '9780316044981'}, 'title':'The Heroes',

376

'authors':['Joe Abercrombie']},

377

[title_test('The Heroes', exact=True),

378

authors_test(['Joe Abercrombie'])]

379

380

381

382

( # Pubdate

383

{'title':'The Great Gatsby', 'authors':['F. Scott Fitzgerald']},

384

[title_test('The great gatsby', exact=True),

385

authors_test(['F. Scott Fitzgerald']), pubdate_test(2004, 9, 29)]

386

387

388

389

]

390

start, stop = 0, len(tests)

391

392

tests = tests[start:stop]

393

test_identify_plugin(Edelweiss.name, tests)

394

395

Older »