~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to recipes/gosc_full.recipe

Committer: Package Import Robot
Author(s): Martin Pitt
Date: 2014-02-27 07:48:06 UTC
mto: This revision was merged to the branch mainline in revision 74.
Revision ID: package-import@ubuntu.com-20140227074806-64wdebb3ptosxhhx

Tags: upstream-1.25.0+dfsg

Import upstream version 1.25.0+dfsg

files added:
manual/templates/epub_cover_template.html

recipes/disinformatico.recipe

recipes/gosc_full.recipe

recipes/icons/gosc_full.png

recipes/les_echos.recipe

resources/images/beautify.png

src/calibre/ebooks/metadata/xmp.py

src/calibre/gui2/tweak_book/diff/highlight.py

src/calibre/gui2/tweak_book/editor/smart

src/calibre/gui2/tweak_book/editor/smart/__init__.py

src/calibre/gui2/tweak_book/editor/smart/html.py

src/calibre/gui2/viewer/gestures.py

files removed:
recipes/icons/kdefamily_pl.png

recipes/kdefamily_pl.recipe

src/calibre/gui2/store/stores/foyles_uk_plugin.py

files modified:
Changelog.yaml

manual/conf.py

manual/conversion.rst

manual/custom.py

manual/diff.rst

manual/edit.rst

manual/epub.py

manual/faq.rst

manual/templates/layout.html

recipes/abc_es.recipe

recipes/ap.recipe

recipes/cosmopolitan_uk.recipe

recipes/cumhuriyet.recipe

recipes/gosc_niedzielny.recipe

recipes/kathemerini.recipe

recipes/mac_world_uk.recipe

recipes/nrc-nl-epub.recipe

recipes/nytimesbook.recipe

recipes/wired_it.recipe

resources/builtin_recipes.xml

resources/builtin_recipes.zip

resources/compiled_coffeescript.zip

resources/ebook-convert-complete.pickle

resources/images.qrc

resources/localization/locales.zip

resources/localization/stats.pickle

resources/quick_start.epub

setup/installer/__init__.py

setup/iso_639/pl.po

setup/iso_639/ru.po

setup/plugins_mirror.py

setup/resources.py

src/calibre/__init__.py

src/calibre/constants.py

src/calibre/customize/builtins.py

src/calibre/db/cache.py

src/calibre/debug.py

src/calibre/devices/kobo/driver.py

src/calibre/ebooks/chardet.py

src/calibre/ebooks/conversion/plugins/epub_output.py

src/calibre/ebooks/conversion/plugins/pdf_output.py

src/calibre/ebooks/html/input.py

src/calibre/ebooks/metadata/__init__.py

src/calibre/ebooks/metadata/meta.py

src/calibre/ebooks/metadata/opf2.py

src/calibre/ebooks/metadata/pdf.py

src/calibre/ebooks/metadata/sources/edelweiss.py

src/calibre/ebooks/mobi/reader/mobi8.py

src/calibre/ebooks/oeb/base.py

src/calibre/ebooks/oeb/display/paged.coffee

src/calibre/ebooks/oeb/parse_utils.py

src/calibre/ebooks/oeb/polish/check/links.py

src/calibre/ebooks/oeb/polish/check/main.py

src/calibre/ebooks/oeb/polish/check/parsing.py

src/calibre/ebooks/oeb/polish/choose.coffee

src/calibre/ebooks/oeb/polish/container.py

src/calibre/ebooks/oeb/polish/css.py

src/calibre/ebooks/oeb/polish/errors.py

src/calibre/ebooks/oeb/polish/font_stats.coffee

src/calibre/ebooks/oeb/polish/parsing.py

src/calibre/ebooks/oeb/polish/preview.coffee

src/calibre/ebooks/oeb/polish/split.py

src/calibre/ebooks/oeb/polish/tests/container.py

src/calibre/ebooks/oeb/polish/tests/parsing.py

src/calibre/ebooks/oeb/polish/toc.py

src/calibre/ebooks/oeb/polish/utils.py

src/calibre/ebooks/oeb/transforms/rasterize.py

src/calibre/ebooks/oeb/transforms/split.py

src/calibre/ebooks/pdf/render/from_html.py

src/calibre/ebooks/pdf/render/graphics.py

src/calibre/ebooks/pdf/render/serialize.py

src/calibre/gui2/__init__.py

src/calibre/gui2/actions/add.py

src/calibre/gui2/actions/copy_to_library.py

src/calibre/gui2/actions/next_match.py

src/calibre/gui2/actions/preferences.py

src/calibre/gui2/actions/restart.py

src/calibre/gui2/actions/similar_books.py

src/calibre/gui2/auto_add.py

src/calibre/gui2/comments_editor.py

src/calibre/gui2/complete2.py

src/calibre/gui2/dialogs/duplicates.py

src/calibre/gui2/dialogs/message_box.py

src/calibre/gui2/dialogs/metadata_bulk.py

src/calibre/gui2/email.py

src/calibre/gui2/init.py

src/calibre/gui2/jobs.py

src/calibre/gui2/layout.py

src/calibre/gui2/preferences/email.ui

src/calibre/gui2/preferences/email_ui.py

src/calibre/gui2/preferences/emailp.py

src/calibre/gui2/preferences/tweaks.py

src/calibre/gui2/preferences/tweaks.ui

src/calibre/gui2/preferences/tweaks_ui.py

src/calibre/gui2/progress_indicator/QProgressIndicator.cpp

src/calibre/gui2/progress_indicator/QProgressIndicator.h

src/calibre/gui2/progress_indicator/QProgressIndicator.sip

src/calibre/gui2/store/stores/amazon_de_plugin.py

src/calibre/gui2/store/stores/amazon_es_plugin.py

src/calibre/gui2/store/stores/amazon_fr_plugin.py

src/calibre/gui2/store/stores/amazon_it_plugin.py

src/calibre/gui2/store/stores/amazon_uk_plugin.py

src/calibre/gui2/store/stores/cdp_plugin.py

src/calibre/gui2/store/stores/mills_boon_uk_plugin.py

src/calibre/gui2/store/stores/publio_plugin.py

src/calibre/gui2/store/stores/woblink_plugin.py

src/calibre/gui2/store/stores/wolnelektury_plugin.py

src/calibre/gui2/toc/location.py

src/calibre/gui2/toc/main.py

src/calibre/gui2/tweak_book/boss.py

src/calibre/gui2/tweak_book/diff/main.py

src/calibre/gui2/tweak_book/diff/view.py

src/calibre/gui2/tweak_book/editor/syntax/base.py

src/calibre/gui2/tweak_book/editor/syntax/html.py

src/calibre/gui2/tweak_book/editor/text.py

src/calibre/gui2/tweak_book/editor/themes.py

src/calibre/gui2/tweak_book/editor/widget.py

src/calibre/gui2/tweak_book/file_list.py

src/calibre/gui2/tweak_book/main.py

src/calibre/gui2/tweak_book/preview.py

src/calibre/gui2/tweak_book/save.py

src/calibre/gui2/tweak_book/ui.py

src/calibre/gui2/tweak_book/undo.py

src/calibre/gui2/viewer/documentview.py

src/calibre/gui2/viewer/main_ui.py

src/calibre/gui2/viewer/toc.py

src/calibre/library/catalogs/epub_mobi.py

src/calibre/library/catalogs/epub_mobi_builder.py

src/calibre/library/cli.py

src/calibre/linux.py

src/calibre/translations/af.po

src/calibre/translations/ar.po

src/calibre/translations/ast.po

src/calibre/translations/az.po

src/calibre/translations/ber.po

src/calibre/translations/bg.po

src/calibre/translations/bn.po

src/calibre/translations/br.po

src/calibre/translations/bs.po

src/calibre/translations/ca.po

src/calibre/translations/calibre.pot

src/calibre/translations/cs.po

src/calibre/translations/cy.po

src/calibre/translations/da.po

src/calibre/translations/de.po

src/calibre/translations/el.po

src/calibre/translations/en_AU.po

src/calibre/translations/en_CA.po

src/calibre/translations/en_GB.po

src/calibre/translations/eo.po

src/calibre/translations/es.po

src/calibre/translations/et.po

src/calibre/translations/eu.po

src/calibre/translations/fa.po

src/calibre/translations/fi.po

src/calibre/translations/fil.po

src/calibre/translations/fo.po

src/calibre/translations/fr.po

src/calibre/translations/fr_CA.po

src/calibre/translations/fur.po

src/calibre/translations/gl.po

src/calibre/translations/gu.po

src/calibre/translations/he.po

src/calibre/translations/hi.po

src/calibre/translations/him.po

src/calibre/translations/hr.po

src/calibre/translations/hu.po

src/calibre/translations/id.po

src/calibre/translations/is.po

src/calibre/translations/it.po

src/calibre/translations/ja.po

src/calibre/translations/jv.po

src/calibre/translations/ka.po

src/calibre/translations/kn.po

src/calibre/translations/ko.po

src/calibre/translations/ku.po

src/calibre/translations/lt.po

src/calibre/translations/ltg.po

src/calibre/translations/lv.po

src/calibre/translations/mk.po

src/calibre/translations/ml.po

src/calibre/translations/mn.po

src/calibre/translations/mr.po

src/calibre/translations/ms.po

src/calibre/translations/my.po

src/calibre/translations/nb.po

src/calibre/translations/nds.po

src/calibre/translations/nl.po

src/calibre/translations/nn.po

src/calibre/translations/oc.po

src/calibre/translations/pa.po

src/calibre/translations/pl.po

src/calibre/translations/pt.po

src/calibre/translations/pt_BR.po

src/calibre/translations/ro.po

src/calibre/translations/ru.po

src/calibre/translations/sc.po

src/calibre/translations/si.po

src/calibre/translations/sk.po

src/calibre/translations/sl.po

src/calibre/translations/sq.po

src/calibre/translations/sr.po

src/calibre/translations/sr@latin.po

src/calibre/translations/sv.po

src/calibre/translations/ta.po

src/calibre/translations/te.po

src/calibre/translations/th.po

src/calibre/translations/tr.po

src/calibre/translations/ug.po

src/calibre/translations/uk.po

src/calibre/translations/ur.po

src/calibre/translations/vi.po

src/calibre/translations/wa.po

src/calibre/translations/yi.po

src/calibre/translations/zh_CN.po

src/calibre/translations/zh_HK.po

src/calibre/translations/zh_TW.po

src/calibre/utils/config.py

src/calibre/utils/ipc/simple_worker.py

src/calibre/utils/ipython.py

src/calibre/utils/localization.py

src/calibre/utils/logging.py

src/calibre/utils/podofo/__init__.py

src/calibre/utils/podofo/doc.cpp

src/calibre/utils/terminal.py

src/calibre/web/jsbrowser/browser.py

Show diffs side-by-side

added added

removed removed

recipes/gosc_full.recipe

# -*- coding: utf-8 -*-

from __future__ import unicode_literals

__license__ = 'GPL v3'

__copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \

2013-2014, Tomasz Długosz, tomek3d@gmail.com'

from calibre.web.feeds.news import BasicNewsRecipe

import re, time

from lxml import html

class GN(BasicNewsRecipe):

__author__ = 'Piotr Kontek, Tomasz Długosz'

title = u'Gość Niedzielny - pełny numer'

publisher = 'Wydawnictwo Kurii Metropolitalnej w Katowicach'

description = 'Ogólnopolski tygodnik katolicki - pełny numer sprzed 4 tygodni'

encoding = 'utf-8'

no_stylesheets = True

language = 'pl'

remove_javascript = True

def find_last_issue(self):

raw = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True)

doc = html.fromstring(raw)

page = doc.xpath('//div[@class="c"]//div[@class="search-result"]/div[1]/div[2]/h1//a/@href')

if time.strftime("%w") in ['3','4']:

return page[5]

else:

return page[4]

def parse_index(self):

soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue())

self.cover_url = 'http://www.gosc.pl' + soup.find('div',attrs={'class':'fl-w100 release-wp'}).findAll('a')[-4].contents[0]['src']

feeds = []

enlisted = []

# editorial:

a = soup.find('div',attrs={'class':'release-wp-b'})

art = a.find('a')

articles = [

{'title' : self.tag_to_string(art),

'url' : 'http://www.gosc.pl' + art['href'],

'description' : self.tag_to_string(a.find('p',attrs={'class':'b lead'}))

}]

feeds.append((u'Wstępniak',articles))

enlisted.append(articles[0].get('url'))

# columns:

for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):

if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb':

main_block = self.index_to_soup('http://www.gosc.pl' + addr['href'])

articles = list(self.find_articles(main_block))

if len(articles) > 0:

section = addr.string

feeds.append((section, articles))

enlisted.extend(list(article.get('url') for article in articles))

# not assigned content:

page = 1

not_assigned = []

while True:

soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue().replace('przeglad','wszystko') + '/' + str(page))

articles = list(self.find_articles(soup))

not_assigned.extend([x for x in articles if x.get('url') not in enlisted])

page+=1

pages = soup.find('span', attrs={'class':'pgr_nrs'})

if str(page) not in [self.tag_to_string(x)[1] for x in pages.findAll('a')]:

break

feeds.insert(1,(u'Nieprzypisane', not_assigned))

return feeds

def find_articles(self, main_block):

for a in main_block.findAll('div', attrs={'class':['prev_doc2', 'sr-document']}):

art = a.find('a')

yield {

'title' : self.tag_to_string(art),

'url' : 'http://www.gosc.pl' + art['href'],

'date' : self.tag_to_string(a.find('p', attrs={'class':'sr-date'})),

'description' : self.tag_to_string(a.find('p', attrs={'class':'sr-lead'}))

}

def append_page(self, soup, appendtag):

chpage= appendtag.find(attrs={'class':'pgr_nrs'})

if chpage:

for page in chpage.findAll('a'):

soup2 = self.index_to_soup('http://gosc.pl' + page['href'])

pagetext = soup2.find(attrs={'class':'intextAd'})

pos = len(appendtag.contents)

appendtag.insert(pos, pagetext)

def preprocess_html(self, soup):

self.append_page(soup, soup.body)

r = soup.find(attrs={'class':'lightbox'})

if r:

r.contents[0]['src'] = r['href']

return soup

def postprocess_html(self, soup, first_fetch):

for r in soup.findAll(attrs={'class':'pgr'}):

r.extract()

100

for r in soup.findAll(attrs={'class':['di_dr', 'doc_image']}):

101

del r['style']

102

for r in soup.findAll(attrs={'class':'cm-i-a'}):

103

r.replaceWith('<div style="clear:both"></div>' + r.prettify() + '<div style="clear:both"></div>')

104

return soup

105

106

keep_only_tags = [

107

dict(name='div', attrs={'class':'cf txt'})

108

]

109

110

remove_tags = [

111

dict(name='p', attrs={'class':['r tr', 'l l-2', 'wykop', 'tags']}),

112

dict(name='div', attrs={'class':['doc_actions', 'cf', 'fr1_cl']}),

113

dict(name='div', attrs={'id':'vote'}),

114

dict(name='link'),

115

dict(name='a', attrs={'class':'img_enlarge'})

116

]

117

118

extra_css = '''

119

h1 {font-size:150%}

120

p.limiter {font-size:150%; font-weight: bold}

121

span.cm-i-a {text-transform:uppercase;font-size:50%}

122

span.cm-i-p {font-style:italic; font-size:70%;text-align:right}

123

'''

Older »