1
# -*- coding: utf-8 -*-
4
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
5
__docformat__ = 'restructuredtext en'
8
Transform OEB content into FB2 markup
12
from base64 import b64encode
14
from lxml import etree
16
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
17
from calibre.ebooks.oeb.stylizer import Stylizer
18
from calibre.ebooks.oeb.base import OEB_IMAGES
19
from calibre.constants import __appname__, __version__
29
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
30
('font-style', {'italic' : 'emphasis'}),
33
class FB2MLizer(object):
34
def __init__(self, log):
37
def extract_content(self, oeb_book, opts):
38
self.log.info('Converting XHTML to FB2 markup...')
39
self.oeb_book = oeb_book
41
return self.fb2mlize_spine()
43
def fb2mlize_spine(self):
44
output = self.fb2_header()
45
if 'titlepage' in self.oeb_book.guide:
46
self.log.debug('Generating cover page...')
47
href = self.oeb_book.guide['titlepage'].href
48
item = self.oeb_book.manifest.hrefs[href]
49
if item.spine_position is None:
50
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
51
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
52
for item in self.oeb_book.spine:
53
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
54
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
55
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
56
output += self.fb2_body_footer()
57
output += self.fb2mlize_images()
58
output += self.fb2_footer()
59
output = self.clean_text(output)
60
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
63
return u'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
64
'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0">\n' \
65
'<description>\n<title-info><book-title>%s</book-title> ' \
66
'</title-info><document-info> ' \
67
'<program-used>%s - %s</program-used></document-info>\n' \
68
'</description>\n<body>\n<section>' % (self.oeb_book.metadata.title[0].value, __appname__, __version__)
70
def fb2_body_footer(self):
71
return u'\n</section>\n</body>'
74
return u'</FictionBook>'
76
def fb2mlize_images(self):
78
for item in self.oeb_book.manifest:
79
if item.media_type in OEB_IMAGES:
80
raw_data = b64encode(item.data)
81
# Don't put the encoded image on a single line.
90
images += '<binary id="%s" content-type="%s">%s\n</binary>' % (os.path.basename(item.href), item.media_type, data)
93
def clean_text(self, text):
94
text = text.replace('&', '')
98
def dump_text(self, elem, stylizer, tag_stack=[]):
99
if not isinstance(elem.tag, basestring) \
100
or namespace(elem.tag) != XHTML_NS:
104
style = stylizer.style(elem)
106
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
107
or style['visibility'] == 'hidden':
110
tag = barename(elem.tag)
114
fb2_text += '<image xlink:href="#%s" />' % os.path.basename(elem.attrib['src'])
117
fb2_tag = TAG_MAP.get(tag, None)
118
if fb2_tag and fb2_tag not in tag_stack:
120
fb2_text += '<%s>' % fb2_tag
121
tag_stack.append(fb2_tag)
124
# Processes style information
126
style_tag = s[1].get(style[s[0]], None)
129
fb2_text += '<%s>' % style_tag
130
tag_stack.append(style_tag)
132
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
133
fb2_text += elem.text
136
fb2_text += self.dump_text(item, stylizer, tag_stack)
139
for i in range(0, tag_count):
140
close_tag_list.insert(0, tag_stack.pop())
141
fb2_text += self.close_tags(close_tag_list)
143
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
144
if 'p' not in tag_stack:
145
fb2_text += '<p>%s</p>' % elem.tail
147
fb2_text += elem.tail
151
def close_tags(self, tags):
153
for i in range(0, len(tags)):
155
fb2_text += '</%s>' % fb2_tag