1
# -*- coding: utf-8 -*-
4
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
5
__docformat__ = 'restructuredtext en'
8
Transform OEB content into RTF markup
22
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
24
from calibre.ebooks.oeb.stylizer import Stylizer
25
from calibre.ebooks.metadata import authors_to_string
30
'h1': '\\b \\par \\pard \\hyphpar',
31
'h2': '\\b \\par \\pard \\hyphpar',
32
'h3': '\\b \\par \\pard \\hyphpar',
33
'h4': '\\b \\par \\pard \\hyphpar',
34
'h5': '\\b \\par \\pard \\hyphpar',
35
'h6': '\\b \\par \\pard \\hyphpar',
36
'li': '\\par \\pard \\hyphpar \t',
37
'p': '\\par \\pard \\hyphpar \t',
44
'br': '\n{\\line }\n',
45
'div': '\n{\\line }\n',
49
'div': '\n{\\line }\n',
53
('display', {'block': '\\par \\pard \\hyphpar'}),
54
('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
55
('font-style', {'italic': '\\i'}),
56
('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}),
57
('text-decoration', {'line-through': '\\strike', 'underline': '\\ul'}),
80
class RTFMLizer(object):
82
def __init__(self, log):
85
def extract_content(self, oeb_book, opts):
86
self.log.info('Converting XHTML to RTF markup...')
87
self.oeb_book = oeb_book
89
return self.mlize_spine()
91
def mlize_spine(self):
92
output = self.header()
93
if 'titlepage' in self.oeb_book.guide:
94
href = self.oeb_book.guide['titlepage'].href
95
item = self.oeb_book.manifest.hrefs[href]
96
if item.spine_position is None:
97
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
98
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
99
output += '{\\page } '
100
for item in self.oeb_book.spine:
101
self.log.debug('Converting %s to RTF markup...' % item.href)
102
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
103
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
104
output += self.footer()
105
output = self.insert_images(output)
106
output = self.clean_text(output)
111
return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))
116
def insert_images(self, text):
117
for item in self.oeb_book.manifest:
118
if item.media_type in OEB_IMAGES:
119
src = os.path.basename(item.href)
120
data, width, height = self.image_to_hexstring(item.data)
121
text = text.replace('SPECIAL_IMAGE-%s-REPLACE_ME' % src, '\n\n{\\*\\shppict{\\pict\\picw%i\\pich%i\\jpegblip \n%s\n}}\n\n' % (width, height, data))
124
def image_to_hexstring(self, data):
125
im = Image.open(cStringIO.StringIO(data))
126
data = cStringIO.StringIO()
127
im.save(data, 'JPEG')
128
data = data.getvalue()
132
raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')
134
# Images must be broken up so that they are no longer than 129 chars
145
return (hex_string, im.size[0], im.size[1])
147
def clean_text(self, text):
148
# Remove excess spaces at beginning and end of lines
149
text = re.sub('(?m)^[ ]+', '', text)
150
text = re.sub('(?m)[ ]+$', '', text)
152
# Remove excessive newlines
153
#text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
154
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
156
# Remove excessive spaces
157
text = re.sub('[ ]{2,}', ' ', text)
159
text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
160
#text = re.compile(r'(\{\\line \}\s*)+(?P<brackets>}*)\s*\{\\par').sub(lambda mo: r'%s{\\par' % mo.group('brackets'), text)
162
# Remove non-breaking spaces
163
text = text.replace(u'\xa0', ' ')
164
text = text.replace('\n\r', '\n')
168
def dump_text(self, elem, stylizer, tag_stack=[]):
169
if not isinstance(elem.tag, basestring) \
170
or namespace(elem.tag) != XHTML_NS:
174
style = stylizer.style(elem)
176
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
177
or style['visibility'] == 'hidden':
180
tag = barename(elem.tag)
183
# Are we in a paragraph block?
184
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
185
if 'block' not in tag_stack:
187
tag_stack.append('block')
189
# Process tags that need special processing and that do not have inner
190
# text. Usually these require an argument
192
src = os.path.basename(elem.get('src'))
195
if 'block' not in tag_stack:
196
block_start = '{\\par \\pard \\hyphpar '
198
text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
200
single_tag = SINGLE_TAGS.get(tag, None)
204
rtf_tag = TAGS.get(tag, None)
205
if rtf_tag and rtf_tag not in tag_stack:
207
text += '{%s\n' % rtf_tag
208
tag_stack.append(rtf_tag)
210
# Processes style information
212
style_tag = s[1].get(style[s[0]], None)
213
if style_tag and style_tag not in tag_stack:
215
text += '{%s\n' % style_tag
216
tag_stack.append(style_tag)
218
# Proccess tags that contain text.
219
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
220
text += '%s' % elem.text
223
text += self.dump_text(item, stylizer, tag_stack)
225
for i in range(0, tag_count):
226
end_tag = tag_stack.pop()
227
if end_tag != 'block':
230
single_tag_end = SINGLE_TAGS_END.get(tag, None)
232
text += single_tag_end
234
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
235
if 'block' in tag_stack:
236
text += '%s ' % elem.tail
238
text += '{\\par \\pard \\hyphpar %s}' % elem.tail