2
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
3
import os, sys, shutil, logging, glob
7
from calibre.ebooks.lrf import option_parser as lrf_option_parser
8
from calibre.ebooks.metadata.meta import get_metadata
9
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
10
from calibre import setup_cli_handlers
11
from calibre.libwand import convert, WandException
12
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
13
from calibre.ebooks.lrf.rtf.xsl import xhtml
14
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
15
from calibre.ptempfile import PersistentTemporaryDirectory
16
from calibre.ebooks.metadata import MetaInformation
17
from calibre.ebooks.metadata.opf import OPFCreator
20
parser = lrf_option_parser(
21
_('''%prog [options] mybook.rtf
24
%prog converts mybook.rtf to mybook.lrf''')
26
parser.add_option('--keep-intermediate-files', action='store_true', default=False)
29
def convert_images(html, logger):
30
wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
32
target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
35
html = html.replace(os.path.basename(wmf), os.path.basename(target))
36
except WandException, err:
37
logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
41
def process_file(path, options, logger=None):
43
level = logging.DEBUG if options.verbose else logging.INFO
44
logger = logging.getLogger('rtf2lrf')
45
setup_cli_handlers(logger, level)
46
rtf = os.path.abspath(os.path.expanduser(path))
48
mi = get_metadata(f, 'rtf')
50
tdir = PersistentTemporaryDirectory('_rtf2lrf')
51
html = generate_html(rtf, tdir)
54
if not options.output:
55
ext = '.lrs' if options.lrs else '.lrf'
56
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
57
options.output = os.path.abspath(os.path.expanduser(options.output))
59
mi.title = os.path.splitext(os.path.basename(rtf))[0]
60
if (not options.title or options.title == 'Unknown'):
61
options.title = mi.title
62
if (not options.author or options.author == 'Unknown') and mi.author:
63
options.author = mi.author
64
if (not options.category or options.category == 'Unknown') and mi.category:
65
options.category = mi.category
66
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
67
options.freetext = mi.comments
69
html_process_file(html, options, logger)
72
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
73
logger.debug('Intermediate files in '+ tdir)
77
def main(args=sys.argv, logger=None):
78
parser = option_parser()
79
options, args = parser.parse_args(args)
83
print 'No rtf file specified'
85
process_file(args[1], options, logger)
89
def generate_xml(rtfpath, tdir):
90
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
91
ofile = os.path.join(tdir, 'index.xml')
94
rtfpath = os.path.abspath(rtfpath)
99
# Convert symbol fonts to unicode equivelents. Default
103
# Convert Zapf fonts to unicode equivelents. Default
107
# Convert Wingding fonts to unicode equivelents.
109
convert_wingdings = 1,
111
# Convert RTF caps to real caps.
115
# Indent resulting XML.
116
# Default is 0 (no indent).
119
# Form lists from RTF. Default is 1.
122
# Convert headings to sections. Default is 0.
123
headings_to_sections = 1,
125
# Group paragraphs with the same style name. Default is 1.
128
# Group borders. Default is 1.
131
# Write or do not write paragraphs. Default is 0.
132
empty_paragraphs = 0,
140
def generate_html(rtfpath, tdir):
141
print 'Converting RTF to XML...'
142
rtfpath = os.path.abspath(rtfpath)
144
xml = generate_xml(rtfpath, tdir)
145
except RtfInvalidCodeException:
146
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
147
tdir = os.path.dirname(xml)
151
print 'Parsing XML...'
152
parser = etree.XMLParser(recover=True, no_network=True)
154
doc = etree.parse(xml, parser)
157
print 'Parsing failed. Trying to clean up XML...'
158
soup = BeautifulStoneSoup(open(xml, 'rb').read())
159
doc = etree.fromstring(str(soup))
160
print 'Converting XML to HTML...'
161
styledoc = etree.fromstring(xhtml)
163
transform = etree.XSLT(styledoc)
164
result = transform(doc)
165
tdir = os.path.dirname(xml)
166
html = os.path.join(tdir, 'index.html')
168
res = transform.tostring(result)
169
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
173
mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
175
mi = MetaInformation(None, None)
177
mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
179
mi.authors = [_('Unknown')]
180
opf = OPFCreator(tdir, mi)
181
opf.create_manifest([('index.html', None)])
182
opf.create_spine(['index.html'])
183
opf.render(open('metadata.opf', 'wb'))
188
if __name__ == '__main__':
b'\\ No newline at end of file'