1
from __future__ import with_statement
3
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
4
__docformat__ = 'restructuredtext en'
7
Convert an ODT file into a Open Ebook
10
from odf.odf2xhtml import ODF2XHTML
12
from calibre import CurrentDir, walk
13
from calibre.customize.conversion import InputFormatPlugin
15
class Extract(ODF2XHTML):
17
def extract_pictures(self, zf):
18
if not os.path.exists('Pictures'):
19
os.makedirs('Pictures')
20
for name in zf.namelist():
21
if name.startswith('Pictures'):
23
with open(name, 'wb') as f:
26
def __call__(self, stream, odir):
27
from calibre.utils.zipfile import ZipFile
28
from calibre.ebooks.metadata.meta import get_metadata
29
from calibre.ebooks.metadata.opf2 import OPFCreator
32
if not os.path.exists(odir):
34
with CurrentDir(odir):
35
print 'Extracting ODT file...'
36
html = self.odf2xhtml(stream)
37
with open('index.xhtml', 'wb') as f:
38
f.write(html.encode('utf-8'))
39
zf = ZipFile(stream, 'r')
40
self.extract_pictures(zf)
42
mi = get_metadata(stream, 'odt')
44
mi.title = _('Unknown')
46
mi.authors = [_('Unknown')]
47
opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
48
opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
49
opf.create_spine([os.path.abspath('index.xhtml')])
50
with open('metadata.opf', 'wb') as f:
52
return os.path.abspath('metadata.opf')
55
class ODTInput(InputFormatPlugin):
58
author = 'Kovid Goyal'
59
description = 'Convert ODT (OpenOffice) files to HTML'
60
file_types = set(['odt'])
63
def convert(self, stream, options, file_ext, log,
65
return Extract()(stream, '.')
67
def postprocess_book(self, oeb, opts, log):
68
# Fix <p><div> constructs as the asinine epubchecker complains
70
from calibre.ebooks.oeb.base import XPath, XHTML
71
path = XPath('//h:p/h:div')
72
for item in oeb.spine:
74
if not hasattr(root, 'xpath'): continue
75
for div in path(root):
76
div.getparent().tag = XHTML('div')