1
from __future__ import with_statement
3
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
4
'''Convert any ebook file into a LRF file.'''
6
import sys, os, logging, shutil, tempfile, re
8
from calibre.ebooks import UnknownFormatError
9
from calibre.ebooks.lrf import option_parser as _option_parser
10
from calibre import __appname__, setup_cli_handlers, extract
11
from calibre.ptempfile import TemporaryDirectory
12
from calibre.ebooks.lrf.lit.convert_from import process_file as lit2lrf
13
from calibre.ebooks.lrf.pdf.convert_from import process_file as pdf2lrf
14
from calibre.ebooks.lrf.rtf.convert_from import process_file as rtf2lrf
15
from calibre.ebooks.lrf.txt.convert_from import process_file as txt2lrf
16
from calibre.ebooks.lrf.html.convert_from import process_file as html2lrf
17
from calibre.ebooks.lrf.epub.convert_from import process_file as epub2lrf
18
from calibre.ebooks.lrf.mobi.convert_from import process_file as mobi2lrf
19
from calibre.ebooks.lrf.fb2.convert_from import process_file as fb22lrf
21
from calibre.customize.ui import run_plugins_on_postprocess, run_plugins_on_preprocess
23
def largest_file(files):
24
maxsize, file = 0, None
26
size = os.stat(f).st_size
32
def find_htmlfile(dir):
33
ext_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
34
toc_pat = re.compile(r'toc', re.IGNORECASE)
35
index_pat = re.compile(r'index', re.IGNORECASE)
36
toc_files, index_files, files = [], [], []
38
for root, dirs, _files in os.walk(dir):
40
f = os.path.abspath(os.path.join(root, f))
41
ext = os.path.splitext(f)[1]
42
if ext and ext_pat.match(ext):
43
toc_files.append(f) if toc_pat.search(f) else \
44
index_files.append(f) if index_pat.search(f) else \
46
a = toc_files if toc_files else index_files if index_files else files
48
return largest_file(a)
50
def number_of_unhidden_files(base, listing):
53
i = os.path.join(base, i)
54
if os.path.isdir(i) or os.path.basename(i).startswith('.'):
59
def unhidden_directories(base, listing):
62
if os.path.isdir(os.path.join(base, i)) and not i.startswith('__') and \
63
not i.startswith('.'):
67
def traverse_subdirs(tdir):
68
temp = os.listdir(tdir)
69
if number_of_unhidden_files(tdir, temp) == 0:
71
cdir = os.path.join(tdir, unhidden_directories(tdir, temp)[0])
72
return traverse_subdirs(cdir)
77
def handle_archive(path):
78
tdir = tempfile.mkdtemp(prefix=__appname__+'_'+'archive_')
81
cdir = traverse_subdirs(tdir)
83
exts = ['lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc']
84
candidates = map(lambda x:os.path.join(cdir, x), os.listdir(cdir))
87
if f.lower().endswith('.'+ext):
89
file = largest_file(files)
91
file = find_htmlfile(cdir)
92
if isinstance(file, str):
93
file = file.decode(sys.getfilesystemencoding())
96
def odt2lrf(path, options, logger):
97
from calibre.ebooks.odt.to_oeb import Extract
98
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
101
level = logging.DEBUG if options.verbose else logging.INFO
102
logger = logging.getLogger('odt2lrf')
103
setup_cli_handlers(logger, level)
105
with TemporaryDirectory('_odt2lrf') as tdir:
106
opf = Extract()(path, tdir)
107
options.use_spine = True
108
options.encoding = 'utf-8'
109
html_process_file(opf.replace('metadata.opf', 'index.html'), options, logger)
111
def process_file(path, options, logger=None):
112
path = os.path.abspath(os.path.expanduser(path))
113
path = run_plugins_on_preprocess(path)
116
level = logging.DEBUG if options.verbose else logging.INFO
117
logger = logging.getLogger('any2lrf')
118
setup_cli_handlers(logger, level)
119
if not os.access(path, os.R_OK):
120
logger.critical('Cannot read from %s', path)
122
ext = os.path.splitext(path)[1]
123
if not ext or ext == '.':
124
logger.critical('Unknown file type: %s', path)
126
ext = ext[1:].lower()
128
if not options.output:
129
fmt = '.lrs' if options.lrs else '.lrf'
130
options.output = os.path.splitext(os.path.basename(path))[0] + fmt
131
options.output = os.path.abspath(os.path.expanduser(options.output))
132
if ext in ['zip', 'rar', 'oebzip']:
135
tdir, newpath = handle_archive(path)
137
logger.exception(' ')
139
raise UnknownFormatError('Could not find ebook in archive')
141
logger.info('Found ebook in archive: %s', repr(path))
143
ext = os.path.splitext(path)[1][1:].lower()
157
elif ext in ['mobi', 'prc', 'azw']:
164
raise UnknownFormatError(_('Converting from %s to LRF is not supported.')%ext)
165
convertor(path, options, logger)
169
if tdir and os.path.exists(tdir):
174
def option_parser(gui_mode=False):
175
return _option_parser(usage=_('''\
176
any2lrf [options] myfile
178
Convert any ebook format into LRF. Supported formats are:
179
LIT, RTF, TXT, HTML, EPUB, MOBI, PRC and PDF. any2lrf will also process a RAR or
180
ZIP archive, looking for an ebook inside the archive.
181
'''), gui_mode=gui_mode)
184
def main(args=sys.argv, logger=None, gui_mode=False):
185
parser = option_parser(gui_mode)
186
options, args = parser.parse_args(args)
190
print _('No file to convert specified.')
194
if not isinstance(src, unicode):
195
src = src.decode(sys.getfilesystemencoding())
196
return process_file(src, options, logger)
198
if __name__ == '__main__':