2
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
4
This package contains logic to read and write LRF files.
5
The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
8
from optparse import OptionValueError
9
from htmlentitydefs import name2codepoint
10
from uuid import uuid4
12
from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
13
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, PutObj, \
14
Paragraph, TextStyle, BlockStyle
15
from calibre.ebooks.lrf.fonts import FONT_FILE_MAP
16
from calibre.ebooks import ConversionError
17
from calibre import __appname__, __version__, __author__, iswindows
18
from calibre.utils.config import OptionParser
20
__docformat__ = "epytext"
22
preferred_source_formats = [
39
class LRFParseError(Exception):
43
class PRS500_PROFILE(object):
47
# Number of pixels to subtract from screen_height when calculating height of text area
49
font_size = 10 #: Default (in pt)
50
parindent = 10 #: Default (in pt)
51
line_space = 1.2 #: Default (in pt)
52
header_font_size = 6 #: In pt
53
header_height = 30 #: In px
54
default_fonts = { 'sans': "Swis721 BT Roman", 'mono': "Courier10 BT Roman",
55
'serif': "Dutch801 Rm BT Roman"}
60
PRS500_PROFILE.name : PRS500_PROFILE,
63
def profile_from_string(option, opt_str, value, parser):
65
profile = profile_map[value]
66
setattr(parser.values, option.dest, profile)
68
raise OptionValueError('Profile: '+value+' is not implemented. Implemented profiles: %s'%(profile_map.keys()))
70
def option_parser(usage, gui_mode=False):
71
parser = OptionParser(usage=usage, gui_mode=gui_mode)
72
metadata = parser.add_option_group('METADATA OPTIONS')
73
metadata.add_option("-t", "--title", action="store", type="string", default=None,\
74
dest="title", help=_("Set the title. Default: filename."))
75
metadata.add_option("-a", "--author", action="store", type="string", \
76
dest="author", help=_("Set the author(s). Multiple authors should be set as a comma separated list. Default: %default"),
78
metadata.add_option("--comment", action="store", type="string", \
79
dest="freetext", help=_("Set the comment."), default=_('Unknown'))
80
metadata.add_option("--category", action="store", type="string", \
81
dest="category", help=_("Set the category"), default=_('Unknown'))
82
metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
83
help=_('Sort key for the title'))
84
metadata.add_option('--author-sort', action='store', default='', dest='author_sort',
85
help=_('Sort key for the author'))
86
metadata.add_option('--publisher', action='store', default=_('Unknown'), dest='publisher',
88
metadata.add_option('--cover', action='store', dest='cover', default=None, \
89
help=_('Path to file containing image to be used as cover'))
90
metadata.add_option('--use-metadata-cover', action='store_true', default=False,
91
help=_('If there is a cover graphic detected in the source file, use that instead of the specified cover.'))
93
parser.add_option('-o', '--output', action='store', default=None, \
94
help=_('Output file name. Default is derived from input filename'))
95
parser.add_option('--ignore-tables', action='store_true', default=False, dest='ignore_tables',
96
help=_('Render HTML tables as blocks of text instead of actual tables. This is neccessary if the HTML contains very large or complex tables.'))
97
laf = parser.add_option_group('LOOK AND FEEL')
98
laf.add_option('--base-font-size', action='store', type='float', default=10.,
99
help=_('''Specify the base font size in pts. All fonts are rescaled accordingly. This option obsoletes the --font-delta option and takes precedence over it. To use --font-delta, set this to 0. Default: %defaultpt'''))
100
laf.add_option('--enable-autorotation', action='store_true', default=False,
101
help=_('Enable autorotation of images that are wider than the screen width.'),
103
laf.add_option('--wordspace', dest='wordspace', default=2.5, type='float',
104
help=_('Set the space between words in pts. Default is %default'))
105
laf.add_option('--blank-after-para', action='store_true', default=False,
106
dest='blank_after_para', help=_('Separate paragraphs by blank lines.'))
107
laf.add_option('--header', action='store_true', default=False, dest='header',
108
help=_('Add a header to all the pages with title and author.'))
109
laf.add_option('--headerformat', default="%t by %a", dest='headerformat', type='string',
110
help=_('Set the format of the header. %a is replaced by the author and %t by the title. Default is %default'))
111
laf.add_option('--header-separation', default=0, type='int',
112
help=_('Add extra spacing below the header. Default is %default px.'))
113
laf.add_option('--override-css', default=None, dest='_override_css', type='string',
114
help=_('Override the CSS. Can be either a path to a CSS stylesheet or a string. If it is a string it is interpreted as CSS.'))
115
laf.add_option('--use-spine', default=False, dest='use_spine', action='store_true',
116
help=_('Use the <spine> element from the OPF file to determine the order in which the HTML files are appended to the LRF. The .opf file must be in the same directory as the base HTML file.'))
117
laf.add_option('--minimum-indent', default=0, type='float',
118
help=_('Minimum paragraph indent (the indent of the first line of a paragraph) in pts. Default: %default'))
119
laf.add_option('--font-delta', action='store', type='float', default=0., \
120
help=_("""Increase the font size by 2 * FONT_DELTA pts and """
121
'''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
122
"""If FONT_DELTA is negative, the font size is decreased."""),
124
laf.add_option('--ignore-colors', action='store_true', default=False, dest='ignore_colors',
125
help=_('Render all content as black on white instead of the colors specified by the HTML or CSS.'))
127
page = parser.add_option_group('PAGE OPTIONS')
128
profiles = profile_map.keys()
129
page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
130
choices=profiles, action='callback', callback=profile_from_string,
131
help=_('''Profile of the target device for which this LRF is '''
132
'''being generated. The profile determines things like the '''
133
'''resolution and screen size of the target device. '''
134
'''Default: %s Supported profiles: ''')%(PRS500_PROFILE.name,)+\
136
page.add_option('--left-margin', default=20, dest='left_margin', type='int',
137
help=_('''Left margin of page. Default is %default px.'''))
138
page.add_option('--right-margin', default=20, dest='right_margin', type='int',
139
help=_('''Right margin of page. Default is %default px.'''))
140
page.add_option('--top-margin', default=10, dest='top_margin', type='int',
141
help=_('''Top margin of page. Default is %default px.'''))
142
page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int',
143
help=_('''Bottom margin of page. Default is %default px.'''))
144
page.add_option('--render-tables-as-images', default=False, action='store_true',
145
help=_('Render tables in the HTML as images (useful if the document has large or complex tables)'))
146
page.add_option('--text-size-multiplier-for-rendered-tables', type='float', default=1.0,
147
help=_('Multiply the size of text in rendered tables by this factor. Default is %default'))
149
link = parser.add_option_group('LINK PROCESSING OPTIONS')
150
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
152
help=_(r'''The maximum number of levels to recursively process '''
153
'''links. A value of 0 means thats links are not followed. '''
154
'''A negative value means that <a> tags are ignored.'''))
155
link.add_option('--link-exclude', dest='link_exclude', default='@',
156
help=_('''A regular expression. <a> tags whose href '''
157
'''matches will be ignored. Defaults to %default'''))
158
link.add_option('--no-links-in-toc', action='store_true', default=False,
159
dest='no_links_in_toc',
160
help=_('''Don't add links to the table of contents.'''))
161
chapter = parser.add_option_group('CHAPTER OPTIONS')
162
chapter.add_option('--disable-chapter-detection', action='store_true',
163
default=False, dest='disable_chapter_detection',
164
help=_('''Prevent the automatic detection chapters.'''))
165
chapter.add_option('--chapter-regex', dest='chapter_regex',
166
default='chapter|book|appendix',
167
help=_('''The regular expression used to detect chapter titles.'''
168
''' It is searched for in heading tags (h1-h6). Defaults to %default'''))
169
chapter.add_option('--chapter-attr', default='$,,$',
170
help=_('Detect a chapter beginning at an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". You can set the attribute to "none" to match only on tag names. So for example, to match all h2 tags, you would use "h2,none,". Default is %default'''))
171
chapter.add_option('--page-break-before-tag', dest='page_break', default='h[12]',
172
help=_('''If html2lrf does not find any page breaks in the '''
173
'''html file and cannot detect chapter headings, it will '''
174
'''automatically insert page-breaks before the tags whose '''
175
'''names match this regular expression. Defaults to %default. '''
176
'''You can disable it by setting the regexp to "$". '''
177
'''The purpose of this option is to try to ensure that '''
178
'''there are no really long pages as this degrades the page '''
179
'''turn performance of the LRF. Thus this option is ignored '''
180
'''if the current page has only a few elements.'''))
181
chapter.add_option('--force-page-break-before-tag', dest='force_page_break',
182
default='$', help=_('Force a page break before tags whose names match this regular expression.'))
183
chapter.add_option('--force-page-break-before-attr', dest='force_page_break_attr',
184
default='$,,$', help=_('Force a page break before an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". Default is %default'''))
185
chapter.add_option('--add-chapters-to-toc', action='store_true',
186
default=False, dest='add_chapters_to_toc',
187
help=_('''Add detected chapters to the table of contents.'''))
188
prepro = parser.add_option_group('PREPROCESSING OPTIONS')
189
prepro.add_option('--baen', action='store_true', default=False, dest='baen',
190
help=_('''Preprocess Baen HTML files to improve generated LRF.'''))
191
prepro.add_option('--pdftohtml', action='store_true', default=False, dest='pdftohtml',
192
help=_('''You must add this option if processing files generated by pdftohtml, otherwise conversion will fail.'''))
193
prepro.add_option('--book-designer', action='store_true', default=False, dest='book_designer',
194
help=_('''Use this option on html0 files from Book Designer.'''))
196
fonts = parser.add_option_group('FONT FAMILIES',
197
_('''Specify trutype font families for serif, sans-serif and monospace fonts. '''
198
'''These fonts will be embedded in the LRF file. Note that custom fonts lead to '''
199
'''slower page turns. '''
201
'''--serif-family "Times New Roman"
203
fonts.add_option('--serif-family',
204
default=None, dest='serif_family', type='string',
205
help=_('The serif family of fonts to embed'))
206
fonts.add_option('--sans-family',
207
default=None, dest='sans_family', type='string',
208
help=_('The sans-serif family of fonts to embed'))
209
fonts.add_option('--mono-family',
210
default=None, dest='mono_family', type='string',
211
help=_('The monospace family of fonts to embed'))
213
debug = parser.add_option_group('DEBUG OPTIONS')
214
debug.add_option('--verbose', dest='verbose', action='store_true', default=False,
215
help=_('''Be verbose while processing'''))
216
debug.add_option('--lrs', action='store_true', dest='lrs', \
217
help=_('Convert to LRS'), default=False)
218
parser.add_option('--minimize-memory-usage', action='store_true', default=False,
219
help=_('Minimize memory usage at the cost of longer processing times. Use this option if you are on a memory constrained machine.'))
220
parser.add_option('--encoding', default=None,
221
help=_('Specify the character encoding of the source file. If the output LRF file contains strange characters, try changing this option. A common encoding for files from windows computers is cp-1252. Another common choice is utf-8. The default is to try and guess the encoding.'))
225
def find_custom_fonts(options, logger):
226
from calibre.utils.fontconfig import files_for_family
227
fonts = {'serif' : None, 'sans' : None, 'mono' : None}
229
return cmd.split(',')[-1].strip()
230
if options.serif_family:
231
f = family(options.serif_family)
232
fonts['serif'] = files_for_family(f)
233
if not fonts['serif']:
234
logger.warn('Unable to find serif family %s'%f)
235
if options.sans_family:
236
f = family(options.sans_family)
237
fonts['sans'] = files_for_family(f)
238
if not fonts['sans']:
239
logger.warn('Unable to find sans family %s'%f)
240
if options.mono_family:
241
f = family(options.mono_family)
242
fonts['mono'] = files_for_family(f)
243
if not fonts['mono']:
244
logger.warn('Unable to find mono family %s'%f)
248
def Book(options, logger, font_delta=0, header=None,
249
profile=PRS500_PROFILE, **settings):
251
ps['topmargin'] = options.top_margin
252
ps['evensidemargin'] = options.left_margin
253
ps['oddsidemargin'] = options.left_margin
254
ps['textwidth'] = profile.screen_width - (options.left_margin + options.right_margin)
255
ps['textheight'] = profile.screen_height - (options.top_margin + options.bottom_margin) \
259
hb = TextBlock(textStyle=TextStyle(align='foot',
260
fontsize=int(profile.header_font_size*10)),
261
blockStyle=BlockStyle(blockwidth=ps['textwidth']))
264
ps['headheight'] = profile.header_height
265
ps['headsep'] = options.header_separation
268
ps['textheight'] = profile.screen_height - (options.bottom_margin + ps['topmargin']) \
269
- ps['headheight'] - ps['headsep'] - profile.fudge
271
fontsize = int(10*profile.font_size+font_delta*20)
272
baselineskip = fontsize + 20
273
fonts = find_custom_fonts(options, logger)
274
tsd = dict(fontsize=fontsize,
275
parindent=int(10*profile.parindent),
276
linespace=int(10*profile.line_space),
277
baselineskip=baselineskip,
278
wordspace=10*options.wordspace)
279
if fonts['serif'] and fonts['serif'].has_key('normal'):
280
tsd['fontfacename'] = fonts['serif']['normal'][1]
282
book = _Book(textstyledefault=tsd,
284
blockstyledefault=dict(blockwidth=ps['textwidth']),
287
for family in fonts.keys():
289
for font in fonts[family].values():
290
book.embed_font(*font)
291
FONT_FILE_MAP[font[1]] = font[0]
293
for family in ['serif', 'sans', 'mono']:
294
if not fonts[family]:
295
fonts[family] = { 'normal' : (None, profile.default_fonts[family]) }
296
elif not fonts[family].has_key('normal'):
297
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
300
from calibre import entity_to_unicode