3
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
4
__docformat__ = 'restructuredtext en'
7
Font size rationalization. See :function:`relativize`.
10
import logging, re, operator, functools, collections, unittest, copy, sys
11
from xml.dom import SyntaxErr
13
from lxml.cssselect import CSSSelector
14
from lxml import etree
15
from lxml.html import HtmlElement
17
from calibre.ebooks.html import fromstring
18
from calibre.ebooks.epub import rules
19
from cssutils import CSSParser
21
num = r'[-]?\d+|[-]?\d*\.\d+'
22
length = r'(?P<zero>0)|(?P<num>{num})(?P<unit>%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
23
absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
24
relative_size = r'(?P<rel>smaller|larger)'
26
font_size_pat = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
27
line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
37
DEFAULT_FONT_SIZE = 12
39
class Rationalizer(object):
42
def specificity(cls, s):
43
'''Map CSS specificity tuple to a single integer'''
44
return sum([10**(4-i) + x for i,x in enumerate(s)])
47
def compute_font_size(cls, elem):
49
Calculate the effective font size of an element traversing its ancestors as far as
52
cfs = elem.computed_font_size
55
sfs = elem.specified_font_size
57
parent = elem.getparent()
58
cls.compute_font_size(parent)
59
elem.computed_font_size = sfs(parent.computed_font_size)
61
elem.computed_font_size = sfs
64
def calculate_font_size(cls, style):
65
'Return font size in pts from style object. For relative units returns a callable'
66
match = font_size_pat.search(style.font)
73
match = font_size_pat.search(fs)
76
match = match.groupdict()
77
unit = match.get('unit', '')
78
if unit: unit = unit.lower()
79
if unit in PTU.keys():
80
return PTU[unit] * float(match['num'])
81
if unit in ('em', 'ex'):
82
return functools.partial(operator.mul, float(match['num']))
84
return functools.partial(operator.mul, float(match['num'])/100.)
85
abs = match.get('abs', '')
86
if abs: abs = abs.lower()
88
x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
90
if match.get('zero', False):
92
return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
95
def resolve_rules(cls, stylesheets):
96
for sheet in stylesheets:
97
if hasattr(sheet, 'fs_rules'):
102
if r.type == r.STYLE_RULE:
103
font_size = cls.calculate_font_size(r.style)
104
if font_size is not None:
105
for s in r.selectorList:
106
sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
107
orig = line_height_pat.search(r.style.lineHeight)
109
for s in r.selectorList:
110
sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
114
def apply_font_size_rules(cls, stylesheets, root):
115
'Add a ``specified_font_size`` attribute to every element that has a specified font size'
116
cls.resolve_rules(stylesheets)
117
for sheet in stylesheets:
118
for selector, font_size in sheet.fs_rules:
119
elems = selector(root)
121
elem.specified_font_size = font_size
124
def remove_font_size_information(cls, stylesheets):
125
for r in rules(stylesheets):
126
r.style.removeProperty('font-size')
128
new = font_size_pat.sub('', r.style.font).strip()
132
r.style.removeProperty('font')
134
r.style.removeProperty('font')
135
if line_height_pat.search(r.style.lineHeight) is not None:
136
r.style.removeProperty('line-height')
139
def compute_font_sizes(cls, root, stylesheets, base=12):
140
stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
141
cls.apply_font_size_rules(stylesheets, root)
143
# Compute the effective font size of all tags
144
root.computed_font_size = DEFAULT_FONT_SIZE
145
for elem in root.iter(etree.Element):
146
cls.compute_font_size(elem)
150
# Calculate the "base" (i.e. most common) font size
151
font_sizes = collections.defaultdict(lambda : 0)
152
body = root.xpath('//body')[0]
153
IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
154
for elem in body.iter(etree.Element):
155
if elem.tag not in IGNORE:
156
t = getattr(elem, 'text', '')
159
font_sizes[elem.computed_font_size] += len(t)
161
t = getattr(elem, 'tail', '')
164
parent = elem.getparent()
165
if parent.tag not in IGNORE:
166
font_sizes[parent.computed_font_size] += len(t)
169
most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
170
scale = base/most_common if most_common > 0 else 1.
174
# rescale absolute line-heights
176
for sheet in stylesheets:
177
for selector, lh in sheet.lh_rules:
178
for elem in selector(root):
179
elem.set('id', elem.get('id', 'cfs_%d'%counter))
181
if not extra_css.has_key(elem.get('id')):
182
extra_css[elem.get('id')] = []
183
extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
187
# Rescale all computed font sizes
188
for elem in body.iter(etree.Element):
189
if isinstance(elem, HtmlElement):
190
elem.computed_font_size *= scale
192
# Remove all font size specifications from the last stylesheet
193
cls.remove_font_size_information(stylesheets[-1:])
195
# Create the CSS to implement the rescaled font sizes
196
for elem in body.iter(etree.Element):
197
cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
198
if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
199
elem.set('id', elem.get('id', 'cfs_%d'%counter))
201
if not extra_css.has_key(elem.get('id')):
202
extra_css[elem.get('id')] = []
203
extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
205
css = CSSParser(loglevel=logging.ERROR).parseString('')
206
for id, r in extra_css.items():
207
css.add('#%s {%s}'%(id, ';'.join(r)))
211
def rationalize(cls, stylesheets, root, opts):
212
logger = logging.getLogger('html2epub')
213
logger.info('\t\tRationalizing fonts...')
215
if opts.base_font_size2 > 0:
217
extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
219
logger.warning('Failed to rationalize font sizes.')
223
root.remove_font_size_information()
224
logger.debug('\t\tDone rationalizing')
227
################################################################################
228
############## Testing
229
################################################################################
231
class FontTest(unittest.TestCase):
234
from calibre.ebooks.epub import config
235
self.opts = config(defaults='').parse()
239
<title>Test document</title>
244
<p id="p1">Some <b>text</b></p>
246
<p id="p2">Some other <span class="it">text</span>.</p>
247
<p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
251
self.root = fromstring(self.html)
253
def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
254
root1 = copy.deepcopy(self.root)
255
root1.computed_font_size = DEFAULT_FONT_SIZE
256
stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
257
stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
258
root2 = copy.deepcopy(root1)
259
root2.remove_font_size_information()
260
root2.computed_font_size = DEFAULT_FONT_SIZE
261
Rationalizer.apply_font_size_rules([stylesheet2], root2)
262
for elem in root2.iter(etree.Element):
263
Rationalizer.compute_font_size(elem)
264
for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
265
self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
266
msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
267
(root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
268
return stylesheet2.cssText
270
def testStripping(self):
271
'Test that any original entries are removed from the CSS'
272
css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
273
css = CSSParser(loglevel=logging.ERROR).parseString(css)
274
Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
275
self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
276
'p{font:bolditalic}')
278
def testIdentity(self):
279
'Test that no unnecessary font size changes are made'
280
extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
281
self.assertEqual(extra_css.strip(), '')
283
def testRelativization(self):
284
'Test conversion of absolute to relative sizes'
285
self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
287
def testResizing(self):
288
'Test resizing of fonts'
289
self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
293
return unittest.TestLoader().loadTestsFromTestCase(FontTest)
296
unittest.TextTestRunner(verbosity=2).run(suite())
298
if __name__ == '__main__':
b'\\ No newline at end of file'