1
# -*- coding: utf-8 -*-
3
pygments.formatters.html
4
~~~~~~~~~~~~~~~~~~~~~~~~
6
Formatter for HTML output.
8
:copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
9
:license: BSD, see LICENSE for details.
16
from pygments.formatter import Formatter
17
from pygments.token import Token, Text, STANDARD_TYPES
18
from pygments.util import get_bool_opt, get_int_opt, get_list_opt, bytes
21
__all__ = ['HtmlFormatter']
24
def escape_html(text):
25
"""Escape &, <, > as well as single and double quotes for HTML."""
26
return text.replace('&', '&'). \
27
replace('<', '<'). \
28
replace('>', '>'). \
29
replace('"', '"'). \
34
"""Return a random id for javascript fields."""
35
from random import random
38
from hashlib import sha1 as sha
42
return sha('%s|%s' % (random(), time())).hexdigest()
45
def _get_ttype_class(ttype):
46
fname = STANDARD_TYPES.get(ttype)
51
aname = '-' + ttype[-1] + aname
53
fname = STANDARD_TYPES.get(ttype)
57
CSSFILE_TEMPLATE = '''\
58
td.linenos { background-color: #f0f0f0; padding-right: 10px; }
59
span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
60
pre { line-height: 125%%; }
65
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
66
"http://www.w3.org/TR/html4/strict.dtd">
70
<title>%(title)s</title>
71
<meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
72
<style type="text/css">
73
''' + CSSFILE_TEMPLATE + '''
81
DOC_HEADER_EXTERNALCSS = '''\
82
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
83
"http://www.w3.org/TR/html4/strict.dtd">
87
<title>%(title)s</title>
88
<meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
89
<link rel="stylesheet" href="%(cssfile)s" type="text/css">
102
class HtmlFormatter(Formatter):
104
Format tokens as HTML 4 ``<span>`` tags within a ``<pre>`` tag, wrapped
105
in a ``<div>`` tag. The ``<div>``'s CSS class can be set by the `cssclass`
108
If the `linenos` option is set to ``"table"``, the ``<pre>`` is
109
additionally wrapped inside a ``<table>`` which has one row and two
110
cells: one containing the line numbers and one containing the code.
115
<div class="highlight" >
117
<td class="linenos" title="click to toggle"
118
onclick="with (this.firstChild.style)
119
{ display = (display == '') ? 'none' : '' }">
124
<pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
125
<span class="Ke">pass</span>
130
(whitespace added to improve clarity).
132
Wrapping can be disabled using the `nowrap` option.
134
A list of lines can be specified using the `hl_lines` option to make these
135
lines highlighted (as of Pygments 0.11).
137
With the `full` option, a complete HTML 4 document is output, including
138
the style definitions inside a ``<style>`` tag, or in a separate file if
139
the `cssfile` option is given.
141
The `get_style_defs(arg='')` method of a `HtmlFormatter` returns a string
142
containing CSS rules for the CSS classes used by the formatter. The
143
argument `arg` can be used to specify additional CSS selectors that
144
are prepended to the classes. A call `fmter.get_style_defs('td .code')`
145
would result in the following CSS classes:
149
td .code .kw { font-weight: bold; color: #00FF00 }
150
td .code .cm { color: #999999 }
153
If you have Pygments 0.6 or higher, you can also pass a list or tuple to the
154
`get_style_defs()` method to request multiple prefixes for the tokens:
156
.. sourcecode:: python
158
formatter.get_style_defs(['div.syntax pre', 'pre.syntax'])
160
The output would then look like this:
165
pre.syntax .kw { font-weight: bold; color: #00FF00 }
167
pre.syntax .cm { color: #999999 }
170
Additional options accepted:
173
If set to ``True``, don't wrap the tokens at all, not even inside a ``<pre>``
174
tag. This disables most other options (default: ``False``).
177
Tells the formatter to output a "full" document, i.e. a complete
178
self-contained document (default: ``False``).
181
If `full` is true, the title that should be used to caption the
182
document (default: ``''``).
185
The style to use, can be a string or a Style subclass (default:
186
``'default'``). This option has no effect if the `cssfile`
187
and `noclobber_cssfile` option are given and the file specified in
191
If set to true, token ``<span>`` tags will not use CSS classes, but
192
inline styles. This is not recommended for larger pieces of code since
193
it increases output size by quite a bit (default: ``False``).
196
Since the token types use relatively short class names, they may clash
197
with some of your own class names. In this case you can use the
198
`classprefix` option to give a string to prepend to all Pygments-generated
199
CSS class names for token types.
200
Note that this option also affects the output of `get_style_defs()`.
203
CSS class for the wrapping ``<div>`` tag (default: ``'highlight'``).
204
If you set this option, the default selector for `get_style_defs()`
207
*New in Pygments 0.9:* If you select the ``'table'`` line numbers, the
208
wrapping table will have a CSS class of this string plus ``'table'``,
209
the default is accordingly ``'highlighttable'``.
212
Inline CSS styles for the wrapping ``<div>`` tag (default: ``''``).
215
Inline CSS styles for the ``<pre>`` tag (default: ``''``). *New in
219
If the `full` option is true and this option is given, it must be the
220
name of an external file. If the filename does not include an absolute
221
path, the file's path will be assumed to be relative to the main output
222
file's path, if the latter can be found. The stylesheet is then written
223
to this file instead of the HTML file. *New in Pygments 0.6.*
226
If `cssfile` is given and the specified file exists, the css file will
227
not be overwritten. This allows the use of the `full` option in
228
combination with a user specified css file. Default is ``False``.
229
*New in Pygments 1.1.*
232
If set to ``'table'``, output line numbers as a table with two cells,
233
one containing the line numbers, the other the whole code. This is
234
copy-and-paste-friendly, but may cause alignment problems with some
235
browsers or fonts. If set to ``'inline'``, the line numbers will be
236
integrated in the ``<pre>`` tag that contains the code (that setting
237
is *new in Pygments 0.8*).
239
For compatibility with Pygments 0.7 and earlier, every true value
240
except ``'inline'`` means the same as ``'table'`` (in particular, that
241
means also ``True``).
243
The default value is ``False``, which means no line numbers at all.
245
**Note:** with the default ("table") line number mechanism, the line
246
numbers and code can have different line heights in Internet Explorer
247
unless you give the enclosing ``<pre>`` tags an explicit ``line-height``
248
CSS property (you get the default line spacing with ``line-height:
252
Specify a list of lines to be highlighted. *New in Pygments 0.11.*
255
The line number for the first line (default: ``1``).
258
If set to a number n > 1, only every nth line number is printed.
261
If set to a number n > 0, every nth line number is given the CSS
262
class ``"special"`` (default: ``0``).
265
If set to ``True``, the formatter won't output the background color
266
for the wrapping element (this automatically defaults to ``False``
267
when there is no wrapping element [eg: no argument for the
268
`get_syntax_defs` method given]) (default: ``False``). *New in
272
This string is output between lines of code. It defaults to ``"\n"``,
273
which is enough to break a line inside ``<pre>`` tags, but you can
274
e.g. set it to ``"<br>"`` to get HTML line breaks. *New in Pygments
278
If set to a nonempty string, e.g. ``foo``, the formatter will wrap each
279
output line in an anchor tag with a ``name`` of ``foo-linenumber``.
280
This allows easy linking to certain lines. *New in Pygments 0.9.*
283
If set to `True`, will wrap line numbers in <a> tags. Used in
284
combination with `linenos` and `lineanchors`.
287
**Subclassing the HTML formatter**
289
*New in Pygments 0.7.*
291
The HTML formatter is now built in a way that allows easy subclassing, thus
292
customizing the output HTML code. The `format()` method calls
293
`self._format_lines()` which returns a generator that yields tuples of ``(1,
294
line)``, where the ``1`` indicates that the ``line`` is a line of the
295
formatted source code.
297
If the `nowrap` option is set, the generator is the iterated over and the
298
resulting HTML is output.
300
Otherwise, `format()` calls `self.wrap()`, which wraps the generator with
301
other generators. These may add some HTML code to the one generated by
302
`_format_lines()`, either by modifying the lines generated by the latter,
303
then yielding them again with ``(1, line)``, and/or by yielding other HTML
304
code before or after the lines, with ``(0, html)``. The distinction between
305
source lines and other code makes it possible to wrap the generator multiple
308
The default `wrap()` implementation adds a ``<div>`` and a ``<pre>`` tag.
310
A custom `HtmlFormatter` subclass could look like this:
312
.. sourcecode:: python
314
class CodeHtmlFormatter(HtmlFormatter):
316
def wrap(self, source, outfile):
317
return self._wrap_code(source)
319
def _wrap_code(self, source):
323
# it's a line of formatted code
328
This results in wrapping the formatted lines with a ``<code>`` tag, where the
329
source lines are broken using ``<br>`` tags.
331
After calling `wrap()`, the `format()` method also adds the "line numbers"
332
and/or "full document" wrappers if the respective options are set. Then, all
333
HTML yielded by the wrapped generator is output.
338
filenames = ['*.html', '*.htm']
340
def __init__(self, **options):
341
Formatter.__init__(self, **options)
342
self.title = self._decodeifneeded(self.title)
343
self.nowrap = get_bool_opt(options, 'nowrap', False)
344
self.noclasses = get_bool_opt(options, 'noclasses', False)
345
self.classprefix = options.get('classprefix', '')
346
self.cssclass = self._decodeifneeded(options.get('cssclass', 'highlight'))
347
self.cssstyles = self._decodeifneeded(options.get('cssstyles', ''))
348
self.prestyles = self._decodeifneeded(options.get('prestyles', ''))
349
self.cssfile = self._decodeifneeded(options.get('cssfile', ''))
350
self.noclobber_cssfile = get_bool_opt(options, 'noclobber_cssfile', False)
352
linenos = options.get('linenos', False)
353
if linenos == 'inline':
356
# compatibility with <= 0.7
360
self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
361
self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
362
self.linenospecial = abs(get_int_opt(options, 'linenospecial', 0))
363
self.nobackground = get_bool_opt(options, 'nobackground', False)
364
self.lineseparator = options.get('lineseparator', '\n')
365
self.lineanchors = options.get('lineanchors', '')
366
self.anchorlinenos = options.get('anchorlinenos', False)
367
self.hl_lines = set()
368
for lineno in get_list_opt(options, 'hl_lines', []):
370
self.hl_lines.add(int(lineno))
374
self._class_cache = {}
375
self._create_stylesheet()
377
def _get_css_class(self, ttype):
378
"""Return the css class of this token type prefixed with
379
the classprefix option."""
380
if ttype in self._class_cache:
381
return self._class_cache[ttype]
382
return self.classprefix + _get_ttype_class(ttype)
384
def _create_stylesheet(self):
385
t2c = self.ttype2class = {Token: ''}
386
c2s = self.class2style = {}
387
cp = self.classprefix
388
for ttype, ndef in self.style:
389
name = cp + _get_ttype_class(ttype)
392
style += 'color: #%s; ' % ndef['color']
394
style += 'font-weight: bold; '
396
style += 'font-style: italic; '
397
if ndef['underline']:
398
style += 'text-decoration: underline; '
400
style += 'background-color: #%s; ' % ndef['bgcolor']
402
style += 'border: 1px solid #%s; ' % ndef['border']
405
# save len(ttype) to enable ordering the styles by
406
# hierarchy (necessary for CSS cascading rules!)
407
c2s[name] = (style[:-2], ttype, len(ttype))
409
def get_style_defs(self, arg=None):
411
Return CSS style definitions for the classes produced by the current
412
highlighting style. ``arg`` can be a string or list of selectors to
413
insert before the token type classes.
416
arg = ('cssclass' in self.options and '.'+self.cssclass or '')
417
if isinstance(arg, basestring):
427
tmp.append((arg and arg + ' ' or '') + cls)
428
return ', '.join(tmp)
430
styles = [(level, ttype, cls, style)
431
for cls, (style, ttype, level) in self.class2style.iteritems()
434
lines = ['%s { %s } /* %s */' % (prefix(cls), style, repr(ttype)[6:])
435
for (level, ttype, cls, style) in styles]
436
if arg and not self.nobackground and \
437
self.style.background_color is not None:
439
if Text in self.ttype2class:
440
text_style = ' ' + self.class2style[self.ttype2class[Text]][0]
441
lines.insert(0, '%s { background: %s;%s }' %
442
(prefix(''), self.style.background_color, text_style))
443
if self.style.highlight_color is not None:
444
lines.insert(0, '%s.hll { background-color: %s }' %
445
(prefix(''), self.style.highlight_color))
446
return '\n'.join(lines)
448
def _decodeifneeded(self, value):
449
if isinstance(value, bytes):
451
return value.decode(self.encoding)
452
return value.decode()
455
def _wrap_full(self, inner, outfile):
457
if os.path.isabs(self.cssfile):
458
# it's an absolute filename
459
cssfilename = self.cssfile
462
filename = outfile.name
463
if not filename or filename[0] == '<':
464
# pseudo files, e.g. name == '<fdopen>'
466
cssfilename = os.path.join(os.path.dirname(filename),
468
except AttributeError:
469
print >>sys.stderr, 'Note: Cannot determine output file name, ' \
470
'using current directory as base for the CSS file name'
471
cssfilename = self.cssfile
472
# write CSS file only if noclobber_cssfile isn't given as an option.
474
if not os.path.exists(cssfilename) or not self.noclobber_cssfile:
475
cf = open(cssfilename, "w")
476
cf.write(CSSFILE_TEMPLATE %
477
{'styledefs': self.get_style_defs('body')})
480
err.strerror = 'Error writing CSS file: ' + err.strerror
483
yield 0, (DOC_HEADER_EXTERNALCSS %
484
dict(title = self.title,
485
cssfile = self.cssfile,
486
encoding = self.encoding))
488
yield 0, (DOC_HEADER %
489
dict(title = self.title,
490
styledefs = self.get_style_defs('body'),
491
encoding = self.encoding))
493
for t, line in inner:
497
def _wrap_tablelinenos(self, inner):
498
dummyoutfile = StringIO.StringIO()
500
for t, line in inner:
503
dummyoutfile.write(line)
505
fl = self.linenostart
506
mw = len(str(lncount + fl - 1))
507
sp = self.linenospecial
509
la = self.lineanchors
510
aln = self.anchorlinenos
514
for i in range(fl, fl+lncount):
518
lines.append('<a href="#%s-%d" class="special">%*d</a>' %
521
lines.append('<span class="special">%*d</span>' % (mw, i))
524
lines.append('<a href="#%s-%d">%*d</a>' % (la, i, mw, i))
526
lines.append('%*d' % (mw, i))
529
ls = '\n'.join(lines)
532
for i in range(fl, fl+lncount):
535
lines.append('<a href="#%s-%d">%*d</a>' % (la, i, mw, i))
537
lines.append('%*d' % (mw, i))
540
ls = '\n'.join(lines)
542
# in case you wonder about the seemingly redundant <div> here: since the
543
# content in the other cell also is wrapped in a div, some browsers in
544
# some configurations seem to mess up the formatting...
545
yield 0, ('<table class="%stable">' % self.cssclass +
546
'<tr><td class="linenos"><div class="linenodiv"><pre>' +
547
ls + '</pre></div></td><td class="code">')
548
yield 0, dummyoutfile.getvalue()
549
yield 0, '</td></tr></table>'
551
def _wrap_inlinelinenos(self, inner):
552
# need a list of lines since we need the width of a single number :(
554
sp = self.linenospecial
556
num = self.linenostart
557
mw = len(str(len(lines) + num - 1))
560
for t, line in lines:
561
yield 1, '<span class="lineno%s">%*s</span> ' % (
562
num%sp == 0 and ' special' or '', mw,
563
(num%st and ' ' or num)) + line
566
for t, line in lines:
567
yield 1, '<span class="lineno">%*s</span> ' % (
568
mw, (num%st and ' ' or num)) + line
571
def _wrap_lineanchors(self, inner):
574
for t, line in inner:
577
yield 1, '<a name="%s-%d"></a>' % (s, i) + line
581
def _wrap_div(self, inner):
583
if (self.noclasses and not self.nobackground and
584
self.style.background_color is not None):
585
style.append('background: %s' % (self.style.background_color,))
587
style.append(self.cssstyles)
588
style = '; '.join(style)
590
yield 0, ('<div' + (self.cssclass and ' class="%s"' % self.cssclass)
591
+ (style and (' style="%s"' % style)) + '>')
596
def _wrap_pre(self, inner):
599
style.append(self.prestyles)
601
style.append('line-height: 125%')
602
style = '; '.join(style)
604
yield 0, ('<pre' + (style and ' style="%s"' % style) + '>')
609
def _format_lines(self, tokensource):
611
Just format the tokens, without any wrapping tags.
612
Yield individual lines.
614
nocls = self.noclasses
615
lsep = self.lineseparator
616
# for <span style=""> lookup only
617
getcls = self.ttype2class.get
618
c2s = self.class2style
622
for ttype, value in tokensource:
624
cclass = getcls(ttype)
625
while cclass is None:
627
cclass = getcls(ttype)
628
cspan = cclass and '<span style="%s">' % c2s[cclass][0] or ''
630
cls = self._get_css_class(ttype)
631
cspan = cls and '<span class="%s">' % cls or ''
633
parts = escape_html(value).split('\n')
635
# for all but the last line
636
for part in parts[:-1]:
639
line += (lspan and '</span>') + cspan + part + \
640
(cspan and '</span>') + lsep
641
else: # both are the same
642
line += part + (lspan and '</span>') + lsep
646
yield 1, cspan + part + (cspan and '</span>') + lsep
650
if line and parts[-1]:
652
line += (lspan and '</span>') + cspan + parts[-1]
657
line = cspan + parts[-1]
659
# else we neither have to open a new span nor set lspan
662
yield 1, line + (lspan and '</span>') + lsep
664
def _highlight_lines(self, tokensource):
666
Highlighted the lines specified in the `hl_lines` option by
667
post-processing the token stream coming from `_format_lines`.
671
for i, (t, value) in enumerate(tokensource):
674
if i + 1 in hls: # i + 1 because Python indexes start at 0
677
if self.style.highlight_color is not None:
678
style = (' style="background-color: %s"' %
679
(self.style.highlight_color,))
680
yield 1, '<span%s>%s</span>' % (style, value)
682
yield 1, '<span class="hll">%s</span>' % value
686
def wrap(self, source, outfile):
688
Wrap the ``source``, which is a generator yielding
689
individual lines, in custom generators. See docstring
690
for `format`. Can be overridden.
692
return self._wrap_div(self._wrap_pre(source))
694
def format_unencoded(self, tokensource, outfile):
696
The formatting process uses several nested generators; which of
697
them are used is determined by the user's options.
699
Each generator should take at least one argument, ``inner``,
700
and wrap the pieces of text generated by this.
702
Always yield 2-tuples: (code, text). If "code" is 1, the text
703
is part of the original tokensource being highlighted, if it's
704
0, the text is some piece of wrapping. This makes it possible to
705
use several different wrappers that process the original source
706
linewise, e.g. line number generators.
708
source = self._format_lines(tokensource)
710
source = self._highlight_lines(source)
712
if self.linenos == 2:
713
source = self._wrap_inlinelinenos(source)
715
source = self._wrap_lineanchors(source)
716
source = self.wrap(source, outfile)
717
if self.linenos == 1:
718
source = self._wrap_tablelinenos(source)
720
source = self._wrap_full(source, outfile)
722
for t, piece in source: