1
# $Id: states.py 7640 2013-03-25 20:57:52Z milde $
2
# Author: David Goodger <goodger@python.org>
3
# Copyright: This module has been placed in the public domain.
6
This is the ``docutils.parsers.rst.states`` module, the core of
7
the reStructuredText parser. It defines the following:
10
- `RSTStateMachine`: reStructuredText parser's entry point.
11
- `NestedStateMachine`: recursive StateMachine.
12
- `RSTState`: reStructuredText State superclass.
13
- `Inliner`: For parsing inline markup.
14
- `Body`: Generic classifier of the first line of a block.
15
- `SpecializedBody`: Superclass for compound element members.
16
- `BulletList`: Second and subsequent bullet_list list_items
17
- `DefinitionList`: Second+ definition_list_items.
18
- `EnumeratedList`: Second+ enumerated_list list_items.
19
- `FieldList`: Second+ fields.
20
- `OptionList`: Second+ option_list_items.
21
- `RFC2822List`: Second+ RFC2822-style fields.
22
- `ExtensionOptions`: Parses directive option fields.
23
- `Explicit`: Second+ explicit markup constructs.
24
- `SubstitutionDef`: For embedded directives in substitution definitions.
25
- `Text`: Classifier of second line of a text block.
26
- `SpecializedText`: Superclass for continuation lines of Text-variants.
27
- `Definition`: Second line of potential definition_list_item.
28
- `Line`: Second line of overlined section title or transition marker.
29
- `Struct`: An auxiliary collection class.
37
- `escape2null()`: Return a string, escape-backslashes converted to nulls.
38
- `unescape()`: Return a string, nulls removed or restored to backslashes.
41
- `state_classes`: set of State classes used with `RSTStateMachine`.
46
The reStructuredText parser is implemented as a recursive state machine,
47
examining its input one line at a time. To understand how the parser works,
48
please first become familiar with the `docutils.statemachine` module. In the
49
description below, references are made to classes defined in this module;
50
please see the individual classes for details.
52
Parsing proceeds as follows:
54
1. The state machine examines each line of input, checking each of the
55
transition patterns of the state `Body`, in order, looking for a match.
56
The implicit transitions (blank lines and indentation) are checked before
57
any others. The 'text' transition is a catch-all (matches anything).
59
2. The method associated with the matched transition pattern is called.
61
A. Some transition methods are self-contained, appending elements to the
62
document tree (`Body.doctest` parses a doctest block). The parser's
63
current line index is advanced to the end of the element, and parsing
64
continues with step 1.
66
B. Other transition methods trigger the creation of a nested state machine,
67
whose job is to parse a compound construct ('indent' does a block quote,
68
'bullet' does a bullet list, 'overline' does a section [first checking
69
for a valid section header], etc.).
71
- In the case of lists and explicit markup, a one-off state machine is
72
created and run to parse contents of the first item.
74
- A new state machine is created and its initial state is set to the
75
appropriate specialized state (`BulletList` in the case of the
76
'bullet' transition; see `SpecializedBody` for more detail). This
77
state machine is run to parse the compound element (or series of
78
explicit markup elements), and returns as soon as a non-member element
79
is encountered. For example, the `BulletList` state machine ends as
80
soon as it encounters an element which is not a list item of that
81
bullet list. The optional omission of inter-element blank lines is
82
enabled by this nested state machine.
84
- The current line index is advanced to the end of the elements parsed,
85
and parsing continues with step 1.
87
C. The result of the 'text' transition depends on the next line of text.
88
The current state is changed to `Text`, under which the second line is
89
examined. If the second line is:
91
- Indented: The element is a definition list item, and parsing proceeds
92
similarly to step 2.B, using the `DefinitionList` state.
94
- A line of uniform punctuation characters: The element is a section
95
header; again, parsing proceeds as in step 2.B, and `Body` is still
98
- Anything else: The element is a paragraph, which is examined for
99
inline markup and appended to the parent element. Processing
100
continues with step 1.
103
__docformat__ = 'reStructuredText'
108
from types import FunctionType, MethodType
110
from docutils import nodes, statemachine, utils
111
from docutils import ApplicationError, DataError
112
from docutils.statemachine import StateMachineWS, StateWS
113
from docutils.nodes import fully_normalize_name as normalize_name
114
from docutils.nodes import whitespace_normalize_name
115
import docutils.parsers.rst
116
from docutils.parsers.rst import directives, languages, tableparser, roles
117
from docutils.parsers.rst.languages import en as _fallback_language_module
118
from docutils.utils import escape2null, unescape, column_width
119
from docutils.utils import punctuation_chars, roman, urischemes
121
class MarkupError(DataError): pass
122
class UnknownInterpretedRoleError(DataError): pass
123
class InterpretedRoleNotImplementedError(DataError): pass
124
class ParserError(ApplicationError): pass
125
class MarkupMismatch(Exception): pass
130
"""Stores data attributes for dotted-attribute access."""
132
def __init__(self, **keywordargs):
133
self.__dict__.update(keywordargs)
136
class RSTStateMachine(StateMachineWS):
139
reStructuredText's master StateMachine.
141
The entry point to reStructuredText parsing is the `run()` method.
144
def run(self, input_lines, document, input_offset=0, match_titles=True,
147
Parse `input_lines` and modify the `document` node in place.
149
Extend `StateMachineWS.run()`: set up parse-global data and
150
run the StateMachine.
152
self.language = languages.get_language(
153
document.settings.language_code)
154
self.match_titles = match_titles
157
inliner.init_customizations(document.settings)
158
self.memo = Struct(document=document,
159
reporter=document.reporter,
160
language=self.language,
163
section_bubble_up_kludge=False,
165
self.document = document
166
self.attach_observer(document.note_source)
167
self.reporter = self.memo.reporter
169
results = StateMachineWS.run(self, input_lines, input_offset,
170
input_source=document['source'])
171
assert results == [], 'RSTStateMachine.run() results should be empty!'
172
self.node = self.memo = None # remove unneeded references
175
class NestedStateMachine(StateMachineWS):
178
StateMachine run from within other StateMachine runs, to parse nested
182
def run(self, input_lines, input_offset, memo, node, match_titles=True):
184
Parse `input_lines` and populate a `docutils.nodes.document` instance.
186
Extend `StateMachineWS.run()`: set up document-wide data.
188
self.match_titles = match_titles
190
self.document = memo.document
191
self.attach_observer(self.document.note_source)
192
self.reporter = memo.reporter
193
self.language = memo.language
195
results = StateMachineWS.run(self, input_lines, input_offset)
196
assert results == [], ('NestedStateMachine.run() results should be '
201
class RSTState(StateWS):
204
reStructuredText State superclass.
206
Contains methods used by all State subclasses.
209
nested_sm = NestedStateMachine
212
def __init__(self, state_machine, debug=False):
213
self.nested_sm_kwargs = {'state_classes': state_classes,
214
'initial_state': 'Body'}
215
StateWS.__init__(self, state_machine, debug)
217
def runtime_init(self):
218
StateWS.runtime_init(self)
219
memo = self.state_machine.memo
221
self.reporter = memo.reporter
222
self.inliner = memo.inliner
223
self.document = memo.document
224
self.parent = self.state_machine.node
225
# enable the reporter to determine source and source-line
226
if not hasattr(self.reporter, 'get_source_and_line'):
227
self.reporter.get_source_and_line = self.state_machine.get_source_and_line
228
# print "adding get_source_and_line to reporter", self.state_machine.input_offset
231
def goto_line(self, abs_line_offset):
233
Jump to input line `abs_line_offset`, ignoring jumps past the end.
236
self.state_machine.goto_line(abs_line_offset)
240
def no_match(self, context, transitions):
242
Override `StateWS.no_match` to generate a system message.
244
This code should never be run.
246
self.reporter.severe(
247
'Internal error: no transition pattern match. State: "%s"; '
248
'transitions: %s; context: %s; current line: %r.'
249
% (self.__class__.__name__, transitions, context,
250
self.state_machine.line))
251
return context, None, []
253
def bof(self, context):
254
"""Called at beginning of file."""
257
def nested_parse(self, block, input_offset, node, match_titles=False,
258
state_machine_class=None, state_machine_kwargs=None):
260
Create a new StateMachine rooted at `node` and run it over the input
264
if state_machine_class is None:
265
state_machine_class = self.nested_sm
267
if state_machine_kwargs is None:
268
state_machine_kwargs = self.nested_sm_kwargs
270
block_length = len(block)
275
state_machine = self.nested_sm_cache.pop()
278
if not state_machine:
279
state_machine = state_machine_class(debug=self.debug,
280
**state_machine_kwargs)
281
state_machine.run(block, input_offset, memo=self.memo,
282
node=node, match_titles=match_titles)
284
self.nested_sm_cache.append(state_machine)
286
state_machine.unlink()
287
new_offset = state_machine.abs_line_offset()
288
# No `block.parent` implies disconnected -- lines aren't in sync:
289
if block.parent and (len(block) - block_length) != 0:
290
# Adjustment for block if modified in nested parse:
291
self.state_machine.next_line(len(block) - block_length)
294
def nested_list_parse(self, block, input_offset, node, initial_state,
296
blank_finish_state=None,
299
state_machine_class=None,
300
state_machine_kwargs=None):
302
Create a new StateMachine rooted at `node` and run it over the input
303
`block`. Also keep track of optional intermediate blank lines and the
306
if state_machine_class is None:
307
state_machine_class = self.nested_sm
308
if state_machine_kwargs is None:
309
state_machine_kwargs = self.nested_sm_kwargs.copy()
310
state_machine_kwargs['initial_state'] = initial_state
311
state_machine = state_machine_class(debug=self.debug,
312
**state_machine_kwargs)
313
if blank_finish_state is None:
314
blank_finish_state = initial_state
315
state_machine.states[blank_finish_state].blank_finish = blank_finish
316
for key, value in extra_settings.items():
317
setattr(state_machine.states[initial_state], key, value)
318
state_machine.run(block, input_offset, memo=self.memo,
319
node=node, match_titles=match_titles)
320
blank_finish = state_machine.states[blank_finish_state].blank_finish
321
state_machine.unlink()
322
return state_machine.abs_line_offset(), blank_finish
324
def section(self, title, source, style, lineno, messages):
325
"""Check for a valid subsection and create one if it checks out."""
326
if self.check_subsection(source, style, lineno):
327
self.new_subsection(title, lineno, messages)
329
def check_subsection(self, source, style, lineno):
331
Check for a valid subsection header. Return 1 (true) or None (false).
333
When a new section is reached that isn't a subsection of the current
334
section, back up the line count (use ``previous_line(-x)``), then
335
``raise EOFError``. The current StateMachine will finish, then the
336
calling StateMachine can re-examine the title. This will work its way
337
back up the calling chain until the correct section level isreached.
339
@@@ Alternative: Evaluate the title, store the title info & level, and
340
back up the chain until that level is reached. Store in memo? Or
343
:Exception: `EOFError` when a sibling or supersection encountered.
346
title_styles = memo.title_styles
347
mylevel = memo.section_level
348
try: # check for existing title style
349
level = title_styles.index(style) + 1
350
except ValueError: # new title style
351
if len(title_styles) == memo.section_level: # new subsection
352
title_styles.append(style)
354
else: # not at lowest level
355
self.parent += self.title_inconsistent(source, lineno)
357
if level <= mylevel: # sibling or supersection
358
memo.section_level = level # bubble up to parent section
360
memo.section_bubble_up_kludge = True
361
# back up 2 lines for underline title, 3 for overline title
362
self.state_machine.previous_line(len(style) + 1)
363
raise EOFError # let parent section re-evaluate
364
if level == mylevel + 1: # immediate subsection
366
else: # invalid subsection
367
self.parent += self.title_inconsistent(source, lineno)
370
def title_inconsistent(self, sourcetext, lineno):
371
error = self.reporter.severe(
372
'Title level inconsistent:', nodes.literal_block('', sourcetext),
376
def new_subsection(self, title, lineno, messages):
377
"""Append new subsection to document tree. On return, check level."""
379
mylevel = memo.section_level
380
memo.section_level += 1
381
section_node = nodes.section()
382
self.parent += section_node
383
textnodes, title_messages = self.inline_text(title, lineno)
384
titlenode = nodes.title(title, '', *textnodes)
385
name = normalize_name(titlenode.astext())
386
section_node['names'].append(name)
387
section_node += titlenode
388
section_node += messages
389
section_node += title_messages
390
self.document.note_implicit_target(section_node, section_node)
391
offset = self.state_machine.line_offset + 1
392
absoffset = self.state_machine.abs_line_offset() + 1
393
newabsoffset = self.nested_parse(
394
self.state_machine.input_lines[offset:], input_offset=absoffset,
395
node=section_node, match_titles=True)
396
self.goto_line(newabsoffset)
397
if memo.section_level <= mylevel: # can't handle next section?
398
raise EOFError # bubble up to supersection
399
# reset section_level; next pass will detect it properly
400
memo.section_level = mylevel
402
def paragraph(self, lines, lineno):
404
Return a list (paragraph & messages) & a boolean: literal_block next?
406
data = '\n'.join(lines).rstrip()
407
if re.search(r'(?<!\\)(\\\\)*::$', data):
410
elif data[-3] in ' \n':
411
text = data[:-3].rstrip()
418
textnodes, messages = self.inline_text(text, lineno)
419
p = nodes.paragraph(data, '', *textnodes)
420
p.source, p.line = self.state_machine.get_source_and_line(lineno)
421
return [p] + messages, literalnext
423
def inline_text(self, text, lineno):
425
Return 2 lists: nodes (text and inline elements), and system_messages.
427
return self.inliner.parse(text, lineno, self.memo, self.parent)
429
def unindent_warning(self, node_name):
430
# the actual problem is one line below the current line
431
lineno = self.state_machine.abs_line_number()+1
432
return self.reporter.warning('%s ends without a blank line; '
433
'unexpected unindent.' % node_name,
437
def build_regexp(definition, compile=True):
439
Build, compile and return a regular expression based on `definition`.
441
:Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
442
where "parts" is a list of regular expressions and/or regular
443
expression definitions to be joined into an or-group.
445
name, prefix, suffix, parts = definition
448
if type(part) is tuple:
449
part_strings.append(build_regexp(part, None))
451
part_strings.append(part)
452
or_group = '|'.join(part_strings)
453
regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
455
return re.compile(regexp, re.UNICODE)
463
Parse inline markup; call the `parse()` method.
467
self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
468
"""List of (pattern, bound method) tuples, used by
469
`self.implicit_inline`."""
471
def init_customizations(self, settings):
472
"""Setting-based customizations; run when parsing begins."""
473
if settings.pep_references:
474
self.implicit_dispatch.append((self.patterns.pep,
476
if settings.rfc_references:
477
self.implicit_dispatch.append((self.patterns.rfc,
480
def parse(self, text, lineno, memo, parent):
481
# Needs to be refactored for nested inline markup.
482
# Add nested_parse() method?
484
Return 2 lists: nodes (text and inline elements), and system_messages.
486
Using `self.patterns.initial`, a pattern which matches start-strings
487
(emphasis, strong, interpreted, phrase reference, literal,
488
substitution reference, and inline target) and complete constructs
489
(simple reference, footnote reference), search for a candidate. When
490
one is found, check for validity (e.g., not a quoted '*' character).
491
If valid, search for the corresponding end string if applicable, and
492
check it for validity. If not found or invalid, generate a warning
493
and ignore the start-string. Implicit inline markup (e.g. standalone
496
self.reporter = memo.reporter
497
self.document = memo.document
498
self.language = memo.language
500
pattern_search = self.patterns.initial.search
501
dispatch = self.dispatch
502
remaining = escape2null(text)
507
match = pattern_search(remaining)
509
groups = match.groupdict()
510
method = dispatch[groups['start'] or groups['backquote']
511
or groups['refend'] or groups['fnend']]
512
before, inlines, remaining, sysmessages = method(self, match,
514
unprocessed.append(before)
515
messages += sysmessages
517
processed += self.implicit_inline(''.join(unprocessed),
523
remaining = ''.join(unprocessed) + remaining
525
processed += self.implicit_inline(remaining, lineno)
526
return processed, messages
528
# Inline object recognition
529
# -------------------------
530
# lookahead and look-behind expressions for inline markup rules
531
start_string_prefix = (u'(^|(?<=\\s|[%s%s]))' %
532
(punctuation_chars.openers,
533
punctuation_chars.delimiters))
534
end_string_suffix = (u'($|(?=\\s|[\x00%s%s%s]))' %
535
(punctuation_chars.closing_delimiters,
536
punctuation_chars.delimiters,
537
punctuation_chars.closers))
538
# print start_string_prefix.encode('utf8')
539
# TODO: support non-ASCII whitespace in the following 4 patterns?
540
non_whitespace_before = r'(?<![ \n])'
541
non_whitespace_escape_before = r'(?<![ \n\x00])'
542
non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[ \n\x00])'
543
non_whitespace_after = r'(?![ \n])'
544
# Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
545
simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
546
# Valid URI characters (see RFC 2396 & RFC 2732);
547
# final \x00 allows backslash escapes in URIs:
548
uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
549
# Delimiter indicating the end of a URI (not part of the URI):
550
uri_end_delim = r"""[>]"""
551
# Last URI character; same as uric but no punctuation:
552
urilast = r"""[_~*/=+a-zA-Z0-9]"""
553
# End of a URI (either 'urilast' or 'uric followed by a
555
uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
556
emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
558
%(emailc)s+(?:\.%(emailc)s+)* # name
560
%(emailc)s+(?:\.%(emailc)s*)* # host
561
%(uri_end)s # final URI char
563
parts = ('initial_inline', start_string_prefix, '',
564
[('start', '', non_whitespace_after, # simple start-strings
566
r'\*(?!\*)', # emphasis but not strong
568
r'_`', # inline internal target
569
r'\|(?!\|)'] # substitution reference
571
('whole', '', end_string_suffix, # whole constructs
572
[# reference name & end-string
573
r'(?P<refname>%s)(?P<refend>__?)' % simplename,
574
('footnotelabel', r'\[', r'(?P<fnend>\]_)',
575
[r'[0-9]+', # manually numbered
576
r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
578
r'(?P<citationlabel>%s)' % simplename] # citation reference
582
('backquote', # interpreted text or phrase reference
583
'(?P<role>(:%s:)?)' % simplename, # optional role
584
non_whitespace_after,
585
['`(?!`)'] # but not literal
590
initial=build_regexp(parts),
591
emphasis=re.compile(non_whitespace_escape_before
592
+ r'(\*)' + end_string_suffix, re.UNICODE),
593
strong=re.compile(non_whitespace_escape_before
594
+ r'(\*\*)' + end_string_suffix, re.UNICODE),
595
interpreted_or_phrase_ref=re.compile(
597
%(non_unescaped_whitespace_escape_before)s
601
(?P<role>:%(simplename)s:)?
605
%(end_string_suffix)s
606
""" % locals(), re.VERBOSE | re.UNICODE),
607
embedded_link=re.compile(
610
(?:[ \n]+|^) # spaces or beginning of line/string
612
%(non_whitespace_after)s
613
([^<>\x00]+(\x00_)?) # anything but angle brackets & nulls
614
# except escaped trailing low line
615
%(non_whitespace_before)s
616
> # close bracket w/o whitespace before
619
""" % locals(), re.VERBOSE | re.UNICODE),
620
literal=re.compile(non_whitespace_before + '(``)'
621
+ end_string_suffix),
622
target=re.compile(non_whitespace_escape_before
623
+ r'(`)' + end_string_suffix),
624
substitution_ref=re.compile(non_whitespace_escape_before
626
+ end_string_suffix),
627
email=re.compile(email_pattern % locals() + '$',
628
re.VERBOSE | re.UNICODE),
631
%(start_string_prefix)s
633
(?P<absolute> # absolute URI
634
(?P<scheme> # scheme (http, ftp, mailto)
635
[a-zA-Z][a-zA-Z0-9.+-]*
640
(//?)? # hierarchical URI
641
%(uric)s* # URI characters
642
%(uri_end)s # final URI char
648
( # optional fragment
655
(?P<email> # email address
656
""" + email_pattern + r"""
659
%(end_string_suffix)s
660
""") % locals(), re.VERBOSE | re.UNICODE),
663
%(start_string_prefix)s
665
(pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
667
(PEP\s+(?P<pepnum2>\d+)) # reference by name
669
%(end_string_suffix)s""" % locals(), re.VERBOSE | re.UNICODE),
672
%(start_string_prefix)s
673
(RFC(-|\s+)?(?P<rfcnum>\d+))
674
%(end_string_suffix)s""" % locals(), re.VERBOSE | re.UNICODE))
676
def quoted_start(self, match):
677
"""Test if inline markup start-string is 'quoted'.
679
'Quoted' in this context means the start-string is enclosed in a pair
680
of matching opening/closing delimiters (not necessarily quotes)
681
or at the end of the match.
683
string = match.string
684
start = match.start()
685
if start == 0: # start-string at beginning of text
687
prestart = string[start - 1]
689
poststart = string[match.end()]
690
except IndexError: # start-string at end of text
691
return True # not "quoted" but no markup start-string either
692
return punctuation_chars.match_chars(prestart, poststart)
694
def inline_obj(self, match, lineno, end_pattern, nodeclass,
695
restore_backslashes=False):
696
string = match.string
697
matchstart = match.start('start')
698
matchend = match.end('start')
699
if self.quoted_start(match):
700
return (string[:matchend], [], string[matchend:], [], '')
701
endmatch = end_pattern.search(string[matchend:])
702
if endmatch and endmatch.start(1): # 1 or more chars
703
text = unescape(endmatch.string[:endmatch.start(1)],
705
textend = matchend + endmatch.end(1)
706
rawsource = unescape(string[matchstart:textend], 1)
707
return (string[:matchstart], [nodeclass(rawsource, text)],
708
string[textend:], [], endmatch.group(1))
709
msg = self.reporter.warning(
710
'Inline %s start-string without end-string.'
711
% nodeclass.__name__, line=lineno)
712
text = unescape(string[matchstart:matchend], 1)
713
rawsource = unescape(string[matchstart:matchend], 1)
714
prb = self.problematic(text, rawsource, msg)
715
return string[:matchstart], [prb], string[matchend:], [msg], ''
717
def problematic(self, text, rawsource, message):
718
msgid = self.document.set_id(message, self.parent)
719
problematic = nodes.problematic(rawsource, text, refid=msgid)
720
prbid = self.document.set_id(problematic)
721
message.add_backref(prbid)
724
def emphasis(self, match, lineno):
725
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
726
match, lineno, self.patterns.emphasis, nodes.emphasis)
727
return before, inlines, remaining, sysmessages
729
def strong(self, match, lineno):
730
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
731
match, lineno, self.patterns.strong, nodes.strong)
732
return before, inlines, remaining, sysmessages
734
def interpreted_or_phrase_ref(self, match, lineno):
735
end_pattern = self.patterns.interpreted_or_phrase_ref
736
string = match.string
737
matchstart = match.start('backquote')
738
matchend = match.end('backquote')
739
rolestart = match.start('role')
740
role = match.group('role')
745
elif self.quoted_start(match):
746
return (string[:matchend], [], string[matchend:], [])
747
endmatch = end_pattern.search(string[matchend:])
748
if endmatch and endmatch.start(1): # 1 or more chars
749
textend = matchend + endmatch.end()
750
if endmatch.group('role'):
752
msg = self.reporter.warning(
753
'Multiple roles in interpreted text (both '
754
'prefix and suffix present; only one allowed).',
756
text = unescape(string[rolestart:textend], 1)
757
prb = self.problematic(text, text, msg)
758
return string[:rolestart], [prb], string[textend:], [msg]
759
role = endmatch.group('suffix')[1:-1]
761
escaped = endmatch.string[:endmatch.start(1)]
762
rawsource = unescape(string[matchstart:textend], 1)
763
if rawsource[-1:] == '_':
765
msg = self.reporter.warning(
766
'Mismatch: both interpreted text role %s and '
767
'reference suffix.' % position, line=lineno)
768
text = unescape(string[rolestart:textend], 1)
769
prb = self.problematic(text, text, msg)
770
return string[:rolestart], [prb], string[textend:], [msg]
771
return self.phrase_ref(string[:matchstart], string[textend:],
772
rawsource, escaped, unescape(escaped))
774
rawsource = unescape(string[rolestart:textend], 1)
775
nodelist, messages = self.interpreted(rawsource, escaped, role,
777
return (string[:rolestart], nodelist,
778
string[textend:], messages)
779
msg = self.reporter.warning(
780
'Inline interpreted text or phrase reference start-string '
781
'without end-string.', line=lineno)
782
text = unescape(string[matchstart:matchend], 1)
783
prb = self.problematic(text, text, msg)
784
return string[:matchstart], [prb], string[matchend:], [msg]
786
def phrase_ref(self, before, after, rawsource, escaped, text):
787
match = self.patterns.embedded_link.search(escaped)
788
if match: # embedded <URI> or <alias_>
789
text = unescape(escaped[:match.start(0)])
790
aliastext = unescape(match.group(2), restore_backslashes=True)
791
if aliastext.endswith('_') and not (aliastext.endswith(r'\_')
792
or self.patterns.uri.match(aliastext)):
794
alias = normalize_name(aliastext[:-1])
795
target = nodes.target(match.group(1), refname=alias)
796
target.indirect_reference_name = aliastext[:-1]
799
alias = ''.join(aliastext.split())
800
alias = self.adjust_uri(alias)
801
if alias.endswith(r'\_'):
802
alias = alias[:-2] + '_'
803
target = nodes.target(match.group(1), refuri=alias)
804
target.referenced = 1
806
raise ApplicationError('problem with embedded link: %r'
813
refname = normalize_name(text)
814
reference = nodes.reference(rawsource, text,
815
name=whitespace_normalize_name(text))
816
node_list = [reference]
818
if rawsource[-2:] == '__':
819
if target and (aliastype == 'name'):
820
reference['refname'] = alias
821
self.document.note_refname(reference)
822
# self.document.note_indirect_target(target) # required?
823
elif target and (aliastype == 'uri'):
824
reference['refuri'] = alias
826
reference['anonymous'] = 1
829
target['names'].append(refname)
830
if aliastype == 'name':
831
reference['refname'] = alias
832
self.document.note_indirect_target(target)
833
self.document.note_refname(reference)
835
reference['refuri'] = alias
836
self.document.note_explicit_target(target, self.parent)
837
# target.note_referenced_by(name=refname)
838
node_list.append(target)
840
reference['refname'] = refname
841
self.document.note_refname(reference)
842
return before, node_list, after, []
845
def adjust_uri(self, uri):
846
match = self.patterns.email.match(uri)
848
return 'mailto:' + uri
852
def interpreted(self, rawsource, text, role, lineno):
853
role_fn, messages = roles.role(role, self.language, lineno,
856
nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
857
return nodes, messages + messages2
859
msg = self.reporter.error(
860
'Unknown interpreted text role "%s".' % role,
862
return ([self.problematic(rawsource, rawsource, msg)],
865
def literal(self, match, lineno):
866
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
867
match, lineno, self.patterns.literal, nodes.literal,
868
restore_backslashes=True)
869
return before, inlines, remaining, sysmessages
871
def inline_internal_target(self, match, lineno):
872
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
873
match, lineno, self.patterns.target, nodes.target)
874
if inlines and isinstance(inlines[0], nodes.target):
875
assert len(inlines) == 1
877
name = normalize_name(target.astext())
878
target['names'].append(name)
879
self.document.note_explicit_target(target, self.parent)
880
return before, inlines, remaining, sysmessages
882
def substitution_reference(self, match, lineno):
883
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
884
match, lineno, self.patterns.substitution_ref,
885
nodes.substitution_reference)
886
if len(inlines) == 1:
887
subref_node = inlines[0]
888
if isinstance(subref_node, nodes.substitution_reference):
889
subref_text = subref_node.astext()
890
self.document.note_substitution_ref(subref_node, subref_text)
891
if endstring[-1:] == '_':
892
reference_node = nodes.reference(
893
'|%s%s' % (subref_text, endstring), '')
894
if endstring[-2:] == '__':
895
reference_node['anonymous'] = 1
897
reference_node['refname'] = normalize_name(subref_text)
898
self.document.note_refname(reference_node)
899
reference_node += subref_node
900
inlines = [reference_node]
901
return before, inlines, remaining, sysmessages
903
def footnote_reference(self, match, lineno):
905
Handles `nodes.footnote_reference` and `nodes.citation_reference`
908
label = match.group('footnotelabel')
909
refname = normalize_name(label)
910
string = match.string
911
before = string[:match.start('whole')]
912
remaining = string[match.end('whole'):]
913
if match.group('citationlabel'):
914
refnode = nodes.citation_reference('[%s]_' % label,
916
refnode += nodes.Text(label)
917
self.document.note_citation_ref(refnode)
919
refnode = nodes.footnote_reference('[%s]_' % label)
920
if refname[0] == '#':
921
refname = refname[1:]
923
self.document.note_autofootnote_ref(refnode)
926
refnode['auto'] = '*'
927
self.document.note_symbol_footnote_ref(
930
refnode += nodes.Text(label)
932
refnode['refname'] = refname
933
self.document.note_footnote_ref(refnode)
934
if utils.get_trim_footnote_ref_space(self.document.settings):
935
before = before.rstrip()
936
return (before, [refnode], remaining, [])
938
def reference(self, match, lineno, anonymous=False):
939
referencename = match.group('refname')
940
refname = normalize_name(referencename)
941
referencenode = nodes.reference(
942
referencename + match.group('refend'), referencename,
943
name=whitespace_normalize_name(referencename))
945
referencenode['anonymous'] = 1
947
referencenode['refname'] = refname
948
self.document.note_refname(referencenode)
949
string = match.string
950
matchstart = match.start('whole')
951
matchend = match.end('whole')
952
return (string[:matchstart], [referencenode], string[matchend:], [])
954
def anonymous_reference(self, match, lineno):
955
return self.reference(match, lineno, anonymous=1)
957
def standalone_uri(self, match, lineno):
958
if (not match.group('scheme')
959
or match.group('scheme').lower() in urischemes.schemes):
960
if match.group('email'):
961
addscheme = 'mailto:'
964
text = match.group('whole')
965
unescaped = unescape(text, 0)
966
return [nodes.reference(unescape(text, 1), unescaped,
967
refuri=addscheme + unescaped)]
968
else: # not a valid scheme
971
def pep_reference(self, match, lineno):
972
text = match.group(0)
973
if text.startswith('pep-'):
974
pepnum = int(match.group('pepnum1'))
975
elif text.startswith('PEP'):
976
pepnum = int(match.group('pepnum2'))
979
ref = (self.document.settings.pep_base_url
980
+ self.document.settings.pep_file_url_template % pepnum)
981
unescaped = unescape(text, 0)
982
return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
984
rfc_url = 'rfc%d.html'
986
def rfc_reference(self, match, lineno):
987
text = match.group(0)
988
if text.startswith('RFC'):
989
rfcnum = int(match.group('rfcnum'))
990
ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
993
unescaped = unescape(text, 0)
994
return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
996
def implicit_inline(self, text, lineno):
998
Check each of the patterns in `self.implicit_dispatch` for a match,
999
and dispatch to the stored method for the pattern. Recursively check
1000
the text before and after the match. Return a list of `nodes.Text`
1001
and inline element nodes.
1005
for pattern, method in self.implicit_dispatch:
1006
match = pattern.search(text)
1009
# Must recurse on strings before *and* after the match;
1010
# there may be multiple patterns.
1011
return (self.implicit_inline(text[:match.start()], lineno)
1012
+ method(match, lineno) +
1013
self.implicit_inline(text[match.end():], lineno))
1014
except MarkupMismatch:
1016
return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
1018
dispatch = {'*': emphasis,
1020
'`': interpreted_or_phrase_ref,
1022
'_`': inline_internal_target,
1023
']_': footnote_reference,
1024
'|': substitution_reference,
1026
'__': anonymous_reference}
1029
def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1030
return ord(s) - _zero
1032
def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1033
return ord(s) - _zero
1035
def _lowerroman_to_int(s):
1036
return roman.fromRoman(s.upper())
1039
class Body(RSTState):
1042
Generic classifier of the first line of a block.
1045
double_width_pad_char = tableparser.TableParser.double_width_pad_char
1046
"""Padding character for East Asian double-width text."""
1049
"""Enumerated list parsing information."""
1052
'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1053
'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1054
'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1055
enum.formats = enum.formatinfo.keys()
1056
enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1057
'lowerroman', 'upperroman'] # ORDERED!
1058
enum.sequencepats = {'arabic': '[0-9]+',
1059
'loweralpha': '[a-z]',
1060
'upperalpha': '[A-Z]',
1061
'lowerroman': '[ivxlcdm]+',
1062
'upperroman': '[IVXLCDM]+',}
1063
enum.converters = {'arabic': int,
1064
'loweralpha': _loweralpha_to_int,
1065
'upperalpha': _upperalpha_to_int,
1066
'lowerroman': _lowerroman_to_int,
1067
'upperroman': roman.fromRoman}
1069
enum.sequenceregexps = {}
1070
for sequence in enum.sequences:
1071
enum.sequenceregexps[sequence] = re.compile(
1072
enum.sequencepats[sequence] + '$', re.UNICODE)
1074
grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1075
"""Matches the top (& bottom) of a full table)."""
1077
simple_table_top_pat = re.compile('=+( +=+)+ *$')
1078
"""Matches the top of a simple table."""
1080
simple_table_border_pat = re.compile('=+[ =]*$')
1081
"""Matches the bottom & header bottom of a simple table."""
1084
"""Fragments of patterns used by transitions."""
1086
pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1087
pats['alpha'] = '[a-zA-Z]'
1088
pats['alphanum'] = '[a-zA-Z0-9]'
1089
pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1090
pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1091
'|%(upperroman)s|#)' % enum.sequencepats)
1092
pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1093
# @@@ Loosen up the pattern? Allow Unicode?
1094
pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1095
pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1096
pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1097
pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1099
for format in enum.formats:
1100
pats[format] = '(?P<%s>%s%s%s)' % (
1101
format, re.escape(enum.formatinfo[format].prefix),
1102
pats['enum'], re.escape(enum.formatinfo[format].suffix))
1105
'bullet': u'[-+*\u2022\u2023\u2043]( +|$)',
1106
'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1107
'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1108
'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1109
'doctest': r'>>>( +|$)',
1110
'line_block': r'\|( +|$)',
1111
'grid_table_top': grid_table_top_pat,
1112
'simple_table_top': simple_table_top_pat,
1113
'explicit_markup': r'\.\.( +|$)',
1114
'anonymous': r'__( +|$)',
1115
'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1117
initial_transitions = (
1131
def indent(self, match, context, next_state):
1133
indented, indent, line_offset, blank_finish = \
1134
self.state_machine.get_indented()
1135
elements = self.block_quote(indented, line_offset)
1136
self.parent += elements
1137
if not blank_finish:
1138
self.parent += self.unindent_warning('Block quote')
1139
return context, next_state, []
1141
def block_quote(self, indented, line_offset):
1148
new_line_offset) = self.split_attribution(indented, line_offset)
1149
blockquote = nodes.block_quote()
1150
self.nested_parse(blockquote_lines, line_offset, blockquote)
1151
elements.append(blockquote)
1152
if attribution_lines:
1153
attribution, messages = self.parse_attribution(
1154
attribution_lines, attribution_offset)
1155
blockquote += attribution
1156
elements += messages
1157
line_offset = new_line_offset
1158
while indented and not indented[0]:
1159
indented = indented[1:]
1163
# U+2014 is an em-dash:
1164
attribution_pattern = re.compile(u'(---?(?!-)|\u2014) *(?=[^ \\n])',
1167
def split_attribution(self, indented, line_offset):
1169
Check for a block quote attribution and split it off:
1171
* First line after a blank line must begin with a dash ("--", "---",
1172
em-dash; matches `self.attribution_pattern`).
1173
* Every line after that must have consistent indentation.
1174
* Attributions must be preceded by block quote content.
1176
Return a tuple of: (block quote content lines, content offset,
1177
attribution lines, attribution offset, remaining indented lines).
1180
nonblank_seen = False
1181
for i in range(len(indented)):
1182
line = indented[i].rstrip()
1184
if nonblank_seen and blank == i - 1: # last line blank
1185
match = self.attribution_pattern.match(line)
1187
attribution_end, indent = self.check_attribution(
1190
a_lines = indented[i:attribution_end]
1191
a_lines.trim_left(match.end(), end=1)
1192
a_lines.trim_left(indent, start=1)
1193
return (indented[:i], a_lines,
1194
i, indented[attribution_end:],
1195
line_offset + attribution_end)
1196
nonblank_seen = True
1200
return (indented, None, None, None, None)
1202
def check_attribution(self, indented, attribution_start):
1204
Check attribution shape.
1205
Return the index past the end of the attribution, and the indent.
1208
i = attribution_start + 1
1209
for i in range(attribution_start + 1, len(indented)):
1210
line = indented[i].rstrip()
1214
indent = len(line) - len(line.lstrip())
1215
elif len(line) - len(line.lstrip()) != indent:
1216
return None, None # bad shape; not an attribution
1218
# return index of line after last attribution line:
1220
return i, (indent or 0)
1222
def parse_attribution(self, indented, line_offset):
1223
text = '\n'.join(indented).rstrip()
1224
lineno = self.state_machine.abs_line_number() + line_offset
1225
textnodes, messages = self.inline_text(text, lineno)
1226
node = nodes.attribution(text, '', *textnodes)
1227
node.source, node.line = self.state_machine.get_source_and_line(lineno)
1228
return node, messages
1230
def bullet(self, match, context, next_state):
1231
"""Bullet list item."""
1232
bulletlist = nodes.bullet_list()
1233
self.parent += bulletlist
1234
bulletlist['bullet'] = match.string[0]
1235
i, blank_finish = self.list_item(match.end())
1237
offset = self.state_machine.line_offset + 1 # next line
1238
new_line_offset, blank_finish = self.nested_list_parse(
1239
self.state_machine.input_lines[offset:],
1240
input_offset=self.state_machine.abs_line_offset() + 1,
1241
node=bulletlist, initial_state='BulletList',
1242
blank_finish=blank_finish)
1243
self.goto_line(new_line_offset)
1244
if not blank_finish:
1245
self.parent += self.unindent_warning('Bullet list')
1246
return [], next_state, []
1248
def list_item(self, indent):
1249
if self.state_machine.line[indent:]:
1250
indented, line_offset, blank_finish = (
1251
self.state_machine.get_known_indented(indent))
1253
indented, indent, line_offset, blank_finish = (
1254
self.state_machine.get_first_known_indented(indent))
1255
listitem = nodes.list_item('\n'.join(indented))
1257
self.nested_parse(indented, input_offset=line_offset,
1259
return listitem, blank_finish
1261
def enumerator(self, match, context, next_state):
1262
"""Enumerated List Item"""
1263
format, sequence, text, ordinal = self.parse_enumerator(match)
1264
if not self.is_enumerated_list_item(ordinal, sequence, format):
1265
raise statemachine.TransitionCorrection('text')
1266
enumlist = nodes.enumerated_list()
1267
self.parent += enumlist
1269
enumlist['enumtype'] = 'arabic'
1271
enumlist['enumtype'] = sequence
1272
enumlist['prefix'] = self.enum.formatinfo[format].prefix
1273
enumlist['suffix'] = self.enum.formatinfo[format].suffix
1275
enumlist['start'] = ordinal
1276
msg = self.reporter.info(
1277
'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1280
listitem, blank_finish = self.list_item(match.end())
1281
enumlist += listitem
1282
offset = self.state_machine.line_offset + 1 # next line
1283
newline_offset, blank_finish = self.nested_list_parse(
1284
self.state_machine.input_lines[offset:],
1285
input_offset=self.state_machine.abs_line_offset() + 1,
1286
node=enumlist, initial_state='EnumeratedList',
1287
blank_finish=blank_finish,
1288
extra_settings={'lastordinal': ordinal,
1290
'auto': sequence == '#'})
1291
self.goto_line(newline_offset)
1292
if not blank_finish:
1293
self.parent += self.unindent_warning('Enumerated list')
1294
return [], next_state, []
1296
def parse_enumerator(self, match, expected_sequence=None):
1298
Analyze an enumerator and return the results.
1301
- the enumerator format ('period', 'parens', or 'rparen'),
1302
- the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1303
- the text of the enumerator, stripped of formatting, and
1304
- the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1305
``None`` is returned for invalid enumerator text).
1307
The enumerator format has already been determined by the regular
1308
expression match. If `expected_sequence` is given, that sequence is
1309
tried first. If not, we check for Roman numeral 1. This way,
1310
single-character Roman numerals (which are also alphabetical) can be
1311
matched. If no sequence has been matched, all sequences are checked in
1314
groupdict = match.groupdict()
1316
for format in self.enum.formats:
1317
if groupdict[format]: # was this the format matched?
1318
break # yes; keep `format`
1319
else: # shouldn't happen
1320
raise ParserError('enumerator format not matched')
1321
text = groupdict[format][self.enum.formatinfo[format].start
1322
:self.enum.formatinfo[format].end]
1325
elif expected_sequence:
1327
if self.enum.sequenceregexps[expected_sequence].match(text):
1328
sequence = expected_sequence
1329
except KeyError: # shouldn't happen
1330
raise ParserError('unknown enumerator sequence: %s'
1333
sequence = 'lowerroman'
1335
sequence = 'upperroman'
1337
for sequence in self.enum.sequences:
1338
if self.enum.sequenceregexps[sequence].match(text):
1340
else: # shouldn't happen
1341
raise ParserError('enumerator sequence not matched')
1346
ordinal = self.enum.converters[sequence](text)
1347
except roman.InvalidRomanNumeralError:
1349
return format, sequence, text, ordinal
1351
def is_enumerated_list_item(self, ordinal, sequence, format):
1353
Check validity based on the ordinal value and the second line.
1355
Return true if the ordinal is valid and the second line is blank,
1356
indented, or starts with the next enumerator or an auto-enumerator.
1361
next_line = self.state_machine.next_line()
1362
except EOFError: # end of input lines
1363
self.state_machine.previous_line()
1366
self.state_machine.previous_line()
1367
if not next_line[:1].strip(): # blank or indented
1369
result = self.make_enumerator(ordinal + 1, sequence, format)
1371
next_enumerator, auto_enumerator = result
1373
if ( next_line.startswith(next_enumerator) or
1374
next_line.startswith(auto_enumerator) ):
1380
def make_enumerator(self, ordinal, sequence, format):
1382
Construct and return the next enumerated list item marker, and an
1383
auto-enumerator ("#" instead of the regular enumerator).
1385
Return ``None`` for invalid (out of range) ordinals.
1389
elif sequence == 'arabic':
1390
enumerator = str(ordinal)
1392
if sequence.endswith('alpha'):
1395
enumerator = chr(ordinal + ord('a') - 1)
1396
elif sequence.endswith('roman'):
1398
enumerator = roman.toRoman(ordinal)
1399
except roman.RomanError:
1401
else: # shouldn't happen
1402
raise ParserError('unknown enumerator sequence: "%s"'
1404
if sequence.startswith('lower'):
1405
enumerator = enumerator.lower()
1406
elif sequence.startswith('upper'):
1407
enumerator = enumerator.upper()
1408
else: # shouldn't happen
1409
raise ParserError('unknown enumerator sequence: "%s"'
1411
formatinfo = self.enum.formatinfo[format]
1412
next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1414
auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1415
return next_enumerator, auto_enumerator
1417
def field_marker(self, match, context, next_state):
1418
"""Field list item."""
1419
field_list = nodes.field_list()
1420
self.parent += field_list
1421
field, blank_finish = self.field(match)
1423
offset = self.state_machine.line_offset + 1 # next line
1424
newline_offset, blank_finish = self.nested_list_parse(
1425
self.state_machine.input_lines[offset:],
1426
input_offset=self.state_machine.abs_line_offset() + 1,
1427
node=field_list, initial_state='FieldList',
1428
blank_finish=blank_finish)
1429
self.goto_line(newline_offset)
1430
if not blank_finish:
1431
self.parent += self.unindent_warning('Field list')
1432
return [], next_state, []
1434
def field(self, match):
1435
name = self.parse_field_marker(match)
1436
src, srcline = self.state_machine.get_source_and_line()
1437
lineno = self.state_machine.abs_line_number()
1438
indented, indent, line_offset, blank_finish = \
1439
self.state_machine.get_first_known_indented(match.end())
1440
field_node = nodes.field()
1441
field_node.source = src
1442
field_node.line = srcline
1443
name_nodes, name_messages = self.inline_text(name, lineno)
1444
field_node += nodes.field_name(name, '', *name_nodes)
1445
field_body = nodes.field_body('\n'.join(indented), *name_messages)
1446
field_node += field_body
1448
self.parse_field_body(indented, line_offset, field_body)
1449
return field_node, blank_finish
1451
def parse_field_marker(self, match):
1452
"""Extract & return field name from a field marker match."""
1453
field = match.group()[1:] # strip off leading ':'
1454
field = field[:field.rfind(':')] # strip off trailing ':' etc.
1457
def parse_field_body(self, indented, offset, node):
1458
self.nested_parse(indented, input_offset=offset, node=node)
1460
def option_marker(self, match, context, next_state):
1461
"""Option list item."""
1462
optionlist = nodes.option_list()
1464
listitem, blank_finish = self.option_list_item(match)
1465
except MarkupError, error:
1466
# This shouldn't happen; pattern won't match.
1467
msg = self.reporter.error(u'Invalid option list marker: %s' %
1470
indented, indent, line_offset, blank_finish = \
1471
self.state_machine.get_first_known_indented(match.end())
1472
elements = self.block_quote(indented, line_offset)
1473
self.parent += elements
1474
if not blank_finish:
1475
self.parent += self.unindent_warning('Option list')
1476
return [], next_state, []
1477
self.parent += optionlist
1478
optionlist += listitem
1479
offset = self.state_machine.line_offset + 1 # next line
1480
newline_offset, blank_finish = self.nested_list_parse(
1481
self.state_machine.input_lines[offset:],
1482
input_offset=self.state_machine.abs_line_offset() + 1,
1483
node=optionlist, initial_state='OptionList',
1484
blank_finish=blank_finish)
1485
self.goto_line(newline_offset)
1486
if not blank_finish:
1487
self.parent += self.unindent_warning('Option list')
1488
return [], next_state, []
1490
def option_list_item(self, match):
1491
offset = self.state_machine.abs_line_offset()
1492
options = self.parse_option_marker(match)
1493
indented, indent, line_offset, blank_finish = \
1494
self.state_machine.get_first_known_indented(match.end())
1495
if not indented: # not an option list item
1496
self.goto_line(offset)
1497
raise statemachine.TransitionCorrection('text')
1498
option_group = nodes.option_group('', *options)
1499
description = nodes.description('\n'.join(indented))
1500
option_list_item = nodes.option_list_item('', option_group,
1503
self.nested_parse(indented, input_offset=line_offset,
1505
return option_list_item, blank_finish
1507
def parse_option_marker(self, match):
1509
Return a list of `node.option` and `node.option_argument` objects,
1510
parsed from an option marker match.
1512
:Exception: `MarkupError` for invalid option markers.
1515
optionstrings = match.group().rstrip().split(', ')
1516
for optionstring in optionstrings:
1517
tokens = optionstring.split()
1519
firstopt = tokens[0].split('=', 1)
1520
if len(firstopt) > 1:
1521
# "--opt=value" form
1522
tokens[:1] = firstopt
1524
elif (len(tokens[0]) > 2
1525
and ((tokens[0].startswith('-')
1526
and not tokens[0].startswith('--'))
1527
or tokens[0].startswith('+'))):
1529
tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1531
if len(tokens) > 1 and (tokens[1].startswith('<')
1532
and tokens[-1].endswith('>')):
1533
# "-o <value1 value2>" form; join all values into one token
1534
tokens[1:] = [' '.join(tokens[1:])]
1535
if 0 < len(tokens) <= 2:
1536
option = nodes.option(optionstring)
1537
option += nodes.option_string(tokens[0], tokens[0])
1539
option += nodes.option_argument(tokens[1], tokens[1],
1540
delimiter=delimiter)
1541
optlist.append(option)
1544
'wrong number of option tokens (=%s), should be 1 or 2: '
1545
'"%s"' % (len(tokens), optionstring))
1548
def doctest(self, match, context, next_state):
1549
data = '\n'.join(self.state_machine.get_text_block())
1550
self.parent += nodes.doctest_block(data, data)
1551
return [], next_state, []
1553
def line_block(self, match, context, next_state):
1554
"""First line of a line block."""
1555
block = nodes.line_block()
1556
self.parent += block
1557
lineno = self.state_machine.abs_line_number()
1558
line, messages, blank_finish = self.line_block_line(match, lineno)
1560
self.parent += messages
1561
if not blank_finish:
1562
offset = self.state_machine.line_offset + 1 # next line
1563
new_line_offset, blank_finish = self.nested_list_parse(
1564
self.state_machine.input_lines[offset:],
1565
input_offset=self.state_machine.abs_line_offset() + 1,
1566
node=block, initial_state='LineBlock',
1568
self.goto_line(new_line_offset)
1569
if not blank_finish:
1570
self.parent += self.reporter.warning(
1571
'Line block ends without a blank line.',
1574
if block[0].indent is None:
1576
self.nest_line_block_lines(block)
1577
return [], next_state, []
1579
def line_block_line(self, match, lineno):
1580
"""Return one line element of a line_block."""
1581
indented, indent, line_offset, blank_finish = \
1582
self.state_machine.get_first_known_indented(match.end(),
1584
text = u'\n'.join(indented)
1585
text_nodes, messages = self.inline_text(text, lineno)
1586
line = nodes.line(text, '', *text_nodes)
1587
if match.string.rstrip() != '|': # not empty
1588
line.indent = len(match.group(1)) - 1
1589
return line, messages, blank_finish
1591
def nest_line_block_lines(self, block):
1592
for index in range(1, len(block)):
1593
if getattr(block[index], 'indent', None) is None:
1594
block[index].indent = block[index - 1].indent
1595
self.nest_line_block_segment(block)
1597
def nest_line_block_segment(self, block):
1598
indents = [item.indent for item in block]
1599
least = min(indents)
1601
new_block = nodes.line_block()
1603
if item.indent > least:
1604
new_block.append(item)
1607
self.nest_line_block_segment(new_block)
1608
new_items.append(new_block)
1609
new_block = nodes.line_block()
1610
new_items.append(item)
1612
self.nest_line_block_segment(new_block)
1613
new_items.append(new_block)
1614
block[:] = new_items
1616
def grid_table_top(self, match, context, next_state):
1617
"""Top border of a full table."""
1618
return self.table_top(match, context, next_state,
1619
self.isolate_grid_table,
1620
tableparser.GridTableParser)
1622
def simple_table_top(self, match, context, next_state):
1623
"""Top border of a simple table."""
1624
return self.table_top(match, context, next_state,
1625
self.isolate_simple_table,
1626
tableparser.SimpleTableParser)
1628
def table_top(self, match, context, next_state,
1629
isolate_function, parser_class):
1630
"""Top border of a generic table."""
1631
nodelist, blank_finish = self.table(isolate_function, parser_class)
1632
self.parent += nodelist
1633
if not blank_finish:
1634
msg = self.reporter.warning(
1635
'Blank line required after table.',
1636
line=self.state_machine.abs_line_number()+1)
1638
return [], next_state, []
1640
def table(self, isolate_function, parser_class):
1641
"""Parse a table."""
1642
block, messages, blank_finish = isolate_function()
1645
parser = parser_class()
1646
tabledata = parser.parse(block)
1647
tableline = (self.state_machine.abs_line_number() - len(block)
1649
table = self.build_table(tabledata, tableline)
1650
nodelist = [table] + messages
1651
except tableparser.TableMarkupError, err:
1652
nodelist = self.malformed_table(block, ' '.join(err.args),
1653
offset=err.offset) + messages
1656
return nodelist, blank_finish
1658
def isolate_grid_table(self):
1662
block = self.state_machine.get_text_block(flush_left=True)
1663
except statemachine.UnexpectedIndentationError, err:
1664
block, src, srcline = err.args
1665
messages.append(self.reporter.error('Unexpected indentation.',
1666
source=src, line=srcline))
1669
# for East Asian chars:
1670
block.pad_double_width(self.double_width_pad_char)
1671
width = len(block[0].strip())
1672
for i in range(len(block)):
1673
block[i] = block[i].strip()
1674
if block[i][0] not in '+|': # check left edge
1676
self.state_machine.previous_line(len(block) - i)
1679
if not self.grid_table_top_pat.match(block[-1]): # find bottom
1681
# from second-last to third line of table:
1682
for i in range(len(block) - 2, 1, -1):
1683
if self.grid_table_top_pat.match(block[i]):
1684
self.state_machine.previous_line(len(block) - i + 1)
1688
messages.extend(self.malformed_table(block))
1689
return [], messages, blank_finish
1690
for i in range(len(block)): # check right edge
1691
if len(block[i]) != width or block[i][-1] not in '+|':
1692
messages.extend(self.malformed_table(block))
1693
return [], messages, blank_finish
1694
return block, messages, blank_finish
1696
def isolate_simple_table(self):
1697
start = self.state_machine.line_offset
1698
lines = self.state_machine.input_lines
1699
limit = len(lines) - 1
1700
toplen = len(lines[start].strip())
1701
pattern_match = self.simple_table_border_pat.match
1707
match = pattern_match(line)
1709
if len(line.strip()) != toplen:
1710
self.state_machine.next_line(i - start)
1711
messages = self.malformed_table(
1712
lines[start:i+1], 'Bottom/header table border does '
1713
'not match top border.')
1714
return [], messages, i == limit or not lines[i+1].strip()
1717
if found == 2 or i == limit or not lines[i+1].strip():
1721
else: # reached end of input_lines
1723
extra = ' or no blank line after table bottom'
1724
self.state_machine.next_line(found_at - start)
1725
block = lines[start:found_at+1]
1728
self.state_machine.next_line(i - start - 1)
1729
block = lines[start:]
1730
messages = self.malformed_table(
1731
block, 'No bottom table border found%s.' % extra)
1732
return [], messages, not extra
1733
self.state_machine.next_line(end - start)
1734
block = lines[start:end+1]
1735
# for East Asian chars:
1736
block.pad_double_width(self.double_width_pad_char)
1737
return block, [], end == limit or not lines[end+1].strip()
1739
def malformed_table(self, block, detail='', offset=0):
1740
block.replace(self.double_width_pad_char, '')
1741
data = '\n'.join(block)
1742
message = 'Malformed table.'
1743
startline = self.state_machine.abs_line_number() - len(block) + 1
1745
message += '\n' + detail
1746
error = self.reporter.error(message, nodes.literal_block(data, data),
1747
line=startline+offset)
1750
def build_table(self, tabledata, tableline, stub_columns=0):
1751
colwidths, headrows, bodyrows = tabledata
1752
table = nodes.table()
1753
tgroup = nodes.tgroup(cols=len(colwidths))
1755
for colwidth in colwidths:
1756
colspec = nodes.colspec(colwidth=colwidth)
1758
colspec.attributes['stub'] = 1
1762
thead = nodes.thead()
1764
for row in headrows:
1765
thead += self.build_table_row(row, tableline)
1766
tbody = nodes.tbody()
1768
for row in bodyrows:
1769
tbody += self.build_table_row(row, tableline)
1772
def build_table_row(self, rowdata, tableline):
1774
for cell in rowdata:
1777
morerows, morecols, offset, cellblock = cell
1780
attributes['morerows'] = morerows
1782
attributes['morecols'] = morecols
1783
entry = nodes.entry(**attributes)
1785
if ''.join(cellblock):
1786
self.nested_parse(cellblock, input_offset=tableline+offset,
1792
"""Patterns and constants used for explicit markup recognition."""
1794
explicit.patterns = Struct(
1795
target=re.compile(r"""
1797
_ # anonymous target
1799
(?!_) # no underscore at the beginning
1800
(?P<quote>`?) # optional open quote
1801
(?![ `]) # first char. not space or
1803
(?P<name> # reference name
1806
%(non_whitespace_escape_before)s
1807
(?P=quote) # close quote if open quote used
1809
(?<!(?<!\x00):) # no unescaped colon at end
1810
%(non_whitespace_escape_before)s
1811
[ ]? # optional space
1812
: # end of reference name
1813
([ ]+|$) # followed by whitespace
1814
""" % vars(Inliner), re.VERBOSE | re.UNICODE),
1815
reference=re.compile(r"""
1817
(?P<simple>%(simplename)s)_
1821
(?P<phrase>.+?) # hyperlink phrase
1822
%(non_whitespace_escape_before)s
1823
`_ # close backquote,
1827
""" % vars(Inliner), re.VERBOSE | re.UNICODE),
1828
substitution=re.compile(r"""
1830
(?![ ]) # first char. not space
1831
(?P<name>.+?) # substitution text
1832
%(non_whitespace_escape_before)s
1833
\| # close delimiter
1835
([ ]+|$) # followed by whitespace
1836
""" % vars(Inliner),
1837
re.VERBOSE | re.UNICODE),)
1839
def footnote(self, match):
1840
src, srcline = self.state_machine.get_source_and_line()
1841
indented, indent, offset, blank_finish = \
1842
self.state_machine.get_first_known_indented(match.end())
1843
label = match.group(1)
1844
name = normalize_name(label)
1845
footnote = nodes.footnote('\n'.join(indented))
1846
footnote.source = src
1847
footnote.line = srcline
1848
if name[0] == '#': # auto-numbered
1849
name = name[1:] # autonumber label
1850
footnote['auto'] = 1
1852
footnote['names'].append(name)
1853
self.document.note_autofootnote(footnote)
1854
elif name == '*': # auto-symbol
1856
footnote['auto'] = '*'
1857
self.document.note_symbol_footnote(footnote)
1858
else: # manually numbered
1859
footnote += nodes.label('', label)
1860
footnote['names'].append(name)
1861
self.document.note_footnote(footnote)
1863
self.document.note_explicit_target(footnote, footnote)
1865
self.document.set_id(footnote, footnote)
1867
self.nested_parse(indented, input_offset=offset, node=footnote)
1868
return [footnote], blank_finish
1870
def citation(self, match):
1871
src, srcline = self.state_machine.get_source_and_line()
1872
indented, indent, offset, blank_finish = \
1873
self.state_machine.get_first_known_indented(match.end())
1874
label = match.group(1)
1875
name = normalize_name(label)
1876
citation = nodes.citation('\n'.join(indented))
1877
citation.source = src
1878
citation.line = srcline
1879
citation += nodes.label('', label)
1880
citation['names'].append(name)
1881
self.document.note_citation(citation)
1882
self.document.note_explicit_target(citation, citation)
1884
self.nested_parse(indented, input_offset=offset, node=citation)
1885
return [citation], blank_finish
1887
def hyperlink_target(self, match):
1888
pattern = self.explicit.patterns.target
1889
lineno = self.state_machine.abs_line_number()
1890
block, indent, offset, blank_finish = \
1891
self.state_machine.get_first_known_indented(
1892
match.end(), until_blank=True, strip_indent=False)
1893
blocktext = match.string[:match.end()] + '\n'.join(block)
1894
block = [escape2null(line) for line in block]
1898
targetmatch = pattern.match(escaped)
1903
escaped += block[blockindex]
1905
raise MarkupError('malformed hyperlink target.')
1906
del block[:blockindex]
1907
block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1908
target = self.make_target(block, blocktext, lineno,
1909
targetmatch.group('name'))
1910
return [target], blank_finish
1912
def make_target(self, block, block_text, lineno, target_name):
1913
target_type, data = self.parse_target(block, block_text, lineno)
1914
if target_type == 'refname':
1915
target = nodes.target(block_text, '', refname=normalize_name(data))
1916
target.indirect_reference_name = data
1917
self.add_target(target_name, '', target, lineno)
1918
self.document.note_indirect_target(target)
1920
elif target_type == 'refuri':
1921
target = nodes.target(block_text, '')
1922
self.add_target(target_name, data, target, lineno)
1927
def parse_target(self, block, block_text, lineno):
1929
Determine the type of reference of a target.
1931
:Return: A 2-tuple, one of:
1933
- 'refname' and the indirect reference name
1934
- 'refuri' and the URI
1935
- 'malformed' and a system_message node
1937
if block and block[-1].strip()[-1:] == '_': # possible indirect target
1938
reference = ' '.join([line.strip() for line in block])
1939
refname = self.is_reference(reference)
1941
return 'refname', refname
1942
reference = ''.join([''.join(line.split()) for line in block])
1943
return 'refuri', unescape(reference)
1945
def is_reference(self, reference):
1946
match = self.explicit.patterns.reference.match(
1947
whitespace_normalize_name(reference))
1950
return unescape(match.group('simple') or match.group('phrase'))
1952
def add_target(self, targetname, refuri, target, lineno):
1953
target.line = lineno
1955
name = normalize_name(unescape(targetname))
1956
target['names'].append(name)
1958
uri = self.inliner.adjust_uri(refuri)
1960
target['refuri'] = uri
1962
raise ApplicationError('problem with URI: %r' % refuri)
1963
self.document.note_explicit_target(target, self.parent)
1964
else: # anonymous target
1966
target['refuri'] = refuri
1967
target['anonymous'] = 1
1968
self.document.note_anonymous_target(target)
1970
def substitution_def(self, match):
1971
pattern = self.explicit.patterns.substitution
1972
src, srcline = self.state_machine.get_source_and_line()
1973
block, indent, offset, blank_finish = \
1974
self.state_machine.get_first_known_indented(match.end(),
1976
blocktext = (match.string[:match.end()] + '\n'.join(block))
1978
escaped = escape2null(block[0].rstrip())
1981
subdefmatch = pattern.match(escaped)
1986
escaped = escaped + ' ' + escape2null(block[blockindex].strip())
1988
raise MarkupError('malformed substitution definition.')
1989
del block[:blockindex] # strip out the substitution marker
1990
block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
1994
while block and not block[-1].strip():
1996
subname = subdefmatch.group('name')
1997
substitution_node = nodes.substitution_definition(blocktext)
1998
substitution_node.source = src
1999
substitution_node.line = srcline
2001
msg = self.reporter.warning(
2002
'Substitution definition "%s" missing contents.' % subname,
2003
nodes.literal_block(blocktext, blocktext),
2004
source=src, line=srcline)
2005
return [msg], blank_finish
2006
block[0] = block[0].strip()
2007
substitution_node['names'].append(
2008
nodes.whitespace_normalize_name(subname))
2009
new_abs_offset, blank_finish = self.nested_list_parse(
2010
block, input_offset=offset, node=substitution_node,
2011
initial_state='SubstitutionDef', blank_finish=blank_finish)
2013
for node in substitution_node[:]:
2014
if not (isinstance(node, nodes.Inline) or
2015
isinstance(node, nodes.Text)):
2016
self.parent += substitution_node[i]
2017
del substitution_node[i]
2020
for node in substitution_node.traverse(nodes.Element):
2021
if self.disallowed_inside_substitution_definitions(node):
2022
pformat = nodes.literal_block('', node.pformat().rstrip())
2023
msg = self.reporter.error(
2024
'Substitution definition contains illegal element:',
2025
pformat, nodes.literal_block(blocktext, blocktext),
2026
source=src, line=srcline)
2027
return [msg], blank_finish
2028
if len(substitution_node) == 0:
2029
msg = self.reporter.warning(
2030
'Substitution definition "%s" empty or invalid.' % subname,
2031
nodes.literal_block(blocktext, blocktext),
2032
source=src, line=srcline)
2033
return [msg], blank_finish
2034
self.document.note_substitution_def(
2035
substitution_node, subname, self.parent)
2036
return [substitution_node], blank_finish
2038
def disallowed_inside_substitution_definitions(self, node):
2040
isinstance(node, nodes.reference) and node.get('anonymous') or
2041
isinstance(node, nodes.footnote_reference) and node.get('auto')):
2046
def directive(self, match, **option_presets):
2047
"""Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2048
type_name = match.group(1)
2049
directive_class, messages = directives.directive(
2050
type_name, self.memo.language, self.document)
2051
self.parent += messages
2053
return self.run_directive(
2054
directive_class, match, type_name, option_presets)
2056
return self.unknown_directive(type_name)
2058
def run_directive(self, directive, match, type_name, option_presets):
2060
Parse a directive then run its directive function.
2064
- `directive`: The class implementing the directive. Must be
2065
a subclass of `rst.Directive`.
2067
- `match`: A regular expression match object which matched the first
2068
line of the directive.
2070
- `type_name`: The directive name, as used in the source text.
2072
- `option_presets`: A dictionary of preset options, defaults for the
2073
directive options. Currently, only an "alt" option is passed by
2074
substitution definitions (value: the substitution name), which may
2075
be used by an embedded image directive.
2077
Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2079
if isinstance(directive, (FunctionType, MethodType)):
2080
from docutils.parsers.rst import convert_directive_function
2081
directive = convert_directive_function(directive)
2082
lineno = self.state_machine.abs_line_number()
2083
initial_line_offset = self.state_machine.line_offset
2084
indented, indent, line_offset, blank_finish \
2085
= self.state_machine.get_first_known_indented(match.end(),
2087
block_text = '\n'.join(self.state_machine.input_lines[
2088
initial_line_offset : self.state_machine.line_offset + 1])
2090
arguments, options, content, content_offset = (
2091
self.parse_directive_block(indented, line_offset,
2092
directive, option_presets))
2093
except MarkupError, detail:
2094
error = self.reporter.error(
2095
'Error in "%s" directive:\n%s.' % (type_name,
2096
' '.join(detail.args)),
2097
nodes.literal_block(block_text, block_text), line=lineno)
2098
return [error], blank_finish
2099
directive_instance = directive(
2100
type_name, arguments, options, content, lineno,
2101
content_offset, block_text, self, self.state_machine)
2103
result = directive_instance.run()
2104
except docutils.parsers.rst.DirectiveError, error:
2105
msg_node = self.reporter.system_message(error.level, error.msg,
2107
msg_node += nodes.literal_block(block_text, block_text)
2109
assert isinstance(result, list), \
2110
'Directive "%s" must return a list of nodes.' % type_name
2111
for i in range(len(result)):
2112
assert isinstance(result[i], nodes.Node), \
2113
('Directive "%s" returned non-Node object (index %s): %r'
2114
% (type_name, i, result[i]))
2116
blank_finish or self.state_machine.is_next_line_blank())
2118
def parse_directive_block(self, indented, line_offset, directive,
2120
option_spec = directive.option_spec
2121
has_content = directive.has_content
2122
if indented and not indented[0].strip():
2123
indented.trim_start()
2125
while indented and not indented[-1].strip():
2127
if indented and (directive.required_arguments
2128
or directive.optional_arguments
2130
for i, line in enumerate(indented):
2131
if not line.strip():
2135
arg_block = indented[:i]
2136
content = indented[i+1:]
2137
content_offset = line_offset + i + 1
2140
content_offset = line_offset
2143
options, arg_block = self.parse_directive_options(
2144
option_presets, option_spec, arg_block)
2147
if arg_block and not (directive.required_arguments
2148
or directive.optional_arguments):
2149
content = arg_block + indented[i:]
2150
content_offset = line_offset
2152
while content and not content[0].strip():
2153
content.trim_start()
2155
if directive.required_arguments or directive.optional_arguments:
2156
arguments = self.parse_directive_arguments(
2157
directive, arg_block)
2160
if content and not has_content:
2161
raise MarkupError('no content permitted')
2162
return (arguments, options, content, content_offset)
2164
def parse_directive_options(self, option_presets, option_spec, arg_block):
2165
options = option_presets.copy()
2166
for i, line in enumerate(arg_block):
2167
if re.match(Body.patterns['field_marker'], line):
2168
opt_block = arg_block[i:]
2169
arg_block = arg_block[:i]
2174
success, data = self.parse_extension_options(option_spec,
2176
if success: # data is a dict of options
2177
options.update(data)
2178
else: # data is an error string
2179
raise MarkupError(data)
2180
return options, arg_block
2182
def parse_directive_arguments(self, directive, arg_block):
2183
required = directive.required_arguments
2184
optional = directive.optional_arguments
2185
arg_text = '\n'.join(arg_block)
2186
arguments = arg_text.split()
2187
if len(arguments) < required:
2188
raise MarkupError('%s argument(s) required, %s supplied'
2189
% (required, len(arguments)))
2190
elif len(arguments) > required + optional:
2191
if directive.final_argument_whitespace:
2192
arguments = arg_text.split(None, required + optional - 1)
2195
'maximum %s argument(s) allowed, %s supplied'
2196
% (required + optional, len(arguments)))
2199
def parse_extension_options(self, option_spec, datalines):
2201
Parse `datalines` for a field list containing extension options
2202
matching `option_spec`.
2205
- `option_spec`: a mapping of option name to conversion
2206
function, which should raise an exception on bad input.
2207
- `datalines`: a list of input strings.
2210
- Success value, 1 or 0.
2211
- An option dictionary on success, an error string on failure.
2213
node = nodes.field_list()
2214
newline_offset, blank_finish = self.nested_list_parse(
2215
datalines, 0, node, initial_state='ExtensionOptions',
2217
if newline_offset != len(datalines): # incomplete parse of block
2218
return 0, 'invalid option block'
2220
options = utils.extract_extension_options(node, option_spec)
2221
except KeyError, detail:
2222
return 0, ('unknown option: "%s"' % detail.args[0])
2223
except (ValueError, TypeError), detail:
2224
return 0, ('invalid option value: %s' % ' '.join(detail.args))
2225
except utils.ExtensionOptionError, detail:
2226
return 0, ('invalid option data: %s' % ' '.join(detail.args))
2230
return 0, 'option data incompletely parsed'
2232
def unknown_directive(self, type_name):
2233
lineno = self.state_machine.abs_line_number()
2234
indented, indent, offset, blank_finish = \
2235
self.state_machine.get_first_known_indented(0, strip_indent=False)
2236
text = '\n'.join(indented)
2237
error = self.reporter.error(
2238
'Unknown directive type "%s".' % type_name,
2239
nodes.literal_block(text, text), line=lineno)
2240
return [error], blank_finish
2242
def comment(self, match):
2243
if not match.string[match.end():].strip() \
2244
and self.state_machine.is_next_line_blank(): # an empty comment?
2245
return [nodes.comment()], 1 # "A tiny but practical wart."
2246
indented, indent, offset, blank_finish = \
2247
self.state_machine.get_first_known_indented(match.end())
2248
while indented and not indented[-1].strip():
2250
text = '\n'.join(indented)
2251
return [nodes.comment(text, text)], blank_finish
2253
explicit.constructs = [
2256
\.\.[ ]+ # explicit markup start
2259
[0-9]+ # manually numbered footnote
2261
\# # anonymous auto-numbered footnote
2263
\#%s # auto-number ed?) footnote label
2265
\* # auto-symbol footnote
2268
([ ]+|$) # whitespace or end of line
2269
""" % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2272
\.\.[ ]+ # explicit markup start
2273
\[(%s)\] # citation label
2274
([ ]+|$) # whitespace or end of line
2275
""" % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2278
\.\.[ ]+ # explicit markup start
2279
_ # target indicator
2280
(?![ ]|$) # first char. not space or EOL
2281
""", re.VERBOSE | re.UNICODE)),
2284
\.\.[ ]+ # explicit markup start
2285
\| # substitution indicator
2286
(?![ ]|$) # first char. not space or EOL
2287
""", re.VERBOSE | re.UNICODE)),
2290
\.\.[ ]+ # explicit markup start
2291
(%s) # directive name
2292
[ ]? # optional space
2293
:: # directive delimiter
2294
([ ]+|$) # whitespace or end of line
2295
""" % Inliner.simplename, re.VERBOSE | re.UNICODE))]
2297
def explicit_markup(self, match, context, next_state):
2298
"""Footnotes, hyperlink targets, directives, comments."""
2299
nodelist, blank_finish = self.explicit_construct(match)
2300
self.parent += nodelist
2301
self.explicit_list(blank_finish)
2302
return [], next_state, []
2304
def explicit_construct(self, match):
2305
"""Determine which explicit construct this is, parse & return it."""
2307
for method, pattern in self.explicit.constructs:
2308
expmatch = pattern.match(match.string)
2311
return method(self, expmatch)
2312
except MarkupError, error:
2313
lineno = self.state_machine.abs_line_number()
2314
message = ' '.join(error.args)
2315
errors.append(self.reporter.warning(message, line=lineno))
2317
nodelist, blank_finish = self.comment(match)
2318
return nodelist + errors, blank_finish
2320
def explicit_list(self, blank_finish):
2322
Create a nested state machine for a series of explicit markup
2323
constructs (including anonymous hyperlink targets).
2325
offset = self.state_machine.line_offset + 1 # next line
2326
newline_offset, blank_finish = self.nested_list_parse(
2327
self.state_machine.input_lines[offset:],
2328
input_offset=self.state_machine.abs_line_offset() + 1,
2329
node=self.parent, initial_state='Explicit',
2330
blank_finish=blank_finish,
2331
match_titles=self.state_machine.match_titles)
2332
self.goto_line(newline_offset)
2333
if not blank_finish:
2334
self.parent += self.unindent_warning('Explicit markup')
2336
def anonymous(self, match, context, next_state):
2337
"""Anonymous hyperlink targets."""
2338
nodelist, blank_finish = self.anonymous_target(match)
2339
self.parent += nodelist
2340
self.explicit_list(blank_finish)
2341
return [], next_state, []
2343
def anonymous_target(self, match):
2344
lineno = self.state_machine.abs_line_number()
2345
block, indent, offset, blank_finish \
2346
= self.state_machine.get_first_known_indented(match.end(),
2348
blocktext = match.string[:match.end()] + '\n'.join(block)
2349
block = [escape2null(line) for line in block]
2350
target = self.make_target(block, blocktext, lineno, '')
2351
return [target], blank_finish
2353
def line(self, match, context, next_state):
2354
"""Section title overline or transition marker."""
2355
if self.state_machine.match_titles:
2356
return [match.string], 'Line', []
2357
elif match.string.strip() == '::':
2358
raise statemachine.TransitionCorrection('text')
2359
elif len(match.string.strip()) < 4:
2360
msg = self.reporter.info(
2361
'Unexpected possible title overline or transition.\n'
2362
"Treating it as ordinary text because it's so short.",
2363
line=self.state_machine.abs_line_number())
2365
raise statemachine.TransitionCorrection('text')
2367
blocktext = self.state_machine.line
2368
msg = self.reporter.severe(
2369
'Unexpected section title or transition.',
2370
nodes.literal_block(blocktext, blocktext),
2371
line=self.state_machine.abs_line_number())
2373
return [], next_state, []
2375
def text(self, match, context, next_state):
2376
"""Titles, definition lists, paragraphs."""
2377
return [match.string], 'Text', []
2380
class RFC2822Body(Body):
2383
RFC2822 headers are only valid as the first constructs in documents. As
2384
soon as anything else appears, the `Body` state should take over.
2387
patterns = Body.patterns.copy() # can't modify the original
2388
patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2389
initial_transitions = [(name, 'Body')
2390
for name in Body.initial_transitions]
2391
initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2393
def rfc2822(self, match, context, next_state):
2394
"""RFC2822-style field list item."""
2395
fieldlist = nodes.field_list(classes=['rfc2822'])
2396
self.parent += fieldlist
2397
field, blank_finish = self.rfc2822_field(match)
2399
offset = self.state_machine.line_offset + 1 # next line
2400
newline_offset, blank_finish = self.nested_list_parse(
2401
self.state_machine.input_lines[offset:],
2402
input_offset=self.state_machine.abs_line_offset() + 1,
2403
node=fieldlist, initial_state='RFC2822List',
2404
blank_finish=blank_finish)
2405
self.goto_line(newline_offset)
2406
if not blank_finish:
2407
self.parent += self.unindent_warning(
2408
'RFC2822-style field list')
2409
return [], next_state, []
2411
def rfc2822_field(self, match):
2412
name = match.string[:match.string.find(':')]
2413
indented, indent, line_offset, blank_finish = \
2414
self.state_machine.get_first_known_indented(match.end(),
2416
fieldnode = nodes.field()
2417
fieldnode += nodes.field_name(name, name)
2418
fieldbody = nodes.field_body('\n'.join(indented))
2419
fieldnode += fieldbody
2421
self.nested_parse(indented, input_offset=line_offset,
2423
return fieldnode, blank_finish
2426
class SpecializedBody(Body):
2429
Superclass for second and subsequent compound element members. Compound
2430
elements are lists and list-like constructs.
2432
All transition methods are disabled (redefined as `invalid_input`).
2433
Override individual methods in subclasses to re-enable.
2435
For example, once an initial bullet list item, say, is recognized, the
2436
`BulletList` subclass takes over, with a "bullet_list" node as its
2437
container. Upon encountering the initial bullet list item, `Body.bullet`
2438
calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2439
starts up a nested parsing session with `BulletList` as the initial state.
2440
Only the ``bullet`` transition method is enabled in `BulletList`; as long
2441
as only bullet list items are encountered, they are parsed and inserted
2442
into the container. The first construct which is *not* a bullet list item
2443
triggers the `invalid_input` method, which ends the nested parse and
2444
closes the container. `BulletList` needs to recognize input that is
2445
invalid in the context of a bullet list, which means everything *other
2446
than* bullet list items, so it inherits the transition list created in
2450
def invalid_input(self, match=None, context=None, next_state=None):
2451
"""Not a compound element member. Abort this state machine."""
2452
self.state_machine.previous_line() # back up so parent SM can reassess
2455
indent = invalid_input
2456
bullet = invalid_input
2457
enumerator = invalid_input
2458
field_marker = invalid_input
2459
option_marker = invalid_input
2460
doctest = invalid_input
2461
line_block = invalid_input
2462
grid_table_top = invalid_input
2463
simple_table_top = invalid_input
2464
explicit_markup = invalid_input
2465
anonymous = invalid_input
2466
line = invalid_input
2467
text = invalid_input
2470
class BulletList(SpecializedBody):
2472
"""Second and subsequent bullet_list list_items."""
2474
def bullet(self, match, context, next_state):
2475
"""Bullet list item."""
2476
if match.string[0] != self.parent['bullet']:
2477
# different bullet: new list
2478
self.invalid_input()
2479
listitem, blank_finish = self.list_item(match.end())
2480
self.parent += listitem
2481
self.blank_finish = blank_finish
2482
return [], next_state, []
2485
class DefinitionList(SpecializedBody):
2487
"""Second and subsequent definition_list_items."""
2489
def text(self, match, context, next_state):
2490
"""Definition lists."""
2491
return [match.string], 'Definition', []
2494
class EnumeratedList(SpecializedBody):
2496
"""Second and subsequent enumerated_list list_items."""
2498
def enumerator(self, match, context, next_state):
2499
"""Enumerated list item."""
2500
format, sequence, text, ordinal = self.parse_enumerator(
2501
match, self.parent['enumtype'])
2502
if ( format != self.format
2503
or (sequence != '#' and (sequence != self.parent['enumtype']
2505
or ordinal != (self.lastordinal + 1)))
2506
or not self.is_enumerated_list_item(ordinal, sequence, format)):
2507
# different enumeration: new list
2508
self.invalid_input()
2511
listitem, blank_finish = self.list_item(match.end())
2512
self.parent += listitem
2513
self.blank_finish = blank_finish
2514
self.lastordinal = ordinal
2515
return [], next_state, []
2518
class FieldList(SpecializedBody):
2520
"""Second and subsequent field_list fields."""
2522
def field_marker(self, match, context, next_state):
2523
"""Field list field."""
2524
field, blank_finish = self.field(match)
2525
self.parent += field
2526
self.blank_finish = blank_finish
2527
return [], next_state, []
2530
class OptionList(SpecializedBody):
2532
"""Second and subsequent option_list option_list_items."""
2534
def option_marker(self, match, context, next_state):
2535
"""Option list item."""
2537
option_list_item, blank_finish = self.option_list_item(match)
2539
self.invalid_input()
2540
self.parent += option_list_item
2541
self.blank_finish = blank_finish
2542
return [], next_state, []
2545
class RFC2822List(SpecializedBody, RFC2822Body):
2547
"""Second and subsequent RFC2822-style field_list fields."""
2549
patterns = RFC2822Body.patterns
2550
initial_transitions = RFC2822Body.initial_transitions
2552
def rfc2822(self, match, context, next_state):
2553
"""RFC2822-style field list item."""
2554
field, blank_finish = self.rfc2822_field(match)
2555
self.parent += field
2556
self.blank_finish = blank_finish
2557
return [], 'RFC2822List', []
2559
blank = SpecializedBody.invalid_input
2562
class ExtensionOptions(FieldList):
2565
Parse field_list fields for extension options.
2567
No nested parsing is done (including inline markup parsing).
2570
def parse_field_body(self, indented, offset, node):
2571
"""Override `Body.parse_field_body` for simpler parsing."""
2573
for line in list(indented) + ['']:
2577
text = '\n'.join(lines)
2578
node += nodes.paragraph(text, text)
2582
class LineBlock(SpecializedBody):
2584
"""Second and subsequent lines of a line_block."""
2586
blank = SpecializedBody.invalid_input
2588
def line_block(self, match, context, next_state):
2589
"""New line of line block."""
2590
lineno = self.state_machine.abs_line_number()
2591
line, messages, blank_finish = self.line_block_line(match, lineno)
2593
self.parent.parent += messages
2594
self.blank_finish = blank_finish
2595
return [], next_state, []
2598
class Explicit(SpecializedBody):
2600
"""Second and subsequent explicit markup construct."""
2602
def explicit_markup(self, match, context, next_state):
2603
"""Footnotes, hyperlink targets, directives, comments."""
2604
nodelist, blank_finish = self.explicit_construct(match)
2605
self.parent += nodelist
2606
self.blank_finish = blank_finish
2607
return [], next_state, []
2609
def anonymous(self, match, context, next_state):
2610
"""Anonymous hyperlink targets."""
2611
nodelist, blank_finish = self.anonymous_target(match)
2612
self.parent += nodelist
2613
self.blank_finish = blank_finish
2614
return [], next_state, []
2616
blank = SpecializedBody.invalid_input
2619
class SubstitutionDef(Body):
2622
Parser for the contents of a substitution_definition element.
2626
'embedded_directive': re.compile(r'(%s)::( +|$)'
2627
% Inliner.simplename, re.UNICODE),
2629
initial_transitions = ['embedded_directive', 'text']
2631
def embedded_directive(self, match, context, next_state):
2632
nodelist, blank_finish = self.directive(match,
2633
alt=self.parent['names'][0])
2634
self.parent += nodelist
2635
if not self.state_machine.at_eof():
2636
self.blank_finish = blank_finish
2639
def text(self, match, context, next_state):
2640
if not self.state_machine.at_eof():
2641
self.blank_finish = self.state_machine.is_next_line_blank()
2645
class Text(RSTState):
2648
Classifier of second line of a text block.
2650
Could be a paragraph, a definition list item, or a title.
2653
patterns = {'underline': Body.patterns['line'],
2655
initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2657
def blank(self, match, context, next_state):
2658
"""End of paragraph."""
2659
# NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2660
paragraph, literalnext = self.paragraph(
2661
context, self.state_machine.abs_line_number() - 1)
2662
self.parent += paragraph
2664
self.parent += self.literal_block()
2665
return [], 'Body', []
2667
def eof(self, context):
2669
self.blank(None, context, None)
2672
def indent(self, match, context, next_state):
2673
"""Definition list item."""
2674
definitionlist = nodes.definition_list()
2675
definitionlistitem, blank_finish = self.definition_list_item(context)
2676
definitionlist += definitionlistitem
2677
self.parent += definitionlist
2678
offset = self.state_machine.line_offset + 1 # next line
2679
newline_offset, blank_finish = self.nested_list_parse(
2680
self.state_machine.input_lines[offset:],
2681
input_offset=self.state_machine.abs_line_offset() + 1,
2682
node=definitionlist, initial_state='DefinitionList',
2683
blank_finish=blank_finish, blank_finish_state='Definition')
2684
self.goto_line(newline_offset)
2685
if not blank_finish:
2686
self.parent += self.unindent_warning('Definition list')
2687
return [], 'Body', []
2689
def underline(self, match, context, next_state):
2690
"""Section title."""
2691
lineno = self.state_machine.abs_line_number()
2692
title = context[0].rstrip()
2693
underline = match.string.rstrip()
2694
source = title + '\n' + underline
2696
if column_width(title) > len(underline):
2697
if len(underline) < 4:
2698
if self.state_machine.match_titles:
2699
msg = self.reporter.info(
2700
'Possible title underline, too short for the title.\n'
2701
"Treating it as ordinary text because it's so short.",
2704
raise statemachine.TransitionCorrection('text')
2706
blocktext = context[0] + '\n' + self.state_machine.line
2707
msg = self.reporter.warning('Title underline too short.',
2708
nodes.literal_block(blocktext, blocktext), line=lineno)
2709
messages.append(msg)
2710
if not self.state_machine.match_titles:
2711
blocktext = context[0] + '\n' + self.state_machine.line
2712
# We need get_source_and_line() here to report correctly
2713
src, srcline = self.state_machine.get_source_and_line()
2714
# TODO: why is abs_line_number() == srcline+1
2715
# if the error is in a table (try with test_tables.py)?
2716
# print "get_source_and_line", srcline
2717
# print "abs_line_number", self.state_machine.abs_line_number()
2718
msg = self.reporter.severe('Unexpected section title.',
2719
nodes.literal_block(blocktext, blocktext),
2720
source=src, line=srcline)
2721
self.parent += messages
2723
return [], next_state, []
2724
style = underline[0]
2726
self.section(title, source, style, lineno - 1, messages)
2727
return [], next_state, []
2729
def text(self, match, context, next_state):
2731
startline = self.state_machine.abs_line_number() - 1
2734
block = self.state_machine.get_text_block(flush_left=True)
2735
except statemachine.UnexpectedIndentationError, err:
2736
block, src, srcline = err.args
2737
msg = self.reporter.error('Unexpected indentation.',
2738
source=src, line=srcline)
2739
lines = context + list(block)
2740
paragraph, literalnext = self.paragraph(lines, startline)
2741
self.parent += paragraph
2745
self.state_machine.next_line()
2748
self.parent += self.literal_block()
2749
return [], next_state, []
2751
def literal_block(self):
2752
"""Return a list of nodes."""
2753
indented, indent, offset, blank_finish = \
2754
self.state_machine.get_indented()
2755
while indented and not indented[-1].strip():
2758
return self.quoted_literal_block()
2759
data = '\n'.join(indented)
2760
literal_block = nodes.literal_block(data, data)
2761
literal_block.line = offset + 1
2762
nodelist = [literal_block]
2763
if not blank_finish:
2764
nodelist.append(self.unindent_warning('Literal block'))
2767
def quoted_literal_block(self):
2768
abs_line_offset = self.state_machine.abs_line_offset()
2769
offset = self.state_machine.line_offset
2770
parent_node = nodes.Element()
2771
new_abs_offset = self.nested_parse(
2772
self.state_machine.input_lines[offset:],
2773
input_offset=abs_line_offset, node=parent_node, match_titles=False,
2774
state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2775
'initial_state': 'QuotedLiteralBlock'})
2776
self.goto_line(new_abs_offset)
2777
return parent_node.children
2779
def definition_list_item(self, termline):
2780
indented, indent, line_offset, blank_finish = \
2781
self.state_machine.get_indented()
2782
itemnode = nodes.definition_list_item(
2783
'\n'.join(termline + list(indented)))
2784
lineno = self.state_machine.abs_line_number() - 1
2786
itemnode.line) = self.state_machine.get_source_and_line(lineno)
2787
termlist, messages = self.term(termline, lineno)
2788
itemnode += termlist
2789
definition = nodes.definition('', *messages)
2790
itemnode += definition
2791
if termline[0][-2:] == '::':
2792
definition += self.reporter.info(
2793
'Blank line missing before literal block (after the "::")? '
2794
'Interpreted as a definition list item.',
2796
self.nested_parse(indented, input_offset=line_offset, node=definition)
2797
return itemnode, blank_finish
2799
classifier_delimiter = re.compile(' +: +')
2801
def term(self, lines, lineno):
2802
"""Return a definition_list's term and optional classifiers."""
2803
assert len(lines) == 1
2804
text_nodes, messages = self.inline_text(lines[0], lineno)
2805
term_node = nodes.term()
2807
term_node.line) = self.state_machine.get_source_and_line(lineno)
2808
term_node.rawsource = unescape(lines[0])
2809
node_list = [term_node]
2810
for i in range(len(text_nodes)):
2811
node = text_nodes[i]
2812
if isinstance(node, nodes.Text):
2813
parts = self.classifier_delimiter.split(node.rawsource)
2815
node_list[-1] += node
2818
node_list[-1] += nodes.Text(parts[0].rstrip())
2819
for part in parts[1:]:
2820
classifier_node = nodes.classifier('', part)
2821
node_list.append(classifier_node)
2823
node_list[-1] += node
2824
return node_list, messages
2827
class SpecializedText(Text):
2830
Superclass for second and subsequent lines of Text-variants.
2832
All transition methods are disabled. Override individual methods in
2833
subclasses to re-enable.
2836
def eof(self, context):
2837
"""Incomplete construct."""
2840
def invalid_input(self, match=None, context=None, next_state=None):
2841
"""Not a compound element member. Abort this state machine."""
2844
blank = invalid_input
2845
indent = invalid_input
2846
underline = invalid_input
2847
text = invalid_input
2850
class Definition(SpecializedText):
2852
"""Second line of potential definition_list_item."""
2854
def eof(self, context):
2855
"""Not a definition."""
2856
self.state_machine.previous_line(2) # so parent SM can reassess
2859
def indent(self, match, context, next_state):
2860
"""Definition list item."""
2861
itemnode, blank_finish = self.definition_list_item(context)
2862
self.parent += itemnode
2863
self.blank_finish = blank_finish
2864
return [], 'DefinitionList', []
2867
class Line(SpecializedText):
2870
Second line of over- & underlined section title or transition marker.
2873
eofcheck = 1 # @@@ ???
2874
"""Set to 0 while parsing sections, so that we don't catch the EOF."""
2876
def eof(self, context):
2877
"""Transition marker at end of section or document."""
2878
marker = context[0].strip()
2879
if self.memo.section_bubble_up_kludge:
2880
self.memo.section_bubble_up_kludge = False
2881
elif len(marker) < 4:
2882
self.state_correction(context)
2883
if self.eofcheck: # ignore EOFError with sections
2884
lineno = self.state_machine.abs_line_number() - 1
2885
transition = nodes.transition(rawsource=context[0])
2886
transition.line = lineno
2887
self.parent += transition
2891
def blank(self, match, context, next_state):
2892
"""Transition marker."""
2893
src, srcline = self.state_machine.get_source_and_line()
2894
marker = context[0].strip()
2896
self.state_correction(context)
2897
transition = nodes.transition(rawsource=marker)
2898
transition.source = src
2899
transition.line = srcline - 1
2900
self.parent += transition
2901
return [], 'Body', []
2903
def text(self, match, context, next_state):
2904
"""Potential over- & underlined title."""
2905
lineno = self.state_machine.abs_line_number() - 1
2906
overline = context[0]
2907
title = match.string
2910
underline = self.state_machine.next_line()
2912
blocktext = overline + '\n' + title
2913
if len(overline.rstrip()) < 4:
2914
self.short_overline(context, blocktext, lineno, 2)
2916
msg = self.reporter.severe(
2917
'Incomplete section title.',
2918
nodes.literal_block(blocktext, blocktext),
2921
return [], 'Body', []
2922
source = '%s\n%s\n%s' % (overline, title, underline)
2923
overline = overline.rstrip()
2924
underline = underline.rstrip()
2925
if not self.transitions['underline'][0].match(underline):
2926
blocktext = overline + '\n' + title + '\n' + underline
2927
if len(overline.rstrip()) < 4:
2928
self.short_overline(context, blocktext, lineno, 2)
2930
msg = self.reporter.severe(
2931
'Missing matching underline for section title overline.',
2932
nodes.literal_block(source, source),
2935
return [], 'Body', []
2936
elif overline != underline:
2937
blocktext = overline + '\n' + title + '\n' + underline
2938
if len(overline.rstrip()) < 4:
2939
self.short_overline(context, blocktext, lineno, 2)
2941
msg = self.reporter.severe(
2942
'Title overline & underline mismatch.',
2943
nodes.literal_block(source, source),
2946
return [], 'Body', []
2947
title = title.rstrip()
2949
if column_width(title) > len(overline):
2950
blocktext = overline + '\n' + title + '\n' + underline
2951
if len(overline.rstrip()) < 4:
2952
self.short_overline(context, blocktext, lineno, 2)
2954
msg = self.reporter.warning(
2955
'Title overline too short.',
2956
nodes.literal_block(source, source),
2958
messages.append(msg)
2959
style = (overline[0], underline[0])
2960
self.eofcheck = 0 # @@@ not sure this is correct
2961
self.section(title.lstrip(), source, style, lineno + 1, messages)
2963
return [], 'Body', []
2965
indent = text # indented title
2967
def underline(self, match, context, next_state):
2968
overline = context[0]
2969
blocktext = overline + '\n' + self.state_machine.line
2970
lineno = self.state_machine.abs_line_number() - 1
2971
if len(overline.rstrip()) < 4:
2972
self.short_overline(context, blocktext, lineno, 1)
2973
msg = self.reporter.error(
2974
'Invalid section title or transition marker.',
2975
nodes.literal_block(blocktext, blocktext),
2978
return [], 'Body', []
2980
def short_overline(self, context, blocktext, lineno, lines=1):
2981
msg = self.reporter.info(
2982
'Possible incomplete section title.\nTreating the overline as '
2983
"ordinary text because it's so short.",
2986
self.state_correction(context, lines)
2988
def state_correction(self, context, lines=1):
2989
self.state_machine.previous_line(lines)
2991
raise statemachine.StateCorrection('Body', 'text')
2994
class QuotedLiteralBlock(RSTState):
2997
Nested parse handler for quoted (unindented) literal blocks.
2999
Special-purpose. Not for inclusion in `state_classes`.
3002
patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
3004
initial_transitions = ('initial_quoted', 'text')
3006
def __init__(self, state_machine, debug=False):
3007
RSTState.__init__(self, state_machine, debug)
3009
self.initial_lineno = None
3011
def blank(self, match, context, next_state):
3015
return context, next_state, []
3017
def eof(self, context):
3019
src, srcline = self.state_machine.get_source_and_line(
3020
self.initial_lineno)
3021
text = '\n'.join(context)
3022
literal_block = nodes.literal_block(text, text)
3023
literal_block.source = src
3024
literal_block.line = srcline
3025
self.parent += literal_block
3027
self.parent += self.reporter.warning(
3028
'Literal block expected; none found.',
3029
line=self.state_machine.abs_line_number())
3030
# src not available, because statemachine.input_lines is empty
3031
self.state_machine.previous_line()
3032
self.parent += self.messages
3035
def indent(self, match, context, next_state):
3036
assert context, ('QuotedLiteralBlock.indent: context should not '
3038
self.messages.append(
3039
self.reporter.error('Unexpected indentation.',
3040
line=self.state_machine.abs_line_number()))
3041
self.state_machine.previous_line()
3044
def initial_quoted(self, match, context, next_state):
3045
"""Match arbitrary quote character on the first line only."""
3046
self.remove_transition('initial_quoted')
3047
quote = match.string[0]
3048
pattern = re.compile(re.escape(quote), re.UNICODE)
3049
# New transition matches consistent quotes only:
3050
self.add_transition('quoted',
3051
(pattern, self.quoted, self.__class__.__name__))
3052
self.initial_lineno = self.state_machine.abs_line_number()
3053
return [match.string], next_state, []
3055
def quoted(self, match, context, next_state):
3056
"""Match consistent quotes on subsequent lines."""
3057
context.append(match.string)
3058
return context, next_state, []
3060
def text(self, match, context, next_state):
3062
self.messages.append(
3063
self.reporter.error('Inconsistent literal block quoting.',
3064
line=self.state_machine.abs_line_number()))
3065
self.state_machine.previous_line()
3069
state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3070
OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3071
Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3072
"""Standard set of State classes used to start `RSTStateMachine`."""