144
145
def run(self, input_lines, document, input_offset=0, match_titles=1,
147
Parse `input_lines` and return a `docutils.nodes.document` instance.
148
Parse `input_lines` and modify the `document` node in place.
149
Extend `StateMachineWS.run()`: set up parse-global data, run the
150
StateMachine, and return the resulting
150
Extend `StateMachineWS.run()`: set up parse-global data and
151
run the StateMachine.
153
153
self.language = languages.get_language(
154
154
document.settings.language_code)
170
170
results = StateMachineWS.run(self, input_lines, input_offset,
171
171
input_source=document['source'])
172
172
assert results == [], 'RSTStateMachine.run() results should be empty!'
173
self.check_document()
174
173
self.node = self.memo = None # remove unneeded references
176
def check_document(self):
177
"""Check for illegal structure: empty document."""
178
if len(self.document) == 0:
179
error = self.reporter.error(
180
'Document empty; must have contents.', line=0)
181
self.document += error
184
176
class NestedStateMachine(StateMachineWS):
383
376
self.state_machine.input_lines[offset:], input_offset=absoffset,
384
377
node=section_node, match_titles=1)
385
378
self.goto_line(newabsoffset)
386
self.check_section(section_node)
387
379
if memo.section_level <= mylevel: # can't handle next section?
388
380
raise EOFError # bubble up to supersection
389
381
# reset section_level; next pass will detect it properly
390
382
memo.section_level = mylevel
392
def check_section(self, section):
394
Check for illegal structure: empty section, misplaced transitions.
396
lineno = section.line
397
if len(section) <= 1:
398
error = self.reporter.error(
399
'Section empty; must have contents.', line=lineno)
402
if not isinstance(section[0], nodes.title): # shouldn't ever happen
403
error = self.reporter.error(
404
'First element of section must be a title.', line=lineno)
405
section.insert(0, error)
406
if isinstance(section[1], nodes.transition):
407
error = self.reporter.error(
408
'Section may not begin with a transition.',
409
line=section[1].line)
410
section.insert(1, error)
411
if len(section) > 2 and isinstance(section[-1], nodes.transition):
412
error = self.reporter.error(
413
'Section may not end with a transition.',
414
line=section[-1].line)
417
384
def paragraph(self, lines, lineno):
419
386
Return a list (paragraph & messages) & a boolean: literal_block next?
421
388
data = '\n'.join(lines).rstrip()
422
if data[-2:] == '::':
389
if re.search(r'(?<!\\)(\\\\)*::$', data):
423
390
if len(data) == 2:
425
392
elif data[-3] in ' \n':
556
518
# Valid URI characters (see RFC 2396 & RFC 2732);
557
519
# final \x00 allows backslash escapes in URIs:
558
520
uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
521
# Delimiter indicating the end of a URI (not part of the URI):
522
uri_end_delim = r"""[>]"""
559
523
# Last URI character; same as uric but no punctuation:
560
urilast = r"""[_~/a-zA-Z0-9]"""
524
urilast = r"""[_~*/=+a-zA-Z0-9]"""
525
# End of a URI (either 'urilast' or 'uric followed by a
527
uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
561
528
emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
562
529
email_pattern = r"""
563
530
%(emailc)s+(?:\.%(emailc)s+)* # name
565
532
%(emailc)s+(?:\.%(emailc)s*)* # host
566
%(urilast)s # final URI char
533
%(uri_end)s # final URI char
568
535
parts = ('initial_inline', start_string_prefix, '',
569
536
[('start', '', non_whitespace_after, # simple start-strings
774
740
prb = self.problematic(text, text, msg)
775
741
return string[:rolestart], [prb], string[textend:], [msg]
776
742
return self.phrase_ref(string[:matchstart], string[textend:],
777
rawsource, escaped, text)
743
rawsource, escaped, unescape(escaped))
779
745
rawsource = unescape(string[rolestart:textend], 1)
780
nodelist, messages = self.interpreted(rawsource, text, role,
746
nodelist, messages = self.interpreted(rawsource, escaped, role,
782
748
return (string[:rolestart], nodelist,
783
749
string[textend:], messages)
966
925
pepnum = int(match.group('pepnum2'))
968
927
raise MarkupMismatch
969
ref = self.pep_url % pepnum
928
ref = self.document.settings.pep_base_url + self.pep_url % pepnum
970
929
unescaped = unescape(text, 0)
971
930
return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
973
rfc_url = 'http://www.faqs.org/rfcs/rfc%d.html'
932
rfc_url = 'rfc%d.html'
975
934
def rfc_reference(self, match, lineno):
976
935
text = match.group(0)
977
936
if text.startswith('RFC'):
978
937
rfcnum = int(match.group('rfcnum'))
979
ref = self.rfc_url % rfcnum
938
ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
981
940
raise MarkupMismatch
982
941
unescaped = unescape(text, 0)
1037
1009
'lowerroman': '[ivxlcdm]+',
1038
1010
'upperroman': '[IVXLCDM]+',}
1039
1011
enum.converters = {'arabic': int,
1041
lambda s, zero=(ord('a')-1): ord(s) - zero,
1043
lambda s, zero=(ord('A')-1): ord(s) - zero,
1045
lambda s: roman.fromRoman(s.upper()),
1012
'loweralpha': _loweralpha_to_int,
1013
'upperalpha': _upperalpha_to_int,
1014
'lowerroman': _lowerroman_to_int,
1046
1015
'upperroman': roman.fromRoman}
1048
1017
enum.sequenceregexps = {}
1067
1036
pats['alphanum'] = '[a-zA-Z0-9]'
1068
1037
pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1069
1038
pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1070
'|%(upperroman)s)' % enum.sequencepats)
1039
'|%(upperroman)s|#)' % enum.sequencepats)
1071
1040
pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1072
1041
# @@@ Loosen up the pattern? Allow Unicode?
1073
pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<%(alphanum)s[^ <>]+>)' % pats
1042
pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1074
1043
pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1075
1044
pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1076
1045
pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1084
1053
'bullet': r'[-+*]( +|$)',
1085
1054
'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1086
'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)',
1055
'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1087
1056
'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1088
1057
'doctest': r'>>>( +|$)',
1058
'line_block': r'\|( +|$)',
1089
1059
'grid_table_top': grid_table_top_pat,
1090
1060
'simple_table_top': simple_table_top_pat,
1091
1061
'explicit_markup': r'\.\.( +|$)',
1185
1159
i, blank_finish = self.list_item(match.end())
1186
1160
bulletlist += i
1187
1161
offset = self.state_machine.line_offset + 1 # next line
1188
newline_offset, blank_finish = self.nested_list_parse(
1162
new_line_offset, blank_finish = self.nested_list_parse(
1189
1163
self.state_machine.input_lines[offset:],
1190
1164
input_offset=self.state_machine.abs_line_offset() + 1,
1191
1165
node=bulletlist, initial_state='BulletList',
1192
1166
blank_finish=blank_finish)
1193
self.goto_line(newline_offset)
1167
self.goto_line(new_line_offset)
1194
1168
if not blank_finish:
1195
1169
self.parent += self.unindent_warning('Bullet list')
1196
1170
return [], next_state, []
1198
1172
def list_item(self, indent):
1199
indented, line_offset, blank_finish = \
1200
self.state_machine.get_known_indented(indent)
1173
if self.state_machine.line[indent:]:
1174
indented, line_offset, blank_finish = (
1175
self.state_machine.get_known_indented(indent))
1177
indented, indent, line_offset, blank_finish = (
1178
self.state_machine.get_first_known_indented(indent))
1201
1179
listitem = nodes.list_item('\n'.join(indented))
1203
1181
self.nested_parse(indented, input_offset=line_offset,
1209
1187
format, sequence, text, ordinal = self.parse_enumerator(match)
1210
1188
if not self.is_enumerated_list_item(ordinal, sequence, format):
1211
1189
raise statemachine.TransitionCorrection('text')
1213
msg = self.reporter.info(
1214
'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1215
% (text, ordinal), line=self.state_machine.abs_line_number())
1217
1190
enumlist = nodes.enumerated_list()
1218
1191
self.parent += enumlist
1219
enumlist['enumtype'] = sequence
1221
enumlist['start'] = ordinal
1193
enumlist['enumtype'] = 'arabic'
1195
enumlist['enumtype'] = sequence
1222
1196
enumlist['prefix'] = self.enum.formatinfo[format].prefix
1223
1197
enumlist['suffix'] = self.enum.formatinfo[format].suffix
1199
enumlist['start'] = ordinal
1200
msg = self.reporter.info(
1201
'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1202
% (text, ordinal), line=self.state_machine.abs_line_number())
1224
1204
listitem, blank_finish = self.list_item(match.end())
1225
1205
enumlist += listitem
1226
1206
offset = self.state_machine.line_offset + 1 # next line
1303
1290
self.state_machine.previous_line()
1304
1291
if not next_line[:1].strip(): # blank or indented
1306
next_enumerator = self.make_enumerator(ordinal + 1, sequence, format)
1308
if next_line.startswith(next_enumerator):
1293
result = self.make_enumerator(ordinal + 1, sequence, format)
1295
next_enumerator, auto_enumerator = result
1297
if ( next_line.startswith(next_enumerator) or
1298
next_line.startswith(auto_enumerator) ):
1314
1304
def make_enumerator(self, ordinal, sequence, format):
1316
Construct and return an enumerated list item marker.
1306
Construct and return the next enumerated list item marker, and an
1307
auto-enumerator ("#" instead of the regular enumerator).
1318
1309
Return ``None`` for invalid (out of range) ordinals.
1320
if sequence == 'arabic':
1313
elif sequence == 'arabic':
1321
1314
enumerator = str(ordinal)
1323
1316
if sequence.endswith('alpha'):
1445
1441
delimiter = ' '
1446
1442
firstopt = tokens[0].split('=')
1447
1443
if len(firstopt) > 1:
1444
# "--opt=value" form
1448
1445
tokens[:1] = firstopt
1449
1446
delimiter = '='
1450
1447
elif (len(tokens[0]) > 2
1451
1448
and ((tokens[0].startswith('-')
1452
1449
and not tokens[0].startswith('--'))
1453
1450
or tokens[0].startswith('+'))):
1454
1452
tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1454
if len(tokens) > 1 and (tokens[1].startswith('<')
1455
and tokens[-1].endswith('>')):
1456
# "-o <value1 value2>" form; join all values into one token
1457
tokens[1:] = [' '.join(tokens[1:])]
1456
1458
if 0 < len(tokens) <= 2:
1457
1459
option = nodes.option(optionstring)
1458
1460
option += nodes.option_string(tokens[0], tokens[0])
1472
1474
self.parent += nodes.doctest_block(data, data)
1473
1475
return [], next_state, []
1477
def line_block(self, match, context, next_state):
1478
"""First line of a line block."""
1479
block = nodes.line_block()
1480
self.parent += block
1481
lineno = self.state_machine.abs_line_number()
1482
line, messages, blank_finish = self.line_block_line(match, lineno)
1484
self.parent += messages
1485
if not blank_finish:
1486
offset = self.state_machine.line_offset + 1 # next line
1487
new_line_offset, blank_finish = self.nested_list_parse(
1488
self.state_machine.input_lines[offset:],
1489
input_offset=self.state_machine.abs_line_offset() + 1,
1490
node=block, initial_state='LineBlock',
1492
self.goto_line(new_line_offset)
1493
if not blank_finish:
1494
self.parent += self.reporter.warning(
1495
'Line block ends without a blank line.',
1496
line=(self.state_machine.abs_line_number() + 1))
1498
if block[0].indent is None:
1500
self.nest_line_block_lines(block)
1501
return [], next_state, []
1503
def line_block_line(self, match, lineno):
1504
"""Return one line element of a line_block."""
1505
indented, indent, line_offset, blank_finish = \
1506
self.state_machine.get_first_known_indented(match.end(),
1508
text = u'\n'.join(indented)
1509
text_nodes, messages = self.inline_text(text, lineno)
1510
line = nodes.line(text, '', *text_nodes)
1511
if match.string.rstrip() != '|': # not empty
1512
line.indent = len(match.group(1)) - 1
1513
return line, messages, blank_finish
1515
def nest_line_block_lines(self, block):
1516
for index in range(1, len(block)):
1517
if block[index].indent is None:
1518
block[index].indent = block[index - 1].indent
1519
self.nest_line_block_segment(block)
1521
def nest_line_block_segment(self, block):
1522
indents = [item.indent for item in block]
1523
least = min(indents)
1525
new_block = nodes.line_block()
1527
if item.indent > least:
1528
new_block.append(item)
1531
self.nest_line_block_segment(new_block)
1532
new_items.append(new_block)
1533
new_block = nodes.line_block()
1534
new_items.append(item)
1536
self.nest_line_block_segment(new_block)
1537
new_items.append(new_block)
1538
block[:] = new_items
1475
1540
def grid_table_top(self, match, context, next_state):
1476
1541
"""Top border of a full table."""
1477
1542
return self.table_top(match, context, next_state,
1588
1656
return [], messages, not extra
1589
1657
self.state_machine.next_line(end - start)
1590
1658
block = lines[start:end+1]
1659
# for East Asian chars:
1660
block.pad_double_width(self.double_width_pad_char)
1591
1661
return block, [], end == limit or not lines[end+1].strip()
1593
1663
def malformed_table(self, block, detail=''):
1664
block.replace(self.double_width_pad_char, '')
1594
1665
data = '\n'.join(block)
1595
1666
message = 'Malformed table.'
1596
1667
lineno = self.state_machine.abs_line_number() - len(block) + 1
1603
def build_table(self, tabledata, tableline):
1604
colspecs, headrows, bodyrows = tabledata
1674
def build_table(self, tabledata, tableline, stub_columns=0):
1675
colwidths, headrows, bodyrows = tabledata
1605
1676
table = nodes.table()
1606
tgroup = nodes.tgroup(cols=len(colspecs))
1677
tgroup = nodes.tgroup(cols=len(colwidths))
1607
1678
table += tgroup
1608
for colspec in colspecs:
1609
tgroup += nodes.colspec(colwidth=colspec)
1679
for colwidth in colwidths:
1680
colspec = nodes.colspec(colwidth=colwidth)
1682
colspec.attributes['stub'] = 1
1611
1686
thead = nodes.thead()
1612
1687
tgroup += thead
1783
1859
refname = self.is_reference(reference)
1785
1861
return 'refname', refname
1786
reference = ''.join([line.strip() for line in block])
1787
if reference.find(' ') == -1:
1788
return 'refuri', unescape(reference)
1790
warning = self.reporter.warning(
1791
'Hyperlink target contains whitespace. Perhaps a footnote '
1793
nodes.literal_block(block_text, block_text), line=lineno)
1794
return 'malformed', warning
1862
reference = ''.join([''.join(line.split()) for line in block])
1863
return 'refuri', unescape(reference)
1796
1865
def is_reference(self, reference):
1797
1866
match = self.explicit.patterns.reference.match(
1851
1917
subname = subdefmatch.group('name')
1852
1918
substitution_node = nodes.substitution_definition(blocktext)
1853
1919
substitution_node.line = lineno
1854
self.document.note_substitution_def(
1855
substitution_node,subname, self.parent)
1857
block[0] = block[0].strip()
1858
new_abs_offset, blank_finish = self.nested_list_parse(
1859
block, input_offset=offset, node=substitution_node,
1860
initial_state='SubstitutionDef', blank_finish=blank_finish)
1862
for node in substitution_node[:]:
1863
if not (isinstance(node, nodes.Inline) or
1864
isinstance(node, nodes.Text)):
1865
self.parent += substitution_node[i]
1866
del substitution_node[i]
1869
if len(substitution_node) == 0:
1870
msg = self.reporter.warning(
1871
'Substitution definition "%s" empty or invalid.'
1873
nodes.literal_block(blocktext, blocktext), line=lineno)
1921
msg = self.reporter.warning(
1922
'Substitution definition "%s" missing contents.' % subname,
1923
nodes.literal_block(blocktext, blocktext), line=lineno)
1924
return [msg], blank_finish
1925
block[0] = block[0].strip()
1926
substitution_node['names'].append(
1927
nodes.whitespace_normalize_name(subname))
1928
new_abs_offset, blank_finish = self.nested_list_parse(
1929
block, input_offset=offset, node=substitution_node,
1930
initial_state='SubstitutionDef', blank_finish=blank_finish)
1932
for node in substitution_node[:]:
1933
if not (isinstance(node, nodes.Inline) or
1934
isinstance(node, nodes.Text)):
1935
self.parent += substitution_node[i]
1936
del substitution_node[i]
1939
for node in substitution_node.traverse(nodes.Element):
1940
if self.disallowed_inside_substitution_definitions(node):
1941
pformat = nodes.literal_block('', node.pformat().rstrip())
1942
msg = self.reporter.error(
1943
'Substitution definition contains illegal element:',
1944
pformat, nodes.literal_block(blocktext, blocktext),
1874
1946
return [msg], blank_finish
1876
return [substitution_node], blank_finish
1947
if len(substitution_node) == 0:
1878
1948
msg = self.reporter.warning(
1879
'Substitution definition "%s" missing contents.' % subname,
1949
'Substitution definition "%s" empty or invalid.'
1880
1951
nodes.literal_block(blocktext, blocktext), line=lineno)
1881
1952
return [msg], blank_finish
1953
self.document.note_substitution_def(
1954
substitution_node, subname, self.parent)
1955
return [substitution_node], blank_finish
1957
def disallowed_inside_substitution_definitions(self, node):
1959
isinstance(node, nodes.reference) and node.get('anonymous') or
1960
isinstance(node, nodes.footnote_reference) and node.get('auto')):
1883
1965
def directive(self, match, **option_presets):
1884
1966
"""Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1927
2009
directive_fn, option_presets))
1928
2010
except MarkupError, detail:
1929
2011
error = self.reporter.error(
1930
'Error in "%s" directive:\n%s.' % (type_name, detail),
2012
'Error in "%s" directive:\n%s.' % (type_name,
2013
' '.join(detail.args)),
1931
2014
nodes.literal_block(block_text, block_text), line=lineno)
1932
2015
return [error], blank_finish
1933
2016
result = directive_fn(type_name, arguments, options, content, lineno,
2310
2397
"""Enumerated list item."""
2311
2398
format, sequence, text, ordinal = self.parse_enumerator(
2312
2399
match, self.parent['enumtype'])
2313
if (sequence != self.parent['enumtype'] or
2314
format != self.format or
2315
ordinal != (self.lastordinal + 1) or
2316
not self.is_enumerated_list_item(ordinal, sequence, format)):
2400
if ( format != self.format
2401
or (sequence != '#' and (sequence != self.parent['enumtype']
2403
or ordinal != (self.lastordinal + 1)))
2404
or not self.is_enumerated_list_item(ordinal, sequence, format)):
2317
2405
# different enumeration: new list
2318
2406
self.invalid_input()
2319
2409
listitem, blank_finish = self.list_item(match.end())
2320
2410
self.parent += listitem
2321
2411
self.blank_finish = blank_finish
2480
class LineBlock(SpecializedBody):
2482
"""Second and subsequent lines of a line_block."""
2484
blank = SpecializedBody.invalid_input
2486
def line_block(self, match, context, next_state):
2487
"""New line of line block."""
2488
lineno = self.state_machine.abs_line_number()
2489
line, messages, blank_finish = self.line_block_line(match, lineno)
2491
self.parent.parent += messages
2492
self.blank_finish = blank_finish
2493
return [], next_state, []
2390
2496
class Explicit(SpecializedBody):
2392
2498
"""Second and subsequent explicit markup construct."""
2588
2697
for i in range(len(text_nodes)):
2589
2698
node = text_nodes[i]
2590
2699
if isinstance(node, nodes.Text):
2591
parts = node.rawsource.split(' : ', 1)
2700
parts = self.classifier_delimiter.split(node.rawsource)
2592
2701
if len(parts) == 1:
2702
node_list[-1] += node
2595
term_node += nodes.Text(parts[0].rstrip())
2596
classifier_node = nodes.classifier('', parts[1])
2597
classifier_node += text_nodes[i+1:]
2598
node_list.append(classifier_node)
2705
node_list[-1] += nodes.Text(parts[0].rstrip())
2706
for part in parts[1:]:
2707
classifier_node = nodes.classifier('', part)
2708
node_list.append(classifier_node)
2710
node_list[-1] += node
2602
2711
return node_list, messages
2676
2781
marker = context[0].strip()
2677
2782
if len(marker) < 4:
2678
2783
self.state_correction(context)
2679
transition = nodes.transition(marker)
2784
transition = nodes.transition(rawsource=marker)
2680
2785
transition.line = lineno
2681
if len(self.parent) == 0:
2682
msg = self.reporter.error(
2683
'Document or section may not begin with a transition.',
2686
elif isinstance(self.parent[-1], nodes.transition):
2687
msg = self.reporter.error(
2688
'At least one body element must separate transitions; '
2689
'adjacent transitions not allowed.',
2692
2786
self.parent += transition
2693
2787
return [], 'Body', []
2851
2945
state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
2852
OptionList, ExtensionOptions, Explicit, Text, Definition,
2853
Line, SubstitutionDef, RFC2822Body, RFC2822List)
2946
OptionList, LineBlock, ExtensionOptions, Explicit, Text,
2947
Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
2854
2948
"""Standard set of State classes used to start `RSTStateMachine`."""
2857
def escape2null(text):
2858
"""Return a string with escape-backslashes converted to nulls."""
2862
found = text.find('\\', start)
2864
parts.append(text[start:])
2865
return ''.join(parts)
2866
parts.append(text[start:found])
2867
parts.append('\x00' + text[found+1:found+2])
2868
start = found + 2 # skip character after escape
2870
def unescape(text, restore_backslashes=0):
2872
Return a string with nulls removed or restored to backslashes.
2873
Backslash-escaped spaces are also removed.
2875
if restore_backslashes:
2876
return text.replace('\x00', '\\')
2878
for sep in ['\x00 ', '\x00\n', '\x00']:
2879
text = ''.join(text.split(sep))