~ubuntu-branches/ubuntu/hardy/python-docutils/hardy

« back to all changes in this revision

Viewing changes to docutils/parsers/rst/states.py

  • Committer: Bazaar Package Importer
  • Author(s): martin f. krafft
  • Date: 2006-07-10 11:45:05 UTC
  • mfrom: (2.1.4 edgy)
  • Revision ID: james.westby@ubuntu.com-20060710114505-otkhqcslevewxmz5
Tags: 0.4-3
Added build dependency on python-central (closes: #377580).

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
# Author: David Goodger
2
2
# Contact: goodger@users.sourceforge.net
3
 
# Revision: $Revision: 1.72 $
4
 
# Date: $Date: 2004/04/27 19:47:28 $
 
3
# Revision: $Revision: 4258 $
 
4
# Date: $Date: 2006-01-09 04:29:23 +0100 (Mon, 09 Jan 2006) $
5
5
# Copyright: This module has been placed in the public domain.
6
6
 
7
7
"""
114
114
from docutils.statemachine import StateMachineWS, StateWS
115
115
from docutils.nodes import fully_normalize_name as normalize_name
116
116
from docutils.nodes import whitespace_normalize_name
 
117
from docutils.utils import escape2null, unescape, column_width
117
118
from docutils.parsers.rst import directives, languages, tableparser, roles
118
119
from docutils.parsers.rst.languages import en as _fallback_language_module
119
120
 
144
145
    def run(self, input_lines, document, input_offset=0, match_titles=1,
145
146
            inliner=None):
146
147
        """
147
 
        Parse `input_lines` and return a `docutils.nodes.document` instance.
 
148
        Parse `input_lines` and modify the `document` node in place.
148
149
 
149
 
        Extend `StateMachineWS.run()`: set up parse-global data, run the
150
 
        StateMachine, and return the resulting
151
 
        document.
 
150
        Extend `StateMachineWS.run()`: set up parse-global data and
 
151
        run the StateMachine.
152
152
        """
153
153
        self.language = languages.get_language(
154
154
            document.settings.language_code)
170
170
        results = StateMachineWS.run(self, input_lines, input_offset,
171
171
                                     input_source=document['source'])
172
172
        assert results == [], 'RSTStateMachine.run() results should be empty!'
173
 
        self.check_document()
174
173
        self.node = self.memo = None    # remove unneeded references
175
174
 
176
 
    def check_document(self):
177
 
        """Check for illegal structure: empty document."""
178
 
        if len(self.document) == 0:
179
 
            error = self.reporter.error(
180
 
                'Document empty; must have contents.', line=0)
181
 
            self.document += error
182
 
 
183
175
 
184
176
class NestedStateMachine(StateMachineWS):
185
177
 
199
191
        self.document = memo.document
200
192
        self.attach_observer(self.document.note_source)
201
193
        self.reporter = memo.reporter
 
194
        self.language = memo.language
202
195
        self.node = node
203
196
        results = StateMachineWS.run(self, input_lines, input_offset)
204
197
        assert results == [], ('NestedStateMachine.run() results should be '
289
282
                          state_machine_kwargs=None):
290
283
        """
291
284
        Create a new StateMachine rooted at `node` and run it over the input
292
 
        `block`. Also keep track of optional intermdediate blank lines and the
 
285
        `block`. Also keep track of optional intermediate blank lines and the
293
286
        required final one.
294
287
        """
295
288
        if state_machine_class is None:
372
365
        textnodes, title_messages = self.inline_text(title, lineno)
373
366
        titlenode = nodes.title(title, '', *textnodes)
374
367
        name = normalize_name(titlenode.astext())
375
 
        section_node['name'] = name
 
368
        section_node['names'].append(name)
376
369
        section_node += titlenode
377
370
        section_node += messages
378
371
        section_node += title_messages
383
376
              self.state_machine.input_lines[offset:], input_offset=absoffset,
384
377
              node=section_node, match_titles=1)
385
378
        self.goto_line(newabsoffset)
386
 
        self.check_section(section_node)
387
379
        if memo.section_level <= mylevel: # can't handle next section?
388
380
            raise EOFError              # bubble up to supersection
389
381
        # reset section_level; next pass will detect it properly
390
382
        memo.section_level = mylevel
391
383
 
392
 
    def check_section(self, section):
393
 
        """
394
 
        Check for illegal structure: empty section, misplaced transitions.
395
 
        """
396
 
        lineno = section.line
397
 
        if len(section) <= 1:
398
 
            error = self.reporter.error(
399
 
                'Section empty; must have contents.', line=lineno)
400
 
            section += error
401
 
            return
402
 
        if not isinstance(section[0], nodes.title): # shouldn't ever happen
403
 
            error = self.reporter.error(
404
 
                'First element of section must be a title.', line=lineno)
405
 
            section.insert(0, error)
406
 
        if isinstance(section[1], nodes.transition):
407
 
            error = self.reporter.error(
408
 
                'Section may not begin with a transition.',
409
 
                line=section[1].line)
410
 
            section.insert(1, error)
411
 
        if len(section) > 2 and isinstance(section[-1], nodes.transition):
412
 
            error = self.reporter.error(
413
 
                'Section may not end with a transition.',
414
 
                line=section[-1].line)
415
 
            section += error
416
 
 
417
384
    def paragraph(self, lines, lineno):
418
385
        """
419
386
        Return a list (paragraph & messages) & a boolean: literal_block next?
420
387
        """
421
388
        data = '\n'.join(lines).rstrip()
422
 
        if data[-2:] == '::':
 
389
        if re.search(r'(?<!\\)(\\\\)*::$', data):
423
390
            if len(data) == 2:
424
391
                return [], 1
425
392
            elif data[-3] in ' \n':
476
443
    Parse inline markup; call the `parse()` method.
477
444
    """
478
445
 
479
 
    def __init__(self, roles=None):
480
 
        """
481
 
        `roles` is a mapping of canonical role name to role function or bound
482
 
        method, which enables additional interpreted text roles.
483
 
        """
484
 
 
 
446
    def __init__(self):
485
447
        self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
486
448
        """List of (pattern, bound method) tuples, used by
487
449
        `self.implicit_inline`."""
556
518
    # Valid URI characters (see RFC 2396 & RFC 2732);
557
519
    # final \x00 allows backslash escapes in URIs:
558
520
    uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
 
521
    # Delimiter indicating the end of a URI (not part of the URI):
 
522
    uri_end_delim = r"""[>]"""
559
523
    # Last URI character; same as uric but no punctuation:
560
 
    urilast = r"""[_~/a-zA-Z0-9]"""
 
524
    urilast = r"""[_~*/=+a-zA-Z0-9]"""
 
525
    # End of a URI (either 'urilast' or 'uric followed by a
 
526
    # uri_end_delim'):
 
527
    uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
561
528
    emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
562
529
    email_pattern = r"""
563
530
          %(emailc)s+(?:\.%(emailc)s+)*   # name
564
 
          @                               # at
 
531
          (?<!\x00)@                      # at
565
532
          %(emailc)s+(?:\.%(emailc)s*)*   # host
566
 
          %(urilast)s                     # final URI char
 
533
          %(uri_end)s                     # final URI char
567
534
          """
568
535
    parts = ('initial_inline', start_string_prefix, '',
569
536
             [('start', '', non_whitespace_after,  # simple start-strings
642
609
                      (                       # either:
643
610
                        (//?)?                  # hierarchical URI
644
611
                        %(uric)s*               # URI characters
645
 
                        %(urilast)s             # final URI char
 
612
                        %(uri_end)s             # final URI char
646
613
                      )
647
614
                      (                       # optional query
648
615
                        \?%(uric)s*
649
 
                        %(urilast)s
 
616
                        %(uri_end)s
650
617
                      )?
651
618
                      (                       # optional fragment
652
619
                        \#%(uric)s*
653
 
                        %(urilast)s
 
620
                        %(uri_end)s
654
621
                      )?
655
622
                    )
656
623
                  )
763
730
                role = endmatch.group('suffix')[1:-1]
764
731
                position = 'suffix'
765
732
            escaped = endmatch.string[:endmatch.start(1)]
766
 
            text = unescape(escaped, 0)
767
733
            rawsource = unescape(string[matchstart:textend], 1)
768
734
            if rawsource[-1:] == '_':
769
735
                if role:
774
740
                    prb = self.problematic(text, text, msg)
775
741
                    return string[:rolestart], [prb], string[textend:], [msg]
776
742
                return self.phrase_ref(string[:matchstart], string[textend:],
777
 
                                       rawsource, escaped, text)
 
743
                                       rawsource, escaped, unescape(escaped))
778
744
            else:
779
745
                rawsource = unescape(string[rolestart:textend], 1)
780
 
                nodelist, messages = self.interpreted(rawsource, text, role,
 
746
                nodelist, messages = self.interpreted(rawsource, escaped, role,
781
747
                                                      lineno)
782
748
                return (string[:rolestart], nodelist,
783
749
                        string[textend:], messages)
812
778
                reference['refuri'] = uri
813
779
            else:
814
780
                reference['anonymous'] = 1
815
 
                self.document.note_anonymous_ref(reference)
816
781
        else:
817
782
            if target:
818
783
                reference['refuri'] = uri
819
 
                target['name'] = refname
820
 
                self.document.note_external_target(target)
 
784
                target['names'].append(refname)
821
785
                self.document.note_explicit_target(target, self.parent)
822
786
                node_list.append(target)
823
787
            else:
858
822
            assert len(inlines) == 1
859
823
            target = inlines[0]
860
824
            name = normalize_name(target.astext())
861
 
            target['name'] = name
 
825
            target['names'].append(name)
862
826
            self.document.note_explicit_target(target, self.parent)
863
827
        return before, inlines, remaining, sysmessages
864
828
 
876
840
                        '|%s%s' % (subref_text, endstring), '')
877
841
                    if endstring[-2:] == '__':
878
842
                        reference_node['anonymous'] = 1
879
 
                        self.document.note_anonymous_ref(
880
 
                              reference_node)
881
843
                    else:
882
844
                        reference_node['refname'] = normalize_name(subref_text)
883
845
                        self.document.note_refname(reference_node)
916
878
            if refname:
917
879
                refnode['refname'] = refname
918
880
                self.document.note_footnote_ref(refnode)
919
 
            if self.document.settings.trim_footnote_reference_space:
 
881
            if utils.get_trim_footnote_ref_space(self.document.settings):
920
882
                before = before.rstrip()
921
883
        return (before, [refnode], remaining, [])
922
884
 
928
890
            name=whitespace_normalize_name(referencename))
929
891
        if anonymous:
930
892
            referencenode['anonymous'] = 1
931
 
            self.document.note_anonymous_ref(referencenode)
932
893
        else:
933
894
            referencenode['refname'] = refname
934
895
            self.document.note_refname(referencenode)
954
915
        else:                   # not a valid scheme
955
916
            raise MarkupMismatch
956
917
 
957
 
    pep_url_local = 'pep-%04d.html'
958
 
    pep_url_absolute = 'http://www.python.org/peps/pep-%04d.html'
959
 
    pep_url = pep_url_absolute
 
918
    pep_url = 'pep-%04d.html'
960
919
 
961
920
    def pep_reference(self, match, lineno):
962
921
        text = match.group(0)
966
925
            pepnum = int(match.group('pepnum2'))
967
926
        else:
968
927
            raise MarkupMismatch
969
 
        ref = self.pep_url % pepnum
 
928
        ref = self.document.settings.pep_base_url + self.pep_url % pepnum
970
929
        unescaped = unescape(text, 0)
971
930
        return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
972
931
 
973
 
    rfc_url = 'http://www.faqs.org/rfcs/rfc%d.html'
 
932
    rfc_url = 'rfc%d.html'
974
933
 
975
934
    def rfc_reference(self, match, lineno):
976
935
        text = match.group(0)
977
936
        if text.startswith('RFC'):
978
937
            rfcnum = int(match.group('rfcnum'))
979
 
            ref = self.rfc_url % rfcnum
 
938
            ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
980
939
        else:
981
940
            raise MarkupMismatch
982
941
        unescaped = unescape(text, 0)
1015
974
                '__': anonymous_reference}
1016
975
 
1017
976
 
 
977
def _loweralpha_to_int(s, _zero=(ord('a')-1)):
 
978
    return ord(s) - _zero
 
979
 
 
980
def _upperalpha_to_int(s, _zero=(ord('A')-1)):
 
981
    return ord(s) - _zero
 
982
 
 
983
def _lowerroman_to_int(s):
 
984
    return roman.fromRoman(s.upper())
 
985
 
 
986
 
1018
987
class Body(RSTState):
1019
988
 
1020
989
    """
1021
990
    Generic classifier of the first line of a block.
1022
991
    """
1023
992
 
 
993
    double_width_pad_char = tableparser.TableParser.double_width_pad_char
 
994
    """Padding character for East Asian double-width text."""
 
995
 
1024
996
    enum = Struct()
1025
997
    """Enumerated list parsing information."""
1026
998
 
1037
1009
                         'lowerroman': '[ivxlcdm]+',
1038
1010
                         'upperroman': '[IVXLCDM]+',}
1039
1011
    enum.converters = {'arabic': int,
1040
 
                       'loweralpha':
1041
 
                       lambda s, zero=(ord('a')-1): ord(s) - zero,
1042
 
                       'upperalpha':
1043
 
                       lambda s, zero=(ord('A')-1): ord(s) - zero,
1044
 
                       'lowerroman':
1045
 
                       lambda s: roman.fromRoman(s.upper()),
 
1012
                       'loweralpha': _loweralpha_to_int,
 
1013
                       'upperalpha': _upperalpha_to_int,
 
1014
                       'lowerroman': _lowerroman_to_int,
1046
1015
                       'upperroman': roman.fromRoman}
1047
1016
 
1048
1017
    enum.sequenceregexps = {}
1067
1036
    pats['alphanum'] = '[a-zA-Z0-9]'
1068
1037
    pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1069
1038
    pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1070
 
                    '|%(upperroman)s)' % enum.sequencepats)
 
1039
                    '|%(upperroman)s|#)' % enum.sequencepats)
1071
1040
    pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1072
1041
    # @@@ Loosen up the pattern?  Allow Unicode?
1073
 
    pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<%(alphanum)s[^ <>]+>)' % pats
 
1042
    pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1074
1043
    pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1075
1044
    pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1076
1045
    pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1083
1052
    patterns = {
1084
1053
          'bullet': r'[-+*]( +|$)',
1085
1054
          'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1086
 
          'field_marker': r':[^: ]([^:]*[^: ])?:( +|$)',
 
1055
          'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1087
1056
          'option_marker': r'%(option)s(, %(option)s)*(  +| ?$)' % pats,
1088
1057
          'doctest': r'>>>( +|$)',
 
1058
          'line_block': r'\|( +|$)',
1089
1059
          'grid_table_top': grid_table_top_pat,
1090
1060
          'simple_table_top': simple_table_top_pat,
1091
1061
          'explicit_markup': r'\.\.( +|$)',
1098
1068
          'field_marker',
1099
1069
          'option_marker',
1100
1070
          'doctest',
 
1071
          'line_block',
1101
1072
          'grid_table_top',
1102
1073
          'simple_table_top',
1103
1074
          'explicit_markup',
1141
1112
        Return a 3-tuple: (block quote lines, attribution lines,
1142
1113
        attribution offset).
1143
1114
        """
 
1115
        #import pdb ; pdb.set_trace()
1144
1116
        blank = None
1145
1117
        nonblank_seen = None
1146
1118
        indent = 0
1157
1129
            indent = (len(indented[blank + 2])
1158
1130
                      - len(indented[blank + 2].lstrip()))
1159
1131
            for j in range(blank + 3, len(indented)):
1160
 
                if indent != (len(indented[j])
1161
 
                              - len(indented[j].lstrip())): # bad shape
 
1132
                if ( indented[j]        # may be blank last line
 
1133
                     and indent != (len(indented[j])
 
1134
                                    - len(indented[j].lstrip()))):
 
1135
                    # bad shape
1162
1136
                    blank = None
1163
1137
                    break
1164
1138
        if blank:
1185
1159
        i, blank_finish = self.list_item(match.end())
1186
1160
        bulletlist += i
1187
1161
        offset = self.state_machine.line_offset + 1   # next line
1188
 
        newline_offset, blank_finish = self.nested_list_parse(
 
1162
        new_line_offset, blank_finish = self.nested_list_parse(
1189
1163
              self.state_machine.input_lines[offset:],
1190
1164
              input_offset=self.state_machine.abs_line_offset() + 1,
1191
1165
              node=bulletlist, initial_state='BulletList',
1192
1166
              blank_finish=blank_finish)
1193
 
        self.goto_line(newline_offset)
 
1167
        self.goto_line(new_line_offset)
1194
1168
        if not blank_finish:
1195
1169
            self.parent += self.unindent_warning('Bullet list')
1196
1170
        return [], next_state, []
1197
1171
 
1198
1172
    def list_item(self, indent):
1199
 
        indented, line_offset, blank_finish = \
1200
 
              self.state_machine.get_known_indented(indent)
 
1173
        if self.state_machine.line[indent:]:
 
1174
            indented, line_offset, blank_finish = (
 
1175
                self.state_machine.get_known_indented(indent))
 
1176
        else:
 
1177
            indented, indent, line_offset, blank_finish = (
 
1178
                self.state_machine.get_first_known_indented(indent))
1201
1179
        listitem = nodes.list_item('\n'.join(indented))
1202
1180
        if indented:
1203
1181
            self.nested_parse(indented, input_offset=line_offset,
1209
1187
        format, sequence, text, ordinal = self.parse_enumerator(match)
1210
1188
        if not self.is_enumerated_list_item(ordinal, sequence, format):
1211
1189
            raise statemachine.TransitionCorrection('text')
1212
 
        if ordinal != 1:
1213
 
            msg = self.reporter.info(
1214
 
                'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1215
 
                % (text, ordinal), line=self.state_machine.abs_line_number())
1216
 
            self.parent += msg
1217
1190
        enumlist = nodes.enumerated_list()
1218
1191
        self.parent += enumlist
1219
 
        enumlist['enumtype'] = sequence
1220
 
        if ordinal != 1:
1221
 
            enumlist['start'] = ordinal
 
1192
        if sequence == '#':
 
1193
            enumlist['enumtype'] = 'arabic'
 
1194
        else:
 
1195
            enumlist['enumtype'] = sequence
1222
1196
        enumlist['prefix'] = self.enum.formatinfo[format].prefix
1223
1197
        enumlist['suffix'] = self.enum.formatinfo[format].suffix
 
1198
        if ordinal != 1:
 
1199
            enumlist['start'] = ordinal
 
1200
            msg = self.reporter.info(
 
1201
                'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
 
1202
                % (text, ordinal), line=self.state_machine.abs_line_number())
 
1203
            self.parent += msg
1224
1204
        listitem, blank_finish = self.list_item(match.end())
1225
1205
        enumlist += listitem
1226
1206
        offset = self.state_machine.line_offset + 1   # next line
1229
1209
              input_offset=self.state_machine.abs_line_offset() + 1,
1230
1210
              node=enumlist, initial_state='EnumeratedList',
1231
1211
              blank_finish=blank_finish,
1232
 
              extra_settings={'lastordinal': ordinal, 'format': format})
 
1212
              extra_settings={'lastordinal': ordinal,
 
1213
                              'format': format,
 
1214
                              'auto': sequence == '#'})
1233
1215
        self.goto_line(newline_offset)
1234
1216
        if not blank_finish:
1235
1217
            self.parent += self.unindent_warning('Enumerated list')
1262
1244
            raise ParserError('enumerator format not matched')
1263
1245
        text = groupdict[format][self.enum.formatinfo[format].start
1264
1246
                                 :self.enum.formatinfo[format].end]
1265
 
        if expected_sequence:
 
1247
        if text == '#':
 
1248
            sequence = '#'
 
1249
        elif expected_sequence:
1266
1250
            try:
1267
1251
                if self.enum.sequenceregexps[expected_sequence].match(text):
1268
1252
                    sequence = expected_sequence
1279
1263
                    break
1280
1264
            else:                       # shouldn't happen
1281
1265
                raise ParserError('enumerator sequence not matched')
1282
 
        try:
1283
 
            ordinal = self.enum.converters[sequence](text)
1284
 
        except roman.InvalidRomanNumeralError:
1285
 
            ordinal = None
 
1266
        if sequence == '#':
 
1267
            ordinal = 1
 
1268
        else:
 
1269
            try:
 
1270
                ordinal = self.enum.converters[sequence](text)
 
1271
            except roman.InvalidRomanNumeralError:
 
1272
                ordinal = None
1286
1273
        return format, sequence, text, ordinal
1287
1274
 
1288
1275
    def is_enumerated_list_item(self, ordinal, sequence, format):
1290
1277
        Check validity based on the ordinal value and the second line.
1291
1278
 
1292
1279
        Return true iff the ordinal is valid and the second line is blank,
1293
 
        indented, or starts with the next enumerator.
 
1280
        indented, or starts with the next enumerator or an auto-enumerator.
1294
1281
        """
1295
1282
        if ordinal is None:
1296
1283
            return None
1303
1290
            self.state_machine.previous_line()
1304
1291
        if not next_line[:1].strip():   # blank or indented
1305
1292
            return 1
1306
 
        next_enumerator = self.make_enumerator(ordinal + 1, sequence, format)
1307
 
        try:
1308
 
            if next_line.startswith(next_enumerator):
1309
 
                return 1
1310
 
        except TypeError:
1311
 
            pass
 
1293
        result = self.make_enumerator(ordinal + 1, sequence, format)
 
1294
        if result:
 
1295
            next_enumerator, auto_enumerator = result
 
1296
            try:
 
1297
                if ( next_line.startswith(next_enumerator) or
 
1298
                     next_line.startswith(auto_enumerator) ):
 
1299
                    return 1
 
1300
            except TypeError:
 
1301
                pass
1312
1302
        return None
1313
1303
 
1314
1304
    def make_enumerator(self, ordinal, sequence, format):
1315
1305
        """
1316
 
        Construct and return an enumerated list item marker.
 
1306
        Construct and return the next enumerated list item marker, and an
 
1307
        auto-enumerator ("#" instead of the regular enumerator).
1317
1308
 
1318
1309
        Return ``None`` for invalid (out of range) ordinals.
1319
 
        """
1320
 
        if sequence == 'arabic':
 
1310
        """ #"
 
1311
        if sequence == '#':
 
1312
            enumerator = '#'
 
1313
        elif sequence == 'arabic':
1321
1314
            enumerator = str(ordinal)
1322
1315
        else:
1323
1316
            if sequence.endswith('alpha'):
1340
1333
                raise ParserError('unknown enumerator sequence: "%s"'
1341
1334
                                  % sequence)
1342
1335
        formatinfo = self.enum.formatinfo[format]
1343
 
        return formatinfo.prefix + enumerator + formatinfo.suffix + ' '
 
1336
        next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
 
1337
                           + ' ')
 
1338
        auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
 
1339
        return next_enumerator, auto_enumerator
1344
1340
 
1345
1341
    def field_marker(self, match, context, next_state):
1346
1342
        """Field list item."""
1376
1372
 
1377
1373
    def parse_field_marker(self, match):
1378
1374
        """Extract & return field name from a field marker match."""
1379
 
        field = match.string[1:]        # strip off leading ':'
1380
 
        field = field[:field.find(':')] # strip off trailing ':' etc.
 
1375
        field = match.group()[1:]        # strip off leading ':'
 
1376
        field = field[:field.rfind(':')] # strip off trailing ':' etc.
1381
1377
        return field
1382
1378
 
1383
1379
    def parse_field_body(self, indented, offset, node):
1445
1441
            delimiter = ' '
1446
1442
            firstopt = tokens[0].split('=')
1447
1443
            if len(firstopt) > 1:
 
1444
                # "--opt=value" form
1448
1445
                tokens[:1] = firstopt
1449
1446
                delimiter = '='
1450
1447
            elif (len(tokens[0]) > 2
1451
1448
                  and ((tokens[0].startswith('-')
1452
1449
                        and not tokens[0].startswith('--'))
1453
1450
                       or tokens[0].startswith('+'))):
 
1451
                # "-ovalue" form
1454
1452
                tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1455
1453
                delimiter = ''
 
1454
            if len(tokens) > 1 and (tokens[1].startswith('<')
 
1455
                                    and tokens[-1].endswith('>')):
 
1456
                # "-o <value1 value2>" form; join all values into one token
 
1457
                tokens[1:] = [' '.join(tokens[1:])]
1456
1458
            if 0 < len(tokens) <= 2:
1457
1459
                option = nodes.option(optionstring)
1458
1460
                option += nodes.option_string(tokens[0], tokens[0])
1462
1464
                optlist.append(option)
1463
1465
            else:
1464
1466
                raise MarkupError(
1465
 
                    'wrong numer of option tokens (=%s), should be 1 or 2: '
 
1467
                    'wrong number of option tokens (=%s), should be 1 or 2: '
1466
1468
                    '"%s"' % (len(tokens), optionstring),
1467
1469
                    self.state_machine.abs_line_number() + 1)
1468
1470
        return optlist
1472
1474
        self.parent += nodes.doctest_block(data, data)
1473
1475
        return [], next_state, []
1474
1476
 
 
1477
    def line_block(self, match, context, next_state):
 
1478
        """First line of a line block."""
 
1479
        block = nodes.line_block()
 
1480
        self.parent += block
 
1481
        lineno = self.state_machine.abs_line_number()
 
1482
        line, messages, blank_finish = self.line_block_line(match, lineno)
 
1483
        block += line
 
1484
        self.parent += messages
 
1485
        if not blank_finish:
 
1486
            offset = self.state_machine.line_offset + 1   # next line
 
1487
            new_line_offset, blank_finish = self.nested_list_parse(
 
1488
                  self.state_machine.input_lines[offset:],
 
1489
                  input_offset=self.state_machine.abs_line_offset() + 1,
 
1490
                  node=block, initial_state='LineBlock',
 
1491
                  blank_finish=0)
 
1492
            self.goto_line(new_line_offset)
 
1493
        if not blank_finish:
 
1494
            self.parent += self.reporter.warning(
 
1495
                'Line block ends without a blank line.',
 
1496
                line=(self.state_machine.abs_line_number() + 1))
 
1497
        if len(block):
 
1498
            if block[0].indent is None:
 
1499
                block[0].indent = 0
 
1500
            self.nest_line_block_lines(block)
 
1501
        return [], next_state, []
 
1502
 
 
1503
    def line_block_line(self, match, lineno):
 
1504
        """Return one line element of a line_block."""
 
1505
        indented, indent, line_offset, blank_finish = \
 
1506
              self.state_machine.get_first_known_indented(match.end(),
 
1507
                                                          until_blank=1)
 
1508
        text = u'\n'.join(indented)
 
1509
        text_nodes, messages = self.inline_text(text, lineno)
 
1510
        line = nodes.line(text, '', *text_nodes)
 
1511
        if match.string.rstrip() != '|': # not empty
 
1512
            line.indent = len(match.group(1)) - 1
 
1513
        return line, messages, blank_finish
 
1514
 
 
1515
    def nest_line_block_lines(self, block):
 
1516
        for index in range(1, len(block)):
 
1517
            if block[index].indent is None:
 
1518
                block[index].indent = block[index - 1].indent
 
1519
        self.nest_line_block_segment(block)
 
1520
 
 
1521
    def nest_line_block_segment(self, block):
 
1522
        indents = [item.indent for item in block]
 
1523
        least = min(indents)
 
1524
        new_items = []
 
1525
        new_block = nodes.line_block()
 
1526
        for item in block:
 
1527
            if item.indent > least:
 
1528
                new_block.append(item)
 
1529
            else:
 
1530
                if len(new_block):
 
1531
                    self.nest_line_block_segment(new_block)
 
1532
                    new_items.append(new_block)
 
1533
                    new_block = nodes.line_block()
 
1534
                new_items.append(item)
 
1535
        if len(new_block):
 
1536
            self.nest_line_block_segment(new_block)
 
1537
            new_items.append(new_block)
 
1538
        block[:] = new_items
 
1539
 
1475
1540
    def grid_table_top(self, match, context, next_state):
1476
1541
        """Top border of a full table."""
1477
1542
        return self.table_top(match, context, next_state,
1508
1573
                table = self.build_table(tabledata, tableline)
1509
1574
                nodelist = [table] + messages
1510
1575
            except tableparser.TableMarkupError, detail:
1511
 
                nodelist = self.malformed_table(block, str(detail)) + messages
 
1576
                nodelist = self.malformed_table(
 
1577
                    block, ' '.join(detail.args)) + messages
1512
1578
        else:
1513
1579
            nodelist = messages
1514
1580
        return nodelist, blank_finish
1524
1590
                                                source=source, line=lineno))
1525
1591
            blank_finish = 0
1526
1592
        block.disconnect()
 
1593
        # for East Asian chars:
 
1594
        block.pad_double_width(self.double_width_pad_char)
1527
1595
        width = len(block[0].strip())
1528
1596
        for i in range(len(block)):
1529
1597
            block[i] = block[i].strip()
1588
1656
            return [], messages, not extra
1589
1657
        self.state_machine.next_line(end - start)
1590
1658
        block = lines[start:end+1]
 
1659
        # for East Asian chars:
 
1660
        block.pad_double_width(self.double_width_pad_char)
1591
1661
        return block, [], end == limit or not lines[end+1].strip()
1592
1662
 
1593
1663
    def malformed_table(self, block, detail=''):
 
1664
        block.replace(self.double_width_pad_char, '')
1594
1665
        data = '\n'.join(block)
1595
1666
        message = 'Malformed table.'
1596
1667
        lineno = self.state_machine.abs_line_number() - len(block) + 1
1600
1671
                                    line=lineno)
1601
1672
        return [error]
1602
1673
 
1603
 
    def build_table(self, tabledata, tableline):
1604
 
        colspecs, headrows, bodyrows = tabledata
 
1674
    def build_table(self, tabledata, tableline, stub_columns=0):
 
1675
        colwidths, headrows, bodyrows = tabledata
1605
1676
        table = nodes.table()
1606
 
        tgroup = nodes.tgroup(cols=len(colspecs))
 
1677
        tgroup = nodes.tgroup(cols=len(colwidths))
1607
1678
        table += tgroup
1608
 
        for colspec in colspecs:
1609
 
            tgroup += nodes.colspec(colwidth=colspec)
 
1679
        for colwidth in colwidths:
 
1680
            colspec = nodes.colspec(colwidth=colwidth)
 
1681
            if stub_columns:
 
1682
                colspec.attributes['stub'] = 1
 
1683
                stub_columns -= 1
 
1684
            tgroup += colspec
1610
1685
        if headrows:
1611
1686
            thead = nodes.thead()
1612
1687
            tgroup += thead
1654
1729
                              %(non_whitespace_escape_before)s
1655
1730
                              (?P=quote)      # close quote if open quote used
1656
1731
                            )
 
1732
                            (?<!(?<!\x00):) # no unescaped colon at end
1657
1733
                            %(non_whitespace_escape_before)s
1658
1734
                            [ ]?            # optional space
1659
1735
                            :               # end of reference name
1694
1770
            name = name[1:]             # autonumber label
1695
1771
            footnote['auto'] = 1
1696
1772
            if name:
1697
 
                footnote['name'] = name
 
1773
                footnote['names'].append(name)
1698
1774
            self.document.note_autofootnote(footnote)
1699
1775
        elif name == '*':               # auto-symbol
1700
1776
            name = ''
1702
1778
            self.document.note_symbol_footnote(footnote)
1703
1779
        else:                           # manually numbered
1704
1780
            footnote += nodes.label('', label)
1705
 
            footnote['name'] = name
 
1781
            footnote['names'].append(name)
1706
1782
            self.document.note_footnote(footnote)
1707
1783
        if name:
1708
1784
            self.document.note_explicit_target(footnote, footnote)
1721
1797
        citation = nodes.citation('\n'.join(indented))
1722
1798
        citation.line = lineno
1723
1799
        citation += nodes.label('', label)
1724
 
        citation['name'] = name
 
1800
        citation['names'].append(name)
1725
1801
        self.document.note_citation(citation)
1726
1802
        self.document.note_explicit_target(citation, citation)
1727
1803
        if indented:
1783
1859
            refname = self.is_reference(reference)
1784
1860
            if refname:
1785
1861
                return 'refname', refname
1786
 
        reference = ''.join([line.strip() for line in block])
1787
 
        if reference.find(' ') == -1:
1788
 
            return 'refuri', unescape(reference)
1789
 
        else:
1790
 
            warning = self.reporter.warning(
1791
 
                  'Hyperlink target contains whitespace. Perhaps a footnote '
1792
 
                  'was intended?',
1793
 
                  nodes.literal_block(block_text, block_text), line=lineno)
1794
 
            return 'malformed', warning
 
1862
        reference = ''.join([''.join(line.split()) for line in block])
 
1863
        return 'refuri', unescape(reference)
1795
1864
 
1796
1865
    def is_reference(self, reference):
1797
1866
        match = self.explicit.patterns.reference.match(
1804
1873
        target.line = lineno
1805
1874
        if targetname:
1806
1875
            name = normalize_name(unescape(targetname))
1807
 
            target['name'] = name
 
1876
            target['names'].append(name)
1808
1877
            if refuri:
1809
1878
                uri = self.inliner.adjust_uri(refuri)
1810
1879
                if uri:
1811
1880
                    target['refuri'] = uri
1812
 
                    self.document.note_external_target(target)
1813
1881
                else:
1814
1882
                    raise ApplicationError('problem with URI: %r' % refuri)
1815
 
            else:
1816
 
                self.document.note_internal_target(target)
1817
1883
            self.document.note_explicit_target(target, self.parent)
1818
1884
        else:                       # anonymous target
1819
1885
            if refuri:
1851
1917
        subname = subdefmatch.group('name')
1852
1918
        substitution_node = nodes.substitution_definition(blocktext)
1853
1919
        substitution_node.line = lineno
1854
 
        self.document.note_substitution_def(
1855
 
            substitution_node,subname, self.parent)
1856
 
        if block:
1857
 
            block[0] = block[0].strip()
1858
 
            new_abs_offset, blank_finish = self.nested_list_parse(
1859
 
                  block, input_offset=offset, node=substitution_node,
1860
 
                  initial_state='SubstitutionDef', blank_finish=blank_finish)
1861
 
            i = 0
1862
 
            for node in substitution_node[:]:
1863
 
                if not (isinstance(node, nodes.Inline) or
1864
 
                        isinstance(node, nodes.Text)):
1865
 
                    self.parent += substitution_node[i]
1866
 
                    del substitution_node[i]
1867
 
                else:
1868
 
                    i += 1
1869
 
            if len(substitution_node) == 0:
1870
 
                msg = self.reporter.warning(
1871
 
                      'Substitution definition "%s" empty or invalid.'
1872
 
                      % subname,
1873
 
                      nodes.literal_block(blocktext, blocktext), line=lineno)
 
1920
        if not block:
 
1921
            msg = self.reporter.warning(
 
1922
                'Substitution definition "%s" missing contents.' % subname,
 
1923
                nodes.literal_block(blocktext, blocktext), line=lineno)
 
1924
            return [msg], blank_finish
 
1925
        block[0] = block[0].strip()
 
1926
        substitution_node['names'].append(
 
1927
            nodes.whitespace_normalize_name(subname))
 
1928
        new_abs_offset, blank_finish = self.nested_list_parse(
 
1929
              block, input_offset=offset, node=substitution_node,
 
1930
              initial_state='SubstitutionDef', blank_finish=blank_finish)
 
1931
        i = 0
 
1932
        for node in substitution_node[:]:
 
1933
            if not (isinstance(node, nodes.Inline) or
 
1934
                    isinstance(node, nodes.Text)):
 
1935
                self.parent += substitution_node[i]
 
1936
                del substitution_node[i]
 
1937
            else:
 
1938
                i += 1
 
1939
        for node in substitution_node.traverse(nodes.Element):
 
1940
            if self.disallowed_inside_substitution_definitions(node):
 
1941
                pformat = nodes.literal_block('', node.pformat().rstrip())
 
1942
                msg = self.reporter.error(
 
1943
                    'Substitution definition contains illegal element:',
 
1944
                    pformat, nodes.literal_block(blocktext, blocktext),
 
1945
                    line=lineno)
1874
1946
                return [msg], blank_finish
1875
 
            else:
1876
 
                return [substitution_node], blank_finish
1877
 
        else:
 
1947
        if len(substitution_node) == 0:
1878
1948
            msg = self.reporter.warning(
1879
 
                  'Substitution definition "%s" missing contents.' % subname,
 
1949
                  'Substitution definition "%s" empty or invalid.'
 
1950
                  % subname,
1880
1951
                  nodes.literal_block(blocktext, blocktext), line=lineno)
1881
1952
            return [msg], blank_finish
 
1953
        self.document.note_substitution_def(
 
1954
            substitution_node, subname, self.parent)
 
1955
        return [substitution_node], blank_finish
 
1956
 
 
1957
    def disallowed_inside_substitution_definitions(self, node):
 
1958
        if (node['ids'] or
 
1959
            isinstance(node, nodes.reference) and node.get('anonymous') or
 
1960
            isinstance(node, nodes.footnote_reference) and node.get('auto')):
 
1961
            return 1
 
1962
        else:
 
1963
            return 0
1882
1964
 
1883
1965
    def directive(self, match, **option_presets):
1884
1966
        """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1927
2009
                                           directive_fn, option_presets))
1928
2010
        except MarkupError, detail:
1929
2011
            error = self.reporter.error(
1930
 
                'Error in "%s" directive:\n%s.' % (type_name, detail),
 
2012
                'Error in "%s" directive:\n%s.' % (type_name,
 
2013
                                                   ' '.join(detail.args)),
1931
2014
                nodes.literal_block(block_text, block_text), line=lineno)
1932
2015
            return [error], blank_finish
1933
2016
        result = directive_fn(type_name, arguments, options, content, lineno,
1969
2052
        if option_spec:
1970
2053
            options, arg_block = self.parse_directive_options(
1971
2054
                option_presets, option_spec, arg_block)
 
2055
            if arg_block and not argument_spec:
 
2056
                raise MarkupError('no arguments permitted; blank line '
 
2057
                                  'required before content block')
1972
2058
        if argument_spec:
1973
2059
            arguments = self.parse_directive_arguments(
1974
2060
                argument_spec, arg_block)
2035
2121
        except KeyError, detail:
2036
2122
            return 0, ('unknown option: "%s"' % detail.args[0])
2037
2123
        except (ValueError, TypeError), detail:
2038
 
            return 0, ('invalid option value: %s' % detail)
 
2124
            return 0, ('invalid option value: %s' % ' '.join(detail.args))
2039
2125
        except utils.ExtensionOptionError, detail:
2040
 
            return 0, ('invalid option data: %s' % detail)
 
2126
            return 0, ('invalid option data: %s' % ' '.join(detail.args))
2041
2127
        if blank_finish:
2042
2128
            return 1, options
2043
2129
        else:
2091
2177
           re.compile(r"""
2092
2178
                      \.\.[ ]+          # explicit markup start
2093
2179
                      _                 # target indicator
2094
 
                      (?![ ])           # first char. not space
 
2180
                      (?![ ]|$)         # first char. not space or EOL
2095
2181
                      """, re.VERBOSE)),
2096
2182
          (substitution_def,
2097
2183
           re.compile(r"""
2098
2184
                      \.\.[ ]+          # explicit markup start
2099
2185
                      \|                # substitution indicator
2100
 
                      (?![ ])           # first char. not space
 
2186
                      (?![ ]|$)         # first char. not space or EOL
2101
2187
                      """, re.VERBOSE)),
2102
2188
          (directive,
2103
2189
           re.compile(r"""
2204
2290
 
2205
2291
    def rfc2822(self, match, context, next_state):
2206
2292
        """RFC2822-style field list item."""
2207
 
        fieldlist = nodes.field_list(CLASS='rfc2822')
 
2293
        fieldlist = nodes.field_list(classes=['rfc2822'])
2208
2294
        self.parent += fieldlist
2209
2295
        field, blank_finish = self.rfc2822_field(match)
2210
2296
        fieldlist += field
2270
2356
    field_marker = invalid_input
2271
2357
    option_marker = invalid_input
2272
2358
    doctest = invalid_input
 
2359
    line_block = invalid_input
2273
2360
    grid_table_top = invalid_input
2274
2361
    simple_table_top = invalid_input
2275
2362
    explicit_markup = invalid_input
2310
2397
        """Enumerated list item."""
2311
2398
        format, sequence, text, ordinal = self.parse_enumerator(
2312
2399
              match, self.parent['enumtype'])
2313
 
        if (sequence != self.parent['enumtype'] or
2314
 
            format != self.format or
2315
 
            ordinal != (self.lastordinal + 1) or
2316
 
            not self.is_enumerated_list_item(ordinal, sequence, format)):
 
2400
        if ( format != self.format
 
2401
             or (sequence != '#' and (sequence != self.parent['enumtype']
 
2402
                                      or self.auto
 
2403
                                      or ordinal != (self.lastordinal + 1)))
 
2404
             or not self.is_enumerated_list_item(ordinal, sequence, format)):
2317
2405
            # different enumeration: new list
2318
2406
            self.invalid_input()
 
2407
        if sequence == '#':
 
2408
            self.auto = 1
2319
2409
        listitem, blank_finish = self.list_item(match.end())
2320
2410
        self.parent += listitem
2321
2411
        self.blank_finish = blank_finish
2387
2477
                lines = []
2388
2478
 
2389
2479
 
 
2480
class LineBlock(SpecializedBody):
 
2481
 
 
2482
    """Second and subsequent lines of a line_block."""
 
2483
 
 
2484
    blank = SpecializedBody.invalid_input
 
2485
 
 
2486
    def line_block(self, match, context, next_state):
 
2487
        """New line of line block."""
 
2488
        lineno = self.state_machine.abs_line_number()
 
2489
        line, messages, blank_finish = self.line_block_line(match, lineno)
 
2490
        self.parent += line
 
2491
        self.parent.parent += messages
 
2492
        self.blank_finish = blank_finish
 
2493
        return [], next_state, []
 
2494
 
 
2495
 
2390
2496
class Explicit(SpecializedBody):
2391
2497
 
2392
2498
    """Second and subsequent explicit markup construct."""
2422
2528
 
2423
2529
    def embedded_directive(self, match, context, next_state):
2424
2530
        nodelist, blank_finish = self.directive(match,
2425
 
                                                alt=self.parent['name'])
 
2531
                                                alt=self.parent['names'][0])
2426
2532
        self.parent += nodelist
2427
2533
        if not self.state_machine.at_eof():
2428
2534
            self.blank_finish = blank_finish
2484
2590
        underline = match.string.rstrip()
2485
2591
        source = title + '\n' + underline
2486
2592
        messages = []
2487
 
        if len(title) > len(underline):
 
2593
        if column_width(title) > len(underline):
2488
2594
            if len(underline) < 4:
2489
2595
                if self.state_machine.match_titles:
2490
2596
                    msg = self.reporter.info(
2542
2648
            indented.trim_end()
2543
2649
        if not indented:
2544
2650
            return self.quoted_literal_block()
2545
 
        nodelist = []
2546
2651
        data = '\n'.join(indented)
2547
 
        nodelist.append(nodes.literal_block(data, data))
 
2652
        literal_block = nodes.literal_block(data, data)
 
2653
        literal_block.line = offset + 1
 
2654
        nodelist = [literal_block]
2548
2655
        if not blank_finish:
2549
2656
            nodelist.append(self.unindent_warning('Literal block'))
2550
2657
        return nodelist
2579
2686
        self.nested_parse(indented, input_offset=line_offset, node=definition)
2580
2687
        return definitionlistitem, blank_finish
2581
2688
 
 
2689
    classifier_delimiter = re.compile(' +: +')
 
2690
 
2582
2691
    def term(self, lines, lineno):
2583
 
        """Return a definition_list's term and optional classifier."""
 
2692
        """Return a definition_list's term and optional classifiers."""
2584
2693
        assert len(lines) == 1
2585
2694
        text_nodes, messages = self.inline_text(lines[0], lineno)
2586
2695
        term_node = nodes.term()
2588
2697
        for i in range(len(text_nodes)):
2589
2698
            node = text_nodes[i]
2590
2699
            if isinstance(node, nodes.Text):
2591
 
                parts = node.rawsource.split(' : ', 1)
 
2700
                parts = self.classifier_delimiter.split(node.rawsource)
2592
2701
                if len(parts) == 1:
2593
 
                    term_node += node
 
2702
                    node_list[-1] += node
2594
2703
                else:
2595
 
                    term_node += nodes.Text(parts[0].rstrip())
2596
 
                    classifier_node = nodes.classifier('', parts[1])
2597
 
                    classifier_node += text_nodes[i+1:]
2598
 
                    node_list.append(classifier_node)
2599
 
                    break
 
2704
                    
 
2705
                    node_list[-1] += nodes.Text(parts[0].rstrip())
 
2706
                    for part in parts[1:]:
 
2707
                        classifier_node = nodes.classifier('', part)
 
2708
                        node_list.append(classifier_node)
2600
2709
            else:
2601
 
                term_node += node
 
2710
                node_list[-1] += node
2602
2711
        return node_list, messages
2603
2712
 
2604
2713
 
2660
2769
            self.state_correction(context)
2661
2770
        if self.eofcheck:               # ignore EOFError with sections
2662
2771
            lineno = self.state_machine.abs_line_number() - 1
2663
 
            transition = nodes.transition(context[0])
 
2772
            transition = nodes.transition(rawsource=context[0])
2664
2773
            transition.line = lineno
2665
2774
            self.parent += transition
2666
 
            msg = self.reporter.error(
2667
 
                  'Document or section may not end with a transition.',
2668
 
                  line=lineno)
2669
 
            self.parent += msg
2670
2775
        self.eofcheck = 1
2671
2776
        return []
2672
2777
 
2676
2781
        marker = context[0].strip()
2677
2782
        if len(marker) < 4:
2678
2783
            self.state_correction(context)
2679
 
        transition = nodes.transition(marker)
 
2784
        transition = nodes.transition(rawsource=marker)
2680
2785
        transition.line = lineno
2681
 
        if len(self.parent) == 0:
2682
 
            msg = self.reporter.error(
2683
 
                  'Document or section may not begin with a transition.',
2684
 
                  line=lineno)
2685
 
            self.parent += msg
2686
 
        elif isinstance(self.parent[-1], nodes.transition):
2687
 
            msg = self.reporter.error(
2688
 
                  'At least one body element must separate transitions; '
2689
 
                  'adjacent transitions not allowed.',
2690
 
                  line=lineno)
2691
 
            self.parent += msg
2692
2786
        self.parent += transition
2693
2787
        return [], 'Body', []
2694
2788
 
2735
2829
                return [], 'Body', []
2736
2830
        title = title.rstrip()
2737
2831
        messages = []
2738
 
        if len(title) > len(overline):
 
2832
        if column_width(title) > len(overline):
2739
2833
            blocktext = overline + '\n' + title + '\n' + underline
2740
2834
            if len(overline.rstrip()) < 4:
2741
2835
                self.short_overline(context, blocktext, lineno, 2)
2849
2943
 
2850
2944
 
2851
2945
state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
2852
 
                 OptionList, ExtensionOptions, Explicit, Text, Definition,
2853
 
                 Line, SubstitutionDef, RFC2822Body, RFC2822List)
 
2946
                 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
 
2947
                 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
2854
2948
"""Standard set of State classes used to start `RSTStateMachine`."""
2855
 
 
2856
 
 
2857
 
def escape2null(text):
2858
 
    """Return a string with escape-backslashes converted to nulls."""
2859
 
    parts = []
2860
 
    start = 0
2861
 
    while 1:
2862
 
        found = text.find('\\', start)
2863
 
        if found == -1:
2864
 
            parts.append(text[start:])
2865
 
            return ''.join(parts)
2866
 
        parts.append(text[start:found])
2867
 
        parts.append('\x00' + text[found+1:found+2])
2868
 
        start = found + 2               # skip character after escape
2869
 
 
2870
 
def unescape(text, restore_backslashes=0):
2871
 
    """
2872
 
    Return a string with nulls removed or restored to backslashes.
2873
 
    Backslash-escaped spaces are also removed.
2874
 
    """
2875
 
    if restore_backslashes:
2876
 
        return text.replace('\x00', '\\')
2877
 
    else:
2878
 
        for sep in ['\x00 ', '\x00\n', '\x00']:
2879
 
            text = ''.join(text.split(sep))
2880
 
        return text