~ubuntu-branches/ubuntu/jaunty/python-docutils/jaunty

« back to all changes in this revision

Viewing changes to docutils/parsers/rst/states.py

  • Committer: Bazaar Package Importer
  • Author(s): Simon McVittie
  • Date: 2008-07-24 10:39:53 UTC
  • mfrom: (1.1.4 upstream) (3.1.7 intrepid)
  • Revision ID: james.westby@ubuntu.com-20080724103953-8gh4uezg17g9ysgy
Tags: 0.5-2
* Upload docutils 0.5 to unstable
* Update rst.el to upstream Subversion r5596, which apparently fixes
  all its performance problems (17_speed_up_rst_el.dpatch, closes: #474941)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Author: David Goodger
2
 
# Contact: goodger@users.sourceforge.net
3
 
# Revision: $Revision: 4258 $
4
 
# Date: $Date: 2006-01-09 04:29:23 +0100 (Mon, 09 Jan 2006) $
 
1
# $Id: states.py 5510 2008-02-15 09:23:07Z grubert $
 
2
# Author: David Goodger <goodger@python.org>
5
3
# Copyright: This module has been placed in the public domain.
6
4
 
7
5
"""
108
106
import sys
109
107
import re
110
108
import roman
111
 
from types import TupleType
 
109
from types import TupleType, FunctionType, MethodType
112
110
from docutils import nodes, statemachine, utils, urischemes
113
111
from docutils import ApplicationError, DataError
114
112
from docutils.statemachine import StateMachineWS, StateWS
115
113
from docutils.nodes import fully_normalize_name as normalize_name
116
114
from docutils.nodes import whitespace_normalize_name
117
115
from docutils.utils import escape2null, unescape, column_width
 
116
import docutils.parsers.rst
118
117
from docutils.parsers.rst import directives, languages, tableparser, roles
119
118
from docutils.parsers.rst.languages import en as _fallback_language_module
120
119
 
513
512
    non_whitespace_before = r'(?<![ \n])'
514
513
    non_whitespace_escape_before = r'(?<![ \n\x00])'
515
514
    non_whitespace_after = r'(?![ \n])'
516
 
    # Alphanumerics with isolated internal [-._] chars (i.e. not 2 together):
517
 
    simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*'
 
515
    # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
 
516
    simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
518
517
    # Valid URI characters (see RFC 2396 & RFC 2732);
519
518
    # final \x00 allows backslash escapes in URIs:
520
519
    uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
915
914
        else:                   # not a valid scheme
916
915
            raise MarkupMismatch
917
916
 
918
 
    pep_url = 'pep-%04d.html'
919
 
 
920
917
    def pep_reference(self, match, lineno):
921
918
        text = match.group(0)
922
919
        if text.startswith('pep-'):
925
922
            pepnum = int(match.group('pepnum2'))
926
923
        else:
927
924
            raise MarkupMismatch
928
 
        ref = self.document.settings.pep_base_url + self.pep_url % pepnum
 
925
        ref = (self.document.settings.pep_base_url
 
926
               + self.document.settings.pep_file_url_template % pepnum)
929
927
        unescaped = unescape(text, 0)
930
928
        return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
931
929
 
1050
1048
              pats['enum'], re.escape(enum.formatinfo[format].suffix))
1051
1049
 
1052
1050
    patterns = {
1053
 
          'bullet': r'[-+*]( +|$)',
 
1051
          'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)',
1054
1052
          'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1055
1053
          'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1056
1054
          'option_marker': r'%(option)s(, %(option)s)*(  +| ?$)' % pats,
1080
1078
        """Block quote."""
1081
1079
        indented, indent, line_offset, blank_finish = \
1082
1080
              self.state_machine.get_indented()
1083
 
        blockquote, messages = self.block_quote(indented, line_offset)
1084
 
        self.parent += blockquote
1085
 
        self.parent += messages
 
1081
        elements = self.block_quote(indented, line_offset)
 
1082
        self.parent += elements
1086
1083
        if not blank_finish:
1087
1084
            self.parent += self.unindent_warning('Block quote')
1088
1085
        return context, next_state, []
1089
1086
 
1090
1087
    def block_quote(self, indented, line_offset):
1091
 
        blockquote_lines, attribution_lines, attribution_offset = \
1092
 
              self.check_attribution(indented, line_offset)
1093
 
        blockquote = nodes.block_quote()
1094
 
        self.nested_parse(blockquote_lines, line_offset, blockquote)
1095
 
        messages = []
1096
 
        if attribution_lines:
1097
 
            attribution, messages = self.parse_attribution(attribution_lines,
1098
 
                                                           attribution_offset)
1099
 
            blockquote += attribution
1100
 
        return blockquote, messages
 
1088
        elements = []
 
1089
        while indented:
 
1090
            (blockquote_lines,
 
1091
             attribution_lines,
 
1092
             attribution_offset,
 
1093
             indented,
 
1094
             new_line_offset) = self.split_attribution(indented, line_offset)
 
1095
            blockquote = nodes.block_quote()
 
1096
            self.nested_parse(blockquote_lines, line_offset, blockquote)
 
1097
            elements.append(blockquote)
 
1098
            if attribution_lines:
 
1099
                attribution, messages = self.parse_attribution(
 
1100
                    attribution_lines, attribution_offset)
 
1101
                blockquote += attribution
 
1102
                elements += messages
 
1103
            line_offset = new_line_offset
 
1104
            while indented and not indented[0]:
 
1105
                indented = indented[1:]
 
1106
                line_offset += 1
 
1107
        return elements
1101
1108
 
1102
 
    # u'\u2014' is an em-dash:
 
1109
    # U+2014 is an em-dash:
1103
1110
    attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
1104
1111
 
1105
 
    def check_attribution(self, indented, line_offset):
 
1112
    def split_attribution(self, indented, line_offset):
1106
1113
        """
1107
 
        Check for an attribution in the last contiguous block of `indented`.
 
1114
        Check for a block quote attribution and split it off:
1108
1115
 
1109
 
        * First line after last blank line must begin with "--" (etc.).
 
1116
        * First line after a blank line must begin with a dash ("--", "---",
 
1117
          em-dash; matches `self.attribution_pattern`).
1110
1118
        * Every line after that must have consistent indentation.
 
1119
        * Attributions must be preceded by block quote content.
1111
1120
 
1112
 
        Return a 3-tuple: (block quote lines, attribution lines,
1113
 
        attribution offset).
 
1121
        Return a tuple of: (block quote content lines, content offset,
 
1122
        attribution lines, attribution offset, remaining indented lines).
1114
1123
        """
1115
 
        #import pdb ; pdb.set_trace()
1116
1124
        blank = None
1117
 
        nonblank_seen = None
1118
 
        indent = 0
1119
 
        for i in range(len(indented) - 1, 0, -1): # don't check first line
1120
 
            this_line_blank = not indented[i].strip()
1121
 
            if nonblank_seen and this_line_blank:
1122
 
                match = self.attribution_pattern.match(indented[i + 1])
1123
 
                if match:
1124
 
                    blank = i
 
1125
        nonblank_seen = False
 
1126
        for i in range(len(indented)):
 
1127
            line = indented[i].rstrip()
 
1128
            if line:
 
1129
                if nonblank_seen and blank == i - 1: # last line blank
 
1130
                    match = self.attribution_pattern.match(line)
 
1131
                    if match:
 
1132
                        attribution_end, indent = self.check_attribution(
 
1133
                            indented, i)
 
1134
                        if attribution_end:
 
1135
                            a_lines = indented[i:attribution_end]
 
1136
                            a_lines.trim_left(match.end(), end=1)
 
1137
                            a_lines.trim_left(indent, start=1)
 
1138
                            return (indented[:i], a_lines,
 
1139
                                    i, indented[attribution_end:],
 
1140
                                    line_offset + attribution_end)
 
1141
                nonblank_seen = True
 
1142
            else:
 
1143
                blank = i
 
1144
        else:
 
1145
            return (indented, None, None, None, None)
 
1146
 
 
1147
    def check_attribution(self, indented, attribution_start):
 
1148
        """
 
1149
        Check attribution shape.
 
1150
        Return the index past the end of the attribution, and the indent.
 
1151
        """
 
1152
        indent = None
 
1153
        i = attribution_start + 1
 
1154
        for i in range(attribution_start + 1, len(indented)):
 
1155
            line = indented[i].rstrip()
 
1156
            if not line:
1125
1157
                break
1126
 
            elif not this_line_blank:
1127
 
                nonblank_seen = 1
1128
 
        if blank and len(indented) - blank > 2: # multi-line attribution
1129
 
            indent = (len(indented[blank + 2])
1130
 
                      - len(indented[blank + 2].lstrip()))
1131
 
            for j in range(blank + 3, len(indented)):
1132
 
                if ( indented[j]        # may be blank last line
1133
 
                     and indent != (len(indented[j])
1134
 
                                    - len(indented[j].lstrip()))):
1135
 
                    # bad shape
1136
 
                    blank = None
1137
 
                    break
1138
 
        if blank:
1139
 
            a_lines = indented[blank + 1:]
1140
 
            a_lines.trim_left(match.end(), end=1)
1141
 
            a_lines.trim_left(indent, start=1)
1142
 
            return (indented[:blank], a_lines, line_offset + blank + 1)
 
1158
            if indent is None:
 
1159
                indent = len(line) - len(line.lstrip())
 
1160
            elif len(line) - len(line.lstrip()) != indent:
 
1161
                return None, None       # bad shape; not an attribution
1143
1162
        else:
1144
 
            return (indented, None, None)
 
1163
            # return index of line after last attribution line:
 
1164
            i += 1
 
1165
        return i, (indent or 0)
1145
1166
 
1146
1167
    def parse_attribution(self, indented, line_offset):
1147
1168
        text = '\n'.join(indented).rstrip()
1391
1412
            self.parent += msg
1392
1413
            indented, indent, line_offset, blank_finish = \
1393
1414
                  self.state_machine.get_first_known_indented(match.end())
1394
 
            blockquote, messages = self.block_quote(indented, line_offset)
1395
 
            self.parent += blockquote
1396
 
            self.parent += messages
 
1415
            elements = self.block_quote(indented, line_offset)
 
1416
            self.parent += elements
1397
1417
            if not blank_finish:
1398
1418
                self.parent += self.unindent_warning('Option list')
1399
1419
            return [], next_state, []
1720
1740
                            (
1721
1741
                              _               # anonymous target
1722
1742
                            |               # *OR*
 
1743
                              (?!_)           # no underscore at the beginning
1723
1744
                              (?P<quote>`?)   # optional open quote
1724
1745
                              (?![ `])        # first char. not space or
1725
1746
                                              # backquote
1965
1986
    def directive(self, match, **option_presets):
1966
1987
        """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1967
1988
        type_name = match.group(1)
1968
 
        directive_function, messages = directives.directive(
 
1989
        directive_class, messages = directives.directive(
1969
1990
            type_name, self.memo.language, self.document)
1970
1991
        self.parent += messages
1971
 
        if directive_function:
 
1992
        if directive_class:
1972
1993
            return self.run_directive(
1973
 
                directive_function, match, type_name, option_presets)
 
1994
                directive_class, match, type_name, option_presets)
1974
1995
        else:
1975
1996
            return self.unknown_directive(type_name)
1976
1997
 
1977
 
    def run_directive(self, directive_fn, match, type_name, option_presets):
 
1998
    def run_directive(self, directive, match, type_name, option_presets):
1978
1999
        """
1979
2000
        Parse a directive then run its directive function.
1980
2001
 
1981
2002
        Parameters:
1982
2003
 
1983
 
        - `directive_fn`: The function implementing the directive.  Uses
1984
 
          function attributes ``arguments``, ``options``, and/or ``content``
1985
 
          if present.
 
2004
        - `directive`: The class implementing the directive.  Must be
 
2005
          a subclass of `rst.Directive`.
1986
2006
 
1987
2007
        - `match`: A regular expression match object which matched the first
1988
2008
          line of the directive.
1996
2016
 
1997
2017
        Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
1998
2018
        """
 
2019
        if isinstance(directive, (FunctionType, MethodType)):
 
2020
            from docutils.parsers.rst import convert_directive_function
 
2021
            directive = convert_directive_function(directive)
1999
2022
        lineno = self.state_machine.abs_line_number()
2000
2023
        initial_line_offset = self.state_machine.line_offset
2001
2024
        indented, indent, line_offset, blank_finish \
2006
2029
        try:
2007
2030
            arguments, options, content, content_offset = (
2008
2031
                self.parse_directive_block(indented, line_offset,
2009
 
                                           directive_fn, option_presets))
 
2032
                                           directive, option_presets))
2010
2033
        except MarkupError, detail:
2011
2034
            error = self.reporter.error(
2012
2035
                'Error in "%s" directive:\n%s.' % (type_name,
2013
2036
                                                   ' '.join(detail.args)),
2014
2037
                nodes.literal_block(block_text, block_text), line=lineno)
2015
2038
            return [error], blank_finish
2016
 
        result = directive_fn(type_name, arguments, options, content, lineno,
2017
 
                              content_offset, block_text, self,
2018
 
                              self.state_machine)
 
2039
        directive_instance = directive(
 
2040
            type_name, arguments, options, content, lineno,
 
2041
            content_offset, block_text, self, self.state_machine)
 
2042
        try:
 
2043
            result = directive_instance.run()
 
2044
        except docutils.parsers.rst.DirectiveError, directive_error:
 
2045
            msg_node = self.reporter.system_message(directive_error.level,
 
2046
                                                    directive_error.message)
 
2047
            msg_node += nodes.literal_block(block_text, block_text)
 
2048
            msg_node['line'] = lineno
 
2049
            result = [msg_node]
 
2050
        assert isinstance(result, list), \
 
2051
               'Directive "%s" must return a list of nodes.' % type_name
 
2052
        for i in range(len(result)):
 
2053
            assert isinstance(result[i], nodes.Node), \
 
2054
                   ('Directive "%s" returned non-Node object (index %s): %r'
 
2055
                    % (type_name, i, result[i]))
2019
2056
        return (result,
2020
2057
                blank_finish or self.state_machine.is_next_line_blank())
2021
2058
 
2022
 
    def parse_directive_block(self, indented, line_offset, directive_fn,
 
2059
    def parse_directive_block(self, indented, line_offset, directive,
2023
2060
                              option_presets):
2024
 
        arguments = []
2025
 
        options = {}
2026
 
        argument_spec = getattr(directive_fn, 'arguments', None)
2027
 
        if argument_spec and argument_spec[:2] == (0, 0):
2028
 
            argument_spec = None
2029
 
        option_spec = getattr(directive_fn, 'options', None)
2030
 
        content_spec = getattr(directive_fn, 'content', None)
 
2061
        option_spec = directive.option_spec
 
2062
        has_content = directive.has_content
2031
2063
        if indented and not indented[0].strip():
2032
2064
            indented.trim_start()
2033
2065
            line_offset += 1
2034
2066
        while indented and not indented[-1].strip():
2035
2067
            indented.trim_end()
2036
 
        if indented and (argument_spec or option_spec):
 
2068
        if indented and (directive.required_arguments
 
2069
                         or directive.optional_arguments
 
2070
                         or option_spec):
2037
2071
            for i in range(len(indented)):
2038
2072
                if not indented[i].strip():
2039
2073
                    break
2052
2086
        if option_spec:
2053
2087
            options, arg_block = self.parse_directive_options(
2054
2088
                option_presets, option_spec, arg_block)
2055
 
            if arg_block and not argument_spec:
 
2089
            if arg_block and not (directive.required_arguments
 
2090
                                  or directive.optional_arguments):
2056
2091
                raise MarkupError('no arguments permitted; blank line '
2057
2092
                                  'required before content block')
2058
 
        if argument_spec:
 
2093
        else:
 
2094
            options = {}
 
2095
        if directive.required_arguments or directive.optional_arguments:
2059
2096
            arguments = self.parse_directive_arguments(
2060
 
                argument_spec, arg_block)
2061
 
        if content and not content_spec:
 
2097
                directive, arg_block)
 
2098
        else:
 
2099
            arguments = []
 
2100
        if content and not has_content:
2062
2101
            raise MarkupError('no content permitted')
2063
2102
        return (arguments, options, content, content_offset)
2064
2103
 
2080
2119
                raise MarkupError(data)
2081
2120
        return options, arg_block
2082
2121
 
2083
 
    def parse_directive_arguments(self, argument_spec, arg_block):
2084
 
        required, optional, last_whitespace = argument_spec
 
2122
    def parse_directive_arguments(self, directive, arg_block):
 
2123
        required = directive.required_arguments
 
2124
        optional = directive.optional_arguments
2085
2125
        arg_text = '\n'.join(arg_block)
2086
2126
        arguments = arg_text.split()
2087
2127
        if len(arguments) < required:
2088
2128
            raise MarkupError('%s argument(s) required, %s supplied'
2089
2129
                              % (required, len(arguments)))
2090
2130
        elif len(arguments) > required + optional:
2091
 
            if last_whitespace:
 
2131
            if directive.final_argument_whitespace:
2092
2132
                arguments = arg_text.split(None, required + optional - 1)
2093
2133
            else:
2094
2134
                raise MarkupError(