~ubuntu-branches/ubuntu/natty/moin/natty-updates

« back to all changes in this revision

Viewing changes to MoinMoin/support/pygments/lexers/dotnet.py

  • Committer: Bazaar Package Importer
  • Author(s): Jonas Smedegaard
  • Date: 2008-06-22 21:17:13 UTC
  • mto: This revision was merged to the branch mainline in revision 18.
  • Revision ID: james.westby@ubuntu.com-20080622211713-inlv5k4eifxckelr
ImportĀ upstreamĀ versionĀ 1.7.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# -*- coding: utf-8 -*-
2
 
"""
3
 
    pygments.lexers.dotnet
4
 
    ~~~~~~~~~~~~~~~~~~~~~~
5
 
 
6
 
    Lexers for .net languages.
7
 
 
8
 
    :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
9
 
    :license: BSD, see LICENSE for details.
10
 
"""
11
 
import re
12
 
 
13
 
from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, using, this
14
 
from pygments.token import Punctuation, \
15
 
     Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other
16
 
from pygments.util import get_choice_opt
17
 
from pygments import unistring as uni
18
 
 
19
 
from pygments.lexers.web import XmlLexer
20
 
 
21
 
__all__ = ['CSharpLexer', 'BooLexer', 'VbNetLexer', 'CSharpAspxLexer',
22
 
           'VbNetAspxLexer']
23
 
 
24
 
 
25
 
def _escape(st):
26
 
    return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\
27
 
           replace(u'[', ur'\[').replace(u']', ur'\]')
28
 
 
29
 
class CSharpLexer(RegexLexer):
30
 
    """
31
 
    For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
32
 
    source code.
33
 
 
34
 
    Additional options accepted:
35
 
 
36
 
    `unicodelevel`
37
 
      Determines which Unicode characters this lexer allows for identifiers.
38
 
      The possible values are:
39
 
 
40
 
      * ``none`` -- only the ASCII letters and numbers are allowed. This
41
 
        is the fastest selection.
42
 
      * ``basic`` -- all Unicode characters from the specification except
43
 
        category ``Lo`` are allowed.
44
 
      * ``full`` -- all Unicode characters as specified in the C# specs
45
 
        are allowed.  Note that this means a considerable slowdown since the
46
 
        ``Lo`` category has more than 40,000 characters in it!
47
 
 
48
 
      The default value is ``basic``.
49
 
 
50
 
      *New in Pygments 0.8.*
51
 
    """
52
 
 
53
 
    name = 'C#'
54
 
    aliases = ['csharp', 'c#']
55
 
    filenames = ['*.cs']
56
 
    mimetypes = ['text/x-csharp'] # inferred
57
 
 
58
 
    flags = re.MULTILINE | re.DOTALL | re.UNICODE
59
 
 
60
 
    # for the range of allowed unicode characters in identifiers,
61
 
    # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
62
 
 
63
 
    levels = {
64
 
        'none': '@?[_a-zA-Z][a-zA-Z0-9_]*',
65
 
        'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
66
 
                  '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
67
 
                  uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
68
 
        'full': ('@?(?:_|[^' +
69
 
                 _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])'
70
 
                 + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
71
 
                                                'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
72
 
                                                'Mc')) + ']*'),
73
 
    }
74
 
 
75
 
    tokens = {}
76
 
    token_variants = True
77
 
 
78
 
    for levelname, cs_ident in levels.items():
79
 
        tokens[levelname] = {
80
 
            'root': [
81
 
                # method names
82
 
                (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type
83
 
                 r'(' + cs_ident + ')'                           # method name
84
 
                 r'(\s*)(\()',                               # signature start
85
 
                 bygroups(using(this), Name.Function, Text, Punctuation)),
86
 
                (r'^\s*\[.*?\]', Name.Attribute),
87
 
                (r'[^\S\n]+', Text),
88
 
                (r'\\\n', Text), # line continuation
89
 
                (r'//.*?\n', Comment.Single),
90
 
                (r'/[*](.|\n)*?[*]/', Comment.Multiline),
91
 
                (r'\n', Text),
92
 
                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
93
 
                (r'[{}]', Punctuation),
94
 
                (r'@"(\\\\|\\"|[^"])*"', String),
95
 
                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
96
 
                (r"'\\.'|'[^\\]'", String.Char),
97
 
                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
98
 
                 r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
99
 
                (r'#[ \t]*(if|endif|else|elif|define|undef|'
100
 
                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
101
 
                 Comment.Preproc),
102
 
                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
103
 
                 Keyword)),
104
 
                (r'(abstract|as|base|break|case|catch|'
105
 
                 r'checked|const|continue|default|delegate|'
106
 
                 r'do|else|enum|event|explicit|extern|false|finally|'
107
 
                 r'fixed|for|foreach|goto|if|implicit|in|interface|'
108
 
                 r'internal|is|lock|new|null|operator|'
109
 
                 r'out|override|params|private|protected|public|readonly|'
110
 
                 r'ref|return|sealed|sizeof|stackalloc|static|'
111
 
                 r'switch|this|throw|true|try|typeof|'
112
 
                 r'unchecked|unsafe|virtual|void|while|'
113
 
                 r'get|set|new|partial|yield|add|remove|value)\b', Keyword),
114
 
                (r'(global)(::)', bygroups(Keyword, Punctuation)),
115
 
                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
116
 
                 r'short|string|uint|ulong|ushort)\b\??', Keyword.Type),
117
 
                (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
118
 
                (r'(namespace|using)(\s+)', bygroups(Keyword, Text), 'namespace'),
119
 
                (cs_ident, Name),
120
 
            ],
121
 
            'class': [
122
 
                (cs_ident, Name.Class, '#pop')
123
 
            ],
124
 
            'namespace': [
125
 
                (r'(?=\()', Text, '#pop'), # using (resource)
126
 
                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
127
 
            ]
128
 
        }
129
 
 
130
 
    def __init__(self, **options):
131
 
        level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), 'basic')
132
 
        if level not in self._all_tokens:
133
 
            # compile the regexes now
134
 
            self._tokens = self.__class__.process_tokendef(level)
135
 
        else:
136
 
            self._tokens = self._all_tokens[level]
137
 
 
138
 
        RegexLexer.__init__(self, **options)
139
 
 
140
 
 
141
 
class BooLexer(RegexLexer):
142
 
    """
143
 
    For `Boo <http://boo.codehaus.org/>`_ source code.
144
 
    """
145
 
 
146
 
    name = 'Boo'
147
 
    aliases = ['boo']
148
 
    filenames = ['*.boo']
149
 
    mimetypes = ['text/x-boo']
150
 
 
151
 
    tokens = {
152
 
        'root': [
153
 
            (r'\s+', Text),
154
 
            (r'(#|//).*$', Comment.Single),
155
 
            (r'/[*]', Comment.Multiline, 'comment'),
156
 
            (r'[]{}:(),.;[]', Punctuation),
157
 
            (r'\\\n', Text),
158
 
            (r'\\', Text),
159
 
            (r'(in|is|and|or|not)\b', Operator.Word),
160
 
            (r'/(\\\\|\\/|[^/\s])/', String.Regex),
161
 
            (r'@/(\\\\|\\/|[^/])*/', String.Regex),
162
 
            (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator),
163
 
            (r'(as|abstract|callable|constructor|destructor|do|import|'
164
 
             r'enum|event|final|get|interface|internal|of|override|'
165
 
             r'partial|private|protected|public|return|set|static|'
166
 
             r'struct|transient|virtual|yield|super|and|break|cast|'
167
 
             r'continue|elif|else|ensure|except|for|given|goto|if|in|'
168
 
             r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|'
169
 
             r'while|from|as)\b', Keyword),
170
 
            (r'def(?=\s+\(.*?\))', Keyword),
171
 
            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
172
 
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
173
 
            (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'),
174
 
            (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|'
175
 
             r'assert|checked|enumerate|filter|getter|len|lock|map|'
176
 
             r'matrix|max|min|normalArrayIndexing|print|property|range|'
177
 
             r'rawArrayIndexing|required|typeof|unchecked|using|'
178
 
             r'yieldAll|zip)\b', Name.Builtin),
179
 
            ('"""(\\\\|\\"|.*?)"""', String.Double),
180
 
            ('"(\\\\|\\"|[^"]*?)"', String.Double),
181
 
            ("'(\\\\|\\'|[^']*?)'", String.Single),
182
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
183
 
            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
184
 
            (r'[0-9][0-9\.]*(m|ms|d|h|s)', Number),
185
 
            (r'0\d+', Number.Oct),
186
 
            (r'0x[a-fA-F0-9]+', Number.Hex),
187
 
            (r'\d+L', Number.Integer.Long),
188
 
            (r'\d+', Number.Integer),
189
 
        ],
190
 
        'comment': [
191
 
            ('/[*]', Comment.Multiline, '#push'),
192
 
            ('[*]/', Comment.Multiline, '#pop'),
193
 
            ('[^/*]', Comment.Multiline),
194
 
            ('[*/]', Comment.Multiline)
195
 
        ],
196
 
        'funcname': [
197
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
198
 
        ],
199
 
        'classname': [
200
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
201
 
        ],
202
 
        'namespace': [
203
 
            ('[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop')
204
 
        ]
205
 
    }
206
 
 
207
 
 
208
 
class VbNetLexer(RegexLexer):
209
 
    """
210
 
    For
211
 
    `Visual Basic.NET <http://msdn2.microsoft.com/en-us/vbasic/default.aspx>`_
212
 
    source code.
213
 
    """
214
 
 
215
 
    name = 'VB.net'
216
 
    aliases = ['vb.net', 'vbnet']
217
 
    filenames = ['*.vb', '*.bas']
218
 
    mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?)
219
 
 
220
 
    flags = re.MULTILINE | re.IGNORECASE
221
 
    tokens = {
222
 
        'root': [
223
 
            (r'^\s*<.*?>', Name.Attribute),
224
 
            (r'\s+', Text),
225
 
            (r'\n', Text),
226
 
            (r'rem\b.*?\n', Comment),
227
 
            (r"'.*?\n", Comment),
228
 
            (r'#If\s.*?\sThen|#ElseIf\s.*?\sThen|#End\s+If|#Const|'
229
 
             r'#ExternalSource.*?\n|#End\s+ExternalSource|'
230
 
             r'#Region.*?\n|#End\s+Region|#ExternalChecksum',
231
 
             Comment.Preproc),
232
 
            (r'[\(\){}!#,.:]', Punctuation),
233
 
            (r'Option\s+(Strict|Explicit|Compare)\s+'
234
 
             r'(On|Off|Binary|Text)', Keyword.Declaration),
235
 
            (r'(?<!\.)(AddHandler|Alias|'
236
 
             r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|'
237
 
             r'CDec|CDbl|CInt|CLng|CObj|Const|Continue|CSByte|CShort|'
238
 
             r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|'
239
 
             r'Default|Delegate|Dim|DirectCast|Do|Each|Else|ElseIf|'
240
 
             r'End|EndIf|Enum|Erase|Error|Event|Exit|False|Finally|For|'
241
 
             r'Friend|Function|Get|Global|GoSub|GoTo|Handles|If|'
242
 
             r'Implements|Imports|Inherits|Interface|'
243
 
             r'Let|Lib|Loop|Me|Module|MustInherit|'
244
 
             r'MustOverride|MyBase|MyClass|Namespace|Narrowing|New|Next|'
245
 
             r'Not|Nothing|NotInheritable|NotOverridable|Of|On|'
246
 
             r'Operator|Option|Optional|Overloads|Overridable|'
247
 
             r'Overrides|ParamArray|Partial|Private|Property|Protected|'
248
 
             r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|'
249
 
             r'Return|Select|Set|Shadows|Shared|Single|'
250
 
             r'Static|Step|Stop|Structure|Sub|SyncLock|Then|'
251
 
             r'Throw|To|True|Try|TryCast|Wend|'
252
 
             r'Using|When|While|Widening|With|WithEvents|'
253
 
             r'WriteOnly)\b', Keyword),
254
 
            (r'(?<!\.)(Function|Sub|Property)(\s+)',
255
 
             bygroups(Keyword, Text), 'funcname'),
256
 
            (r'(?<!\.)(Class|Structure|Enum)(\s+)',
257
 
             bygroups(Keyword, Text), 'classname'),
258
 
            (r'(?<!\.)(Namespace|Imports)(\s+)',
259
 
             bygroups(Keyword, Text), 'namespace'),
260
 
            (r'(?<!\.)(Boolean|Byte|Char|Date|Decimal|Double|Integer|Long|'
261
 
             r'Object|SByte|Short|Single|String|Variant|UInteger|ULong|'
262
 
             r'UShort)\b', Keyword.Type),
263
 
            (r'(?<!\.)(AddressOf|And|AndAlso|As|GetType|In|Is|IsNot|Like|Mod|'
264
 
             r'Or|OrElse|TypeOf|Xor)\b', Operator.Word),
265
 
            (r'&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|'
266
 
             r'<=|>=|<>|[-&*/\\^+=<>]',
267
 
             Operator),
268
 
            ('"', String, 'string'),
269
 
            ('[a-zA-Z_][a-zA-Z0-9_]*[%&@!#$]?', Name),
270
 
            ('#.*?#', Literal.Date),
271
 
            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
272
 
            (r'\d+([SILDFR]|US|UI|UL)?', Number.Integer),
273
 
            (r'&H[0-9a-f]+([SILDFR]|US|UI|UL)?', Number.Integer),
274
 
            (r'&O[0-7]+([SILDFR]|US|UI|UL)?', Number.Integer),
275
 
            (r'_\n', Text), # Line continuation
276
 
        ],
277
 
        'string': [
278
 
            (r'""', String),
279
 
            (r'"C?', String, '#pop'),
280
 
            (r'[^"]+', String),
281
 
        ],
282
 
        'funcname': [
283
 
            (r'[a-z_][a-z0-9_]*', Name.Function, '#pop')
284
 
        ],
285
 
        'classname': [
286
 
            (r'[a-z_][a-z0-9_]*', Name.Class, '#pop')
287
 
        ],
288
 
        'namespace': [
289
 
            (r'[a-z_][a-z0-9_.]*', Name.Namespace, '#pop')
290
 
        ],
291
 
    }
292
 
 
293
 
class GenericAspxLexer(RegexLexer):
294
 
    """
295
 
    Lexer for ASP.NET pages.
296
 
    """
297
 
 
298
 
    name = 'aspx-gen'
299
 
    filenames = []
300
 
    mimetypes = []
301
 
 
302
 
    flags = re.DOTALL
303
 
 
304
 
    tokens = {
305
 
        'root': [
306
 
            (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)),
307
 
            (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer),
308
 
                                                        Other,
309
 
                                                        using(XmlLexer))),
310
 
            (r'(.+?)(?=<)', using(XmlLexer)),
311
 
            (r'.+', using(XmlLexer)),
312
 
        ],
313
 
    }
314
 
 
315
 
#TODO support multiple languages within the same source file
316
 
class CSharpAspxLexer(DelegatingLexer):
317
 
    """
318
 
    Lexer for highligting C# within ASP.NET pages.
319
 
    """
320
 
 
321
 
    name = 'aspx-cs'
322
 
    aliases = ['aspx-cs']
323
 
    filenames = ['*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd']
324
 
    mimetypes = []
325
 
 
326
 
    def __init__(self, **options):
327
 
        super(CSharpAspxLexer, self).__init__(CSharpLexer,GenericAspxLexer,
328
 
                                              **options)
329
 
 
330
 
    def analyse_text(text):
331
 
        if re.search(r'Page\s*Language="C#"', text, re.I) is not None:
332
 
            return 0.2
333
 
        elif re.search(r'script[^>]+language=["\']C#', text, re.I) is not None:
334
 
            return 0.15
335
 
        return 0.001 # TODO really only for when filename matched...
336
 
 
337
 
class VbNetAspxLexer(DelegatingLexer):
338
 
    """
339
 
    Lexer for highligting Visual Basic.net within ASP.NET pages.
340
 
    """
341
 
 
342
 
    name = 'aspx-vb'
343
 
    aliases = ['aspx-vb']
344
 
    filenames = ['*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd']
345
 
    mimetypes = []
346
 
 
347
 
    def __init__(self, **options):
348
 
        super(VbNetAspxLexer, self).__init__(VbNetLexer,GenericAspxLexer,
349
 
                                              **options)
350
 
 
351
 
    def analyse_text(text):
352
 
        if re.search(r'Page\s*Language="Vb"', text, re.I) is not None:
353
 
            return 0.2
354
 
        elif re.search(r'script[^>]+language=["\']vb', text, re.I) is not None:
355
 
            return 0.15