~ubuntu-branches/ubuntu/natty/moin/natty-updates

« back to all changes in this revision

Viewing changes to MoinMoin/support/pygments/lexers/agile.py

  • Committer: Bazaar Package Importer
  • Author(s): Jonas Smedegaard
  • Date: 2008-06-22 21:17:13 UTC
  • mto: This revision was merged to the branch mainline in revision 18.
  • Revision ID: james.westby@ubuntu.com-20080622211713-inlv5k4eifxckelr
Import upstream version 1.7.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# -*- coding: utf-8 -*-
2
 
"""
3
 
    pygments.lexers.agile
4
 
    ~~~~~~~~~~~~~~~~~~~~~
5
 
 
6
 
    Lexers for agile languages.
7
 
 
8
 
    :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
9
 
    :license: BSD, see LICENSE for details.
10
 
"""
11
 
 
12
 
import re
13
 
 
14
 
from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
15
 
     LexerContext, include, combined, do_insertions, bygroups, using
16
 
from pygments.token import Error, Text, Other, \
17
 
     Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
18
 
from pygments.util import get_bool_opt, get_list_opt, shebang_matches
19
 
from pygments import unistring as uni
20
 
 
21
 
 
22
 
__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
23
 
           'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer',
24
 
           'MiniDLexer', 'IoLexer', 'TclLexer', 'ClojureLexer',
25
 
           'Python3Lexer', 'Python3TracebackLexer']
26
 
 
27
 
# b/w compatibility
28
 
from pygments.lexers.functional import SchemeLexer
29
 
 
30
 
line_re  = re.compile('.*?\n')
31
 
 
32
 
 
33
 
class PythonLexer(RegexLexer):
34
 
    """
35
 
    For `Python <http://www.python.org>`_ source code.
36
 
    """
37
 
 
38
 
    name = 'Python'
39
 
    aliases = ['python', 'py']
40
 
    filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac']
41
 
    mimetypes = ['text/x-python', 'application/x-python']
42
 
 
43
 
    tokens = {
44
 
        'root': [
45
 
            (r'\n', Text),
46
 
            (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
47
 
            (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
48
 
            (r'[^\S\n]+', Text),
49
 
            (r'#.*$', Comment),
50
 
            (r'[]{}:(),;[]', Punctuation),
51
 
            (r'\\\n', Text),
52
 
            (r'\\', Text),
53
 
            (r'(in|is|and|or|not)\b', Operator.Word),
54
 
            (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
55
 
            include('keywords'),
56
 
            (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
57
 
            (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
58
 
            (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'),
59
 
            (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'),
60
 
            include('builtins'),
61
 
            include('backtick'),
62
 
            ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
63
 
            ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
64
 
            ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
65
 
            ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
66
 
            ('[uU]?"""', String, combined('stringescape', 'tdqs')),
67
 
            ("[uU]?'''", String, combined('stringescape', 'tsqs')),
68
 
            ('[uU]?"', String, combined('stringescape', 'dqs')),
69
 
            ("[uU]?'", String, combined('stringescape', 'sqs')),
70
 
            include('name'),
71
 
            include('numbers'),
72
 
        ],
73
 
        'keywords': [
74
 
            (r'(assert|break|continue|del|elif|else|except|exec|'
75
 
             r'finally|for|global|if|lambda|pass|print|raise|'
76
 
             r'return|try|while|yield|as|with)\b', Keyword),
77
 
        ],
78
 
        'builtins': [
79
 
            (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|'
80
 
             r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|'
81
 
             r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|'
82
 
             r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|'
83
 
             r'input|int|intern|isinstance|issubclass|iter|len|list|locals|'
84
 
             r'long|map|max|min|next|object|oct|open|ord|pow|property|range|'
85
 
             r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|'
86
 
             r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|'
87
 
             r'vars|xrange|zip)\b', Name.Builtin),
88
 
            (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True'
89
 
             r')\b', Name.Builtin.Pseudo),
90
 
            (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
91
 
             r'BaseException|DeprecationWarning|EOFError|EnvironmentError|'
92
 
             r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|'
93
 
             r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|'
94
 
             r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
95
 
             r'NotImplemented|NotImplementedError|OSError|OverflowError|'
96
 
             r'OverflowWarning|PendingDeprecationWarning|ReferenceError|'
97
 
             r'RuntimeError|RuntimeWarning|StandardError|StopIteration|'
98
 
             r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
99
 
             r'TypeError|UnboundLocalError|UnicodeDecodeError|'
100
 
             r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
101
 
             r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
102
 
             r'WindowsError|ZeroDivisionError)\b', Name.Exception),
103
 
        ],
104
 
        'numbers': [
105
 
            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
106
 
            (r'\d+[eE][+-]?[0-9]+', Number.Float),
107
 
            (r'0\d+', Number.Oct),
108
 
            (r'0[xX][a-fA-F0-9]+', Number.Hex),
109
 
            (r'\d+L', Number.Integer.Long),
110
 
            (r'\d+', Number.Integer)
111
 
        ],
112
 
        'backtick': [
113
 
            ('`.*?`', String.Backtick),
114
 
        ],
115
 
        'name': [
116
 
            (r'@[a-zA-Z0-9_.]+', Name.Decorator),
117
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
118
 
        ],
119
 
        'funcname': [
120
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
121
 
        ],
122
 
        'classname': [
123
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
124
 
        ],
125
 
        'import': [
126
 
            (r'((?:\s|\\\s)+)(as)((?:\s|\\\s)+)',
127
 
             bygroups(Text, Keyword.Namespace, Text)),
128
 
            (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
129
 
            (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
130
 
            (r'', Text, '#pop') # all else: go back
131
 
        ],
132
 
        'fromimport': [
133
 
            (r'((?:\s|\\\s)+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'),
134
 
            (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
135
 
        ],
136
 
        'stringescape': [
137
 
            (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
138
 
             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
139
 
        ],
140
 
        'strings': [
141
 
            (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
142
 
             '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
143
 
            (r'[^\\\'"%\n]+', String),
144
 
            # quotes, percents and backslashes must be parsed one at a time
145
 
            (r'[\'"\\]', String),
146
 
            # unhandled string formatting sign
147
 
            (r'%', String)
148
 
            # newlines are an error (use "nl" state)
149
 
        ],
150
 
        'nl': [
151
 
            (r'\n', String)
152
 
        ],
153
 
        'dqs': [
154
 
            (r'"', String, '#pop'),
155
 
            (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
156
 
            include('strings')
157
 
        ],
158
 
        'sqs': [
159
 
            (r"'", String, '#pop'),
160
 
            (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
161
 
            include('strings')
162
 
        ],
163
 
        'tdqs': [
164
 
            (r'"""', String, '#pop'),
165
 
            include('strings'),
166
 
            include('nl')
167
 
        ],
168
 
        'tsqs': [
169
 
            (r"'''", String, '#pop'),
170
 
            include('strings'),
171
 
            include('nl')
172
 
        ],
173
 
    }
174
 
 
175
 
    def analyse_text(text):
176
 
        return shebang_matches(text, r'pythonw?(2\.\d)?')
177
 
 
178
 
 
179
 
class Python3Lexer(RegexLexer):
180
 
    """
181
 
    For `Python <http://www.python.org>`_ source code (version 3.0).
182
 
 
183
 
    *New in Pygments 0.10.*
184
 
    """
185
 
 
186
 
    name = 'Python 3'
187
 
    aliases = ['python3', 'py3']
188
 
    filenames = []  # Nothing until Python 3 gets widespread
189
 
    mimetypes = ['text/x-python3', 'application/x-python3']
190
 
 
191
 
    flags = re.MULTILINE | re.UNICODE
192
 
 
193
 
    uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
194
 
 
195
 
    tokens = PythonLexer.tokens.copy()
196
 
    tokens['keywords'] = [
197
 
        (r'(assert|break|continue|del|elif|else|except|'
198
 
         r'finally|for|global|if|lambda|pass|raise|'
199
 
         r'return|try|while|yield|as|with|True|False|None)\b', Keyword),
200
 
    ]
201
 
    tokens['builtins'] = [
202
 
        (r'(?<!\.)(__import__|abs|all|any|bin|bool|bytearray|bytes|'
203
 
         r'chr|classmethod|cmp|compile|complex|delattr|dict|dir|'
204
 
         r'divmod|enumerate|eval|filter|float|format|frozenset|getattr|'
205
 
         r'globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|'
206
 
         r'iter|len|list|locals|map|max|memoryview|min|next|object|oct|'
207
 
         r'open|ord|pow|print|property|range|repr|reversed|round|'
208
 
         r'set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|'
209
 
         r'vars|zip)\b', Name.Builtin),
210
 
        (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
211
 
        (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
212
 
         r'BaseException|BufferError|BytesWarning|DeprecationWarning|'
213
 
         r'EOFError|EnvironmentError|Exception|FloatingPointError|'
214
 
         r'FutureWarning|GeneratorExit|IOError|ImportError|'
215
 
         r'ImportWarning|IndentationError|IndexError|KeyError|'
216
 
         r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
217
 
         r'NotImplementedError|OSError|OverflowError|'
218
 
         r'PendingDeprecationWarning|ReferenceError|'
219
 
         r'RuntimeError|RuntimeWarning|StopIteration|'
220
 
         r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
221
 
         r'TypeError|UnboundLocalError|UnicodeDecodeError|'
222
 
         r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
223
 
         r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
224
 
         r'WindowsError|ZeroDivisionError)\b', Name.Exception),
225
 
    ]
226
 
    tokens['numbers'] = [
227
 
        (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
228
 
        (r'0[oO][0-7]+', Number.Oct),
229
 
        (r'0[bB][01]+', Number.Bin),
230
 
        (r'0[xX][a-fA-F0-9]+', Number.Hex),
231
 
        (r'\d+', Number.Integer)
232
 
    ]
233
 
    tokens['backtick'] = []
234
 
    tokens['name'] = [
235
 
        (r'@[a-zA-Z0-9_]+', Name.Decorator),
236
 
        (uni_name, Name),
237
 
    ]
238
 
    tokens['funcname'] = [
239
 
        (uni_name, Name.Function, '#pop')
240
 
    ]
241
 
    tokens['classname'] = [
242
 
        (uni_name, Name.Class, '#pop')
243
 
    ]
244
 
    tokens['import'] = [
245
 
        (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
246
 
        (r'\.', Name.Namespace),
247
 
        (uni_name, Name.Namespace),
248
 
        (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
249
 
        (r'', Text, '#pop') # all else: go back
250
 
    ]
251
 
    tokens['fromimport'] = [
252
 
        (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
253
 
        (r'\.', Name.Namespace),
254
 
        (uni_name, Name.Namespace),
255
 
    ]
256
 
    # don't highlight "%s" substitutions
257
 
    tokens['strings'] = [
258
 
        (r'[^\\\'"%\n]+', String),
259
 
        # quotes, percents and backslashes must be parsed one at a time
260
 
        (r'[\'"\\]', String),
261
 
        # unhandled string formatting sign
262
 
        (r'%', String)
263
 
        # newlines are an error (use "nl" state)
264
 
    ]
265
 
 
266
 
    def analyse_text(text):
267
 
        return shebang_matches(text, r'pythonw?3(\.\d)?')
268
 
 
269
 
 
270
 
class PythonConsoleLexer(Lexer):
271
 
    """
272
 
    For Python console output or doctests, such as:
273
 
 
274
 
    .. sourcecode:: pycon
275
 
 
276
 
        >>> a = 'foo'
277
 
        >>> print a
278
 
        foo
279
 
        >>> 1 / 0
280
 
        Traceback (most recent call last):
281
 
          File "<stdin>", line 1, in <module>
282
 
        ZeroDivisionError: integer division or modulo by zero
283
 
 
284
 
    Additional options:
285
 
 
286
 
    `python3`
287
 
        Use Python 3 lexer for code.  Default is ``False``.
288
 
        *New in Pygments 1.0.*
289
 
    """
290
 
    name = 'Python console session'
291
 
    aliases = ['pycon']
292
 
    mimetypes = ['text/x-python-doctest']
293
 
 
294
 
    def __init__(self, **options):
295
 
        self.python3 = get_bool_opt(options, 'python3', False)
296
 
        Lexer.__init__(self, **options)
297
 
 
298
 
    def get_tokens_unprocessed(self, text):
299
 
        if self.python3:
300
 
            pylexer = Python3Lexer(**self.options)
301
 
            tblexer = Python3TracebackLexer(**self.options)
302
 
        else:
303
 
            pylexer = PythonLexer(**self.options)
304
 
            tblexer = PythonTracebackLexer(**self.options)
305
 
 
306
 
        curcode = ''
307
 
        insertions = []
308
 
        curtb = ''
309
 
        tbindex = 0
310
 
        tb = 0
311
 
        for match in line_re.finditer(text):
312
 
            line = match.group()
313
 
            if line.startswith('>>> ') or line.startswith('... '):
314
 
                tb = 0
315
 
                insertions.append((len(curcode),
316
 
                                   [(0, Generic.Prompt, line[:4])]))
317
 
                curcode += line[4:]
318
 
            elif line.rstrip() == '...' and not tb:
319
 
                # only a new >>> prompt can end an exception block
320
 
                # otherwise an ellipsis in place of the traceback frames
321
 
                # will be mishandled
322
 
                insertions.append((len(curcode),
323
 
                                   [(0, Generic.Prompt, '...')]))
324
 
                curcode += line[3:]
325
 
            else:
326
 
                if curcode:
327
 
                    for item in do_insertions(insertions,
328
 
                                    pylexer.get_tokens_unprocessed(curcode)):
329
 
                        yield item
330
 
                    curcode = ''
331
 
                    insertions = []
332
 
                if (line.startswith('Traceback (most recent call last):') or
333
 
                    re.match(r'  File "[^"]+", line \d+\n$', line)):
334
 
                    tb = 1
335
 
                    curtb = line
336
 
                    tbindex = match.start()
337
 
                elif line == 'KeyboardInterrupt\n':
338
 
                    yield match.start(), Name.Class, line
339
 
                elif tb:
340
 
                    curtb += line
341
 
                    if not (line.startswith(' ') or line.strip() == '...'):
342
 
                        tb = 0
343
 
                        for i, t, v in tblexer.get_tokens_unprocessed(curtb):
344
 
                            yield tbindex+i, t, v
345
 
                else:
346
 
                    yield match.start(), Generic.Output, line
347
 
        if curcode:
348
 
            for item in do_insertions(insertions,
349
 
                                      pylexer.get_tokens_unprocessed(curcode)):
350
 
                yield item
351
 
 
352
 
 
353
 
class PythonTracebackLexer(RegexLexer):
354
 
    """
355
 
    For Python tracebacks.
356
 
 
357
 
    *New in Pygments 0.7.*
358
 
    """
359
 
 
360
 
    name = 'Python Traceback'
361
 
    aliases = ['pytb']
362
 
    filenames = ['*.pytb']
363
 
    mimetypes = ['text/x-python-traceback']
364
 
 
365
 
    tokens = {
366
 
        'root': [
367
 
            (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
368
 
            # SyntaxError starts with this.
369
 
            (r'^(?=  File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
370
 
            (r'^.*\n', Other),
371
 
        ],
372
 
        'intb': [
373
 
            (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
374
 
             bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)),
375
 
            (r'^(  File )("[^"]+")(, line )(\d+)(\n)',
376
 
             bygroups(Text, Name.Builtin, Text, Number, Text)),
377
 
            (r'^(    )(.+)(\n)',
378
 
             bygroups(Text, using(PythonLexer), Text)),
379
 
            (r'^([ \t]*)(...)(\n)',
380
 
             bygroups(Text, Comment, Text)), # for doctests...
381
 
            (r'^(.+)(: )(.+)(\n)',
382
 
             bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'),
383
 
            (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
384
 
             bygroups(Name.Class, Text), '#pop')
385
 
        ],
386
 
    }
387
 
 
388
 
 
389
 
class Python3TracebackLexer(RegexLexer):
390
 
    """
391
 
    For Python 3.0 tracebacks, with support for chained exceptions.
392
 
 
393
 
    *New in Pygments 1.0.*
394
 
    """
395
 
 
396
 
    name = 'Python 3.0 Traceback'
397
 
    aliases = ['py3tb']
398
 
    filenames = ['*.py3tb']
399
 
    mimetypes = ['text/x-python3-traceback']
400
 
 
401
 
    tokens = {
402
 
        'root': [
403
 
            (r'\n', Text),
404
 
            (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
405
 
            (r'^During handling of the above exception, another '
406
 
             r'exception occurred:\n\n', Generic.Traceback),
407
 
            (r'^The above exception was the direct cause of the '
408
 
             r'following exception:\n\n', Generic.Traceback),
409
 
        ],
410
 
        'intb': [
411
 
            (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
412
 
             bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)),
413
 
            (r'^(    )(.+)(\n)',
414
 
             bygroups(Text, using(Python3Lexer), Text)),
415
 
            (r'^([ \t]*)(...)(\n)',
416
 
             bygroups(Text, Comment, Text)), # for doctests...
417
 
            (r'^(.+)(: )(.+)(\n)',
418
 
             bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'),
419
 
            (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
420
 
             bygroups(Name.Class, Text), '#pop')
421
 
        ],
422
 
    }
423
 
 
424
 
 
425
 
class RubyLexer(ExtendedRegexLexer):
426
 
    """
427
 
    For `Ruby <http://www.ruby-lang.org>`_ source code.
428
 
    """
429
 
 
430
 
    name = 'Ruby'
431
 
    aliases = ['rb', 'ruby']
432
 
    filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx']
433
 
    mimetypes = ['text/x-ruby', 'application/x-ruby']
434
 
 
435
 
    flags = re.DOTALL | re.MULTILINE
436
 
 
437
 
    def heredoc_callback(self, match, ctx):
438
 
        # okay, this is the hardest part of parsing Ruby...
439
 
        # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
440
 
 
441
 
        start = match.start(1)
442
 
        yield start, Operator, match.group(1)        # <<-?
443
 
        yield match.start(2), String.Heredoc, match.group(2)  # quote ", ', `
444
 
        yield match.start(3), Name.Constant, match.group(3)   # heredoc name
445
 
        yield match.start(4), String.Heredoc, match.group(4)  # quote again
446
 
 
447
 
        heredocstack = ctx.__dict__.setdefault('heredocstack', [])
448
 
        outermost = not bool(heredocstack)
449
 
        heredocstack.append((match.group(1) == '<<-', match.group(3)))
450
 
 
451
 
        ctx.pos = match.start(5)
452
 
        ctx.end = match.end(5)
453
 
        # this may find other heredocs
454
 
        for i, t, v in self.get_tokens_unprocessed(context=ctx):
455
 
            yield i, t, v
456
 
        ctx.pos = match.end()
457
 
 
458
 
        if outermost:
459
 
            # this is the outer heredoc again, now we can process them all
460
 
            for tolerant, hdname in heredocstack:
461
 
                lines = []
462
 
                for match in line_re.finditer(ctx.text, ctx.pos):
463
 
                    if tolerant:
464
 
                        check = match.group().strip()
465
 
                    else:
466
 
                        check = match.group().rstrip()
467
 
                    if check == hdname:
468
 
                        for amatch in lines:
469
 
                            yield amatch.start(), String.Heredoc, amatch.group()
470
 
                        yield match.start(), Name.Constant, match.group()
471
 
                        ctx.pos = match.end()
472
 
                        break
473
 
                    else:
474
 
                        lines.append(match)
475
 
                else:
476
 
                    # end of heredoc not found -- error!
477
 
                    for amatch in lines:
478
 
                        yield amatch.start(), Error, amatch.group()
479
 
            ctx.end = len(ctx.text)
480
 
            del heredocstack[:]
481
 
 
482
 
 
483
 
    def gen_rubystrings_rules():
484
 
        def intp_regex_callback(self, match, ctx):
485
 
            yield match.start(1), String.Regex, match.group(1)    # begin
486
 
            nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
487
 
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
488
 
                yield match.start(3)+i, t, v
489
 
            yield match.start(4), String.Regex, match.group(4)    # end[mixounse]*
490
 
            ctx.pos = match.end()
491
 
 
492
 
        def intp_string_callback(self, match, ctx):
493
 
            yield match.start(1), String.Other, match.group(1)
494
 
            nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
495
 
            for i, t, v in self.get_tokens_unprocessed(context=nctx):
496
 
                yield match.start(3)+i, t, v
497
 
            yield match.start(4), String.Other, match.group(4)    # end
498
 
            ctx.pos = match.end()
499
 
 
500
 
        states = {}
501
 
        states['strings'] = [
502
 
            # easy ones
503
 
            (r'\:([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
504
 
             r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
505
 
            (r":'(\\\\|\\'|[^'])*'", String.Symbol),
506
 
            (r"'(\\\\|\\'|[^'])*'", String.Single),
507
 
            (r':"', String.Symbol, 'simple-sym'),
508
 
            (r'"', String.Double, 'simple-string'),
509
 
            (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
510
 
        ]
511
 
 
512
 
        # double-quoted string and symbol
513
 
        for name, ttype, end in ('string', String.Double, '"'), \
514
 
                                ('sym', String.Symbol, '"'), \
515
 
                                ('backtick', String.Backtick, '`'):
516
 
            states['simple-'+name] = [
517
 
                include('string-intp-escaped'),
518
 
                (r'[^\\%s#]+' % end, ttype),
519
 
                (r'[\\#]', ttype),
520
 
                (end, ttype, '#pop'),
521
 
            ]
522
 
 
523
 
        # braced quoted strings
524
 
        for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
525
 
                                    ('\\[', '\\]', 'sb'), \
526
 
                                    ('\\(', '\\)', 'pa'), \
527
 
                                    ('<', '>', 'ab'):
528
 
            states[name+'-intp-string'] = [
529
 
                (r'\\[\\' + lbrace + rbrace + ']', String.Other),
530
 
                (r'(?<!\\)' + lbrace, String.Other, '#push'),
531
 
                (r'(?<!\\)' + rbrace, String.Other, '#pop'),
532
 
                include('string-intp-escaped'),
533
 
                (r'[\\#' + lbrace + rbrace + ']', String.Other),
534
 
                (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
535
 
            ]
536
 
            states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
537
 
                                      name+'-intp-string'))
538
 
            states[name+'-string'] = [
539
 
                (r'\\[\\' + lbrace + rbrace + ']', String.Other),
540
 
                (r'(?<!\\)' + lbrace, String.Other, '#push'),
541
 
                (r'(?<!\\)' + rbrace, String.Other, '#pop'),
542
 
                (r'[\\#' + lbrace + rbrace + ']', String.Other),
543
 
                (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
544
 
            ]
545
 
            states['strings'].append((r'%[qsw]' + lbrace, String.Other,
546
 
                                      name+'-string'))
547
 
            states[name+'-regex'] = [
548
 
                (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
549
 
                (r'(?<!\\)' + lbrace, String.Regex, '#push'),
550
 
                (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
551
 
                include('string-intp'),
552
 
                (r'[\\#' + lbrace + rbrace + ']', String.Regex),
553
 
                (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
554
 
            ]
555
 
            states['strings'].append((r'%r' + lbrace, String.Regex,
556
 
                                      name+'-regex'))
557
 
 
558
 
        # these must come after %<brace>!
559
 
        states['strings'] += [
560
 
            # %r regex
561
 
            (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
562
 
             intp_regex_callback),
563
 
            # regular fancy strings with qsw
564
 
            (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other),
565
 
            (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)',
566
 
             intp_string_callback),
567
 
            # special forms of fancy strings after operators or
568
 
            # in method calls with braces
569
 
            (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
570
 
             bygroups(Text, String.Other, None)),
571
 
            # and because of fixed width lookbehinds the whole thing a
572
 
            # second time for line startings...
573
 
            (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
574
 
             bygroups(Text, String.Other, None)),
575
 
            # all regular fancy strings without qsw
576
 
            (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
577
 
             intp_string_callback),
578
 
        ]
579
 
 
580
 
        return states
581
 
 
582
 
    tokens = {
583
 
        'root': [
584
 
            (r'#.*?$', Comment.Single),
585
 
            (r'=begin\s.*?\n=end', Comment.Multiline),
586
 
            # keywords
587
 
            (r'(BEGIN|END|alias|begin|break|case|defined\?|'
588
 
             r'do|else|elsif|end|ensure|for|if|in|next|redo|'
589
 
             r'rescue|raise|retry|return|super|then|undef|unless|until|when|'
590
 
             r'while|yield)\b', Keyword),
591
 
            # start of function, class and module names
592
 
            (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)',
593
 
             bygroups(Keyword, Text, Name.Namespace)),
594
 
            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
595
 
            (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
596
 
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
597
 
            # special methods
598
 
            (r'(initialize|new|loop|include|extend|raise|attr_reader|'
599
 
             r'attr_writer|attr_accessor|attr|catch|throw|private|'
600
 
             r'module_function|public|protected|true|false|nil)\b', Keyword.Pseudo),
601
 
            (r'(not|and|or)\b', Operator.Word),
602
 
            (r'(autoload|block_given|const_defined|eql|equal|frozen|include|'
603
 
             r'instance_of|is_a|iterator|kind_of|method_defined|nil|'
604
 
             r'private_method_defined|protected_method_defined|'
605
 
             r'public_method_defined|respond_to|tainted)\?', Name.Builtin),
606
 
            (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
607
 
            (r'(?<!\.)(Array|Float|Integer|String|__id__|__send__|abort|ancestors|'
608
 
             r'at_exit|autoload|binding|callcc|caller|'
609
 
             r'catch|chomp|chop|class_eval|class_variables|'
610
 
             r'clone|const_defined\?|const_get|const_missing|const_set|constants|'
611
 
             r'display|dup|eval|exec|exit|extend|fail|fork|'
612
 
             r'format|freeze|getc|gets|global_variables|gsub|'
613
 
             r'hash|id|included_modules|inspect|instance_eval|'
614
 
             r'instance_method|instance_methods|'
615
 
             r'instance_variable_get|instance_variable_set|instance_variables|'
616
 
             r'lambda|load|local_variables|loop|'
617
 
             r'method|method_missing|methods|module_eval|name|'
618
 
             r'object_id|open|p|print|printf|private_class_method|'
619
 
             r'private_instance_methods|'
620
 
             r'private_methods|proc|protected_instance_methods|'
621
 
             r'protected_methods|public_class_method|'
622
 
             r'public_instance_methods|public_methods|'
623
 
             r'putc|puts|raise|rand|readline|readlines|require|'
624
 
             r'scan|select|self|send|set_trace_func|singleton_methods|sleep|'
625
 
             r'split|sprintf|srand|sub|syscall|system|taint|'
626
 
             r'test|throw|to_a|to_s|trace_var|trap|type|untaint|untrace_var|'
627
 
             r'warn)\b', Name.Builtin),
628
 
            (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
629
 
            # normal heredocs
630
 
            (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback),
631
 
            # empty string heredocs
632
 
            (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
633
 
            (r'__END__', Comment.Preproc, 'end-part'),
634
 
            # multiline regex (after keywords or assignments)
635
 
            (r'(?:^|(?<=[=<>~!])|'
636
 
                 r'(?<=(?:\s|;)when\s)|'
637
 
                 r'(?<=(?:\s|;)or\s)|'
638
 
                 r'(?<=(?:\s|;)and\s)|'
639
 
                 r'(?<=(?:\s|;|\.)index\s)|'
640
 
                 r'(?<=(?:\s|;|\.)scan\s)|'
641
 
                 r'(?<=(?:\s|;|\.)sub\s)|'
642
 
                 r'(?<=(?:\s|;|\.)sub!\s)|'
643
 
                 r'(?<=(?:\s|;|\.)gsub\s)|'
644
 
                 r'(?<=(?:\s|;|\.)gsub!\s)|'
645
 
                 r'(?<=(?:\s|;|\.)match\s)|'
646
 
                 r'(?<=(?:\s|;)if\s)|'
647
 
                 r'(?<=(?:\s|;)elsif\s)|'
648
 
                 r'(?<=^when\s)|'
649
 
                 r'(?<=^index\s)|'
650
 
                 r'(?<=^scan\s)|'
651
 
                 r'(?<=^sub\s)|'
652
 
                 r'(?<=^gsub\s)|'
653
 
                 r'(?<=^sub!\s)|'
654
 
                 r'(?<=^gsub!\s)|'
655
 
                 r'(?<=^match\s)|'
656
 
                 r'(?<=^if\s)|'
657
 
                 r'(?<=^elsif\s)'
658
 
             r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
659
 
            # multiline regex (in method calls)
660
 
            (r'(?<=\(|,)/', String.Regex, 'multiline-regex'),
661
 
            # multiline regex (this time the funny no whitespace rule)
662
 
            (r'(\s+)(/[^\s=])', String.Regex, 'multiline-regex'),
663
 
            # lex numbers and ignore following regular expressions which
664
 
            # are division operators in fact (grrrr. i hate that. any
665
 
            # better ideas?)
666
 
            # since pygments 0.7 we also eat a "?" operator after numbers
667
 
            # so that the char operator does not work. Chars are not allowed
668
 
            # there so that you can use the ternary operator.
669
 
            # stupid example:
670
 
            #   x>=0?n[x]:""
671
 
            (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
672
 
             bygroups(Number.Oct, Text, Operator)),
673
 
            (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
674
 
             bygroups(Number.Hex, Text, Operator)),
675
 
            (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
676
 
             bygroups(Number.Bin, Text, Operator)),
677
 
            (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
678
 
             bygroups(Number.Integer, Text, Operator)),
679
 
            # Names
680
 
            (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
681
 
            (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
682
 
            (r'\$[a-zA-Z0-9_]+', Name.Variable.Global),
683
 
            (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global),
684
 
            (r'\$-[0adFiIlpvw]', Name.Variable.Global),
685
 
            (r'::', Operator),
686
 
            include('strings'),
687
 
            # chars
688
 
            (r'\?(\\[MC]-)*' # modifiers
689
 
             r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
690
 
             r'(?!\w)',
691
 
             String.Char),
692
 
            (r'[A-Z][a-zA-Z0-9_]+', Name.Constant),
693
 
            # this is needed because ruby attributes can look
694
 
            # like keywords (class) or like this: ` ?!?
695
 
            (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])',
696
 
             bygroups(Operator, Name)),
697
 
            (r'[a-zA-Z_][\w_]*[\!\?]?', Name),
698
 
            (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
699
 
             r'!~|&&?|\|\||\.{1,3})', Operator),
700
 
            (r'[-+/*%=<>&!^|~]=?', Operator),
701
 
            (r'[(){};,/?:\\]', Punctuation),
702
 
            (r'\s+', Text)
703
 
        ],
704
 
        'funcname': [
705
 
            (r'\(', Punctuation, 'defexpr'),
706
 
            (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?'
707
 
             r'([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
708
 
             r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
709
 
             bygroups(Name.Class, Operator, Name.Function), '#pop'),
710
 
            (r'', Text, '#pop')
711
 
        ],
712
 
        'classname': [
713
 
            (r'\(', Punctuation, 'defexpr'),
714
 
            (r'<<', Operator, '#pop'),
715
 
            (r'[A-Z_][\w_]*', Name.Class, '#pop'),
716
 
            (r'', Text, '#pop')
717
 
        ],
718
 
        'defexpr': [
719
 
            (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
720
 
            (r'\(', Operator, '#push'),
721
 
            include('root')
722
 
        ],
723
 
        'in-intp': [
724
 
            ('}', String.Interpol, '#pop'),
725
 
            include('root'),
726
 
        ],
727
 
        'string-intp': [
728
 
            (r'#{', String.Interpol, 'in-intp'),
729
 
            (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol),
730
 
            (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol)
731
 
        ],
732
 
        'string-intp-escaped': [
733
 
            include('string-intp'),
734
 
            (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)
735
 
        ],
736
 
        'interpolated-regex': [
737
 
            include('string-intp'),
738
 
            (r'[\\#]', String.Regex),
739
 
            (r'[^\\#]+', String.Regex),
740
 
        ],
741
 
        'interpolated-string': [
742
 
            include('string-intp'),
743
 
            (r'[\\#]', String.Other),
744
 
            (r'[^\\#]+', String.Other),
745
 
        ],
746
 
        'multiline-regex': [
747
 
            include('string-intp'),
748
 
            (r'\\\\', String.Regex),
749
 
            (r'\\/', String.Regex),
750
 
            (r'[\\#]', String.Regex),
751
 
            (r'[^\\/#]+', String.Regex),
752
 
            (r'/[mixounse]*', String.Regex, '#pop'),
753
 
        ],
754
 
        'end-part': [
755
 
            (r'.+', Comment.Preproc, '#pop')
756
 
        ]
757
 
    }
758
 
    tokens.update(gen_rubystrings_rules())
759
 
 
760
 
    def analyse_text(text):
761
 
        return shebang_matches(text, r'ruby(1\.\d)?')
762
 
 
763
 
 
764
 
class RubyConsoleLexer(Lexer):
765
 
    """
766
 
    For Ruby interactive console (**irb**) output like:
767
 
 
768
 
    .. sourcecode:: rbcon
769
 
 
770
 
        irb(main):001:0> a = 1
771
 
        => 1
772
 
        irb(main):002:0> puts a
773
 
        1
774
 
        => nil
775
 
    """
776
 
    name = 'Ruby irb session'
777
 
    aliases = ['rbcon', 'irb']
778
 
    mimetypes = ['text/x-ruby-shellsession']
779
 
 
780
 
    _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] '
781
 
                            '|>> |\?> ')
782
 
 
783
 
    def get_tokens_unprocessed(self, text):
784
 
        rblexer = RubyLexer(**self.options)
785
 
 
786
 
        curcode = ''
787
 
        insertions = []
788
 
        for match in line_re.finditer(text):
789
 
            line = match.group()
790
 
            m = self._prompt_re.match(line)
791
 
            if m is not None:
792
 
                end = m.end()
793
 
                insertions.append((len(curcode),
794
 
                                   [(0, Generic.Prompt, line[:end])]))
795
 
                curcode += line[end:]
796
 
            else:
797
 
                if curcode:
798
 
                    for item in do_insertions(insertions,
799
 
                                    rblexer.get_tokens_unprocessed(curcode)):
800
 
                        yield item
801
 
                    curcode = ''
802
 
                    insertions = []
803
 
                yield match.start(), Generic.Output, line
804
 
        if curcode:
805
 
            for item in do_insertions(insertions,
806
 
                                      rblexer.get_tokens_unprocessed(curcode)):
807
 
                yield item
808
 
 
809
 
 
810
 
class PerlLexer(RegexLexer):
811
 
    """
812
 
    For `Perl <http://www.perl.org>`_ source code.
813
 
    """
814
 
 
815
 
    name = 'Perl'
816
 
    aliases = ['perl', 'pl']
817
 
    filenames = ['*.pl', '*.pm']
818
 
    mimetypes = ['text/x-perl', 'application/x-perl']
819
 
 
820
 
    flags = re.DOTALL | re.MULTILINE
821
 
    # TODO: give this a perl guy who knows how to parse perl...
822
 
    tokens = {
823
 
        'balanced-regex': [
824
 
            (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'),
825
 
            (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
826
 
            (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
827
 
            (r'{(\\\\|\\}|[^}])*}[egimosx]*', String.Regex, '#pop'),
828
 
            (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'),
829
 
            (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'),
830
 
            (r'\((\\\\|\\\)|[^\)])*\)[egimosx]*', String.Regex, '#pop'),
831
 
            (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'),
832
 
            (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'),
833
 
            (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'),
834
 
        ],
835
 
        'root': [
836
 
            (r'\#.*?$', Comment.Single),
837
 
            (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
838
 
            (r'(case|continue|do|else|elsif|for|foreach|if|last|my|'
839
 
             r'next|our|redo|reset|then|unless|until|while|use|'
840
 
             r'print|new|BEGIN|END|return)\b', Keyword),
841
 
            (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)',
842
 
             bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
843
 
            (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
844
 
            # common delimiters
845
 
            (r's/(\\\\|\\/|[^/])*/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex),
846
 
            (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
847
 
            (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
848
 
            (r's@(\\\\|\\@|[^@])*@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex),
849
 
            (r's%(\\\\|\\%|[^%])*%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex),
850
 
            # balanced delimiters
851
 
            (r's{(\\\\|\\}|[^}])*}\s*', String.Regex, 'balanced-regex'),
852
 
            (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'),
853
 
            (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'),
854
 
            (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'),
855
 
 
856
 
            (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex),
857
 
            (r'm(?=[/!\\{<\[\(@%\$])', String.Regex, 'balanced-regex'),
858
 
            (r'((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[gcimosx]*', String.Regex),
859
 
            (r'\s+', Text),
860
 
            (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|'
861
 
             r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|'
862
 
             r'continue|cos|crypt|dbmclose|dbmopen|defined|delete|die|'
863
 
             r'dump|each|endgrent|endhostent|endnetent|endprotoent|'
864
 
             r'endpwent|endservent|eof|eval|exec|exists|exit|exp|fcntl|'
865
 
             r'fileno|flock|fork|format|formline|getc|getgrent|getgrgid|'
866
 
             r'getgrnam|gethostbyaddr|gethostbyname|gethostent|getlogin|'
867
 
             r'getnetbyaddr|getnetbyname|getnetent|getpeername|getpgrp|'
868
 
             r'getppid|getpriority|getprotobyname|getprotobynumber|'
869
 
             r'getprotoent|getpwent|getpwnam|getpwuid|getservbyname|'
870
 
             r'getservbyport|getservent|getsockname|getsockopt|glob|gmtime|'
871
 
             r'goto|grep|hex|import|index|int|ioctl|join|keys|kill|last|'
872
 
             r'lc|lcfirst|length|link|listen|local|localtime|log|lstat|'
873
 
             r'map|mkdir|msgctl|msgget|msgrcv|msgsnd|my|next|no|oct|open|'
874
 
             r'opendir|ord|our|pack|package|pipe|pop|pos|printf|'
875
 
             r'prototype|push|quotemeta|rand|read|readdir|'
876
 
             r'readline|readlink|readpipe|recv|redo|ref|rename|require|'
877
 
             r'reverse|rewinddir|rindex|rmdir|scalar|seek|seekdir|'
878
 
             r'select|semctl|semget|semop|send|setgrent|sethostent|setnetent|'
879
 
             r'setpgrp|setpriority|setprotoent|setpwent|setservent|'
880
 
             r'setsockopt|shift|shmctl|shmget|shmread|shmwrite|shutdown|'
881
 
             r'sin|sleep|socket|socketpair|sort|splice|split|sprintf|sqrt|'
882
 
             r'srand|stat|study|substr|symlink|syscall|sysopen|sysread|'
883
 
             r'sysseek|system|syswrite|tell|telldir|tie|tied|time|times|tr|'
884
 
             r'truncate|uc|ucfirst|umask|undef|unlink|unpack|unshift|untie|'
885
 
             r'utime|values|vec|wait|waitpid|wantarray|warn|write'
886
 
             r')\b', Name.Builtin),
887
 
            (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
888
 
            (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String),
889
 
            (r'__END__', Comment.Preproc, 'end-part'),
890
 
            (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
891
 
            (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
892
 
            (r'[$@%#]+', Name.Variable, 'varname'),
893
 
            (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
894
 
            (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
895
 
            (r'0b[01]+(_[01]+)*', Number.Bin),
896
 
            (r'\d+', Number.Integer),
897
 
            (r"'(\\\\|\\'|[^'])*'", String),
898
 
            (r'"(\\\\|\\"|[^"])*"', String),
899
 
            (r'`(\\\\|\\`|[^`])*`', String.Backtick),
900
 
            (r'<([^\s>]+)>', String.Regexp),
901
 
            (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
902
 
            (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
903
 
            (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
904
 
            (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
905
 
            (r'(q|qq|qw|qr|qx)([^a-zA-Z0-9])(.|\n)*?\2', String.Other),
906
 
            (r'package\s+', Keyword, 'modulename'),
907
 
            (r'sub\s+', Keyword, 'funcname'),
908
 
            (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|'
909
 
             r'!~|&&?|\|\||\.{1,3})', Operator),
910
 
            (r'[-+/*%=<>&^|!\\~]=?', Operator),
911
 
            (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage
912
 
                                                      # of punctuation in Perl!
913
 
            (r'(?=\w)', Name, 'name'),
914
 
        ],
915
 
        'format': [
916
 
            (r'\.\n', String.Interpol, '#pop'),
917
 
            (r'[^\n]*\n', String.Interpol),
918
 
        ],
919
 
        'varname': [
920
 
            (r'\s+', Text),
921
 
            (r'\{', Punctuation, '#pop'), # hash syntax?
922
 
            (r'\)|,', Punctuation, '#pop'), # argument specifier
923
 
            (r'[a-zA-Z0-9_]+::', Name.Namespace),
924
 
            (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'),
925
 
        ],
926
 
        'name': [
927
 
            (r'[a-zA-Z0-9_]+::', Name.Namespace),
928
 
            (r'[a-zA-Z0-9_:]+', Name, '#pop'),
929
 
            (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'),
930
 
            (r'(?=[^a-zA-Z0-9_])', Text, '#pop'),
931
 
        ],
932
 
        'modulename': [
933
 
            (r'[a-zA-Z_][\w_]*', Name.Namespace, '#pop')
934
 
        ],
935
 
        'funcname': [
936
 
            (r'[a-zA-Z_][\w_]*[\!\?]?', Name.Function),
937
 
            (r'\s+', Text),
938
 
            # argument declaration
939
 
            (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
940
 
            (r'.*?{', Punctuation, '#pop'),
941
 
            (r';', Punctuation, '#pop'),
942
 
        ],
943
 
        'cb-string': [
944
 
            (r'\\[\{\}\\]', String.Other),
945
 
            (r'\\', String.Other),
946
 
            (r'\{', String.Other, 'cb-string'),
947
 
            (r'\}', String.Other, '#pop'),
948
 
            (r'[^\{\}\\]+', String.Other)
949
 
        ],
950
 
        'rb-string': [
951
 
            (r'\\[\(\)\\]', String.Other),
952
 
            (r'\\', String.Other),
953
 
            (r'\(', String.Other, 'rb-string'),
954
 
            (r'\)', String.Other, '#pop'),
955
 
            (r'[^\(\)]+', String.Other)
956
 
        ],
957
 
        'sb-string': [
958
 
            (r'\\[\[\]\\]', String.Other),
959
 
            (r'\\', String.Other),
960
 
            (r'\[', String.Other, 'sb-string'),
961
 
            (r'\]', String.Other, '#pop'),
962
 
            (r'[^\[\]]+', String.Other)
963
 
        ],
964
 
        'lt-string': [
965
 
            (r'\\[\<\>\\]', String.Other),
966
 
            (r'\\', String.Other),
967
 
            (r'\<', String.Other, 'lt-string'),
968
 
            (r'\>', String.Other, '#pop'),
969
 
            (r'[^\<\>]+', String.Other)
970
 
        ],
971
 
        'end-part': [
972
 
            (r'.+', Comment.Preproc, '#pop')
973
 
        ]
974
 
    }
975
 
 
976
 
    def analyse_text(text):
977
 
        if shebang_matches(text, r'perl(\d\.\d\.\d)?'):
978
 
            return True
979
 
        if 'my $' in text:
980
 
            return 0.9
981
 
        return 0.1 # who knows, might still be perl!
982
 
 
983
 
 
984
 
class LuaLexer(RegexLexer):
985
 
    """
986
 
    For `Lua <http://www.lua.org>`_ source code.
987
 
 
988
 
    Additional options accepted:
989
 
 
990
 
    `func_name_highlighting`
991
 
        If given and ``True``, highlight builtin function names
992
 
        (default: ``True``).
993
 
    `disabled_modules`
994
 
        If given, must be a list of module names whose function names
995
 
        should not be highlighted. By default all modules are highlighted.
996
 
 
997
 
        To get a list of allowed modules have a look into the
998
 
        `_luabuiltins` module:
999
 
 
1000
 
        .. sourcecode:: pycon
1001
 
 
1002
 
            >>> from pygments.lexers._luabuiltins import MODULES
1003
 
            >>> MODULES.keys()
1004
 
            ['string', 'coroutine', 'modules', 'io', 'basic', ...]
1005
 
    """
1006
 
 
1007
 
    name = 'Lua'
1008
 
    aliases = ['lua']
1009
 
    filenames = ['*.lua']
1010
 
    mimetypes = ['text/x-lua', 'application/x-lua']
1011
 
 
1012
 
    tokens = {
1013
 
        'root': [
1014
 
            # lua allows a file to start with a shebang
1015
 
            (r'#!(.*?)$', Comment.Preproc),
1016
 
            (r'', Text, 'base'),
1017
 
        ],
1018
 
        'base': [
1019
 
            (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline),
1020
 
            ('--.*$', Comment.Single),
1021
 
 
1022
 
            (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
1023
 
            (r'(?i)\d+e[+-]?\d+', Number.Float),
1024
 
            ('(?i)0x[0-9a-f]*', Number.Hex),
1025
 
            (r'\d+', Number.Integer),
1026
 
 
1027
 
            (r'\n', Text),
1028
 
            (r'[^\S\n]', Text),
1029
 
            (r'(?s)\[(=*)\[.*?\]\1\]', String.Multiline),
1030
 
            (r'[\[\]\{\}\(\)\.,:;]', Punctuation),
1031
 
 
1032
 
            (r'(==|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#])', Operator),
1033
 
            (r'(and|or|not)\b', Operator.Word),
1034
 
 
1035
 
            ('(break|do|else|elseif|end|for|if|in|repeat|return|then|until|'
1036
 
             r'while)\b', Keyword),
1037
 
            (r'(local)\b', Keyword.Declaration),
1038
 
            (r'(true|false|nil)\b', Keyword.Constant),
1039
 
 
1040
 
            (r'(function)(\s+)', bygroups(Keyword, Text), 'funcname'),
1041
 
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
1042
 
 
1043
 
            (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
1044
 
 
1045
 
            # multiline strings
1046
 
            (r'(?s)\[(=*)\[(.*?)\]\1\]', String),
1047
 
            ("'", String.Single, combined('stringescape', 'sqs')),
1048
 
            ('"', String.Double, combined('stringescape', 'dqs'))
1049
 
        ],
1050
 
 
1051
 
        'funcname': [
1052
 
            ('[A-Za-z_][A-Za-z0-9_]*', Name.Function, '#pop'),
1053
 
            # inline function
1054
 
            ('\(', Punctuation, '#pop'),
1055
 
        ],
1056
 
 
1057
 
        'classname': [
1058
 
            ('[A-Za-z_][A-Za-z0-9_]*', Name.Class, '#pop')
1059
 
        ],
1060
 
 
1061
 
        # if I understand correctly, every character is valid in a lua string,
1062
 
        # so this state is only for later corrections
1063
 
        'string': [
1064
 
            ('.', String)
1065
 
        ],
1066
 
 
1067
 
        'stringescape': [
1068
 
            (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
1069
 
        ],
1070
 
 
1071
 
        'sqs': [
1072
 
            ("'", String, '#pop'),
1073
 
            include('string')
1074
 
        ],
1075
 
 
1076
 
        'dqs': [
1077
 
            ('"', String, '#pop'),
1078
 
            include('string')
1079
 
        ]
1080
 
    }
1081
 
 
1082
 
    def __init__(self, **options):
1083
 
        self.func_name_highlighting = get_bool_opt(
1084
 
            options, 'func_name_highlighting', True)
1085
 
        self.disabled_modules = get_list_opt(options, 'disabled_modules', [])
1086
 
 
1087
 
        self._functions = set()
1088
 
        if self.func_name_highlighting:
1089
 
            from pygments.lexers._luabuiltins import MODULES
1090
 
            for mod, func in MODULES.iteritems():
1091
 
                if mod not in self.disabled_modules:
1092
 
                    self._functions.update(func)
1093
 
        RegexLexer.__init__(self, **options)
1094
 
 
1095
 
    def get_tokens_unprocessed(self, text):
1096
 
        for index, token, value in \
1097
 
            RegexLexer.get_tokens_unprocessed(self, text):
1098
 
            if token is Name:
1099
 
                if value in self._functions:
1100
 
                    yield index, Name.Builtin, value
1101
 
                    continue
1102
 
                elif '.' in value:
1103
 
                    a, b = value.split('.')
1104
 
                    yield index, Name, a
1105
 
                    yield index + len(a), Punctuation, u'.'
1106
 
                    yield index + len(a) + 1, Name, b
1107
 
                    continue
1108
 
            yield index, token, value
1109
 
 
1110
 
 
1111
 
class MiniDLexer(RegexLexer):
1112
 
    """
1113
 
    For `MiniD <http://www.dsource.org/projects/minid>`_ (a D-like scripting
1114
 
    language) source.
1115
 
    """
1116
 
    name = 'MiniD'
1117
 
    filenames = ['*.md']
1118
 
    aliases = ['minid']
1119
 
    mimetypes = ['text/x-minidsrc']
1120
 
 
1121
 
    tokens = {
1122
 
        'root': [
1123
 
            (r'\n', Text),
1124
 
            (r'\s+', Text),
1125
 
            # Comments
1126
 
            (r'//(.*?)\n', Comment.Single),
1127
 
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
1128
 
            (r'/\+', Comment.Multiline, 'nestedcomment'),
1129
 
            # Keywords
1130
 
            (r'(as|assert|break|case|catch|class|continue|coroutine|default'
1131
 
             r'|do|else|finally|for|foreach|function|global|namespace'
1132
 
             r'|if|import|in|is|local|module|return|super|switch'
1133
 
             r'|this|throw|try|vararg|while|with|yield)\b', Keyword),
1134
 
            (r'(false|true|null)\b', Keyword.Constant),
1135
 
            # FloatLiteral
1136
 
            (r'([0-9][0-9_]*)?\.[0-9_]+([eE][+\-]?[0-9_]+)?', Number.Float),
1137
 
            # IntegerLiteral
1138
 
            # -- Binary
1139
 
            (r'0[Bb][01_]+', Number),
1140
 
            # -- Octal
1141
 
            (r'0[Cc][0-7_]+', Number.Oct),
1142
 
            # -- Hexadecimal
1143
 
            (r'0[xX][0-9a-fA-F_]+', Number.Hex),
1144
 
            # -- Decimal
1145
 
            (r'(0|[1-9][0-9_]*)', Number.Integer),
1146
 
            # CharacterLiteral
1147
 
            (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-9]{1,3}"""
1148
 
             r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""",
1149
 
             String.Char
1150
 
            ),
1151
 
            # StringLiteral
1152
 
            # -- WysiwygString
1153
 
            (r'@"(""|.)*"', String),
1154
 
            # -- AlternateWysiwygString
1155
 
            (r'`(``|.)*`', String),
1156
 
            # -- DoubleQuotedString
1157
 
            (r'"(\\\\|\\"|[^"])*"', String),
1158
 
            # Tokens
1159
 
            (
1160
 
             r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>'
1161
 
             r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)'
1162
 
             r'|[-/.&$@|\+<>!()\[\]{}?,;:=*%^~#\\]', Punctuation
1163
 
            ),
1164
 
            # Identifier
1165
 
            (r'[a-zA-Z_]\w*', Name),
1166
 
        ],
1167
 
        'nestedcomment': [
1168
 
            (r'[^+/]+', Comment.Multiline),
1169
 
            (r'/\+', Comment.Multiline, '#push'),
1170
 
            (r'\+/', Comment.Multiline, '#pop'),
1171
 
            (r'[+/]', Comment.Multiline),
1172
 
        ],
1173
 
    }
1174
 
 
1175
 
 
1176
 
class IoLexer(RegexLexer):
1177
 
    """
1178
 
    For `Io <http://iolanguage.com/>`_ (a small, prototype-based
1179
 
    programming language) source.
1180
 
 
1181
 
    *New in Pygments 0.10.*
1182
 
    """
1183
 
    name = 'Io'
1184
 
    filenames = ['*.io']
1185
 
    aliases = ['io']
1186
 
    mimetypes = ['text/x-iosrc']
1187
 
    tokens = {
1188
 
        'root': [
1189
 
            (r'\n', Text),
1190
 
            (r'\s+', Text),
1191
 
            # Comments
1192
 
            (r'//(.*?)\n', Comment.Single),
1193
 
            (r'#(.*?)\n', Comment.Single),
1194
 
            (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
1195
 
            (r'/\+', Comment.Multiline, 'nestedcomment'),
1196
 
            # DoubleQuotedString
1197
 
            (r'"(\\\\|\\"|[^"])*"', String),
1198
 
            # Operators
1199
 
            (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}',
1200
 
             Operator),
1201
 
            # keywords
1202
 
            (r'(clone|do|doFile|doString|method|for|if|else|elseif|then)\b',
1203
 
             Keyword),
1204
 
            # constants
1205
 
            (r'(nil|false|true)\b', Name.Constant),
1206
 
            # names
1207
 
            ('(Object|list|List|Map|args|Sequence|Coroutine|File)\b',
1208
 
             Name.Builtin),
1209
 
            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
1210
 
            # numbers
1211
 
            (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
1212
 
            (r'\d+', Number.Integer)
1213
 
        ],
1214
 
        'nestedcomment': [
1215
 
            (r'[^+/]+', Comment.Multiline),
1216
 
            (r'/\+', Comment.Multiline, '#push'),
1217
 
            (r'\+/', Comment.Multiline, '#pop'),
1218
 
            (r'[+/]', Comment.Multiline),
1219
 
        ]
1220
 
    }
1221
 
 
1222
 
 
1223
 
class TclLexer(RegexLexer):
1224
 
    """
1225
 
    For Tcl source code.
1226
 
 
1227
 
    *New in Pygments 0.10.*
1228
 
    """
1229
 
 
1230
 
    keyword_cmds_re = (
1231
 
        r'\b(after|apply|array|break|catch|continue|elseif|else|error|'
1232
 
        r'eval|expr|for|foreach|global|if|namespace|proc|rename|return|'
1233
 
        r'set|switch|then|trace|unset|update|uplevel|upvar|variable|'
1234
 
        r'vwait|while)\b'
1235
 
        )
1236
 
 
1237
 
    builtin_cmds_re = (
1238
 
        r'\b(append|bgerror|binary|cd|chan|clock|close|concat|dde|dict|'
1239
 
        r'encoding|eof|exec|exit|fblocked|fconfigure|fcopy|file|'
1240
 
        r'fileevent|flush|format|gets|glob|history|http|incr|info|interp|'
1241
 
        r'join|lappend|lassign|lindex|linsert|list|llength|load|loadTk|'
1242
 
        r'lrange|lrepeat|lreplace|lreverse|lsearch|lset|lsort|mathfunc|'
1243
 
        r'mathop|memory|msgcat|open|package|pid|pkg::create|pkg_mkIndex|'
1244
 
        r'platform|platform::shell|puts|pwd|re_syntax|read|refchan|'
1245
 
        r'regexp|registry|regsub|scan|seek|socket|source|split|string|'
1246
 
        r'subst|tell|time|tm|unknown|unload)\b'
1247
 
        )
1248
 
 
1249
 
    name = 'Tcl'
1250
 
    aliases = ['tcl']
1251
 
    filenames = ['*.tcl']
1252
 
    mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl']
1253
 
 
1254
 
    def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""):
1255
 
        return [
1256
 
            (keyword_cmds_re, Keyword, 'params' + context),
1257
 
            (builtin_cmds_re, Name.Builtin, 'params' + context),
1258
 
            (r'([\w\.\-]+)', Name.Variable, 'params' + context),
1259
 
            (r'#', Comment, 'comment'),
1260
 
        ]
1261
 
 
1262
 
    tokens = {
1263
 
        'root': [
1264
 
            include('command'),
1265
 
            include('basic'),
1266
 
            include('data'),
1267
 
            (r'}', Keyword),  # HACK: somehow we miscounted our braces
1268
 
        ],
1269
 
        'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re),
1270
 
        'command-in-brace': _gen_command_rules(keyword_cmds_re,
1271
 
                                               builtin_cmds_re,
1272
 
                                               "-in-brace"),
1273
 
        'command-in-bracket': _gen_command_rules(keyword_cmds_re,
1274
 
                                                 builtin_cmds_re,
1275
 
                                                 "-in-bracket"),
1276
 
        'command-in-paren': _gen_command_rules(keyword_cmds_re,
1277
 
                                               builtin_cmds_re,
1278
 
                                               "-in-paren"),
1279
 
        'basic': [
1280
 
            (r'\(', Keyword, 'paren'),
1281
 
            (r'\[', Keyword, 'bracket'),
1282
 
            (r'\{', Keyword, 'brace'),
1283
 
            (r'"', String.Double, 'string'),
1284
 
            (r'(eq|ne|in|ni)\b', Operator.Word),
1285
 
            (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator),
1286
 
        ],
1287
 
        'data': [
1288
 
            (r'\s+', Text),
1289
 
            (r'0x[a-fA-F0-9]+', Number.Hex),
1290
 
            (r'0[0-7]+', Number.Oct),
1291
 
            (r'\d+\.\d+', Number.Float),
1292
 
            (r'\d+', Number.Integer),
1293
 
            (r'\$([\w\.\-\:]+)', Name.Variable),
1294
 
            (r'([\w\.\-\:]+)', Text),
1295
 
        ],
1296
 
        'params': [
1297
 
            (r';', Keyword, '#pop'),
1298
 
            (r'\n', Text, '#pop'),
1299
 
            (r'(else|elseif|then)', Keyword),
1300
 
            include('basic'),
1301
 
            include('data'),
1302
 
        ],
1303
 
        'params-in-brace': [
1304
 
            (r'}', Keyword, ('#pop', '#pop')),
1305
 
            include('params')
1306
 
        ],
1307
 
        'params-in-paren': [
1308
 
            (r'\)', Keyword, ('#pop', '#pop')),
1309
 
            include('params')
1310
 
        ],
1311
 
        'params-in-bracket': [
1312
 
            (r'\]', Keyword, ('#pop', '#pop')),
1313
 
            include('params')
1314
 
        ],
1315
 
        'string': [
1316
 
            (r'\[', String.Double, 'string-square'),
1317
 
            (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\])', String.Double),
1318
 
            (r'"', String.Double, '#pop')
1319
 
        ],
1320
 
        'string-square': [
1321
 
            (r'\[', String.Double, 'string-square'),
1322
 
            (r'(?s)(\\\\|\\[0-7]+|\\.|\\\n|[^\]\\])', String.Double),
1323
 
            (r'\]', String.Double, '#pop')
1324
 
        ],
1325
 
        'brace': [
1326
 
            (r'}', Keyword, '#pop'),
1327
 
            include('command-in-brace'),
1328
 
            include('basic'),
1329
 
            include('data'),
1330
 
        ],
1331
 
        'paren': [
1332
 
            (r'\)', Keyword, '#pop'),
1333
 
            include('command-in-paren'),
1334
 
            include('basic'),
1335
 
            include('data'),
1336
 
        ],
1337
 
        'bracket': [
1338
 
            (r'\]', Keyword, '#pop'),
1339
 
            include('command-in-bracket'),
1340
 
            include('basic'),
1341
 
            include('data'),
1342
 
        ],
1343
 
        'comment': [
1344
 
            (r'.*[^\\]\n', Comment, '#pop'),
1345
 
            (r'.*\\\n', Comment),
1346
 
        ],
1347
 
    }
1348
 
 
1349
 
    def analyse_text(text):
1350
 
        return shebang_matches(text, r'(tcl)')
1351
 
 
1352
 
 
1353
 
class ClojureLexer(RegexLexer):
1354
 
    """
1355
 
    Lexer for `Clojure <http://clojure.org/>`_ source code.
1356
 
 
1357
 
    *New in Pygments 0.11.*
1358
 
    """
1359
 
    name = 'Clojure'
1360
 
    aliases = ['clojure', 'clj']
1361
 
    filenames = ['*.clj']
1362
 
    mimetypes = ['text/x-clojure', 'application/x-clojure']
1363
 
 
1364
 
    keywords = [
1365
 
        'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-',
1366
 
        'defstruct',
1367
 
        'if', 'cond',
1368
 
        'let', 'for'
1369
 
    ]
1370
 
    builtins = [
1371
 
        '.', '..',
1372
 
        '*', '+', '-', '->', '..', '/', '<', '<=', '=', '==', '>', '>=',
1373
 
        'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
1374
 
        'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
1375
 
        'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
1376
 
        'aset-int', 'aset-long', 'aset-short', 'assert', 'assoc', 'await',
1377
 
        'await-for', 'bean', 'binding', 'bit-and', 'bit-not', 'bit-or',
1378
 
        'bit-shift-left', 'bit-shift-right', 'bit-xor', 'boolean', 'branch?',
1379
 
        'butlast', 'byte', 'cast', 'char', 'children', 'class',
1380
 
        'clear-agent-errors', 'comment', 'commute', 'comp', 'comparator',
1381
 
        'complement', 'concat', 'conj', 'cons', 'constantly',
1382
 
        'construct-proxy', 'contains?', 'count', 'create-ns', 'create-struct',
1383
 
        'cycle', 'dec',  'deref', 'difference', 'disj', 'dissoc', 'distinct',
1384
 
        'doall', 'doc', 'dorun', 'doseq', 'dosync', 'dotimes', 'doto',
1385
 
        'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
1386
 
        'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
1387
 
        'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush',
1388
 
        'fnseq', 'frest', 'gensym', 'get', 'get-proxy-class',
1389
 
        'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
1390
 
        'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
1391
 
        'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
1392
 
        'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
1393
 
        'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
1394
 
        'lefts', 'line-seq', 'list', 'list*', 'load', 'load-file',
1395
 
        'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
1396
 
        'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
1397
 
        'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
1398
 
        'min-key', 'name', 'namespace', 'neg?', 'new', 'newline', 'next',
1399
 
        'nil?', 'node', 'not', 'not-any?', 'not-every?', 'not=', 'ns-imports',
1400
 
        'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers',
1401
 
        'ns-resolve', 'ns-unmap', 'nth', 'nthrest', 'or', 'parse', 'partial',
1402
 
        'path', 'peek', 'pop', 'pos?', 'pr', 'pr-str', 'print', 'print-str',
1403
 
        'println', 'println-str', 'prn', 'prn-str', 'project', 'proxy',
1404
 
        'proxy-mappings', 'quot', 'rand', 'rand-int', 'range', 're-find',
1405
 
        're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq',
1406
 
        'read', 'read-line', 'reduce', 'ref', 'ref-set', 'refer', 'rem',
1407
 
        'remove', 'remove-method', 'remove-ns', 'rename', 'rename-keys',
1408
 
        'repeat', 'replace', 'replicate', 'resolve', 'rest', 'resultset-seq',
1409
 
        'reverse', 'rfirst', 'right', 'rights', 'root', 'rrest', 'rseq',
1410
 
        'second', 'select', 'select-keys', 'send', 'send-off', 'seq',
1411
 
        'seq-zip', 'seq?', 'set', 'short', 'slurp', 'some', 'sort',
1412
 
        'sort-by', 'sorted-map', 'sorted-map-by', 'sorted-set',
1413
 
        'special-symbol?', 'split-at', 'split-with', 'str', 'string?',
1414
 
        'struct', 'struct-map', 'subs', 'subvec', 'symbol', 'symbol?',
1415
 
        'sync', 'take', 'take-nth', 'take-while', 'test', 'time', 'to-array',
1416
 
        'to-array-2d', 'tree-seq', 'true?', 'union', 'up', 'update-proxy',
1417
 
        'val', 'vals', 'var-get', 'var-set', 'var?', 'vector', 'vector-zip',
1418
 
        'vector?', 'when', 'when-first', 'when-let', 'when-not',
1419
 
        'with-local-vars', 'with-meta', 'with-open', 'with-out-str',
1420
 
        'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper']
1421
 
 
1422
 
    # valid names for identifiers
1423
 
    # well, names can only not consist fully of numbers
1424
 
    # but this should be good enough for now
1425
 
    valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+'
1426
 
 
1427
 
    tokens = {
1428
 
        'root' : [
1429
 
            # the comments - always starting with semicolon
1430
 
            # and going to the end of the line
1431
 
            (r';.*$', Comment.Single),
1432
 
 
1433
 
            # whitespaces - usually not relevant
1434
 
            (r'\s+', Text),
1435
 
 
1436
 
            # numbers
1437
 
            (r'-?\d+\.\d+', Number.Float),
1438
 
            (r'-?\d+', Number.Integer),
1439
 
            # support for uncommon kinds of numbers -
1440
 
            # have to figure out what the characters mean
1441
 
            #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
1442
 
 
1443
 
            # strings, symbols and characters
1444
 
            (r'"(\\\\|\\"|[^"])*"', String),
1445
 
            (r"'" + valid_name, String.Symbol),
1446
 
            (r"\\([()/'\".'_!§$%& ?;=#+-]{1}|[a-zA-Z0-9]+)", String.Char),
1447
 
 
1448
 
            # constants
1449
 
            (r'(#t|#f)', Name.Constant),
1450
 
 
1451
 
            # special operators
1452
 
            (r"('|#|`|,@|,|\.)", Operator),
1453
 
 
1454
 
            # highlight the keywords
1455
 
            ('(%s)' % '|'.join([
1456
 
                re.escape(entry) + ' ' for entry in keywords]),
1457
 
                Keyword
1458
 
            ),
1459
 
 
1460
 
            # first variable in a quoted string like
1461
 
            # '(this is syntactic sugar)
1462
 
            (r"(?<='\()" + valid_name, Name.Variable),
1463
 
            (r"(?<=#\()" + valid_name, Name.Variable),
1464
 
 
1465
 
            # highlight the builtins
1466
 
            ("(?<=\()(%s)" % '|'.join([
1467
 
                re.escape(entry) + ' ' for entry in builtins]),
1468
 
                Name.Builtin
1469
 
            ),
1470
 
 
1471
 
            # the remaining functions
1472
 
            (r'(?<=\()' + valid_name, Name.Function),
1473
 
            # find the remaining variables
1474
 
            (valid_name, Name.Variable),
1475
 
 
1476
 
            # Clojure accepts vector notation
1477
 
            (r'(\[|\])', Punctuation),
1478
 
 
1479
 
            # Clojure accepts map notation
1480
 
            (r'(\{|\})', Punctuation),
1481
 
 
1482
 
            # the famous parentheses!
1483
 
            (r'(\(|\))', Punctuation),
1484
 
        ],
1485
 
    }