1
# -*- coding: utf-8 -*-
3
pygments.lexers.parsers
4
~~~~~~~~~~~~~~~~~~~~~~~
6
Lexers for parser generators.
8
:copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
9
:license: BSD, see LICENSE for details.
14
from pygments.lexer import RegexLexer, DelegatingLexer, \
15
include, bygroups, using
16
from pygments.token import Punctuation, Other, Text, Comment, Operator, \
17
Keyword, Name, String, Number, Whitespace
18
from pygments.lexers.compiled import JavaLexer, CLexer, CppLexer, \
19
ObjectiveCLexer, DLexer
20
from pygments.lexers.dotnet import CSharpLexer
21
from pygments.lexers.agile import RubyLexer, PythonLexer, PerlLexer
22
from pygments.lexers.web import ActionScriptLexer
25
__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
26
'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
27
'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
28
'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
30
'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
31
'AntlrJavaLexer', "AntlrActionScriptLexer"]
34
class RagelLexer(RegexLexer):
36
A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for
37
fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead
38
(or one of the language-specific subclasses).
40
*New in Pygments 1.1.*
55
(r'(access|action|alphtype)\b', Keyword),
56
(r'(getkey|write|machine|include)\b', Keyword),
57
(r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
58
(r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
61
(r'0x[0-9A-Fa-f]+', Number.Hex),
62
(r'[+-]?[0-9]+', Number.Integer),
65
(r'"(\\\\|\\"|[^"])*"', String), # double quote string
66
(r"'(\\\\|\\'|[^'])*'", String), # single quote string
67
(r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
68
(r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
71
(r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
74
(r',', Operator), # Join
75
(r'\||&|-|--', Operator), # Union, Intersection and Subtraction
76
(r'\.|<:|:>|:>>', Operator), # Concatention
77
(r':', Operator), # Label
78
(r'->', Operator), # Epsilon Transition
79
(r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
80
(r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
81
(r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
82
(r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
83
(r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
84
(r'>|@|\$|%', Operator), # Transition Actions and Priorities
85
(r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition
86
(r'!|\^', Operator), # Negation
87
(r'\(|\)', Operator), # Grouping
91
include('whitespace'),
95
include('identifiers'),
97
(r'{', Punctuation, 'host'),
102
(r'(' + r'|'.join(( # keep host code in largest possible chunks
103
r'[^{}\'"/#]+', # exclude unsafe characters
104
r'[^\\][\\][{}]', # allow escaped { or }
106
# strings and comments may safely contain unsafe characters
107
r'"(\\\\|\\"|[^"])*"', # double quote string
108
r"'(\\\\|\\'|[^'])*'", # single quote string
109
r'//.*$\n?', # single line comment
110
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
111
r'\#.*$\n?', # ruby comment
113
# regular expression: There's no reason for it to start
114
# with a * and this stops confusion with comments.
115
r'/(?!\*)(\\\\|\\/|[^/])*/',
117
# / is safe now that we've handled regex and javadoc comments
121
(r'{', Punctuation, '#push'),
122
(r'}', Punctuation, '#pop'),
127
class RagelEmbeddedLexer(RegexLexer):
129
A lexer for `Ragel`_ embedded in a host language file.
131
This will only highlight Ragel statements. If you want host language
132
highlighting then call the language-specific Ragel lexer.
134
*New in Pygments 1.1.*
137
name = 'Embedded Ragel'
138
aliases = ['ragel-em']
143
(r'(' + r'|'.join(( # keep host code in largest possible chunks
144
r'[^%\'"/#]+', # exclude unsafe characters
145
r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
147
# strings and comments may safely contain unsafe characters
148
r'"(\\\\|\\"|[^"])*"', # double quote string
149
r"'(\\\\|\\'|[^'])*'", # single quote string
150
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
151
r'//.*$\n?', # single line comment
152
r'\#.*$\n?', # ruby/ragel comment
153
r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
155
# / is safe now that we've handled regex and javadoc comments
160
# Please don't put a quoted newline in a single line FSM.
161
# That's just mean. It will break this.
162
(r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
167
(r'(%%%%|%%){', Punctuation, 'multi-line-fsm'),
170
(r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
172
r'[^}\'"\[/#]', # exclude unsafe characters
173
r'}(?=[^%]|$)', # } is okay as long as it's not followed by %
174
r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
175
r'[^\\][\\][{}]', # ...and } is okay if it's escaped
177
# allow / if it's preceded with one of these symbols
178
# (ragel EOF actions)
181
# specifically allow regex followed immediately by *
182
# so it doesn't get mistaken for a comment
183
r'/(?!\*)(\\\\|\\/|[^/])*/\*',
185
# allow / as long as it's not followed by another / or by a *
188
# We want to match as many of these as we can in one block.
189
# Not sure if we need the + sign here,
190
# does it help performance?
193
# strings and comments may safely contain unsafe characters
194
r'"(\\\\|\\"|[^"])*"', # double quote string
195
r"'(\\\\|\\'|[^'])*'", # single quote string
196
r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
197
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
198
r'//.*$\n?', # single line comment
199
r'\#.*$\n?', # ruby/ragel comment
200
)) + r')+', using(RagelLexer)),
202
(r'}%%', Punctuation, '#pop'),
206
def analyse_text(text):
207
return '@LANG: indep' in text or 0.1
210
class RagelRubyLexer(DelegatingLexer):
212
A lexer for `Ragel`_ in a Ruby host file.
214
*New in Pygments 1.1.*
217
name = 'Ragel in Ruby Host'
218
aliases = ['ragel-ruby', 'ragel-rb']
221
def __init__(self, **options):
222
super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
225
def analyse_text(text):
226
return '@LANG: ruby' in text
229
class RagelCLexer(DelegatingLexer):
231
A lexer for `Ragel`_ in a C host file.
233
*New in Pygments 1.1.*
236
name = 'Ragel in C Host'
237
aliases = ['ragel-c']
240
def __init__(self, **options):
241
super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer,
244
def analyse_text(text):
245
return '@LANG: c' in text
248
class RagelDLexer(DelegatingLexer):
250
A lexer for `Ragel`_ in a D host file.
252
*New in Pygments 1.1.*
255
name = 'Ragel in D Host'
256
aliases = ['ragel-d']
259
def __init__(self, **options):
260
super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options)
262
def analyse_text(text):
263
return '@LANG: d' in text
266
class RagelCppLexer(DelegatingLexer):
268
A lexer for `Ragel`_ in a CPP host file.
270
*New in Pygments 1.1.*
273
name = 'Ragel in CPP Host'
274
aliases = ['ragel-cpp']
277
def __init__(self, **options):
278
super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options)
280
def analyse_text(text):
281
return '@LANG: c++' in text
284
class RagelObjectiveCLexer(DelegatingLexer):
286
A lexer for `Ragel`_ in an Objective C host file.
288
*New in Pygments 1.1.*
291
name = 'Ragel in Objective C Host'
292
aliases = ['ragel-objc']
295
def __init__(self, **options):
296
super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer,
300
def analyse_text(text):
301
return '@LANG: objc' in text
304
class RagelJavaLexer(DelegatingLexer):
306
A lexer for `Ragel`_ in a Java host file.
308
*New in Pygments 1.1.*
311
name = 'Ragel in Java Host'
312
aliases = ['ragel-java']
315
def __init__(self, **options):
316
super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer,
319
def analyse_text(text):
320
return '@LANG: java' in text
323
class AntlrLexer(RegexLexer):
325
Generic `ANTLR`_ Lexer.
326
Should not be called directly, instead
327
use DelegatingLexer for your target language.
329
*New in Pygments 1.1.*
331
.. _ANTLR: http://www.antlr.org/
338
_id = r'[A-Za-z][A-Za-z_0-9]*'
339
_TOKEN_REF = r'[A-Z][A-Za-z_0-9]*'
340
_RULE_REF = r'[a-z][A-Za-z_0-9]*'
341
_STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
346
(r'\s+', Whitespace),
350
(r'/\*(.|\n)*?\*/', Comment),
353
include('whitespace'),
356
(r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
357
bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
360
(r'options\b', Keyword, 'options'),
362
(r'tokens\b', Keyword, 'tokens'),
364
(r'(scope)(\s*)(' + _id + ')(\s*)({)',
365
bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
366
Punctuation), 'action'),
368
(r'(catch|finally)\b', Keyword, 'exception'),
370
(r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)',
371
bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
372
Name.Label, Whitespace, Punctuation), 'action'),
374
(r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \
375
bygroups(Keyword, Whitespace, Name.Label, Punctuation),
376
('rule-alts', 'rule-prelims')),
379
(r'\n', Whitespace, '#pop'),
383
(r'\[', Punctuation, 'nested-arg-action'),
384
(r'\{', Punctuation, 'action'),
387
include('whitespace'),
390
(r'returns\b', Keyword),
391
(r'\[', Punctuation, 'nested-arg-action'),
392
(r'\{', Punctuation, 'action'),
394
(r'(throws)(\s+)(' + _id + ')',
395
bygroups(Keyword, Whitespace, Name.Label)),
396
(r'(?:(,)(\s*)(' + _id + '))+',
397
bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
399
(r'options\b', Keyword, 'options'),
400
# ruleScopeSpec - scope followed by target language code or name of action
401
# TODO finish implementing other possibilities for scope
402
# L173 ANTLRv3.g from ANTLR book
403
(r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation),
405
(r'(scope)(\s+)(' + _id + ')(\s*)(;)',
406
bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
408
(r'(@' + _id + ')(\s*)({)',
409
bygroups(Name.Label, Whitespace, Punctuation), 'action'),
410
# finished prelims, go to rule alts!
411
(r':', Punctuation, '#pop')
414
include('whitespace'),
417
# These might need to go in a separate 'block' state triggered by (
418
(r'options\b', Keyword, 'options'),
422
(r"'(\\\\|\\'|[^'])*'", String),
423
(r'"(\\\\|\\"|[^"])*"', String),
424
(r'<<([^>]|>[^>])>>', String),
426
# Tokens start with capital letter.
427
(r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant),
428
# Rules start with small letter.
429
(r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable),
431
(r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
433
(r'\[', Punctuation, 'nested-arg-action'),
434
(r'\{', Punctuation, 'action'),
435
(r';', Punctuation, '#pop')
438
include('whitespace'),
441
(r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
443
bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
444
String, Whitespace, Punctuation)),
445
(r'}', Punctuation, '#pop'),
448
include('whitespace'),
451
(r'(' + _id + r')(\s*)(=)(\s*)(' +
452
'|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)',
453
bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
454
Text, Whitespace, Punctuation)),
455
(r'}', Punctuation, '#pop'),
458
(r'(' + r'|'.join(( # keep host code in largest possible chunks
459
r'[^\${}\'"/\\]+', # exclude unsafe characters
461
# strings and comments may safely contain unsafe characters
462
r'"(\\\\|\\"|[^"])*"', # double quote string
463
r"'(\\\\|\\'|[^'])*'", # single quote string
464
r'//.*$\n?', # single line comment
465
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
467
# regular expression: There's no reason for it to start
468
# with a * and this stops confusion with comments.
469
r'/(?!\*)(\\\\|\\/|[^/])*/',
471
# backslashes are okay, as long as we are not backslashing a %
474
# Now that we've handled regex and javadoc comments
475
# it's safe to let / through.
478
(r'(\\)(%)', bygroups(Punctuation, Other)),
479
(r'(\$[a-zA-Z]+)(\.?)(text|value)?',
480
bygroups(Name.Variable, Punctuation, Name.Property)),
481
(r'{', Punctuation, '#push'),
482
(r'}', Punctuation, '#pop'),
484
'nested-arg-action': [
485
(r'(' + r'|'.join(( # keep host code in largest possible chunks.
486
r'[^\$\[\]\'"/]+', # exclude unsafe characters
488
# strings and comments may safely contain unsafe characters
489
r'"(\\\\|\\"|[^"])*"', # double quote string
490
r"'(\\\\|\\'|[^'])*'", # single quote string
491
r'//.*$\n?', # single line comment
492
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
494
# regular expression: There's no reason for it to start
495
# with a * and this stops confusion with comments.
496
r'/(?!\*)(\\\\|\\/|[^/])*/',
498
# Now that we've handled regex and javadoc comments
499
# it's safe to let / through.
504
(r'\[', Punctuation, '#push'),
505
(r'\]', Punctuation, '#pop'),
506
(r'(\$[a-zA-Z]+)(\.?)(text|value)?',
507
bygroups(Name.Variable, Punctuation, Name.Property)),
508
(r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
512
def analyse_text(text):
513
return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
515
# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
517
# TH: I'm not aware of any language features of C++ that will cause
518
# incorrect lexing of C files. Antlr doesn't appear to make a distinction,
519
# so just assume they're C++. No idea how to make Objective C work in the
522
#class AntlrCLexer(DelegatingLexer):
524
# ANTLR with C Target
526
# *New in Pygments 1.1*
529
# name = 'ANTLR With C Target'
530
# aliases = ['antlr-c']
531
# filenames = ['*.G', '*.g']
533
# def __init__(self, **options):
534
# super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options)
536
# def analyse_text(text):
537
# return re.match(r'^\s*language\s*=\s*C\s*;', text)
539
class AntlrCppLexer(DelegatingLexer):
541
`ANTLR`_ with CPP Target
543
*New in Pygments 1.1.*
546
name = 'ANTLR With CPP Target'
547
aliases = ['antlr-cpp']
548
filenames = ['*.G', '*.g']
550
def __init__(self, **options):
551
super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options)
553
def analyse_text(text):
554
return AntlrLexer.analyse_text(text) and \
555
re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
558
class AntlrObjectiveCLexer(DelegatingLexer):
560
`ANTLR`_ with Objective-C Target
562
*New in Pygments 1.1.*
565
name = 'ANTLR With ObjectiveC Target'
566
aliases = ['antlr-objc']
567
filenames = ['*.G', '*.g']
569
def __init__(self, **options):
570
super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer,
571
AntlrLexer, **options)
573
def analyse_text(text):
574
return AntlrLexer.analyse_text(text) and \
575
re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
578
class AntlrCSharpLexer(DelegatingLexer):
580
`ANTLR`_ with C# Target
582
*New in Pygments 1.1.*
585
name = 'ANTLR With C# Target'
586
aliases = ['antlr-csharp', 'antlr-c#']
587
filenames = ['*.G', '*.g']
589
def __init__(self, **options):
590
super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer,
593
def analyse_text(text):
594
return AntlrLexer.analyse_text(text) and \
595
re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
598
class AntlrPythonLexer(DelegatingLexer):
600
`ANTLR`_ with Python Target
602
*New in Pygments 1.1.*
605
name = 'ANTLR With Python Target'
606
aliases = ['antlr-python']
607
filenames = ['*.G', '*.g']
609
def __init__(self, **options):
610
super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer,
613
def analyse_text(text):
614
return AntlrLexer.analyse_text(text) and \
615
re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
618
class AntlrJavaLexer(DelegatingLexer):
620
`ANTLR`_ with Java Target
622
*New in Pygments 1.1*
625
name = 'ANTLR With Java Target'
626
aliases = ['antlr-java']
627
filenames = ['*.G', '*.g']
629
def __init__(self, **options):
630
super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer,
633
def analyse_text(text):
634
# Antlr language is Java by default
635
return AntlrLexer.analyse_text(text) and 0.9
638
class AntlrRubyLexer(DelegatingLexer):
640
`ANTLR`_ with Ruby Target
642
*New in Pygments 1.1.*
645
name = 'ANTLR With Ruby Target'
646
aliases = ['antlr-ruby', 'antlr-rb']
647
filenames = ['*.G', '*.g']
649
def __init__(self, **options):
650
super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer,
653
def analyse_text(text):
654
return AntlrLexer.analyse_text(text) and \
655
re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
658
class AntlrPerlLexer(DelegatingLexer):
660
`ANTLR`_ with Perl Target
662
*New in Pygments 1.1.*
665
name = 'ANTLR With Perl Target'
666
aliases = ['antlr-perl']
667
filenames = ['*.G', '*.g']
669
def __init__(self, **options):
670
super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer,
673
def analyse_text(text):
674
return AntlrLexer.analyse_text(text) and \
675
re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
678
class AntlrActionScriptLexer(DelegatingLexer):
680
`ANTLR`_ with ActionScript Target
682
*New in Pygments 1.1.*
685
name = 'ANTLR With ActionScript Target'
686
aliases = ['antlr-as', 'antlr-actionscript']
687
filenames = ['*.G', '*.g']
689
def __init__(self, **options):
690
super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
691
AntlrLexer, **options)
693
def analyse_text(text):
694
return AntlrLexer.analyse_text(text) and \
695
re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)