1
# -*- coding: utf-8 -*-
6
Lexers for assembly languages.
8
:copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
9
:license: BSD, see LICENSE for details.
14
from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer
15
from pygments.lexers.compiled import DLexer, CppLexer, CLexer
16
from pygments.token import *
18
__all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer',
19
'CObjdumpLexer', 'LlvmLexer', 'NasmLexer']
22
class GasLexer(RegexLexer):
24
For Gas (AT&T) assembly code.
28
filenames = ['*.s', '*.S']
29
mimetypes = ['text/x-gas']
31
#: optional Comment or Whitespace
32
string = r'"(\\"|[^"])*"'
33
char = r'[a-zA-Z$._0-9@]'
34
identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
35
number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
39
include('whitespace'),
40
(identifier + ':', Name.Label),
41
(r'\.' + identifier, Name.Attribute, 'directive-args'),
42
(r'lock|rep(n?z)?|data\d+', Name.Attribute),
43
(identifier, Name.Function, 'instruction-args'),
47
(identifier, Name.Constant),
49
('@' + identifier, Name.Attribute),
50
(number, Number.Integer),
51
(r'[\r\n]+', Text, '#pop'),
53
(r'#.*?$', Comment, '#pop'),
55
include('punctuation'),
59
# For objdump-disassembled code, shouldn't occur in
60
# actual assembler input
61
('([a-z0-9]+)( )(<)('+identifier+')(>)',
62
bygroups(Number.Hex, Text, Punctuation, Name.Constant,
64
('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
65
bygroups(Number.Hex, Text, Punctuation, Name.Constant,
66
Punctuation, Number.Integer, Punctuation)),
69
(identifier, Name.Constant),
70
(number, Number.Integer),
72
('%' + identifier, Name.Variable),
74
('$'+number, Number.Integer),
75
(r'[\r\n]+', Text, '#pop'),
76
(r'#.*?$', Comment, '#pop'),
77
include('punctuation'),
86
(r'[-*,.():]+', Punctuation)
90
def analyse_text(text):
91
return re.match(r'^\.\w+', text, re.M)
93
class ObjdumpLexer(RegexLexer):
95
For the output of 'objdump -dr'
99
filenames = ['*.objdump']
100
mimetypes = ['text/x-objdump']
106
# File name & format:
107
('(.*?)(:)( +file format )(.*?)$',
108
bygroups(Name.Label, Punctuation, Text, String)),
110
('(Disassembly of section )(.*?)(:)$',
111
bygroups(Text, Name.Label, Punctuation)),
114
('('+hex+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
115
bygroups(Number.Hex, Text, Punctuation, Name.Function,
116
Punctuation, Number.Hex, Punctuation)),
118
('('+hex+'+)( )(<)(.*?)(>:)$',
119
bygroups(Number.Hex, Text, Punctuation, Name.Function,
121
# Code line with disassembled instructions
122
('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *\t)([a-zA-Z].*?)$',
123
bygroups(Text, Name.Label, Text, Number.Hex, Text,
125
# Code line with ascii
126
('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *)(.*?)$',
127
bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
128
# Continued code line, only raw opcodes without disassembled
130
('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$',
131
bygroups(Text, Name.Label, Text, Number.Hex)),
132
# Skipped a few bytes
136
('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$',
137
bygroups(Text, Name.Label, Text, Name.Property, Text,
138
Name.Constant, Punctuation, Number.Hex)),
140
('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)$',
141
bygroups(Text, Name.Label, Text, Name.Property, Text,
148
class DObjdumpLexer(DelegatingLexer):
150
For the output of 'objdump -Sr on compiled D files'
153
aliases = ['d-objdump']
154
filenames = ['*.d-objdump']
155
mimetypes = ['text/x-d-objdump']
157
def __init__(self, **options):
158
super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options)
161
class CppObjdumpLexer(DelegatingLexer):
163
For the output of 'objdump -Sr on compiled C++ files'
166
aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
167
filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
168
mimetypes = ['text/x-cpp-objdump']
170
def __init__(self, **options):
171
super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options)
174
class CObjdumpLexer(DelegatingLexer):
176
For the output of 'objdump -Sr on compiled C files'
179
aliases = ['c-objdump']
180
filenames = ['*.c-objdump']
181
mimetypes = ['text/x-c-objdump']
183
def __init__(self, **options):
184
super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options)
187
class LlvmLexer(RegexLexer):
189
For LLVM assembly code.
194
mimetypes = ['text/x-llvm']
196
#: optional Comment or Whitespace
198
identifier = r'([-a-zA-Z$._][-a-zA-Z$._0-9]*|' + string + ')'
202
include('whitespace'),
204
# Before keywords, because keywords are valid label names :(...
205
(r'^\s*' + identifier + '\s*:', Name.Label),
209
(r'%' + identifier, Name.Variable),#Name.Identifier.Local),
210
(r'@' + identifier, Name.Variable.Global),#Name.Identifier.Global),
211
(r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous),
212
(r'@\d+', Name.Variable.Global),#Name.Identifier.Anonymous),
213
(r'!' + identifier, Name.Variable),
214
(r'!\d+', Name.Variable.Anonymous),
215
(r'c?' + string, String),
217
(r'0[xX][a-fA-F0-9]+', Number),
218
(r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
220
(r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
233
r'|private|linker_private|internal|available_externally|linkonce'
234
r'|linkonce_odr|weak|weak_odr|appending|dllimport|dllexport'
235
r'|common|default|hidden|protected|extern_weak|external'
236
r'|thread_local|zeroinitializer|undef|null|to|tail|target|triple'
237
r'|deplibs|datalayout|volatile|nuw|nsw|exact|inbounds|align'
238
r'|addrspace|section|alias|module|asm|sideeffect|gc|dbg'
240
r'|ccc|fastcc|coldcc|x86_stdcallcc|x86_fastcallcc|arm_apcscc'
241
r'|arm_aapcscc|arm_aapcs_vfpcc'
245
r'|signext|zeroext|inreg|sret|nounwind|noreturn|noalias|nocapture'
246
r'|byval|nest|readnone|readonly'
248
r'|inlinehint|noinline|alwaysinline|optsize|ssp|sspreq|noredzone'
249
r'|noimplicitfloat|naked'
253
r'|eq|ne|slt|sgt|sle'
254
r'|sge|ult|ugt|ule|uge'
255
r'|oeq|one|olt|ogt|ole'
256
r'|oge|ord|uno|ueq|une'
260
r'|add|fadd|sub|fsub|mul|fmul|udiv|sdiv|fdiv|urem|srem|frem|shl'
261
r'|lshr|ashr|and|or|xor|icmp|fcmp'
263
r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui'
264
r'fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch'
265
r'|invoke|unwind|unreachable'
267
r'|malloc|alloca|free|load|store|getelementptr'
269
r'|extractelement|insertelement|shufflevector|getresult'
270
r'|extractvalue|insertvalue'
275
(r'void|float|double|x86_fp80|fp128|ppc_fp128|label|metadata',
279
(r'i[1-9]\d*', Keyword)
284
class NasmLexer(RegexLexer):
286
For Nasm (Intel) assembly code.
290
filenames = ['*.asm', '*.ASM']
291
mimetypes = ['text/x-nasm']
293
identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*'
294
hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9]+[0-9a-fA-F]*h)'
298
floatn = decn + r'\.e?' + decn
299
string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'"
300
declkw = r'(?:res|d)[bwdqt]|times'
301
register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|'
302
r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
303
wordop = r'seg|wrt|strict'
304
type = r'byte|[dq]?word'
305
directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
306
r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
307
r'EXPORT|LIBRARY|MODULE')
309
flags = re.IGNORECASE | re.MULTILINE
312
include('whitespace'),
313
(r'^\s*%', Comment.Preproc, 'preproc'),
314
(identifier + ':', Name.Label),
315
(r'(%s)(\s+)(equ)' % identifier,
316
bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration),
318
(directives, Keyword, 'instruction-args'),
319
(declkw, Keyword.Declaration, 'instruction-args'),
320
(identifier, Name.Function, 'instruction-args'),
323
'instruction-args': [
328
(floatn, Number.Float),
329
(decn, Number.Integer),
330
include('punctuation'),
331
(register, Name.Builtin),
332
(identifier, Name.Variable),
333
(r'[\r\n]+', Text, '#pop'),
334
include('whitespace')
337
(r'[^;\n]+', Comment.Preproc),
338
(r';.*?\n', Comment.Single, '#pop'),
339
(r'\n', Comment.Preproc, '#pop'),
344
(r';.*', Comment.Single)
347
(r'[,():\[\]]+', Punctuation),
348
(r'[&|^<>+*/%~-]+', Operator),
349
(r'[$]+', Keyword.Constant),
350
(wordop, Operator.Word),