1
// Scintilla source code edit control
3
** Lexer for Assembler, just for the MASM syntax
4
** Written by The Black Horus
5
** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6
** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7
** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
9
// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10
// The License.txt file describes the conditions under which this software may be distributed.
24
#include "Scintilla.h"
28
#include "LexAccessor.h"
29
#include "StyleContext.h"
30
#include "CharacterSet.h"
31
#include "LexerModule.h"
32
#include "OptionSet.h"
35
using namespace Scintilla;
38
static inline bool IsAWordChar(const int ch) {
39
return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
40
ch == '_' || ch == '?');
43
static inline bool IsAWordStart(const int ch) {
44
return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
45
ch == '%' || ch == '@' || ch == '$' || ch == '?');
48
static inline bool IsAsmOperator(const int ch) {
49
if ((ch < 0x80) && (isalnum(ch)))
51
// '.' left out as it is used to make up numbers
52
if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
53
ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
54
ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
55
ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
56
ch == '%' || ch == ':')
61
static bool IsStreamCommentStyle(int style) {
62
return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
65
static inline int LowerCase(int c) {
66
if (c >= 'A' && c <= 'Z')
71
// An individual named option for use in an OptionSet
73
// Options used for LexerAsm
75
std::string delimiter;
78
bool foldCommentMultiline;
79
bool foldCommentExplicit;
80
std::string foldExplicitStart;
81
std::string foldExplicitEnd;
82
bool foldExplicitAnywhere;
87
foldSyntaxBased = true;
88
foldCommentMultiline = false;
89
foldCommentExplicit = false;
90
foldExplicitStart = "";
92
foldExplicitAnywhere = false;
97
static const char * const asmWordListDesc[] = {
102
"Directive operands",
103
"Extended instructions",
104
"Directives4Foldstart",
105
"Directives4Foldend",
109
struct OptionSetAsm : public OptionSet<OptionsAsm> {
111
DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
112
"Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
114
DefineProperty("fold", &OptionsAsm::fold);
116
DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
117
"Set this property to 0 to disable syntax based folding.");
119
DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
120
"Set this property to 1 to enable folding multi-line comments.");
122
DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
123
"This option enables folding explicit fold points when using the Asm lexer. "
124
"Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
125
"at the end of a section that should fold.");
127
DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
128
"The string to use for explicit fold start points, replacing the standard ;{.");
130
DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
131
"The string to use for explicit fold end points, replacing the standard ;}.");
133
DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
134
"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
136
DefineProperty("fold.compact", &OptionsAsm::foldCompact);
138
DefineWordListSets(asmWordListDesc);
142
class LexerAsm : public ILexer {
143
WordList cpuInstruction;
144
WordList mathInstruction;
147
WordList directiveOperand;
148
WordList extInstruction;
149
WordList directives4foldstart;
150
WordList directives4foldend;
156
virtual ~LexerAsm() {
158
void SCI_METHOD Release() {
161
int SCI_METHOD Version() const {
164
const char * SCI_METHOD PropertyNames() {
165
return osAsm.PropertyNames();
167
int SCI_METHOD PropertyType(const char *name) {
168
return osAsm.PropertyType(name);
170
const char * SCI_METHOD DescribeProperty(const char *name) {
171
return osAsm.DescribeProperty(name);
173
int SCI_METHOD PropertySet(const char *key, const char *val);
174
const char * SCI_METHOD DescribeWordListSets() {
175
return osAsm.DescribeWordListSets();
177
int SCI_METHOD WordListSet(int n, const char *wl);
178
void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
179
void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
181
void * SCI_METHOD PrivateCall(int, void *) {
185
static ILexer *LexerFactoryAsm() {
186
return new LexerAsm();
190
int SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
191
if (osAsm.PropertySet(&options, key, val)) {
197
int SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
198
WordList *wordListN = 0;
201
wordListN = &cpuInstruction;
204
wordListN = &mathInstruction;
207
wordListN = ®isters;
210
wordListN = &directive;
213
wordListN = &directiveOperand;
216
wordListN = &extInstruction;
219
wordListN = &directives4foldstart;
222
wordListN = &directives4foldend;
225
int firstModification = -1;
229
if (*wordListN != wlNew) {
231
firstModification = 0;
234
return firstModification;
237
void SCI_METHOD LexerAsm::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
238
LexAccessor styler(pAccess);
240
// Do not leak onto next line
241
if (initStyle == SCE_ASM_STRINGEOL)
242
initStyle = SCE_ASM_DEFAULT;
244
StyleContext sc(startPos, length, initStyle, styler);
246
for (; sc.More(); sc.Forward())
249
// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
250
if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
251
sc.SetState(SCE_ASM_STRING);
252
} else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
253
sc.SetState(SCE_ASM_CHARACTER);
256
// Handle line continuation generically.
258
if (sc.chNext == '\n' || sc.chNext == '\r') {
260
if (sc.ch == '\r' && sc.chNext == '\n') {
267
// Determine if the current state should terminate.
268
if (sc.state == SCE_ASM_OPERATOR) {
269
if (!IsAsmOperator(sc.ch)) {
270
sc.SetState(SCE_ASM_DEFAULT);
272
} else if (sc.state == SCE_ASM_NUMBER) {
273
if (!IsAWordChar(sc.ch)) {
274
sc.SetState(SCE_ASM_DEFAULT);
276
} else if (sc.state == SCE_ASM_IDENTIFIER) {
277
if (!IsAWordChar(sc.ch) ) {
279
sc.GetCurrentLowered(s, sizeof(s));
280
bool IsDirective = false;
282
if (cpuInstruction.InList(s)) {
283
sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
284
} else if (mathInstruction.InList(s)) {
285
sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
286
} else if (registers.InList(s)) {
287
sc.ChangeState(SCE_ASM_REGISTER);
288
} else if (directive.InList(s)) {
289
sc.ChangeState(SCE_ASM_DIRECTIVE);
291
} else if (directiveOperand.InList(s)) {
292
sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
293
} else if (extInstruction.InList(s)) {
294
sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
296
sc.SetState(SCE_ASM_DEFAULT);
297
if (IsDirective && !strcmp(s, "comment")) {
298
char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
299
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
300
sc.ForwardSetState(SCE_ASM_DEFAULT);
302
if (sc.ch == delimiter) {
303
sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
307
} else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
308
char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
309
if (sc.ch == delimiter) {
310
while (!sc.atLineEnd) {
313
sc.SetState(SCE_ASM_DEFAULT);
315
} else if (sc.state == SCE_ASM_COMMENT ) {
317
sc.SetState(SCE_ASM_DEFAULT);
319
} else if (sc.state == SCE_ASM_STRING) {
321
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
324
} else if (sc.ch == '\"') {
325
sc.ForwardSetState(SCE_ASM_DEFAULT);
326
} else if (sc.atLineEnd) {
327
sc.ChangeState(SCE_ASM_STRINGEOL);
328
sc.ForwardSetState(SCE_ASM_DEFAULT);
330
} else if (sc.state == SCE_ASM_CHARACTER) {
332
if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
335
} else if (sc.ch == '\'') {
336
sc.ForwardSetState(SCE_ASM_DEFAULT);
337
} else if (sc.atLineEnd) {
338
sc.ChangeState(SCE_ASM_STRINGEOL);
339
sc.ForwardSetState(SCE_ASM_DEFAULT);
343
// Determine if a new state should be entered.
344
if (sc.state == SCE_ASM_DEFAULT) {
346
sc.SetState(SCE_ASM_COMMENT);
347
} else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
348
sc.SetState(SCE_ASM_NUMBER);
349
} else if (IsAWordStart(sc.ch)) {
350
sc.SetState(SCE_ASM_IDENTIFIER);
351
} else if (sc.ch == '\"') {
352
sc.SetState(SCE_ASM_STRING);
353
} else if (sc.ch == '\'') {
354
sc.SetState(SCE_ASM_CHARACTER);
355
} else if (IsAsmOperator(sc.ch)) {
356
sc.SetState(SCE_ASM_OPERATOR);
364
// Store both the current line's fold level and the next lines in the
365
// level store to make it easy to pick up with each increment
366
// and to make it possible to fiddle the current level for "else".
368
void SCI_METHOD LexerAsm::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
373
LexAccessor styler(pAccess);
375
unsigned int endPos = startPos + length;
376
int visibleChars = 0;
377
int lineCurrent = styler.GetLine(startPos);
378
int levelCurrent = SC_FOLDLEVELBASE;
380
levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
381
int levelNext = levelCurrent;
382
char chNext = styler[startPos];
383
int styleNext = styler.StyleAt(startPos);
384
int style = initStyle;
387
const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
388
for (unsigned int i = startPos; i < endPos; i++) {
390
chNext = styler.SafeGetCharAt(i + 1);
391
int stylePrev = style;
393
styleNext = styler.StyleAt(i + 1);
394
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
395
if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
396
if (!IsStreamCommentStyle(stylePrev)) {
398
} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
399
// Comments don't end at end of line and the next character may be unstyled.
403
if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
404
if (userDefinedFoldMarkers) {
405
if (styler.Match(i, options.foldExplicitStart.c_str())) {
407
} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
414
} else if (chNext == '}') {
420
if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
421
word[wordlen++] = static_cast<char>(LowerCase(ch));
422
if (wordlen == 100) { // prevent overflow
426
if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
427
word[wordlen] = '\0';
429
if (directives4foldstart.InList(word)) {
431
} else if (directives4foldend.InList(word)){
438
if (atEOL || (i == endPos-1)) {
439
int levelUse = levelCurrent;
440
int lev = levelUse | levelNext << 16;
441
if (visibleChars == 0 && options.foldCompact)
442
lev |= SC_FOLDLEVELWHITEFLAG;
443
if (levelUse < levelNext)
444
lev |= SC_FOLDLEVELHEADERFLAG;
445
if (lev != styler.LevelAt(lineCurrent)) {
446
styler.SetLevel(lineCurrent, lev);
449
levelCurrent = levelNext;
450
if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
451
// There is an empty line at end of file so give it same level and empty
452
styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
459
LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);