1
// Scintilla source code edit control
5
// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
6
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7
// The License.txt file describes the conditions under which this software may be distributed.
19
#include "StyleContext.h"
21
#include "Scintilla.h"
23
#include "CharacterSet.h"
26
using namespace Scintilla;
29
// Info for HERE document handling from perldata.pod (reformatted):
30
// ----------------------------------------------------------------
31
// A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
32
// Following a << you specify a string to terminate the quoted material, and
33
// all lines following the current line down to the terminating string are
34
// the value of the item.
35
// * The terminating string may be either an identifier (a word), or some
37
// * If quoted, the type of quotes you use determines the treatment of the
38
// text, just as in regular quoting.
39
// * An unquoted identifier works like double quotes.
40
// * There must be no space between the << and the identifier.
41
// (If you put a space it will be treated as a null identifier,
42
// which is valid, and matches the first empty line.)
43
// (This is deprecated, -w warns of this syntax)
44
// * The terminating string must appear by itself (unquoted and
45
// with no surrounding whitespace) on the terminating line.
47
#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
49
#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
51
#define PERLNUM_OCTAL 3
52
#define PERLNUM_FLOAT_EXP 4 // exponent part only
53
#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
54
#define PERLNUM_VECTOR 6
55
#define PERLNUM_V_VECTOR 7
58
#define BACK_NONE 0 // lookback state for bareword disambiguation:
59
#define BACK_OPERATOR 1 // whitespace/comments are insignificant
60
#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
62
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler)
64
// old-style keyword matcher; needed because GetCurrent() needs
65
// current segment to be committed, but we may abandon early...
67
unsigned int i, len = end - start;
68
if (len > 30) { len = 30; }
69
for (i = 0; i < len; i++, start++) s[i] = styler[start];
71
return keywords.InList(s);
74
static int disambiguateBareword(Accessor &styler, unsigned int bk, unsigned int fw,
75
int backFlag, unsigned int backPos, unsigned int endPos)
77
// identifiers are recognized by Perl as barewords under some
78
// conditions, the following attempts to do the disambiguation
79
// by looking backward and forward; result in 2 LSB
81
bool moreback = false; // true if passed newline/comments
82
bool brace = false; // true if opening brace found
83
// if BACK_NONE, neither operator nor keyword, so skip test
84
if (backFlag == BACK_NONE)
86
// first look backwards past whitespace/comments to set EOL flag
87
// (some disambiguation patterns must be on a single line)
88
if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
90
// look backwards at last significant lexed item for disambiguation
92
int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
93
if (ch == '{' && !moreback) {
94
// {bareword: possible variable spec
96
} else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
97
// &bareword: subroutine call
98
|| styler.Match(bk - 1, "->")
99
// ->bareword: part of variable spec
100
|| styler.Match(bk - 2, "sub")) {
101
// sub bareword: subroutine declaration
102
// (implied BACK_KEYWORD, no keywords end in 'sub'!)
105
// next, scan forward after word past tab/spaces only;
106
// if ch isn't one of '[{(,' we can skip the test
107
if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
109
while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
110
IsASpaceOrTab(ch) && fw < endPos) {
113
if ((ch == '}' && brace)
114
// {bareword}: variable spec
115
|| styler.Match(fw, "=>")) {
116
// [{(, bareword=>: hash literal
123
static void skipWhitespaceComment(Accessor &styler, unsigned int &p)
125
// when backtracking, we need to skip whitespace and comments
127
while ((p > 0) && (style = styler.StyleAt(p),
128
style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
132
static int styleBeforeBracePair(Accessor &styler, unsigned int bk)
134
// backtrack to find open '{' corresponding to a '}', balanced
135
// return significant style to be tested for '/' disambiguation
138
return SCE_PL_DEFAULT;
140
if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
141
int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
142
if (bkch == ';') { // early out
144
} else if (bkch == '}') {
146
} else if (bkch == '{') {
147
if (--braceCount == 0) break;
151
if (bk > 0 && braceCount == 0) {
152
// balanced { found, bk > 0, skip more whitespace/comments
154
skipWhitespaceComment(styler, bk);
155
return styler.StyleAt(bk);
157
return SCE_PL_DEFAULT;
160
static int styleCheckIdentifier(Accessor &styler, unsigned int bk)
162
// backtrack to classify sub-styles of identifier under test
163
// return sub-style to be tested for '/' disambiguation
164
if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
166
// backtrack to check for possible "->" or "::" before identifier
167
while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
171
int bkstyle = styler.StyleAt(bk);
172
if (bkstyle == SCE_PL_DEFAULT
173
|| bkstyle == SCE_PL_COMMENTLINE) {
174
// skip whitespace, comments
175
} else if (bkstyle == SCE_PL_OPERATOR) {
176
// test for "->" and "::"
177
if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
180
return 3; // bare identifier
186
static int inputsymbolScan(Accessor &styler, unsigned int pos, unsigned int endPos)
188
// looks forward for matching > on same line; a bit ugly
189
unsigned int fw = pos;
190
while (++fw < endPos) {
191
int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw));
192
if (fwch == '\r' || fwch == '\n') {
194
} else if (fwch == '>') {
195
if (styler.Match(fw - 2, "<=>")) // '<=>' case
203
static int podLineScan(Accessor &styler, unsigned int &pos, unsigned int endPos)
205
// forward scan the current line to classify line for POD style
207
while (pos <= endPos) {
208
int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
209
if (ch == '\n' || ch == '\r' || pos >= endPos) {
210
if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
213
if (IsASpaceOrTab(ch)) { // whitespace, take note
215
state = SCE_PL_DEFAULT;
216
} else if (state == SCE_PL_DEFAULT) { // verbatim POD line
217
state = SCE_PL_POD_VERB;
218
} else if (state != SCE_PL_POD_VERB) { // regular POD line
224
state = SCE_PL_DEFAULT;
228
static bool styleCheckSubPrototype(Accessor &styler, unsigned int bk)
230
// backtrack to identify if we're starting a subroutine prototype
231
// we also need to ignore whitespace/comments:
232
// 'sub' [whitespace|comment] <identifier> [whitespace|comment]
234
skipWhitespaceComment(styler, bk);
235
if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
237
while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
240
skipWhitespaceComment(styler, bk);
241
if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
242
|| !styler.Match(bk - 2, "sub")) // assume suffix is unique!
247
static bool isMatch(const char *sref, char *s)
249
// match per-line delimiter - must kill trailing CR if CRLF
251
if (i != 0 && s[i - 1] == '\r')
253
return (strcmp(sref, s) == 0);
256
static int actualNumStyle(int numberStyle) {
257
if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
258
return SCE_PL_STRING;
259
} else if (numberStyle == PERLNUM_BAD) {
262
return SCE_PL_NUMBER;
265
static int opposite(int ch) {
266
if (ch == '(') return ')';
267
if (ch == '[') return ']';
268
if (ch == '{') return '}';
269
if (ch == '<') return '>';
273
static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
274
WordList *keywordlists[], Accessor &styler) {
276
WordList &keywords = *keywordlists[0];
278
// keywords that forces /PATTERN/ at all times; should track vim's behaviour
280
reWords.Set("elsif if split while");
283
CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
284
CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
285
CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
286
// lexing of "%*</" operators is non-trivial; these are missing in the set below
287
CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
288
CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
289
CharacterSet setModifiers(CharacterSet::setAlpha);
290
CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
291
// setArray and setHash also accepts chars for special vars like $_,
292
// which are then truncated when the next char does not match setVar
293
CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
294
CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
295
CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
296
CharacterSet &setPOD = setModifiers;
297
CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
298
CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
299
CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];");
300
// for format identifiers
301
CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
302
CharacterSet &setFormat = setHereDocDelim;
304
// Lexer for perl often has to backtrack to start of current style to determine
305
// which characters are being used as quotes, how deeply nested is the
306
// start position and what the termination string is for HERE documents.
308
class HereDocCls { // Class to manage HERE doc sequence
310
int State; // 0: '<<' encountered
311
// 1: collect the delimiter
312
// 2: here doc text (lines after the delimiter)
313
int Quote; // the char after '<<'
314
bool Quoted; // true if Quote in ('\'','"','`')
315
int DelimiterLength; // strlen(Delimiter)
316
char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
322
Delimiter = new char[HERE_DELIM_MAX];
325
void Append(int ch) {
326
Delimiter[DelimiterLength++] = static_cast<char>(ch);
327
Delimiter[DelimiterLength] = '\0';
333
HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
335
class QuoteCls { // Class to manage quote pairs
343
void New(int r = 1) {
357
// additional state for number lexing
358
int numState = PERLNUM_DECIMAL;
361
unsigned int endPos = startPos + length;
363
// Backtrack to beginning of style if required...
364
// If in a long distance lexical state, backtrack to find quote characters.
365
// Includes strings (may be multi-line), numbers (additional state), format
366
// bodies, as well as POD sections.
367
if (initStyle == SCE_PL_HERE_Q
368
|| initStyle == SCE_PL_HERE_QQ
369
|| initStyle == SCE_PL_HERE_QX
370
|| initStyle == SCE_PL_FORMAT
372
int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
373
while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
376
startPos = styler.LineStart(styler.GetLine(startPos));
377
initStyle = styler.StyleAt(startPos - 1);
379
if (initStyle == SCE_PL_STRING_Q
380
|| initStyle == SCE_PL_STRING_QQ
381
|| initStyle == SCE_PL_STRING_QX
382
|| initStyle == SCE_PL_STRING_QR
383
|| initStyle == SCE_PL_STRING_QW
384
|| initStyle == SCE_PL_REGEX
385
|| initStyle == SCE_PL_REGSUBST
386
|| initStyle == SCE_PL_STRING
387
|| initStyle == SCE_PL_BACKTICKS
388
|| initStyle == SCE_PL_CHARACTER
389
|| initStyle == SCE_PL_NUMBER
390
|| initStyle == SCE_PL_IDENTIFIER
391
|| initStyle == SCE_PL_ERROR
392
|| initStyle == SCE_PL_SUB_PROTOTYPE
394
while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
397
initStyle = SCE_PL_DEFAULT;
398
} else if (initStyle == SCE_PL_POD
399
|| initStyle == SCE_PL_POD_VERB
401
// POD backtracking finds preceeding blank lines and goes back past them
402
int ln = styler.GetLine(startPos);
404
initStyle = styler.StyleAt(styler.LineStart(--ln));
405
if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
406
while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
409
startPos = styler.LineStart(++ln);
410
initStyle = styler.StyleAt(startPos - 1);
413
initStyle = SCE_PL_DEFAULT;
417
// backFlag, backPos are additional state to aid identifier corner cases.
418
// Look backwards past whitespace and comments in order to detect either
419
// operator or keyword. Later updated as we go along.
420
int backFlag = BACK_NONE;
421
unsigned int backPos = startPos;
424
skipWhitespaceComment(styler, backPos);
425
if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
426
backFlag = BACK_OPERATOR;
427
else if (styler.StyleAt(backPos) == SCE_PL_WORD)
428
backFlag = BACK_KEYWORD;
432
StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
434
for (; sc.More(); sc.Forward()) {
436
// Determine if the current state should terminate.
438
case SCE_PL_OPERATOR:
439
sc.SetState(SCE_PL_DEFAULT);
440
backFlag = BACK_OPERATOR;
441
backPos = sc.currentPos;
443
case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
444
if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
445
|| sc.Match('.', '.')
446
|| sc.chPrev == '>') { // end of inputsymbol
447
sc.SetState(SCE_PL_DEFAULT);
450
case SCE_PL_WORD: // keyword, plus special cases
451
if (!setWord.Contains(sc.ch)) {
453
sc.GetCurrent(s, sizeof(s));
454
if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
455
sc.ChangeState(SCE_PL_DATASECTION);
457
if ((strcmp(s, "format") == 0)) {
458
sc.SetState(SCE_PL_FORMAT_IDENT);
461
sc.SetState(SCE_PL_DEFAULT);
463
backFlag = BACK_KEYWORD;
464
backPos = sc.currentPos;
471
case SCE_PL_SYMBOLTABLE:
472
if (sc.Match(':', ':')) { // skip ::
474
} else if (!setVar.Contains(sc.ch)) {
475
if (sc.LengthCurrent() == 1) {
476
// Special variable: $(, $_ etc.
479
sc.SetState(SCE_PL_DEFAULT);
483
// if no early break, number style is terminated at "(go through)"
485
if (sc.chNext == '.') {
486
// double dot is always an operator (go through)
487
} else if (numState <= PERLNUM_FLOAT_EXP) {
488
// non-decimal number or float exponent, consume next dot
489
sc.SetState(SCE_PL_OPERATOR);
491
} else { // decimal or vectors allows dots
493
if (numState == PERLNUM_DECIMAL) {
494
if (dotCount <= 1) // number with one dot in it
496
if (IsADigit(sc.chNext)) { // really a vector
497
numState = PERLNUM_VECTOR;
500
// number then dot (go through)
501
} else if (IsADigit(sc.chNext)) // vectors
503
// vector then dot (go through)
505
} else if (sc.ch == '_') {
506
// permissive underscoring for number and vector literals
508
} else if (numState == PERLNUM_DECIMAL) {
509
if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
510
numState = PERLNUM_FLOAT_EXP;
511
if (sc.chNext == '+' || sc.chNext == '-') {
515
} else if (IsADigit(sc.ch))
517
// number then word (go through)
518
} else if (numState == PERLNUM_HEX) {
519
if (IsADigit(sc.ch, 16))
521
} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
522
if (IsADigit(sc.ch)) // vector
524
if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
525
sc.ChangeState(SCE_PL_IDENTIFIER);
528
// vector then word (go through)
529
} else if (IsADigit(sc.ch)) {
530
if (numState == PERLNUM_FLOAT_EXP) {
532
} else if (numState == PERLNUM_OCTAL) {
533
if (sc.ch <= '7') break;
534
} else if (numState == PERLNUM_BINARY) {
535
if (sc.ch <= '1') break;
537
// mark invalid octal, binary numbers (go through)
538
numState = PERLNUM_BAD;
541
// complete current number or vector
542
sc.ChangeState(actualNumStyle(numState));
543
sc.SetState(SCE_PL_DEFAULT);
545
case SCE_PL_COMMENTLINE:
547
sc.SetState(SCE_PL_DEFAULT);
550
case SCE_PL_HERE_DELIM:
551
if (HereDoc.State == 0) { // '<<' encountered
552
int delim_ch = sc.chNext;
554
HereDoc.State = 1; // pre-init HERE doc class
555
HereDoc.Quote = sc.chNext;
556
HereDoc.Quoted = false;
557
HereDoc.DelimiterLength = 0;
558
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
559
if (IsASpaceOrTab(delim_ch)) {
560
// skip whitespace; legal only for quoted delimiters
561
unsigned int i = sc.currentPos + 1;
562
while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
564
delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
566
ws_skip = i - sc.currentPos - 1;
568
if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
569
// a quoted here-doc delimiter; skip any whitespace
570
sc.Forward(ws_skip + 1);
571
HereDoc.Quote = delim_ch;
572
HereDoc.Quoted = true;
573
} else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
575
// left shift << or <<= operator cases
576
// restore position if operator
577
sc.ChangeState(SCE_PL_OPERATOR);
578
sc.ForwardSetState(SCE_PL_DEFAULT);
579
backFlag = BACK_OPERATOR;
580
backPos = sc.currentPos;
583
// specially handle initial '\' for identifier
584
if (ws_skip == 0 && HereDoc.Quote == '\\')
586
// an unquoted here-doc delimiter, no special handling
587
// (cannot be prefixed by spaces/tabs), or
588
// symbols terminates; deprecated zero-length delimiter
590
} else if (HereDoc.State == 1) { // collect the delimiter
591
backFlag = BACK_NONE;
592
if (HereDoc.Quoted) { // a quoted here-doc delimiter
593
if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
594
sc.ForwardSetState(SCE_PL_DEFAULT);
595
} else if (!sc.atLineEnd) {
596
if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
599
if (sc.ch != '\r') { // skip CR if CRLF
600
HereDoc.Append(sc.ch);
603
} else { // an unquoted here-doc delimiter
604
if (setHereDocDelim.Contains(sc.ch)) {
605
HereDoc.Append(sc.ch);
607
sc.SetState(SCE_PL_DEFAULT);
610
if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
611
sc.SetState(SCE_PL_ERROR);
618
case SCE_PL_HERE_QX: {
619
// also implies HereDoc.State == 2
621
while (!sc.atLineEnd)
623
char s[HERE_DELIM_MAX];
624
sc.GetCurrent(s, sizeof(s));
625
if (isMatch(HereDoc.Delimiter, s)) {
626
sc.SetState(SCE_PL_DEFAULT);
627
backFlag = BACK_NONE;
632
case SCE_PL_POD_VERB: {
633
unsigned int fw = sc.currentPos;
634
int ln = styler.GetLine(fw);
635
if (sc.atLineStart && sc.Match("=cut")) { // end of POD
636
sc.SetState(SCE_PL_POD);
638
sc.SetState(SCE_PL_DEFAULT);
639
styler.SetLineState(ln, SCE_PL_POD);
642
int pod = podLineScan(styler, fw, endPos); // classify POD line
643
styler.SetLineState(ln, pod);
644
if (pod == SCE_PL_DEFAULT) {
645
if (sc.state == SCE_PL_POD_VERB) {
646
unsigned int fw2 = fw;
647
while (fw2 <= endPos && pod == SCE_PL_DEFAULT) {
648
fw = fw2++; // penultimate line (last blank line)
649
pod = podLineScan(styler, fw2, endPos);
650
styler.SetLineState(styler.GetLine(fw2), pod);
652
if (pod == SCE_PL_POD) { // truncate verbatim POD early
653
sc.SetState(SCE_PL_POD);
659
if (pod == SCE_PL_POD_VERB // still part of current paragraph
660
&& (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
662
styler.SetLineState(ln, pod);
663
} else if (pod == SCE_PL_POD
664
&& (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
665
pod = SCE_PL_POD_VERB;
666
styler.SetLineState(ln, pod);
670
sc.Forward(fw - sc.currentPos); // commit style
673
case SCE_PL_STRING_QR:
674
if (Quote.Rep <= 0) {
675
if (!setModifiers.Contains(sc.ch))
676
sc.SetState(SCE_PL_DEFAULT);
677
} else if (!Quote.Up && !IsASpace(sc.ch)) {
679
} else if (sc.ch == '\\' && Quote.Up != '\\') {
681
} else if (sc.ch == Quote.Down) {
683
if (Quote.Count == 0)
685
} else if (sc.ch == Quote.Up) {
689
case SCE_PL_REGSUBST:
690
if (Quote.Rep <= 0) {
691
if (!setModifiers.Contains(sc.ch))
692
sc.SetState(SCE_PL_DEFAULT);
693
} else if (!Quote.Up && !IsASpace(sc.ch)) {
695
} else if (sc.ch == '\\' && Quote.Up != '\\') {
697
} else if (Quote.Count == 0 && Quote.Rep == 1) {
698
// We matched something like s(...) or tr{...}, Perl 5.10
699
// appears to allow almost any character for use as the
700
// next delimiters. Whitespace and comments are accepted in
701
// between, but we'll limit to whitespace here.
702
// For '#', if no whitespace in between, it's a delimiter.
703
if (IsASpace(sc.ch)) {
705
} else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) {
706
sc.SetState(SCE_PL_DEFAULT);
710
} else if (sc.ch == Quote.Down) {
712
if (Quote.Count == 0)
714
if (Quote.Up == Quote.Down)
716
} else if (sc.ch == Quote.Up) {
720
case SCE_PL_STRING_Q:
721
case SCE_PL_STRING_QQ:
722
case SCE_PL_STRING_QX:
723
case SCE_PL_STRING_QW:
725
case SCE_PL_CHARACTER:
726
case SCE_PL_BACKTICKS:
727
if (!Quote.Down && !IsASpace(sc.ch)) {
729
} else if (sc.ch == '\\' && Quote.Up != '\\') {
731
} else if (sc.ch == Quote.Down) {
733
if (Quote.Count == 0)
734
sc.ForwardSetState(SCE_PL_DEFAULT);
735
} else if (sc.ch == Quote.Up) {
739
case SCE_PL_SUB_PROTOTYPE: {
741
// forward scan; must all be valid proto characters
742
while (setSubPrototype.Contains(sc.GetRelative(i)))
744
if (sc.GetRelative(i) == ')') { // valid sub prototype
746
sc.ForwardSetState(SCE_PL_DEFAULT);
748
// abandon prototype, restart from '('
749
sc.ChangeState(SCE_PL_OPERATOR);
750
sc.SetState(SCE_PL_DEFAULT);
753
case SCE_PL_FORMAT: {
755
while (!sc.atLineEnd)
758
sc.GetCurrent(s, sizeof(s));
760
sc.SetState(SCE_PL_DEFAULT);
765
// Needed for specific continuation styles (one follows the other)
767
// continued from SCE_PL_WORD
768
case SCE_PL_FORMAT_IDENT:
769
// occupies HereDoc state 3 to avoid clashing with HERE docs
770
if (IsASpaceOrTab(sc.ch)) { // skip whitespace
771
sc.ChangeState(SCE_PL_DEFAULT);
772
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
774
sc.SetState(SCE_PL_FORMAT_IDENT);
776
if (setFormatStart.Contains(sc.ch)) { // identifier or '='
780
} while (setFormat.Contains(sc.ch));
782
while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
785
sc.ForwardSetState(SCE_PL_DEFAULT);
788
// invalid indentifier; inexact fallback, but hey
789
sc.ChangeState(SCE_PL_IDENTIFIER);
790
sc.SetState(SCE_PL_DEFAULT);
793
sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier
795
backFlag = BACK_NONE;
799
// Must check end of HereDoc states here before default state is handled
800
if (HereDoc.State == 1 && sc.atLineEnd) {
801
// Begin of here-doc (the line after the here-doc delimiter):
802
// Lexically, the here-doc starts from the next line after the >>, but the
803
// first line of here-doc seem to follow the style of the last EOL sequence
804
int st_new = SCE_PL_HERE_QQ;
806
if (HereDoc.Quoted) {
807
if (sc.state == SCE_PL_HERE_DELIM) {
808
// Missing quote at end of string! We are stricter than perl.
809
// Colour here-doc anyway while marking this bit as an error.
810
sc.ChangeState(SCE_PL_ERROR);
812
switch (HereDoc.Quote) {
813
case '\'': st_new = SCE_PL_HERE_Q ; break;
814
case '"' : st_new = SCE_PL_HERE_QQ; break;
815
case '`' : st_new = SCE_PL_HERE_QX; break;
818
if (HereDoc.Quote == '\\')
819
st_new = SCE_PL_HERE_Q;
823
if (HereDoc.State == 3 && sc.atLineEnd) {
824
// Start of format body.
826
sc.SetState(SCE_PL_FORMAT);
829
// Determine if a new state should be entered.
830
if (sc.state == SCE_PL_DEFAULT) {
831
if (IsADigit(sc.ch) ||
832
(IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
833
sc.SetState(SCE_PL_NUMBER);
834
backFlag = BACK_NONE;
835
numState = PERLNUM_DECIMAL;
837
if (sc.ch == '0') { // hex,bin,octal
838
if (sc.chNext == 'x') {
839
numState = PERLNUM_HEX;
840
} else if (sc.chNext == 'b') {
841
numState = PERLNUM_BINARY;
842
} else if (IsADigit(sc.chNext)) {
843
numState = PERLNUM_OCTAL;
845
if (numState != PERLNUM_DECIMAL) {
848
} else if (sc.ch == 'v') { // vector
849
numState = PERLNUM_V_VECTOR;
851
} else if (setWord.Contains(sc.ch)) {
852
// if immediately prefixed by '::', always a bareword
853
sc.SetState(SCE_PL_WORD);
854
if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
855
sc.ChangeState(SCE_PL_IDENTIFIER);
857
unsigned int bk = sc.currentPos;
858
unsigned int fw = sc.currentPos + 1;
859
// first check for possible quote-like delimiter
860
if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
861
sc.ChangeState(SCE_PL_REGSUBST);
863
} else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
864
sc.ChangeState(SCE_PL_REGEX);
866
} else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
867
sc.ChangeState(SCE_PL_STRING_Q);
869
} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
870
sc.ChangeState(SCE_PL_REGSUBST);
872
} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
873
sc.ChangeState(SCE_PL_REGSUBST);
877
} else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
878
&& !setWord.Contains(sc.GetRelative(2))) {
879
if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
880
else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
881
else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
882
else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
886
} else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
887
!setWord.Contains(sc.chNext) ||
888
(IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
889
sc.ChangeState(SCE_PL_OPERATOR);
891
// if potentially a keyword, scan forward and grab word, then check
892
// if it's really one; if yes, disambiguation test is performed
893
// otherwise it is always a bareword and we skip a lot of scanning
894
if (sc.state == SCE_PL_WORD) {
895
while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
897
if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
898
sc.ChangeState(SCE_PL_IDENTIFIER);
901
// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
902
// for quote-like delimiters/keywords, attempt to disambiguate
903
// to select for bareword, change state -> SCE_PL_IDENTIFIER
904
if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
905
if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
906
sc.ChangeState(SCE_PL_IDENTIFIER);
908
backFlag = BACK_NONE;
909
} else if (sc.ch == '#') {
910
sc.SetState(SCE_PL_COMMENTLINE);
911
} else if (sc.ch == '\"') {
912
sc.SetState(SCE_PL_STRING);
915
backFlag = BACK_NONE;
916
} else if (sc.ch == '\'') {
917
if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
919
sc.SetState(SCE_PL_IDENTIFIER);
921
sc.SetState(SCE_PL_CHARACTER);
925
backFlag = BACK_NONE;
926
} else if (sc.ch == '`') {
927
sc.SetState(SCE_PL_BACKTICKS);
930
backFlag = BACK_NONE;
931
} else if (sc.ch == '$') {
932
sc.SetState(SCE_PL_SCALAR);
933
if (sc.chNext == '{') {
934
sc.ForwardSetState(SCE_PL_OPERATOR);
935
} else if (IsASpace(sc.chNext)) {
936
sc.ForwardSetState(SCE_PL_DEFAULT);
939
if (sc.Match('`', '`') || sc.Match(':', ':')) {
943
backFlag = BACK_NONE;
944
} else if (sc.ch == '@') {
945
sc.SetState(SCE_PL_ARRAY);
946
if (setArray.Contains(sc.chNext)) {
947
// no special treatment
948
} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
950
} else if (sc.chNext == '{' || sc.chNext == '[') {
951
sc.ForwardSetState(SCE_PL_OPERATOR);
953
sc.ChangeState(SCE_PL_OPERATOR);
955
backFlag = BACK_NONE;
956
} else if (setPreferRE.Contains(sc.ch)) {
957
// Explicit backward peeking to set a consistent preferRE for
958
// any slash found, so no longer need to track preferRE state.
959
// Find first previous significant lexed element and interpret.
960
// A few symbols shares this code for disambiguation.
961
bool preferRE = false;
962
bool isHereDoc = sc.Match('<', '<');
963
bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
964
unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
968
if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
970
skipWhitespaceComment(styler, bk);
972
// avoid backward scanning breakage
975
int bkstyle = styler.StyleAt(bk);
976
int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
978
case SCE_PL_OPERATOR:
980
if (bkch == ')' || bkch == ']') {
982
} else if (bkch == '}') {
983
// backtrack by counting balanced brace pairs
984
// needed to test for variables like ${}, @{} etc.
985
bkstyle = styleBeforeBracePair(styler, bk);
986
if (bkstyle == SCE_PL_SCALAR
987
|| bkstyle == SCE_PL_ARRAY
988
|| bkstyle == SCE_PL_HASH
989
|| bkstyle == SCE_PL_SYMBOLTABLE
990
|| bkstyle == SCE_PL_OPERATOR) {
993
} else if (bkch == '+' || bkch == '-') {
994
if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
995
&& bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
996
// exceptions for operators: unary suffixes ++, --
1000
case SCE_PL_IDENTIFIER:
1002
bkstyle = styleCheckIdentifier(styler, bk);
1003
if ((bkstyle == 1) || (bkstyle == 2)) {
1004
// inputsymbol or var with "->" or "::" before identifier
1006
} else if (bkstyle == 3) {
1007
// bare identifier, test cases follows:
1009
// if '/', /PATTERN/ unless digit/space immediately after '/'
1010
// if '//', always expect defined-or operator to follow identifier
1011
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1013
} else if (sc.ch == '*' || sc.ch == '%') {
1014
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1016
} else if (sc.ch == '<') {
1017
if (IsASpace(sc.chNext) || sc.chNext == '=')
1022
case SCE_PL_SCALAR: // for $var<< case:
1023
if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
1028
// for HERE docs, always true
1030
// adopt heuristics similar to vim-style rules:
1031
// keywords always forced as /PATTERN/: split, if, elsif, while
1032
// everything else /PATTERN/ unless digit/space immediately after '/'
1033
// for '//', defined-or favoured unless special keywords
1035
while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1038
if (isPerlKeyword(bk, bkend, reWords, styler))
1040
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1042
} else if (sc.ch == '*' || sc.ch == '%') {
1043
if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1045
} else if (sc.ch == '<') {
1046
if (IsASpace(sc.chNext) || sc.chNext == '=')
1050
// other styles uses the default, preferRE=false
1053
case SCE_PL_HERE_QQ:
1054
case SCE_PL_HERE_QX:
1059
backFlag = BACK_NONE;
1060
if (isHereDoc) { // handle '<<', HERE doc
1062
sc.SetState(SCE_PL_HERE_DELIM);
1064
} else { // << operator
1065
sc.SetState(SCE_PL_OPERATOR);
1068
} else if (sc.ch == '*') { // handle '*', typeglob
1070
sc.SetState(SCE_PL_SYMBOLTABLE);
1071
if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1073
} else if (sc.chNext == '{') {
1074
sc.ForwardSetState(SCE_PL_OPERATOR);
1079
sc.SetState(SCE_PL_OPERATOR);
1080
if (sc.chNext == '*') // exponentiation
1083
} else if (sc.ch == '%') { // handle '%', hash
1085
sc.SetState(SCE_PL_HASH);
1086
if (setHash.Contains(sc.chNext)) {
1088
} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1090
} else if (sc.chNext == '{') {
1091
sc.ForwardSetState(SCE_PL_OPERATOR);
1093
sc.ChangeState(SCE_PL_OPERATOR);
1096
sc.SetState(SCE_PL_OPERATOR);
1098
} else if (sc.ch == '<') { // handle '<', inputsymbol
1101
int i = inputsymbolScan(styler, sc.currentPos, endPos);
1103
sc.SetState(SCE_PL_IDENTIFIER);
1106
sc.SetState(SCE_PL_OPERATOR);
1109
sc.SetState(SCE_PL_OPERATOR);
1111
} else { // handle '/', regexp
1113
sc.SetState(SCE_PL_REGEX);
1116
} else { // / and // operators
1117
sc.SetState(SCE_PL_OPERATOR);
1118
if (sc.chNext == '/') {
1123
} else if (sc.ch == '=' // POD
1124
&& setPOD.Contains(sc.chNext)
1125
&& sc.atLineStart) {
1126
sc.SetState(SCE_PL_POD);
1127
backFlag = BACK_NONE;
1128
} else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
1129
unsigned int bk = sc.currentPos;
1130
unsigned int fw = 2;
1131
if (setSingleCharOp.Contains(sc.chNext) && // file test operators
1132
!setWord.Contains(sc.GetRelative(2))) {
1133
sc.SetState(SCE_PL_WORD);
1135
// nominally a minus and bareword; find extent of bareword
1136
while (setWord.Contains(sc.GetRelative(fw)))
1138
sc.SetState(SCE_PL_OPERATOR);
1140
// force to bareword for hash key => or {variable literal} cases
1141
if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1142
sc.ChangeState(SCE_PL_IDENTIFIER);
1144
backFlag = BACK_NONE;
1145
} else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1147
if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1148
sc.SetState(SCE_PL_SUB_PROTOTYPE);
1149
backFlag = BACK_NONE;
1151
sc.SetState(SCE_PL_OPERATOR);
1153
} else if (setPerlOperator.Contains(sc.ch)) { // operators
1154
sc.SetState(SCE_PL_OPERATOR);
1155
if (sc.Match('.', '.')) { // .. and ...
1157
if (sc.chNext == '.') sc.Forward();
1159
} else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
1160
sc.SetState(SCE_PL_DATASECTION);
1162
// keep colouring defaults
1170
static bool IsCommentLine(int line, Accessor &styler) {
1171
int pos = styler.LineStart(line);
1172
int eol_pos = styler.LineStart(line + 1) - 1;
1173
for (int i = pos; i < eol_pos; i++) {
1174
char ch = styler[i];
1175
int style = styler.StyleAt(i);
1176
if (ch == '#' && style == SCE_PL_COMMENTLINE)
1178
else if (!IsASpaceOrTab(ch))
1184
static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
1186
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1187
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1188
// Custom folding of POD and packages
1190
// property fold.perl.pod
1191
// Enable folding Pod blocks when using the Perl lexer.
1192
bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
1194
// property fold.perl.package
1195
// Enable folding packages when using the Perl lexer.
1196
bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
1198
unsigned int endPos = startPos + length;
1199
int visibleChars = 0;
1200
int lineCurrent = styler.GetLine(startPos);
1201
int levelPrev = SC_FOLDLEVELBASE;
1202
if (lineCurrent > 0)
1203
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1204
int levelCurrent = levelPrev;
1205
char chNext = styler[startPos];
1206
char chPrev = styler.SafeGetCharAt(startPos - 1);
1207
int styleNext = styler.StyleAt(startPos);
1208
// Used at end of line to determine if the line was a package definition
1209
bool isPackageLine = false;
1210
bool isPodHeading = false;
1211
for (unsigned int i = startPos; i < endPos; i++) {
1213
chNext = styler.SafeGetCharAt(i + 1);
1214
int style = styleNext;
1215
styleNext = styler.StyleAt(i + 1);
1216
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1217
bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1219
if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1221
if (!IsCommentLine(lineCurrent - 1, styler)
1222
&& IsCommentLine(lineCurrent + 1, styler))
1224
else if (IsCommentLine(lineCurrent - 1, styler)
1225
&& !IsCommentLine(lineCurrent+1, styler))
1228
if (style == SCE_PL_OPERATOR) {
1231
} else if (ch == '}') {
1235
// Custom POD folding
1236
if (foldPOD && atLineStart) {
1237
int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1238
if (style == SCE_PL_POD) {
1239
if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1241
else if (styler.Match(i, "=cut"))
1243
else if (styler.Match(i, "=head"))
1244
isPodHeading = true;
1245
} else if (style == SCE_PL_DATASECTION) {
1246
if (ch == '=' && isascii(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1248
else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1250
else if (styler.Match(i, "=head"))
1251
isPodHeading = true;
1252
// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1253
// reset needed as level test is vs. SC_FOLDLEVELBASE
1254
else if (styler.Match(i, "__END__"))
1255
levelCurrent = SC_FOLDLEVELBASE;
1258
// Custom package folding
1259
if (foldPackage && atLineStart) {
1260
if (style == SCE_PL_WORD && styler.Match(i, "package")) {
1261
isPackageLine = true;
1266
int lev = levelPrev;
1268
lev = levelPrev - 1;
1269
lev |= SC_FOLDLEVELHEADERFLAG;
1270
isPodHeading = false;
1272
// Check if line was a package declaration
1273
// because packages need "special" treatment
1274
if (isPackageLine) {
1275
lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1276
levelCurrent = SC_FOLDLEVELBASE + 1;
1277
isPackageLine = false;
1279
lev |= levelCurrent << 16;
1280
if (visibleChars == 0 && foldCompact)
1281
lev |= SC_FOLDLEVELWHITEFLAG;
1282
if ((levelCurrent > levelPrev) && (visibleChars > 0))
1283
lev |= SC_FOLDLEVELHEADERFLAG;
1284
if (lev != styler.LevelAt(lineCurrent)) {
1285
styler.SetLevel(lineCurrent, lev);
1288
levelPrev = levelCurrent;
1291
if (!isspacechar(ch))
1295
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1296
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1297
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1300
static const char * const perlWordListDesc[] = {
1305
LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);