1
// Scintilla source code edit control
3
** Lexer for subset of Perl.
5
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7
// The License.txt file describes the conditions under which this software may be distributed.
20
#include "Scintilla.h"
23
#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
25
#define PERLNUM_OCTAL 3
26
#define PERLNUM_FLOAT 4 // actually exponent part
27
#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
28
#define PERLNUM_VECTOR 6
29
#define PERLNUM_V_VECTOR 7
32
#define BACK_NONE 0 // lookback state for bareword disambiguation:
33
#define BACK_OPERATOR 1 // whitespace/comments are insignificant
34
#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
36
#define HERE_DELIM_MAX 256
38
static inline bool isEOLChar(char ch) {
39
return (ch == '\r') || (ch == '\n');
42
static bool isSingleCharOp(char ch) {
46
return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
49
static inline bool isPerlOperator(char ch) {
50
if (ch == '^' || ch == '&' || ch == '\\' ||
51
ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
52
ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
53
ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
54
ch == '>' || ch == ',' ||
55
ch == '?' || ch == '!' || ch == '.' || ch == '~')
57
// these chars are already tested before this call
58
// ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
62
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
64
unsigned int i, len = end - start;
65
if (len > 30) { len = 30; }
66
for (i = 0; i < len; i++, start++) s[i] = styler[start];
68
return keywords.InList(s);
71
static inline bool isEndVar(char ch) {
72
return !isalnum(ch) && ch != '#' && ch != '$' &&
73
ch != '_' && ch != '\'';
77
static inline bool isNonQuote(char ch) {
78
return isalnum(ch) || ch == '_';
81
static inline char actualNumStyle(int numberStyle) {
82
if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
84
} else if (numberStyle == PERLNUM_BAD) {
90
static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
91
if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
95
if (*val != styler[pos++]) {
103
static char opposite(char ch) {
115
static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
116
WordList *keywordlists[], Accessor &styler) {
118
// Lexer for perl often has to backtrack to start of current style to determine
119
// which characters are being used as quotes, how deeply nested is the
120
// start position and what the termination string is for here documents
122
WordList &keywords = *keywordlists[0];
126
int State; // 0: '<<' encountered
127
// 1: collect the delimiter
128
// 2: here doc text (lines after the delimiter)
129
char Quote; // the char after '<<'
130
bool Quoted; // true if Quote in ('\'','"','`')
131
int DelimiterLength; // strlen(Delimiter)
132
char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
138
Delimiter = new char[HERE_DELIM_MAX];
145
HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
170
int state = initStyle;
171
char numState = PERLNUM_DECIMAL;
173
unsigned int lengthDoc = startPos + length;
174
//int sookedpos = 0; // these have no apparent use, see POD state
176
//sooked[sookedpos] = '\0';
178
// If in a long distance lexical state, seek to the beginning to find quote characters
179
// Perl strings can be multi-line with embedded newlines, so backtrack.
180
// Perl numbers have additional state during lexing, so backtrack too.
181
if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
182
while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
185
startPos = styler.LineStart(styler.GetLine(startPos));
186
state = styler.StyleAt(startPos - 1);
188
if ( state == SCE_PL_STRING_Q
189
|| state == SCE_PL_STRING_QQ
190
|| state == SCE_PL_STRING_QX
191
|| state == SCE_PL_STRING_QR
192
|| state == SCE_PL_STRING_QW
193
|| state == SCE_PL_REGEX
194
|| state == SCE_PL_REGSUBST
195
|| state == SCE_PL_STRING
196
|| state == SCE_PL_BACKTICKS
197
|| state == SCE_PL_CHARACTER
198
|| state == SCE_PL_NUMBER
199
|| state == SCE_PL_IDENTIFIER
200
|| state == SCE_PL_ERROR
202
while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
205
state = SCE_PL_DEFAULT;
208
// lookback at start of lexing to set proper state for backflag
209
// after this, they are updated when elements are lexed
210
int backflag = BACK_NONE;
211
unsigned int backPos = startPos;
214
int sty = SCE_PL_DEFAULT;
215
while ((backPos > 0) && (sty = styler.StyleAt(backPos),
216
sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))
218
if (sty == SCE_PL_OPERATOR)
219
backflag = BACK_OPERATOR;
220
else if (sty == SCE_PL_WORD)
221
backflag = BACK_KEYWORD;
224
styler.StartAt(startPos);
225
char chPrev = styler.SafeGetCharAt(startPos - 1);
228
char chNext = styler[startPos];
229
styler.StartSegment(startPos);
231
for (unsigned int i = startPos; i < lengthDoc; i++) {
233
// if the current character is not consumed due to the completion of an
234
// earlier style, lexing can be restarted via a simple goto
236
chNext = styler.SafeGetCharAt(i + 1);
237
char chNext2 = styler.SafeGetCharAt(i + 2);
239
if (styler.IsLeadByte(ch)) {
240
chNext = styler.SafeGetCharAt(i + 2);
245
if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
246
styler.ColourTo(i, state);
251
if (HereDoc.State == 1 && isEOLChar(ch)) {
252
// Begin of here-doc (the line after the here-doc delimiter):
253
// Lexically, the here-doc starts from the next line after the >>, but the
254
// first line of here-doc seem to follow the style of the last EOL sequence
256
if (HereDoc.Quoted) {
257
if (state == SCE_PL_HERE_DELIM) {
258
// Missing quote at end of string! We are stricter than perl.
259
// Colour here-doc anyway while marking this bit as an error.
260
state = SCE_PL_ERROR;
262
styler.ColourTo(i - 1, state);
263
switch (HereDoc.Quote) {
265
state = SCE_PL_HERE_Q ;
268
state = SCE_PL_HERE_QQ;
271
state = SCE_PL_HERE_QX;
275
styler.ColourTo(i - 1, state);
276
switch (HereDoc.Quote) {
278
state = SCE_PL_HERE_Q ;
281
state = SCE_PL_HERE_QQ;
286
if (state == SCE_PL_DEFAULT) {
287
if (isdigit(ch) || (isdigit(chNext) &&
288
(ch == '.' || ch == 'v'))) {
289
state = SCE_PL_NUMBER;
290
backflag = BACK_NONE;
291
numState = PERLNUM_DECIMAL;
293
if (ch == '0') { // hex,bin,octal
295
numState = PERLNUM_HEX;
296
} else if (chNext == 'b') {
297
numState = PERLNUM_BINARY;
298
} else if (isdigit(chNext)) {
299
numState = PERLNUM_OCTAL;
301
if (numState != PERLNUM_DECIMAL) {
306
} else if (ch == 'v') { // vector
307
numState = PERLNUM_V_VECTOR;
309
} else if (iswordstart(ch)) {
310
// if immediately prefixed by '::', always a bareword
312
if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
313
state = SCE_PL_IDENTIFIER;
315
unsigned int kw = i + 1;
316
// first check for possible quote-like delimiter
317
if (ch == 's' && !isNonQuote(chNext)) {
318
state = SCE_PL_REGSUBST;
320
} else if (ch == 'm' && !isNonQuote(chNext)) {
321
state = SCE_PL_REGEX;
323
} else if (ch == 'q' && !isNonQuote(chNext)) {
324
state = SCE_PL_STRING_Q;
326
} else if (ch == 'y' && !isNonQuote(chNext)) {
327
state = SCE_PL_REGSUBST;
329
} else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
330
state = SCE_PL_REGSUBST;
333
} else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
334
if (chNext == 'q') state = SCE_PL_STRING_QQ;
335
else if (chNext == 'x') state = SCE_PL_STRING_QX;
336
else if (chNext == 'r') state = SCE_PL_STRING_QR;
337
else if (chNext == 'w') state = SCE_PL_STRING_QW;
340
} else if (ch == 'x' && (chNext == '=' || // repetition
341
(chNext != '_' && !isalnum(chNext)) ||
342
(isdigit(chPrev) && isdigit(chNext)))) {
343
state = SCE_PL_OPERATOR;
345
// if potentially a keyword, scan forward and grab word, then check
346
// if it's really one; if yes, disambiguation test is performed
347
// otherwise it is always a bareword and we skip a lot of scanning
348
// note: keywords assumed to be limited to [_a-zA-Z] only
349
if (state == SCE_PL_WORD) {
350
while (iswordstart(styler.SafeGetCharAt(kw))) kw++;
351
if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
352
state = SCE_PL_IDENTIFIER;
355
// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
356
// for quote-like delimiters/keywords, attempt to disambiguate
357
// to select for bareword, change state -> SCE_PL_IDENTIFIER
358
if (state != SCE_PL_IDENTIFIER && i > 0) {
360
bool moreback = false; // true if passed newline/comments
361
bool brace = false; // true if opening brace found
363
// first look backwards past whitespace/comments for EOLs
364
// if BACK_NONE, neither operator nor keyword, so skip test
365
if (backflag != BACK_NONE) {
366
while (--j > backPos) {
367
if (isEOLChar(styler.SafeGetCharAt(j)))
370
ch2 = styler.SafeGetCharAt(j);
371
if (ch2 == '{' && !moreback) {
372
// {bareword: possible variable spec
374
} else if ((ch2 == '&')
375
// &bareword: subroutine call
376
|| (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')
377
// ->bareword: part of variable spec
378
|| (ch2 == 'b' && styler.Match(j - 2, "su"))) {
379
// sub bareword: subroutine declaration
380
// (implied BACK_KEYWORD, no keywords end in 'sub'!)
381
state = SCE_PL_IDENTIFIER;
383
// if status still ambiguous, look forward after word past
384
// tabs/spaces only; if ch2 isn't one of '[{(,' it can never
385
// match anything, so skip the whole thing
387
if (state != SCE_PL_IDENTIFIER
388
&& (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')
390
while (ch2 = styler.SafeGetCharAt(j),
391
(ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {
394
if ((ch2 == '}' && brace)
395
// {bareword}: variable spec
396
|| (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {
397
// [{(, bareword=>: hash literal
398
state = SCE_PL_IDENTIFIER;
403
backflag = BACK_NONE;
404
// an identifier or bareword
405
if (state == SCE_PL_IDENTIFIER) {
406
if ((!iswordchar(chNext) && chNext != '\'')
407
|| (chNext == '.' && chNext2 == '.')) {
408
// We need that if length of word == 1!
409
// This test is copied from the SCE_PL_WORD handler.
410
styler.ColourTo(i, SCE_PL_IDENTIFIER);
411
state = SCE_PL_DEFAULT;
414
} else if (state == SCE_PL_WORD) {
416
if (ch == '_' && chNext == '_' &&
417
(isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
418
|| isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) {
419
styler.ColourTo(i, SCE_PL_DATASECTION);
420
state = SCE_PL_DATASECTION;
422
styler.ColourTo(i, SCE_PL_WORD);
423
state = SCE_PL_DEFAULT;
424
backflag = BACK_KEYWORD;
427
ch = styler.SafeGetCharAt(i);
428
chNext = styler.SafeGetCharAt(i + 1);
429
// a repetition operator 'x'
430
} else if (state == SCE_PL_OPERATOR) {
431
styler.ColourTo(i, SCE_PL_OPERATOR);
432
state = SCE_PL_DEFAULT;
433
// quote-like delimiter, skip one char if double-char delimiter
436
chNext = styler.SafeGetCharAt(i + 1);
438
} else if (ch == '#') {
439
state = SCE_PL_COMMENTLINE;
440
} else if (ch == '\"') {
441
state = SCE_PL_STRING;
444
backflag = BACK_NONE;
445
} else if (ch == '\'') {
448
styler.ColourTo(i, state);
450
state = SCE_PL_CHARACTER;
454
backflag = BACK_NONE;
455
} else if (ch == '`') {
456
state = SCE_PL_BACKTICKS;
459
backflag = BACK_NONE;
460
} else if (ch == '$') {
461
if ((chNext == '{') || isspacechar(chNext)) {
462
styler.ColourTo(i, SCE_PL_SCALAR);
464
state = SCE_PL_SCALAR;
465
if (chNext == '`' && chNext2 == '`') {
467
ch = styler.SafeGetCharAt(i);
468
chNext = styler.SafeGetCharAt(i + 1);
475
backflag = BACK_NONE;
476
} else if (ch == '@') {
477
if (isalpha(chNext) || chNext == '#' || chNext == '$'
478
|| chNext == '_' || chNext == '+' || chNext == '-') {
479
state = SCE_PL_ARRAY;
480
} else if (chNext != '{' && chNext != '[') {
481
styler.ColourTo(i, SCE_PL_ARRAY);
483
styler.ColourTo(i, SCE_PL_ARRAY);
485
backflag = BACK_NONE;
486
} else if (ch == '%') {
487
if (isalpha(chNext) || chNext == '#' || chNext == '$'
488
|| chNext == '_' || chNext == '!' || chNext == '^') {
493
} else if (chNext == '{') {
494
styler.ColourTo(i, SCE_PL_HASH);
496
styler.ColourTo(i, SCE_PL_OPERATOR);
498
backflag = BACK_NONE;
499
} else if (ch == '*') {
503
if (isalpha(chNext) || chNext == '_' ||
504
NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
505
state = SCE_PL_SYMBOLTABLE;
509
} else if (chNext == '{') {
510
styler.ColourTo(i, SCE_PL_SYMBOLTABLE);
512
if (chNext == '*') { // exponentiation
517
styler.ColourTo(i, SCE_PL_OPERATOR);
519
backflag = BACK_NONE;
520
} else if (ch == '/' || (ch == '<' && chNext == '<')) {
521
// Explicit backward peeking to set a consistent preferRE for
522
// any slash found, so no longer need to track preferRE state.
523
// Find first previous significant lexed element and interpret.
524
// Test for HERE doc start '<<' shares this code, helps to
525
// determine if it should be an operator.
526
bool preferRE = false;
527
bool isHereDoc = (ch == '<');
528
bool hereDocSpace = false; // these are for corner case:
529
bool hereDocScalar = false; // SCALAR [whitespace] '<<'
530
unsigned int bk = (i > 0)? i - 1: 0;
533
if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
535
while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
536
styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
540
// position 0 won't really be checked; rarely happens
541
// hard to fix due to an unsigned index i
544
int bkstyle = styler.StyleAt(bk);
545
bkch = styler.SafeGetCharAt(bk);
547
case SCE_PL_OPERATOR:
549
if (bkch == ')' || bkch == ']') {
551
} else if (bkch == '}') {
552
// backtrack further, count balanced brace pairs
553
// if a brace pair found, see if it's a variable
556
bkstyle = styler.StyleAt(bk);
557
if (bkstyle == SCE_PL_OPERATOR) {
558
bkch = styler.SafeGetCharAt(bk);
559
if (bkch == ';') { // early out
561
} else if (bkch == '}') {
563
} else if (bkch == '{') {
564
if (--braceCount == 0)
570
// at beginning, true
571
} else if (braceCount == 0) {
572
// balanced { found, bk>0, skip more whitespace
573
if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
575
bkstyle = styler.StyleAt(--bk);
576
if (bkstyle != SCE_PL_DEFAULT)
580
bkstyle = styler.StyleAt(bk);
581
if (bkstyle == SCE_PL_SCALAR
582
|| bkstyle == SCE_PL_ARRAY
583
|| bkstyle == SCE_PL_HASH
584
|| bkstyle == SCE_PL_SYMBOLTABLE
585
|| bkstyle == SCE_PL_OPERATOR) {
591
case SCE_PL_IDENTIFIER:
593
if (bkch == '>') { // inputsymbol
597
// backtrack to find "->" or "::" before identifier
598
while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
602
bkstyle = styler.StyleAt(bk);
603
if (bkstyle == SCE_PL_DEFAULT ||
604
bkstyle == SCE_PL_COMMENTLINE) {
605
} else if (bkstyle == SCE_PL_OPERATOR) {
606
// gcc 3.2.3 bloats if more compact form used
607
bkch = styler.SafeGetCharAt(bk);
608
if (bkch == '>') { // "->"
609
if (styler.SafeGetCharAt(bk - 1) == '-') {
613
} else if (bkch == ':') { // "::"
614
if (styler.SafeGetCharAt(bk - 1) == ':') {
619
} else {// bare identifier, usually a function call but Perl
620
// optimizes them as pseudo-constants, then the next
621
// '/' will be a divide; favour divide over regex
622
// if there is a whitespace after the '/'
623
if (isspacechar(chNext)) {
631
case SCE_PL_SCALAR: // for $var<< case
632
hereDocScalar = true;
634
// other styles uses the default, preferRE=false
637
case SCE_PL_POD_VERB:
645
if (isHereDoc) { // handle HERE doc
646
// if SCALAR whitespace '<<', *always* a HERE doc
647
if (preferRE || (hereDocSpace && hereDocScalar)) {
648
state = SCE_PL_HERE_DELIM;
650
} else { // << operator
654
styler.ColourTo(i, SCE_PL_OPERATOR);
656
} else { // handle regexp
658
state = SCE_PL_REGEX;
661
} else { // / operator
662
styler.ColourTo(i, SCE_PL_OPERATOR);
665
backflag = BACK_NONE;
666
} else if (ch == '<') {
667
// looks forward for matching > on same line
668
unsigned int fw = i + 1;
669
while (fw < lengthDoc) {
670
char fwch = styler.SafeGetCharAt(fw);
672
if (styler.SafeGetCharAt(fw-1) != '\\' ||
673
styler.SafeGetCharAt(fw-2) != '\\')
675
} else if (isEOLChar(fwch) || isspacechar(fwch)) {
677
} else if (fwch == '>') {
678
if ((fw - i) == 2 && // '<=>' case
679
styler.SafeGetCharAt(fw-1) == '=') {
680
styler.ColourTo(fw, SCE_PL_OPERATOR);
682
styler.ColourTo(fw, SCE_PL_IDENTIFIER);
686
chNext = styler.SafeGetCharAt(i+1);
690
styler.ColourTo(i, SCE_PL_OPERATOR);
691
backflag = BACK_NONE;
692
} else if (ch == '=' // POD
694
&& (isEOLChar(chPrev))) {
696
backflag = BACK_NONE;
698
//sooked[sookedpos] = '\0';
699
} else if (ch == '-' // file test operators
700
&& isSingleCharOp(chNext)
701
&& !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
702
styler.ColourTo(i + 1, SCE_PL_WORD);
703
state = SCE_PL_DEFAULT;
707
backflag = BACK_NONE;
708
} else if (isPerlOperator(ch)) {
709
if (ch == '.' && chNext == '.') { // .. and ...
711
if (chNext2 == '.') { i++; }
712
state = SCE_PL_DEFAULT;
713
ch = styler.SafeGetCharAt(i);
714
chNext = styler.SafeGetCharAt(i + 1);
716
styler.ColourTo(i, SCE_PL_OPERATOR);
717
backflag = BACK_OPERATOR;
720
// keep colouring defaults to make restart easier
721
styler.ColourTo(i, SCE_PL_DEFAULT);
723
} else if (state == SCE_PL_NUMBER) {
726
// double dot is always an operator
728
} else if (numState <= PERLNUM_FLOAT) {
729
// non-decimal number or float exponent, consume next dot
730
styler.ColourTo(i - 1, SCE_PL_NUMBER);
731
styler.ColourTo(i, SCE_PL_OPERATOR);
732
state = SCE_PL_DEFAULT;
733
} else { // decimal or vectors allows dots
735
if (numState == PERLNUM_DECIMAL) {
737
if (isdigit(chNext)) { // really a vector
738
numState = PERLNUM_VECTOR;
739
} else // number then dot
743
if (!isdigit(chNext)) // vector then dot
747
} else if (ch == '_' && numState == PERLNUM_DECIMAL) {
748
if (!isdigit(chNext)) {
751
} else if (isalnum(ch)) {
752
if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
754
if (dotCount == 0) { // change to word
755
state = SCE_PL_IDENTIFIER;
756
} else { // vector then word
760
} else if (numState == PERLNUM_DECIMAL) {
761
if (ch == 'E' || ch == 'e') { // exponent
762
numState = PERLNUM_FLOAT;
763
if (chNext == '+' || chNext == '-') {
768
} else if (!isdigit(ch)) { // number then word
771
} else if (numState == PERLNUM_FLOAT) {
772
if (!isdigit(ch)) { // float then word
775
} else if (numState == PERLNUM_OCTAL) {
779
numState = PERLNUM_BAD;
780
} else if (numState == PERLNUM_BINARY) {
784
numState = PERLNUM_BAD;
785
} else if (numState == PERLNUM_HEX) {
786
int ch2 = toupper(ch);
787
if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F'))
789
} else {//(numState == PERLNUM_BAD) {
794
// complete current number or vector
796
styler.ColourTo(i - 1, actualNumStyle(numState));
797
state = SCE_PL_DEFAULT;
800
} else if (state == SCE_PL_IDENTIFIER) {
801
if (!iswordstart(chNext) && chNext != '\'') {
802
styler.ColourTo(i, SCE_PL_IDENTIFIER);
803
state = SCE_PL_DEFAULT;
807
if (state == SCE_PL_COMMENTLINE) {
809
styler.ColourTo(i - 1, state);
810
state = SCE_PL_DEFAULT;
812
} else if (isEOLChar(chNext)) {
813
styler.ColourTo(i, state);
814
state = SCE_PL_DEFAULT;
816
} else if (state == SCE_PL_HERE_DELIM) {
818
// From perldata.pod:
819
// ------------------
820
// A line-oriented form of quoting is based on the shell ``here-doc''
822
// Following a << you specify a string to terminate the quoted material,
823
// and all lines following the current line down to the terminating
824
// string are the value of the item.
825
// The terminating string may be either an identifier (a word),
826
// or some quoted text.
827
// If quoted, the type of quotes you use determines the treatment of
828
// the text, just as in regular quoting.
829
// An unquoted identifier works like double quotes.
830
// There must be no space between the << and the identifier.
831
// (If you put a space it will be treated as a null identifier,
832
// which is valid, and matches the first empty line.)
833
// (This is deprecated, -w warns of this syntax)
834
// The terminating string must appear by itself (unquoted and with no
835
// surrounding whitespace) on the terminating line.
839
// Specifier format is: <<[-]WORD
840
// Optional '-' is for removal of leading tabs from here-doc.
841
// Whitespace acceptable after <<[-] operator.
843
if (HereDoc.State == 0) { // '<<' encountered
844
bool gotspace = false;
845
unsigned int oldi = i;
846
if (chNext == ' ' || chNext == '\t') {
847
// skip whitespace; legal for quoted delimiters
851
chNext = styler.SafeGetCharAt(i + 1);
852
} while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t'));
853
chNext2 = styler.SafeGetCharAt(i + 2);
856
HereDoc.Quote = chNext;
857
HereDoc.Quoted = false;
858
HereDoc.DelimiterLength = 0;
859
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
860
if (chNext == '\'' || chNext == '"' || chNext == '`') {
861
// a quoted here-doc delimiter
865
HereDoc.Quoted = true;
866
} else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
867
|| chNext == '=' || chNext == '$' || chNext == '@'
868
|| ((isalpha(chNext) || chNext == '_') && gotspace)) {
869
// left shift << or <<= operator cases
870
// restore position if operator
872
styler.ColourTo(i, SCE_PL_OPERATOR);
873
state = SCE_PL_DEFAULT;
877
// an unquoted here-doc delimiter, no special handling
878
// (cannot be prefixed by spaces/tabs), or
879
// symbols terminates; deprecated zero-length delimiter
882
} else if (HereDoc.State == 1) { // collect the delimiter
883
backflag = BACK_NONE;
884
if (HereDoc.Quoted) { // a quoted here-doc delimiter
885
if (ch == HereDoc.Quote) { // closing quote => end of delimiter
886
styler.ColourTo(i, state);
887
state = SCE_PL_DEFAULT;
889
if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
894
HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
895
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
897
} else { // an unquoted here-doc delimiter
898
if (isalnum(ch) || ch == '_') {
899
HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
900
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
902
styler.ColourTo(i - 1, state);
903
state = SCE_PL_DEFAULT;
907
if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
908
styler.ColourTo(i - 1, state);
909
state = SCE_PL_ERROR;
913
} else if (HereDoc.State == 2) {
914
// state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
915
if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
916
i += HereDoc.DelimiterLength;
917
chPrev = styler.SafeGetCharAt(i - 1);
918
ch = styler.SafeGetCharAt(i);
920
styler.ColourTo(i - 1, state);
921
state = SCE_PL_DEFAULT;
922
backflag = BACK_NONE;
926
chNext = styler.SafeGetCharAt(i + 1);
928
} else if (state == SCE_PL_POD
929
|| state == SCE_PL_POD_VERB) {
930
if (isEOLChar(chPrev)) {
931
if (ch == ' ' || ch == '\t') {
932
styler.ColourTo(i - 1, state);
933
state = SCE_PL_POD_VERB;
935
styler.ColourTo(i - 1, state);
938
if (isMatch(styler, lengthDoc, i, "=cut")) {
939
styler.ColourTo(i - 1 + 4, state);
941
state = SCE_PL_DEFAULT;
942
ch = styler.SafeGetCharAt(i);
943
//chNext = styler.SafeGetCharAt(i + 1);
949
} else if (state == SCE_PL_SCALAR // variable names
950
|| state == SCE_PL_ARRAY
951
|| state == SCE_PL_HASH
952
|| state == SCE_PL_SYMBOLTABLE) {
953
if (ch == ':' && chNext == ':') { // skip ::
958
else if (isEndVar(ch)) {
959
if (i == (styler.GetStartSegment() + 1)) {
960
// Special variable: $(, $_ etc.
961
styler.ColourTo(i, state);
962
state = SCE_PL_DEFAULT;
964
styler.ColourTo(i - 1, state);
965
state = SCE_PL_DEFAULT;
969
} else if (state == SCE_PL_REGEX
970
|| state == SCE_PL_STRING_QR
972
if (!Quote.Up && !isspacechar(ch)) {
974
} else if (ch == '\\' && Quote.Up != '\\') {
975
// SG: Is it save to skip *every* escaped char?
978
chNext = styler.SafeGetCharAt(i + 1);
980
if (ch == Quote.Down /*&& chPrev != '\\'*/) {
982
if (Quote.Count == 0) {
984
if (Quote.Up == Quote.Down) {
988
if (!isalpha(chNext)) {
989
if (Quote.Rep <= 0) {
990
styler.ColourTo(i, state);
991
state = SCE_PL_DEFAULT;
995
} else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
997
} else if (!isalpha(chNext)) {
998
if (Quote.Rep <= 0) {
999
styler.ColourTo(i, state);
1000
state = SCE_PL_DEFAULT;
1005
} else if (state == SCE_PL_REGSUBST) {
1006
if (!Quote.Up && !isspacechar(ch)) {
1008
} else if (ch == '\\' && Quote.Up != '\\') {
1009
// SG: Is it save to skip *every* escaped char?
1012
chNext = styler.SafeGetCharAt(i + 1);
1014
if (Quote.Count == 0 && Quote.Rep == 1) {
1015
/* We matched something like s(...) or tr{...}
1016
* and are looking for the next matcher characters,
1017
* which could be either bracketed ({...}) or non-bracketed
1020
* Number-signs are problematic. If they occur after
1021
* the close of the first part, treat them like
1022
* a Quote.Up char, even if they actually start comments.
1024
* If we find an alnum, we end the regsubst, and punt.
1026
* Eric Promislow ericp@activestate.com Aug 9,2000
1028
if (isspacechar(ch)) {
1031
else if (isalnum(ch)) {
1032
styler.ColourTo(i, state);
1033
state = SCE_PL_DEFAULT;
1038
} else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
1040
if (Quote.Count == 0) {
1043
if (!isalpha(chNext)) {
1044
if (Quote.Rep <= 0) {
1045
styler.ColourTo(i, state);
1046
state = SCE_PL_DEFAULT;
1050
if (Quote.Up == Quote.Down) {
1053
} else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
1055
} else if (!isalpha(chNext)) {
1056
if (Quote.Rep <= 0) {
1057
styler.ColourTo(i, state);
1058
state = SCE_PL_DEFAULT;
1063
} else if (state == SCE_PL_STRING_Q
1064
|| state == SCE_PL_STRING_QQ
1065
|| state == SCE_PL_STRING_QX
1066
|| state == SCE_PL_STRING_QW
1067
|| state == SCE_PL_STRING
1068
|| state == SCE_PL_CHARACTER
1069
|| state == SCE_PL_BACKTICKS
1071
if (!Quote.Down && !isspacechar(ch)) {
1073
} else if (ch == '\\' && Quote.Up != '\\') {
1076
chNext = styler.SafeGetCharAt(i + 1);
1077
} else if (ch == Quote.Down) {
1079
if (Quote.Count == 0) {
1081
if (Quote.Rep <= 0) {
1082
styler.ColourTo(i, state);
1083
state = SCE_PL_DEFAULT;
1086
if (Quote.Up == Quote.Down) {
1090
} else if (ch == Quote.Up) {
1095
if (state == SCE_PL_ERROR) {
1100
styler.ColourTo(lengthDoc - 1, state);
1103
static bool IsCommentLine(int line, Accessor &styler) {
1104
int pos = styler.LineStart(line);
1105
int eol_pos = styler.LineStart(line + 1) - 1;
1106
for (int i = pos; i < eol_pos; i++) {
1107
char ch = styler[i];
1108
int style = styler.StyleAt(i);
1109
if (ch == '#' && style == SCE_PL_COMMENTLINE)
1111
else if (ch != ' ' && ch != '\t')
1117
static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
1119
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1120
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1121
// Custom folding of POD and packages
1122
bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
1123
bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
1124
unsigned int endPos = startPos + length;
1125
int visibleChars = 0;
1126
int lineCurrent = styler.GetLine(startPos);
1127
int levelPrev = SC_FOLDLEVELBASE;
1128
if (lineCurrent > 0)
1129
levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1130
int levelCurrent = levelPrev;
1131
char chNext = styler[startPos];
1132
char chPrev = styler.SafeGetCharAt(startPos - 1);
1133
int styleNext = styler.StyleAt(startPos);
1134
// Used at end of line to determine if the line was a package definition
1135
bool isPackageLine = false;
1136
bool isPodHeading = false;
1137
for (unsigned int i = startPos; i < endPos; i++) {
1139
chNext = styler.SafeGetCharAt(i + 1);
1140
int style = styleNext;
1141
styleNext = styler.StyleAt(i + 1);
1142
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1143
bool atLineStart = isEOLChar(chPrev) || i == 0;
1145
if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1147
if (!IsCommentLine(lineCurrent - 1, styler)
1148
&& IsCommentLine(lineCurrent + 1, styler))
1150
else if (IsCommentLine(lineCurrent - 1, styler)
1151
&& !IsCommentLine(lineCurrent+1, styler))
1154
if (style == SCE_C_OPERATOR) {
1157
} else if (ch == '}') {
1161
// Custom POD folding
1162
if (foldPOD && atLineStart) {
1163
int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1164
if (style == SCE_PL_POD) {
1165
if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1167
else if (styler.Match(i, "=cut"))
1169
else if (styler.Match(i, "=head"))
1170
isPodHeading = true;
1171
} else if (style == SCE_PL_DATASECTION) {
1172
if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1174
else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1176
else if (styler.Match(i, "=head"))
1177
isPodHeading = true;
1178
// if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1179
// reset needed as level test is vs. SC_FOLDLEVELBASE
1180
else if (styler.Match(i, "__END__"))
1181
levelCurrent = SC_FOLDLEVELBASE;
1184
// Custom package folding
1185
if (foldPackage && atLineStart) {
1186
if (style == SCE_PL_WORD && styler.Match(i, "package")) {
1187
isPackageLine = true;
1192
int lev = levelPrev;
1194
lev = levelPrev - 1;
1195
lev |= SC_FOLDLEVELHEADERFLAG;
1196
isPodHeading = false;
1198
// Check if line was a package declaration
1199
// because packages need "special" treatment
1200
if (isPackageLine) {
1201
lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1202
levelCurrent = SC_FOLDLEVELBASE + 1;
1203
isPackageLine = false;
1205
lev |= levelCurrent << 16;
1206
if (visibleChars == 0 && foldCompact)
1207
lev |= SC_FOLDLEVELWHITEFLAG;
1208
if ((levelCurrent > levelPrev) && (visibleChars > 0))
1209
lev |= SC_FOLDLEVELHEADERFLAG;
1210
if (lev != styler.LevelAt(lineCurrent)) {
1211
styler.SetLevel(lineCurrent, lev);
1214
levelPrev = levelCurrent;
1217
if (!isspacechar(ch))
1221
// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1222
int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1223
styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1226
static const char * const perlWordListDesc[] = {
1231
LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc);