~ubuntu-branches/ubuntu/utopic/geany/utopic

« back to all changes in this revision

Viewing changes to scintilla/LexPerl.cxx

  • Committer: Package Import Robot
  • Author(s): Chow Loong Jin
  • Date: 2011-12-10 07:43:26 UTC
  • mfrom: (3.3.7 sid)
  • Revision ID: package-import@ubuntu.com-20111210074326-s8yqbew5i20h33tf
Tags: 0.21-1ubuntu1
* Merge from Debian Unstable, remaining changes:
  - debian/patches/20_use_evince_viewer.patch:
     + use evince as viewer for pdf and dvi files
  - debian/patches/20_use_x_terminal_emulator.patch:
     + use x-terminal-emulator as terminal
  - debian/control
     + Add breaks on geany-plugins-common << 0.20
* Also fixes bugs:
  - Filter for MATLAB/Octave files filters everythign (LP: 885505)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
// Scintilla source code edit control
2
 
/** @file LexPerl.cxx
3
 
 ** Lexer for Perl.
4
 
 **/
5
 
// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
6
 
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
7
 
// The License.txt file describes the conditions under which this software may be distributed.
8
 
 
9
 
#include <stdlib.h>
10
 
#include <string.h>
11
 
#include <ctype.h>
12
 
#include <stdio.h>
13
 
#include <stdarg.h>
14
 
 
15
 
#include "Platform.h"
16
 
 
17
 
#include "PropSet.h"
18
 
#include "Accessor.h"
19
 
#include "StyleContext.h"
20
 
#include "KeyWords.h"
21
 
#include "Scintilla.h"
22
 
#include "SciLexer.h"
23
 
#include "CharacterSet.h"
24
 
 
25
 
#ifdef SCI_NAMESPACE
26
 
using namespace Scintilla;
27
 
#endif
28
 
 
29
 
// Info for HERE document handling from perldata.pod (reformatted):
30
 
// ----------------------------------------------------------------
31
 
// A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
32
 
// Following a << you specify a string to terminate the quoted material, and
33
 
// all lines following the current line down to the terminating string are
34
 
// the value of the item.
35
 
// * The terminating string may be either an identifier (a word), or some
36
 
//   quoted text.
37
 
// * If quoted, the type of quotes you use determines the treatment of the
38
 
//   text, just as in regular quoting.
39
 
// * An unquoted identifier works like double quotes.
40
 
// * There must be no space between the << and the identifier.
41
 
//   (If you put a space it will be treated as a null identifier,
42
 
//    which is valid, and matches the first empty line.)
43
 
//   (This is deprecated, -w warns of this syntax)
44
 
// * The terminating string must appear by itself (unquoted and
45
 
//   with no surrounding whitespace) on the terminating line.
46
 
 
47
 
#define HERE_DELIM_MAX 256              // maximum length of HERE doc delimiter
48
 
 
49
 
#define PERLNUM_BINARY          1       // order is significant: 1-4 cannot have a dot
50
 
#define PERLNUM_HEX                     2
51
 
#define PERLNUM_OCTAL           3
52
 
#define PERLNUM_FLOAT_EXP       4       // exponent part only
53
 
#define PERLNUM_DECIMAL         5       // 1-5 are numbers; 6-7 are strings
54
 
#define PERLNUM_VECTOR          6
55
 
#define PERLNUM_V_VECTOR        7
56
 
#define PERLNUM_BAD                     8
57
 
 
58
 
#define BACK_NONE               0       // lookback state for bareword disambiguation:
59
 
#define BACK_OPERATOR   1       // whitespace/comments are insignificant
60
 
#define BACK_KEYWORD    2       // operators/keywords are needed for disambiguation
61
 
 
62
 
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler)
63
 
{
64
 
        // old-style keyword matcher; needed because GetCurrent() needs
65
 
        // current segment to be committed, but we may abandon early...
66
 
        char s[100];
67
 
        unsigned int i, len = end - start;
68
 
        if (len > 30) { len = 30; }
69
 
        for (i = 0; i < len; i++, start++) s[i] = styler[start];
70
 
        s[i] = '\0';
71
 
        return keywords.InList(s);
72
 
}
73
 
 
74
 
static int disambiguateBareword(Accessor &styler, unsigned int bk, unsigned int fw,
75
 
                                int backFlag, unsigned int backPos, unsigned int endPos)
76
 
{
77
 
        // identifiers are recognized by Perl as barewords under some
78
 
        // conditions, the following attempts to do the disambiguation
79
 
        // by looking backward and forward; result in 2 LSB
80
 
        int result = 0;
81
 
        bool moreback = false;          // true if passed newline/comments
82
 
        bool brace = false;                     // true if opening brace found
83
 
        // if BACK_NONE, neither operator nor keyword, so skip test
84
 
        if (backFlag == BACK_NONE)
85
 
                return result;
86
 
        // first look backwards past whitespace/comments to set EOL flag
87
 
        // (some disambiguation patterns must be on a single line)
88
 
        if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk))))
89
 
                moreback = true;
90
 
        // look backwards at last significant lexed item for disambiguation
91
 
        bk = backPos - 1;
92
 
        int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
93
 
        if (ch == '{' && !moreback) {
94
 
                // {bareword: possible variable spec
95
 
                brace = true;
96
 
        } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
97
 
                        // &bareword: subroutine call
98
 
                           || styler.Match(bk - 1, "->")
99
 
                        // ->bareword: part of variable spec
100
 
                           || styler.Match(bk - 2, "sub")) {
101
 
                        // sub bareword: subroutine declaration
102
 
                        // (implied BACK_KEYWORD, no keywords end in 'sub'!)
103
 
                result |= 1;
104
 
        }
105
 
        // next, scan forward after word past tab/spaces only;
106
 
        // if ch isn't one of '[{(,' we can skip the test
107
 
        if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
108
 
                && fw < endPos) {
109
 
                while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
110
 
                           IsASpaceOrTab(ch) && fw < endPos) {
111
 
                        fw++;
112
 
                }
113
 
                if ((ch == '}' && brace)
114
 
                        // {bareword}: variable spec
115
 
                        || styler.Match(fw, "=>")) {
116
 
                        // [{(, bareword=>: hash literal
117
 
                        result |= 2;
118
 
                }
119
 
        }
120
 
        return result;
121
 
}
122
 
 
123
 
static void skipWhitespaceComment(Accessor &styler, unsigned int &p)
124
 
{
125
 
        // when backtracking, we need to skip whitespace and comments
126
 
        int style;
127
 
        while ((p > 0) && (style = styler.StyleAt(p),
128
 
                   style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
129
 
                p--;
130
 
}
131
 
 
132
 
static int styleBeforeBracePair(Accessor &styler, unsigned int bk)
133
 
{
134
 
        // backtrack to find open '{' corresponding to a '}', balanced
135
 
        // return significant style to be tested for '/' disambiguation
136
 
        int braceCount = 1;
137
 
        if (bk == 0)
138
 
                return SCE_PL_DEFAULT;
139
 
        while (--bk > 0) {
140
 
                if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
141
 
                        int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
142
 
                        if (bkch == ';') {      // early out
143
 
                                break;
144
 
                        } else if (bkch == '}') {
145
 
                                braceCount++;
146
 
                        } else if (bkch == '{') {
147
 
                                if (--braceCount == 0) break;
148
 
                        }
149
 
                }
150
 
        }
151
 
        if (bk > 0 && braceCount == 0) {
152
 
                // balanced { found, bk > 0, skip more whitespace/comments
153
 
                bk--;
154
 
                skipWhitespaceComment(styler, bk);
155
 
                return styler.StyleAt(bk);
156
 
        }
157
 
        return SCE_PL_DEFAULT;
158
 
}
159
 
 
160
 
static int styleCheckIdentifier(Accessor &styler, unsigned int bk)
161
 
{
162
 
        // backtrack to classify sub-styles of identifier under test
163
 
        // return sub-style to be tested for '/' disambiguation
164
 
        if (styler.SafeGetCharAt(bk) == '>')    // inputsymbol, like <foo>
165
 
                return 1;
166
 
        // backtrack to check for possible "->" or "::" before identifier
167
 
        while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
168
 
                bk--;
169
 
        }
170
 
        while (bk > 0) {
171
 
                int bkstyle = styler.StyleAt(bk);
172
 
                if (bkstyle == SCE_PL_DEFAULT
173
 
                        || bkstyle == SCE_PL_COMMENTLINE) {
174
 
                        // skip whitespace, comments
175
 
                } else if (bkstyle == SCE_PL_OPERATOR) {
176
 
                        // test for "->" and "::"
177
 
                        if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
178
 
                                return 2;
179
 
                } else
180
 
                        return 3;       // bare identifier
181
 
                bk--;
182
 
        }
183
 
        return 0;
184
 
}
185
 
 
186
 
static int inputsymbolScan(Accessor &styler, unsigned int pos, unsigned int endPos)
187
 
{
188
 
        // looks forward for matching > on same line; a bit ugly
189
 
        unsigned int fw = pos;
190
 
        while (++fw < endPos) {
191
 
                int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw));
192
 
                if (fwch == '\r' || fwch == '\n') {
193
 
                        return 0;
194
 
                } else if (fwch == '>') {
195
 
                        if (styler.Match(fw - 2, "<=>"))        // '<=>' case
196
 
                                return 0;
197
 
                        return fw - pos;
198
 
                }
199
 
        }
200
 
        return 0;
201
 
}
202
 
 
203
 
static int podLineScan(Accessor &styler, unsigned int &pos, unsigned int endPos)
204
 
{
205
 
        // forward scan the current line to classify line for POD style
206
 
        int state = -1;
207
 
        while (pos <= endPos) {
208
 
                int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
209
 
                if (ch == '\n' || ch == '\r' || pos >= endPos) {
210
 
                        if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
211
 
                        break;
212
 
                }
213
 
                if (IsASpaceOrTab(ch)) {        // whitespace, take note
214
 
                        if (state == -1)
215
 
                                state = SCE_PL_DEFAULT;
216
 
                } else if (state == SCE_PL_DEFAULT) {   // verbatim POD line
217
 
                        state = SCE_PL_POD_VERB;
218
 
                } else if (state != SCE_PL_POD_VERB) {  // regular POD line
219
 
                        state = SCE_PL_POD;
220
 
                }
221
 
                pos++;
222
 
        }
223
 
        if (state == -1)
224
 
                state = SCE_PL_DEFAULT;
225
 
        return state;
226
 
}
227
 
 
228
 
static bool styleCheckSubPrototype(Accessor &styler, unsigned int bk)
229
 
{
230
 
        // backtrack to identify if we're starting a subroutine prototype
231
 
        // we also need to ignore whitespace/comments:
232
 
        // 'sub' [whitespace|comment] <identifier> [whitespace|comment]
233
 
        styler.Flush();
234
 
        skipWhitespaceComment(styler, bk);
235
 
        if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
236
 
                return false;
237
 
        while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
238
 
                bk--;
239
 
        }
240
 
        skipWhitespaceComment(styler, bk);
241
 
        if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
242
 
                || !styler.Match(bk - 2, "sub"))        // assume suffix is unique!
243
 
                return false;
244
 
        return true;
245
 
}
246
 
 
247
 
static bool isMatch(const char *sref, char *s)
248
 
{
249
 
        // match per-line delimiter - must kill trailing CR if CRLF
250
 
        int i = strlen(s);
251
 
        if (i != 0 && s[i - 1] == '\r')
252
 
                s[i - 1] = '\0';
253
 
        return (strcmp(sref, s) == 0);
254
 
}
255
 
 
256
 
static int actualNumStyle(int numberStyle) {
257
 
        if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
258
 
                return SCE_PL_STRING;
259
 
        } else if (numberStyle == PERLNUM_BAD) {
260
 
                return SCE_PL_ERROR;
261
 
        }
262
 
        return SCE_PL_NUMBER;
263
 
}
264
 
 
265
 
static int opposite(int ch) {
266
 
        if (ch == '(') return ')';
267
 
        if (ch == '[') return ']';
268
 
        if (ch == '{') return '}';
269
 
        if (ch == '<') return '>';
270
 
        return ch;
271
 
}
272
 
 
273
 
static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
274
 
                             WordList *keywordlists[], Accessor &styler) {
275
 
 
276
 
        WordList &keywords = *keywordlists[0];
277
 
 
278
 
        // keywords that forces /PATTERN/ at all times; should track vim's behaviour
279
 
        WordList reWords;
280
 
        reWords.Set("elsif if split while");
281
 
 
282
 
        // charset classes
283
 
        CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
284
 
        CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
285
 
        CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
286
 
        // lexing of "%*</" operators is non-trivial; these are missing in the set below
287
 
        CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
288
 
        CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
289
 
        CharacterSet setModifiers(CharacterSet::setAlpha);
290
 
        CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
291
 
        // setArray and setHash also accepts chars for special vars like $_,
292
 
        // which are then truncated when the next char does not match setVar
293
 
        CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
294
 
        CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
295
 
        CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
296
 
        CharacterSet &setPOD = setModifiers;
297
 
        CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
298
 
        CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
299
 
        CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];");
300
 
        // for format identifiers
301
 
        CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
302
 
        CharacterSet &setFormat = setHereDocDelim;
303
 
 
304
 
        // Lexer for perl often has to backtrack to start of current style to determine
305
 
        // which characters are being used as quotes, how deeply nested is the
306
 
        // start position and what the termination string is for HERE documents.
307
 
 
308
 
        class HereDocCls {      // Class to manage HERE doc sequence
309
 
        public:
310
 
                int State;              // 0: '<<' encountered
311
 
                                                // 1: collect the delimiter
312
 
                                                // 2: here doc text (lines after the delimiter)
313
 
                int Quote;              // the char after '<<'
314
 
                bool Quoted;            // true if Quote in ('\'','"','`')
315
 
                int DelimiterLength;    // strlen(Delimiter)
316
 
                char *Delimiter;        // the Delimiter, 256: sizeof PL_tokenbuf
317
 
                HereDocCls() {
318
 
                        State = 0;
319
 
                        Quote = 0;
320
 
                        Quoted = false;
321
 
                        DelimiterLength = 0;
322
 
                        Delimiter = new char[HERE_DELIM_MAX];
323
 
                        Delimiter[0] = '\0';
324
 
                }
325
 
                void Append(int ch) {
326
 
                        Delimiter[DelimiterLength++] = static_cast<char>(ch);
327
 
                        Delimiter[DelimiterLength] = '\0';
328
 
                }
329
 
                ~HereDocCls() {
330
 
                        delete []Delimiter;
331
 
                }
332
 
        };
333
 
        HereDocCls HereDoc;             // TODO: FIFO for stacked here-docs
334
 
 
335
 
        class QuoteCls {        // Class to manage quote pairs
336
 
                public:
337
 
                int Rep;
338
 
                int Count;
339
 
                int Up, Down;
340
 
                QuoteCls() {
341
 
                        this->New(1);
342
 
                }
343
 
                void New(int r = 1) {
344
 
                        Rep   = r;
345
 
                        Count = 0;
346
 
                        Up    = '\0';
347
 
                        Down  = '\0';
348
 
                }
349
 
                void Open(int u) {
350
 
                        Count++;
351
 
                        Up    = u;
352
 
                        Down  = opposite(Up);
353
 
                }
354
 
        };
355
 
        QuoteCls Quote;
356
 
 
357
 
        // additional state for number lexing
358
 
        int numState = PERLNUM_DECIMAL;
359
 
        int dotCount = 0;
360
 
 
361
 
        unsigned int endPos = startPos + length;
362
 
 
363
 
        // Backtrack to beginning of style if required...
364
 
        // If in a long distance lexical state, backtrack to find quote characters.
365
 
        // Includes strings (may be multi-line), numbers (additional state), format
366
 
        // bodies, as well as POD sections.
367
 
        if (initStyle == SCE_PL_HERE_Q
368
 
                || initStyle == SCE_PL_HERE_QQ
369
 
                || initStyle == SCE_PL_HERE_QX
370
 
                || initStyle == SCE_PL_FORMAT
371
 
        ) {
372
 
                int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
373
 
                while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
374
 
                        startPos--;
375
 
                }
376
 
                startPos = styler.LineStart(styler.GetLine(startPos));
377
 
                initStyle = styler.StyleAt(startPos - 1);
378
 
        }
379
 
        if (initStyle == SCE_PL_STRING_Q
380
 
                || initStyle == SCE_PL_STRING_QQ
381
 
                || initStyle == SCE_PL_STRING_QX
382
 
                || initStyle == SCE_PL_STRING_QR
383
 
                || initStyle == SCE_PL_STRING_QW
384
 
                || initStyle == SCE_PL_REGEX
385
 
                || initStyle == SCE_PL_REGSUBST
386
 
                || initStyle == SCE_PL_STRING
387
 
                || initStyle == SCE_PL_BACKTICKS
388
 
                || initStyle == SCE_PL_CHARACTER
389
 
                || initStyle == SCE_PL_NUMBER
390
 
                || initStyle == SCE_PL_IDENTIFIER
391
 
                || initStyle == SCE_PL_ERROR
392
 
                || initStyle == SCE_PL_SUB_PROTOTYPE
393
 
        ) {
394
 
                while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
395
 
                        startPos--;
396
 
                }
397
 
                initStyle = SCE_PL_DEFAULT;
398
 
        } else if (initStyle == SCE_PL_POD
399
 
                           || initStyle == SCE_PL_POD_VERB
400
 
        ) {
401
 
                // POD backtracking finds preceeding blank lines and goes back past them
402
 
                int ln = styler.GetLine(startPos);
403
 
                if (ln > 0) {
404
 
                        initStyle = styler.StyleAt(styler.LineStart(--ln));
405
 
                        if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
406
 
                                while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
407
 
                                        ln--;
408
 
                        }
409
 
                        startPos = styler.LineStart(++ln);
410
 
                        initStyle = styler.StyleAt(startPos - 1);
411
 
                } else {
412
 
                        startPos = 0;
413
 
                        initStyle = SCE_PL_DEFAULT;
414
 
                }
415
 
        }
416
 
 
417
 
        // backFlag, backPos are additional state to aid identifier corner cases.
418
 
        // Look backwards past whitespace and comments in order to detect either
419
 
        // operator or keyword. Later updated as we go along.
420
 
        int backFlag = BACK_NONE;
421
 
        unsigned int backPos = startPos;
422
 
        if (backPos > 0) {
423
 
                backPos--;
424
 
                skipWhitespaceComment(styler, backPos);
425
 
                if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
426
 
                        backFlag = BACK_OPERATOR;
427
 
                else if (styler.StyleAt(backPos) == SCE_PL_WORD)
428
 
                        backFlag = BACK_KEYWORD;
429
 
                backPos++;
430
 
        }
431
 
 
432
 
        StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX));
433
 
 
434
 
        for (; sc.More(); sc.Forward()) {
435
 
 
436
 
                // Determine if the current state should terminate.
437
 
                switch (sc.state) {
438
 
                        case SCE_PL_OPERATOR:
439
 
                                sc.SetState(SCE_PL_DEFAULT);
440
 
                                backFlag = BACK_OPERATOR;
441
 
                                backPos = sc.currentPos;
442
 
                                break;
443
 
                        case SCE_PL_IDENTIFIER:         // identifier, bareword, inputsymbol
444
 
                                if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
445
 
                                        || sc.Match('.', '.')
446
 
                                        || sc.chPrev == '>') {  // end of inputsymbol
447
 
                                        sc.SetState(SCE_PL_DEFAULT);
448
 
                                }
449
 
                                break;
450
 
                        case SCE_PL_WORD:               // keyword, plus special cases
451
 
                                if (!setWord.Contains(sc.ch)) {
452
 
                                        char s[100];
453
 
                                        sc.GetCurrent(s, sizeof(s));
454
 
                                        if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
455
 
                                                sc.ChangeState(SCE_PL_DATASECTION);
456
 
                                        } else {
457
 
                                                if ((strcmp(s, "format") == 0)) {
458
 
                                                        sc.SetState(SCE_PL_FORMAT_IDENT);
459
 
                                                        HereDoc.State = 0;
460
 
                                                } else {
461
 
                                                        sc.SetState(SCE_PL_DEFAULT);
462
 
                                                }
463
 
                                                backFlag = BACK_KEYWORD;
464
 
                                                backPos = sc.currentPos;
465
 
                                        }
466
 
                                }
467
 
                                break;
468
 
                        case SCE_PL_SCALAR:
469
 
                        case SCE_PL_ARRAY:
470
 
                        case SCE_PL_HASH:
471
 
                        case SCE_PL_SYMBOLTABLE:
472
 
                                if (sc.Match(':', ':')) {       // skip ::
473
 
                                        sc.Forward();
474
 
                                } else if (!setVar.Contains(sc.ch)) {
475
 
                                        if (sc.LengthCurrent() == 1) {
476
 
                                                // Special variable: $(, $_ etc.
477
 
                                                sc.Forward();
478
 
                                        }
479
 
                                        sc.SetState(SCE_PL_DEFAULT);
480
 
                                }
481
 
                                break;
482
 
                        case SCE_PL_NUMBER:
483
 
                                // if no early break, number style is terminated at "(go through)"
484
 
                                if (sc.ch == '.') {
485
 
                                        if (sc.chNext == '.') {
486
 
                                                // double dot is always an operator (go through)
487
 
                                        } else if (numState <= PERLNUM_FLOAT_EXP) {
488
 
                                                // non-decimal number or float exponent, consume next dot
489
 
                                                sc.SetState(SCE_PL_OPERATOR);
490
 
                                                break;
491
 
                                        } else {        // decimal or vectors allows dots
492
 
                                                dotCount++;
493
 
                                                if (numState == PERLNUM_DECIMAL) {
494
 
                                                        if (dotCount <= 1)      // number with one dot in it
495
 
                                                                break;
496
 
                                                        if (IsADigit(sc.chNext)) {      // really a vector
497
 
                                                                numState = PERLNUM_VECTOR;
498
 
                                                                break;
499
 
                                                        }
500
 
                                                        // number then dot (go through)
501
 
                                                } else if (IsADigit(sc.chNext)) // vectors
502
 
                                                        break;
503
 
                                                // vector then dot (go through)
504
 
                                        }
505
 
                                } else if (sc.ch == '_') {
506
 
                                        // permissive underscoring for number and vector literals
507
 
                                        break;
508
 
                                } else if (numState == PERLNUM_DECIMAL) {
509
 
                                        if (sc.ch == 'E' || sc.ch == 'e') {     // exponent, sign
510
 
                                                numState = PERLNUM_FLOAT_EXP;
511
 
                                                if (sc.chNext == '+' || sc.chNext == '-') {
512
 
                                                        sc.Forward();
513
 
                                                }
514
 
                                                break;
515
 
                                        } else if (IsADigit(sc.ch))
516
 
                                                break;
517
 
                                        // number then word (go through)
518
 
                                } else if (numState == PERLNUM_HEX) {
519
 
                                        if (IsADigit(sc.ch, 16))
520
 
                                                break;
521
 
                                } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
522
 
                                        if (IsADigit(sc.ch))    // vector
523
 
                                                break;
524
 
                                        if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
525
 
                                                sc.ChangeState(SCE_PL_IDENTIFIER);
526
 
                                                break;
527
 
                                        }
528
 
                                        // vector then word (go through)
529
 
                                } else if (IsADigit(sc.ch)) {
530
 
                                        if (numState == PERLNUM_FLOAT_EXP) {
531
 
                                                break;
532
 
                                        } else if (numState == PERLNUM_OCTAL) {
533
 
                                                if (sc.ch <= '7') break;
534
 
                                        } else if (numState == PERLNUM_BINARY) {
535
 
                                                if (sc.ch <= '1') break;
536
 
                                        }
537
 
                                        // mark invalid octal, binary numbers (go through)
538
 
                                        numState = PERLNUM_BAD;
539
 
                                        break;
540
 
                                }
541
 
                                // complete current number or vector
542
 
                                sc.ChangeState(actualNumStyle(numState));
543
 
                                sc.SetState(SCE_PL_DEFAULT);
544
 
                                break;
545
 
                        case SCE_PL_COMMENTLINE:
546
 
                                if (sc.atLineEnd) {
547
 
                                        sc.SetState(SCE_PL_DEFAULT);
548
 
                                }
549
 
                                break;
550
 
                        case SCE_PL_HERE_DELIM:
551
 
                                if (HereDoc.State == 0) { // '<<' encountered
552
 
                                        int delim_ch = sc.chNext;
553
 
                                        int ws_skip = 0;
554
 
                                        HereDoc.State = 1;      // pre-init HERE doc class
555
 
                                        HereDoc.Quote = sc.chNext;
556
 
                                        HereDoc.Quoted = false;
557
 
                                        HereDoc.DelimiterLength = 0;
558
 
                                        HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
559
 
                                        if (IsASpaceOrTab(delim_ch)) {
560
 
                                                // skip whitespace; legal only for quoted delimiters
561
 
                                                unsigned int i = sc.currentPos + 1;
562
 
                                                while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
563
 
                                                        i++;
564
 
                                                        delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
565
 
                                                }
566
 
                                                ws_skip = i - sc.currentPos - 1;
567
 
                                        }
568
 
                                        if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
569
 
                                                // a quoted here-doc delimiter; skip any whitespace
570
 
                                                sc.Forward(ws_skip + 1);
571
 
                                                HereDoc.Quote = delim_ch;
572
 
                                                HereDoc.Quoted = true;
573
 
                                        } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
574
 
                                                           || ws_skip > 0) {
575
 
                                                // left shift << or <<= operator cases
576
 
                                                // restore position if operator
577
 
                                                sc.ChangeState(SCE_PL_OPERATOR);
578
 
                                                sc.ForwardSetState(SCE_PL_DEFAULT);
579
 
                                                backFlag = BACK_OPERATOR;
580
 
                                                backPos = sc.currentPos;
581
 
                                                HereDoc.State = 0;
582
 
                                        } else {
583
 
                                                // specially handle initial '\' for identifier
584
 
                                                if (ws_skip == 0 && HereDoc.Quote == '\\')
585
 
                                                        sc.Forward();
586
 
                                                // an unquoted here-doc delimiter, no special handling
587
 
                                                // (cannot be prefixed by spaces/tabs), or
588
 
                                                // symbols terminates; deprecated zero-length delimiter
589
 
                                        }
590
 
                                } else if (HereDoc.State == 1) { // collect the delimiter
591
 
                                        backFlag = BACK_NONE;
592
 
                                        if (HereDoc.Quoted) { // a quoted here-doc delimiter
593
 
                                                if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
594
 
                                                        sc.ForwardSetState(SCE_PL_DEFAULT);
595
 
                                                } else if (!sc.atLineEnd) {
596
 
                                                        if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
597
 
                                                                sc.Forward();
598
 
                                                        }
599
 
                                                        if (sc.ch != '\r') {    // skip CR if CRLF
600
 
                                                                HereDoc.Append(sc.ch);
601
 
                                                        }
602
 
                                                }
603
 
                                        } else { // an unquoted here-doc delimiter
604
 
                                                if (setHereDocDelim.Contains(sc.ch)) {
605
 
                                                        HereDoc.Append(sc.ch);
606
 
                                                } else {
607
 
                                                        sc.SetState(SCE_PL_DEFAULT);
608
 
                                                }
609
 
                                        }
610
 
                                        if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
611
 
                                                sc.SetState(SCE_PL_ERROR);
612
 
                                                HereDoc.State = 0;
613
 
                                        }
614
 
                                }
615
 
                                break;
616
 
                        case SCE_PL_HERE_Q:
617
 
                        case SCE_PL_HERE_QQ:
618
 
                        case SCE_PL_HERE_QX: {
619
 
                                // also implies HereDoc.State == 2
620
 
                                sc.Complete();
621
 
                                while (!sc.atLineEnd)
622
 
                                        sc.Forward();
623
 
                                char s[HERE_DELIM_MAX];
624
 
                                sc.GetCurrent(s, sizeof(s));
625
 
                                if (isMatch(HereDoc.Delimiter, s)) {
626
 
                                        sc.SetState(SCE_PL_DEFAULT);
627
 
                                        backFlag = BACK_NONE;
628
 
                                        HereDoc.State = 0;
629
 
                                }
630
 
                                } break;
631
 
                        case SCE_PL_POD:
632
 
                        case SCE_PL_POD_VERB: {
633
 
                                unsigned int fw = sc.currentPos;
634
 
                                int ln = styler.GetLine(fw);
635
 
                                if (sc.atLineStart && sc.Match("=cut")) {       // end of POD
636
 
                                        sc.SetState(SCE_PL_POD);
637
 
                                        sc.Forward(4);
638
 
                                        sc.SetState(SCE_PL_DEFAULT);
639
 
                                        styler.SetLineState(ln, SCE_PL_POD);
640
 
                                        break;
641
 
                                }
642
 
                                int pod = podLineScan(styler, fw, endPos);      // classify POD line
643
 
                                styler.SetLineState(ln, pod);
644
 
                                if (pod == SCE_PL_DEFAULT) {
645
 
                                        if (sc.state == SCE_PL_POD_VERB) {
646
 
                                                unsigned int fw2 = fw;
647
 
                                                while (fw2 <= endPos && pod == SCE_PL_DEFAULT) {
648
 
                                                        fw = fw2++;     // penultimate line (last blank line)
649
 
                                                        pod = podLineScan(styler, fw2, endPos);
650
 
                                                        styler.SetLineState(styler.GetLine(fw2), pod);
651
 
                                                }
652
 
                                                if (pod == SCE_PL_POD) {        // truncate verbatim POD early
653
 
                                                        sc.SetState(SCE_PL_POD);
654
 
                                                } else
655
 
                                                        fw = fw2;
656
 
                                        } else
657
 
                                                pod = SCE_PL_POD;
658
 
                                } else {
659
 
                                        if (pod == SCE_PL_POD_VERB      // still part of current paragraph
660
 
                                            && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
661
 
                                                pod = SCE_PL_POD;
662
 
                                                styler.SetLineState(ln, pod);
663
 
                                        } else if (pod == SCE_PL_POD
664
 
                                                           && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
665
 
                                                pod = SCE_PL_POD_VERB;
666
 
                                                styler.SetLineState(ln, pod);
667
 
                                        }
668
 
                                        sc.SetState(pod);
669
 
                                }
670
 
                                sc.Forward(fw - sc.currentPos); // commit style
671
 
                                } break;
672
 
                        case SCE_PL_REGEX:
673
 
                        case SCE_PL_STRING_QR:
674
 
                                if (Quote.Rep <= 0) {
675
 
                                        if (!setModifiers.Contains(sc.ch))
676
 
                                                sc.SetState(SCE_PL_DEFAULT);
677
 
                                } else if (!Quote.Up && !IsASpace(sc.ch)) {
678
 
                                        Quote.Open(sc.ch);
679
 
                                } else if (sc.ch == '\\' && Quote.Up != '\\') {
680
 
                                        sc.Forward();
681
 
                                } else if (sc.ch == Quote.Down) {
682
 
                                        Quote.Count--;
683
 
                                        if (Quote.Count == 0)
684
 
                                                Quote.Rep--;
685
 
                                } else if (sc.ch == Quote.Up) {
686
 
                                        Quote.Count++;
687
 
                                }
688
 
                                break;
689
 
                        case SCE_PL_REGSUBST:
690
 
                                if (Quote.Rep <= 0) {
691
 
                                        if (!setModifiers.Contains(sc.ch))
692
 
                                                sc.SetState(SCE_PL_DEFAULT);
693
 
                                } else if (!Quote.Up && !IsASpace(sc.ch)) {
694
 
                                        Quote.Open(sc.ch);
695
 
                                } else if (sc.ch == '\\' && Quote.Up != '\\') {
696
 
                                        sc.Forward();
697
 
                                } else if (Quote.Count == 0 && Quote.Rep == 1) {
698
 
                                        // We matched something like s(...) or tr{...}, Perl 5.10
699
 
                                        // appears to allow almost any character for use as the
700
 
                                        // next delimiters. Whitespace and comments are accepted in
701
 
                                        // between, but we'll limit to whitespace here.
702
 
                                        // For '#', if no whitespace in between, it's a delimiter.
703
 
                                        if (IsASpace(sc.ch)) {
704
 
                                                // Keep going
705
 
                                        } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) {
706
 
                                                sc.SetState(SCE_PL_DEFAULT);
707
 
                                        } else {
708
 
                                                Quote.Open(sc.ch);
709
 
                                        }
710
 
                                } else if (sc.ch == Quote.Down) {
711
 
                                        Quote.Count--;
712
 
                                        if (Quote.Count == 0)
713
 
                                                Quote.Rep--;
714
 
                                        if (Quote.Up == Quote.Down)
715
 
                                                Quote.Count++;
716
 
                                } else if (sc.ch == Quote.Up) {
717
 
                                        Quote.Count++;
718
 
                                }
719
 
                                break;
720
 
                        case SCE_PL_STRING_Q:
721
 
                        case SCE_PL_STRING_QQ:
722
 
                        case SCE_PL_STRING_QX:
723
 
                        case SCE_PL_STRING_QW:
724
 
                        case SCE_PL_STRING:
725
 
                        case SCE_PL_CHARACTER:
726
 
                        case SCE_PL_BACKTICKS:
727
 
                                if (!Quote.Down && !IsASpace(sc.ch)) {
728
 
                                        Quote.Open(sc.ch);
729
 
                                } else if (sc.ch == '\\' && Quote.Up != '\\') {
730
 
                                        sc.Forward();
731
 
                                } else if (sc.ch == Quote.Down) {
732
 
                                        Quote.Count--;
733
 
                                        if (Quote.Count == 0)
734
 
                                                sc.ForwardSetState(SCE_PL_DEFAULT);
735
 
                                } else if (sc.ch == Quote.Up) {
736
 
                                        Quote.Count++;
737
 
                                }
738
 
                                break;
739
 
                        case SCE_PL_SUB_PROTOTYPE: {
740
 
                                int i = 0;
741
 
                                // forward scan; must all be valid proto characters
742
 
                                while (setSubPrototype.Contains(sc.GetRelative(i)))
743
 
                                        i++;
744
 
                                if (sc.GetRelative(i) == ')') { // valid sub prototype
745
 
                                        sc.Forward(i);
746
 
                                        sc.ForwardSetState(SCE_PL_DEFAULT);
747
 
                                } else {
748
 
                                        // abandon prototype, restart from '('
749
 
                                        sc.ChangeState(SCE_PL_OPERATOR);
750
 
                                        sc.SetState(SCE_PL_DEFAULT);
751
 
                                }
752
 
                                } break;
753
 
                        case SCE_PL_FORMAT: {
754
 
                                sc.Complete();
755
 
                                while (!sc.atLineEnd)
756
 
                                        sc.Forward();
757
 
                                char s[10];
758
 
                                sc.GetCurrent(s, sizeof(s));
759
 
                                if (isMatch(".", s))
760
 
                                        sc.SetState(SCE_PL_DEFAULT);
761
 
                                } break;
762
 
                        case SCE_PL_ERROR:
763
 
                                break;
764
 
                }
765
 
                // Needed for specific continuation styles (one follows the other)
766
 
                switch (sc.state) {
767
 
                        // continued from SCE_PL_WORD
768
 
                        case SCE_PL_FORMAT_IDENT:
769
 
                                // occupies HereDoc state 3 to avoid clashing with HERE docs
770
 
                                if (IsASpaceOrTab(sc.ch)) {             // skip whitespace
771
 
                                        sc.ChangeState(SCE_PL_DEFAULT);
772
 
                                        while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
773
 
                                                sc.Forward();
774
 
                                        sc.SetState(SCE_PL_FORMAT_IDENT);
775
 
                                }
776
 
                                if (setFormatStart.Contains(sc.ch)) {   // identifier or '='
777
 
                                        if (sc.ch != '=') {
778
 
                                                do {
779
 
                                                        sc.Forward();
780
 
                                                } while (setFormat.Contains(sc.ch));
781
 
                                        }
782
 
                                        while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
783
 
                                                sc.Forward();
784
 
                                        if (sc.ch == '=') {
785
 
                                                sc.ForwardSetState(SCE_PL_DEFAULT);
786
 
                                                HereDoc.State = 3;
787
 
                                        } else {
788
 
                                                // invalid indentifier; inexact fallback, but hey
789
 
                                                sc.ChangeState(SCE_PL_IDENTIFIER);
790
 
                                                sc.SetState(SCE_PL_DEFAULT);
791
 
                                        }
792
 
                                } else {
793
 
                                        sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier
794
 
                                }
795
 
                                backFlag = BACK_NONE;
796
 
                                break;
797
 
                }
798
 
 
799
 
                // Must check end of HereDoc states here before default state is handled
800
 
                if (HereDoc.State == 1 && sc.atLineEnd) {
801
 
                        // Begin of here-doc (the line after the here-doc delimiter):
802
 
                        // Lexically, the here-doc starts from the next line after the >>, but the
803
 
                        // first line of here-doc seem to follow the style of the last EOL sequence
804
 
                        int st_new = SCE_PL_HERE_QQ;
805
 
                        HereDoc.State = 2;
806
 
                        if (HereDoc.Quoted) {
807
 
                                if (sc.state == SCE_PL_HERE_DELIM) {
808
 
                                        // Missing quote at end of string! We are stricter than perl.
809
 
                                        // Colour here-doc anyway while marking this bit as an error.
810
 
                                        sc.ChangeState(SCE_PL_ERROR);
811
 
                                }
812
 
                                switch (HereDoc.Quote) {
813
 
                                        case '\'': st_new = SCE_PL_HERE_Q ; break;
814
 
                                        case '"' : st_new = SCE_PL_HERE_QQ; break;
815
 
                                        case '`' : st_new = SCE_PL_HERE_QX; break;
816
 
                                }
817
 
                        } else {
818
 
                                if (HereDoc.Quote == '\\')
819
 
                                        st_new = SCE_PL_HERE_Q;
820
 
                        }
821
 
                        sc.SetState(st_new);
822
 
                }
823
 
                if (HereDoc.State == 3 && sc.atLineEnd) {
824
 
                        // Start of format body.
825
 
                        HereDoc.State = 0;
826
 
                        sc.SetState(SCE_PL_FORMAT);
827
 
                }
828
 
 
829
 
                // Determine if a new state should be entered.
830
 
                if (sc.state == SCE_PL_DEFAULT) {
831
 
                        if (IsADigit(sc.ch) ||
832
 
                                (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
833
 
                                sc.SetState(SCE_PL_NUMBER);
834
 
                                backFlag = BACK_NONE;
835
 
                                numState = PERLNUM_DECIMAL;
836
 
                                dotCount = 0;
837
 
                                if (sc.ch == '0') {             // hex,bin,octal
838
 
                                        if (sc.chNext == 'x') {
839
 
                                                numState = PERLNUM_HEX;
840
 
                                        } else if (sc.chNext == 'b') {
841
 
                                                numState = PERLNUM_BINARY;
842
 
                                        } else if (IsADigit(sc.chNext)) {
843
 
                                                numState = PERLNUM_OCTAL;
844
 
                                        }
845
 
                                        if (numState != PERLNUM_DECIMAL) {
846
 
                                                sc.Forward();
847
 
                                        }
848
 
                                } else if (sc.ch == 'v') {              // vector
849
 
                                        numState = PERLNUM_V_VECTOR;
850
 
                                }
851
 
                        } else if (setWord.Contains(sc.ch)) {
852
 
                                // if immediately prefixed by '::', always a bareword
853
 
                                sc.SetState(SCE_PL_WORD);
854
 
                                if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
855
 
                                        sc.ChangeState(SCE_PL_IDENTIFIER);
856
 
                                }
857
 
                                unsigned int bk = sc.currentPos;
858
 
                                unsigned int fw = sc.currentPos + 1;
859
 
                                // first check for possible quote-like delimiter
860
 
                                if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
861
 
                                        sc.ChangeState(SCE_PL_REGSUBST);
862
 
                                        Quote.New(2);
863
 
                                } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
864
 
                                        sc.ChangeState(SCE_PL_REGEX);
865
 
                                        Quote.New();
866
 
                                } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
867
 
                                        sc.ChangeState(SCE_PL_STRING_Q);
868
 
                                        Quote.New();
869
 
                                } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
870
 
                                        sc.ChangeState(SCE_PL_REGSUBST);
871
 
                                        Quote.New(2);
872
 
                                } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
873
 
                                        sc.ChangeState(SCE_PL_REGSUBST);
874
 
                                        Quote.New(2);
875
 
                                        sc.Forward();
876
 
                                        fw++;
877
 
                                } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
878
 
                                                   && !setWord.Contains(sc.GetRelative(2))) {
879
 
                                        if      (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
880
 
                                        else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
881
 
                                        else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
882
 
                                        else sc.ChangeState(SCE_PL_STRING_QW);  // sc.chNext == 'w'
883
 
                                        Quote.New();
884
 
                                        sc.Forward();
885
 
                                        fw++;
886
 
                                } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
887
 
                                                   !setWord.Contains(sc.chNext) ||
888
 
                                                   (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
889
 
                                        sc.ChangeState(SCE_PL_OPERATOR);
890
 
                                }
891
 
                                // if potentially a keyword, scan forward and grab word, then check
892
 
                                // if it's really one; if yes, disambiguation test is performed
893
 
                                // otherwise it is always a bareword and we skip a lot of scanning
894
 
                                if (sc.state == SCE_PL_WORD) {
895
 
                                        while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
896
 
                                                fw++;
897
 
                                        if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
898
 
                                                sc.ChangeState(SCE_PL_IDENTIFIER);
899
 
                                        }
900
 
                                }
901
 
                                // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
902
 
                                // for quote-like delimiters/keywords, attempt to disambiguate
903
 
                                // to select for bareword, change state -> SCE_PL_IDENTIFIER
904
 
                                if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
905
 
                                        if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
906
 
                                                sc.ChangeState(SCE_PL_IDENTIFIER);
907
 
                                }
908
 
                                backFlag = BACK_NONE;
909
 
                        } else if (sc.ch == '#') {
910
 
                                sc.SetState(SCE_PL_COMMENTLINE);
911
 
                        } else if (sc.ch == '\"') {
912
 
                                sc.SetState(SCE_PL_STRING);
913
 
                                Quote.New();
914
 
                                Quote.Open(sc.ch);
915
 
                                backFlag = BACK_NONE;
916
 
                        } else if (sc.ch == '\'') {
917
 
                                if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
918
 
                                        // Archaic call
919
 
                                        sc.SetState(SCE_PL_IDENTIFIER);
920
 
                                } else {
921
 
                                        sc.SetState(SCE_PL_CHARACTER);
922
 
                                        Quote.New();
923
 
                                        Quote.Open(sc.ch);
924
 
                                }
925
 
                                backFlag = BACK_NONE;
926
 
                        } else if (sc.ch == '`') {
927
 
                                sc.SetState(SCE_PL_BACKTICKS);
928
 
                                Quote.New();
929
 
                                Quote.Open(sc.ch);
930
 
                                backFlag = BACK_NONE;
931
 
                        } else if (sc.ch == '$') {
932
 
                                sc.SetState(SCE_PL_SCALAR);
933
 
                                if (sc.chNext == '{') {
934
 
                                        sc.ForwardSetState(SCE_PL_OPERATOR);
935
 
                                } else if (IsASpace(sc.chNext)) {
936
 
                                        sc.ForwardSetState(SCE_PL_DEFAULT);
937
 
                                } else {
938
 
                                        sc.Forward();
939
 
                                        if (sc.Match('`', '`') || sc.Match(':', ':')) {
940
 
                                                sc.Forward();
941
 
                                        }
942
 
                                }
943
 
                                backFlag = BACK_NONE;
944
 
                        } else if (sc.ch == '@') {
945
 
                                sc.SetState(SCE_PL_ARRAY);
946
 
                                if (setArray.Contains(sc.chNext)) {
947
 
                                        // no special treatment
948
 
                                } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
949
 
                                        sc.Forward(2);
950
 
                                } else if (sc.chNext == '{' || sc.chNext == '[') {
951
 
                                        sc.ForwardSetState(SCE_PL_OPERATOR);
952
 
                                } else {
953
 
                                        sc.ChangeState(SCE_PL_OPERATOR);
954
 
                                }
955
 
                                backFlag = BACK_NONE;
956
 
                        } else if (setPreferRE.Contains(sc.ch)) {
957
 
                                // Explicit backward peeking to set a consistent preferRE for
958
 
                                // any slash found, so no longer need to track preferRE state.
959
 
                                // Find first previous significant lexed element and interpret.
960
 
                                // A few symbols shares this code for disambiguation.
961
 
                                bool preferRE = false;
962
 
                                bool isHereDoc = sc.Match('<', '<');
963
 
                                bool hereDocSpace = false;              // for: SCALAR [whitespace] '<<'
964
 
                                unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
965
 
                                unsigned int bkend;
966
 
                                sc.Complete();
967
 
                                styler.Flush();
968
 
                                if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
969
 
                                        hereDocSpace = true;
970
 
                                skipWhitespaceComment(styler, bk);
971
 
                                if (bk == 0) {
972
 
                                        // avoid backward scanning breakage
973
 
                                        preferRE = true;
974
 
                                } else {
975
 
                                        int bkstyle = styler.StyleAt(bk);
976
 
                                        int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
977
 
                                        switch(bkstyle) {
978
 
                                        case SCE_PL_OPERATOR:
979
 
                                                preferRE = true;
980
 
                                                if (bkch == ')' || bkch == ']') {
981
 
                                                        preferRE = false;
982
 
                                                } else if (bkch == '}') {
983
 
                                                        // backtrack by counting balanced brace pairs
984
 
                                                        // needed to test for variables like ${}, @{} etc.
985
 
                                                        bkstyle = styleBeforeBracePair(styler, bk);
986
 
                                                        if (bkstyle == SCE_PL_SCALAR
987
 
                                                                || bkstyle == SCE_PL_ARRAY
988
 
                                                                || bkstyle == SCE_PL_HASH
989
 
                                                                || bkstyle == SCE_PL_SYMBOLTABLE
990
 
                                                                || bkstyle == SCE_PL_OPERATOR) {
991
 
                                                                preferRE = false;
992
 
                                                        }
993
 
                                                } else if (bkch == '+' || bkch == '-') {
994
 
                                                        if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
995
 
                                                                && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
996
 
                                                        // exceptions for operators: unary suffixes ++, --
997
 
                                                        preferRE = false;
998
 
                                                }
999
 
                                                break;
1000
 
                                        case SCE_PL_IDENTIFIER:
1001
 
                                                preferRE = true;
1002
 
                                                bkstyle = styleCheckIdentifier(styler, bk);
1003
 
                                                if ((bkstyle == 1) || (bkstyle == 2)) {
1004
 
                                                        // inputsymbol or var with "->" or "::" before identifier
1005
 
                                                        preferRE = false;
1006
 
                                                } else if (bkstyle == 3) {
1007
 
                                                        // bare identifier, test cases follows:
1008
 
                                                        if (sc.ch == '/') {
1009
 
                                                                // if '/', /PATTERN/ unless digit/space immediately after '/'
1010
 
                                                                // if '//', always expect defined-or operator to follow identifier
1011
 
                                                                if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1012
 
                                                                        preferRE = false;
1013
 
                                                        } else if (sc.ch == '*' || sc.ch == '%') {
1014
 
                                                                if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1015
 
                                                                        preferRE = false;
1016
 
                                                        } else if (sc.ch == '<') {
1017
 
                                                                if (IsASpace(sc.chNext) || sc.chNext == '=')
1018
 
                                                                        preferRE = false;
1019
 
                                                        }
1020
 
                                                }
1021
 
                                                break;
1022
 
                                        case SCE_PL_SCALAR:             // for $var<< case:
1023
 
                                                if (isHereDoc && hereDocSpace)  // if SCALAR whitespace '<<', *always* a HERE doc
1024
 
                                                        preferRE = true;
1025
 
                                                break;
1026
 
                                        case SCE_PL_WORD:
1027
 
                                                preferRE = true;
1028
 
                                                // for HERE docs, always true
1029
 
                                                if (sc.ch == '/') {
1030
 
                                                        // adopt heuristics similar to vim-style rules:
1031
 
                                                        // keywords always forced as /PATTERN/: split, if, elsif, while
1032
 
                                                        // everything else /PATTERN/ unless digit/space immediately after '/'
1033
 
                                                        // for '//', defined-or favoured unless special keywords
1034
 
                                                        bkend = bk + 1;
1035
 
                                                        while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1036
 
                                                                bk--;
1037
 
                                                        }
1038
 
                                                        if (isPerlKeyword(bk, bkend, reWords, styler))
1039
 
                                                                break;
1040
 
                                                        if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1041
 
                                                                preferRE = false;
1042
 
                                                } else if (sc.ch == '*' || sc.ch == '%') {
1043
 
                                                        if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1044
 
                                                                preferRE = false;
1045
 
                                                } else if (sc.ch == '<') {
1046
 
                                                        if (IsASpace(sc.chNext) || sc.chNext == '=')
1047
 
                                                                preferRE = false;
1048
 
                                                }
1049
 
                                                break;
1050
 
                                        // other styles uses the default, preferRE=false
1051
 
                                        case SCE_PL_POD:
1052
 
                                        case SCE_PL_HERE_Q:
1053
 
                                        case SCE_PL_HERE_QQ:
1054
 
                                        case SCE_PL_HERE_QX:
1055
 
                                                preferRE = true;
1056
 
                                                break;
1057
 
                                        }
1058
 
                                }
1059
 
                                backFlag = BACK_NONE;
1060
 
                                if (isHereDoc) {        // handle '<<', HERE doc
1061
 
                                        if (preferRE) {
1062
 
                                                sc.SetState(SCE_PL_HERE_DELIM);
1063
 
                                                HereDoc.State = 0;
1064
 
                                        } else {                // << operator
1065
 
                                                sc.SetState(SCE_PL_OPERATOR);
1066
 
                                                sc.Forward();
1067
 
                                        }
1068
 
                                } else if (sc.ch == '*') {      // handle '*', typeglob
1069
 
                                        if (preferRE) {
1070
 
                                                sc.SetState(SCE_PL_SYMBOLTABLE);
1071
 
                                                if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1072
 
                                                        sc.Forward(2);
1073
 
                                                } else if (sc.chNext == '{') {
1074
 
                                                        sc.ForwardSetState(SCE_PL_OPERATOR);
1075
 
                                                } else {
1076
 
                                                        sc.Forward();
1077
 
                                                }
1078
 
                                        } else {
1079
 
                                                sc.SetState(SCE_PL_OPERATOR);
1080
 
                                                if (sc.chNext == '*')   // exponentiation
1081
 
                                                        sc.Forward();
1082
 
                                        }
1083
 
                                } else if (sc.ch == '%') {      // handle '%', hash
1084
 
                                        if (preferRE) {
1085
 
                                                sc.SetState(SCE_PL_HASH);
1086
 
                                                if (setHash.Contains(sc.chNext)) {
1087
 
                                                        sc.Forward();
1088
 
                                                } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1089
 
                                                        sc.Forward(2);
1090
 
                                                } else if (sc.chNext == '{') {
1091
 
                                                        sc.ForwardSetState(SCE_PL_OPERATOR);
1092
 
                                                } else {
1093
 
                                                        sc.ChangeState(SCE_PL_OPERATOR);
1094
 
                                                }
1095
 
                                        } else {
1096
 
                                                sc.SetState(SCE_PL_OPERATOR);
1097
 
                                        }
1098
 
                                } else if (sc.ch == '<') {      // handle '<', inputsymbol
1099
 
                                        if (preferRE) {
1100
 
                                                // forward scan
1101
 
                                                int i = inputsymbolScan(styler, sc.currentPos, endPos);
1102
 
                                                if (i > 0) {
1103
 
                                                        sc.SetState(SCE_PL_IDENTIFIER);
1104
 
                                                        sc.Forward(i);
1105
 
                                                } else {
1106
 
                                                        sc.SetState(SCE_PL_OPERATOR);
1107
 
                                                }
1108
 
                                        } else {
1109
 
                                                sc.SetState(SCE_PL_OPERATOR);
1110
 
                                        }
1111
 
                                } else {                        // handle '/', regexp
1112
 
                                        if (preferRE) {
1113
 
                                                sc.SetState(SCE_PL_REGEX);
1114
 
                                                Quote.New();
1115
 
                                                Quote.Open(sc.ch);
1116
 
                                        } else {                // / and // operators
1117
 
                                                sc.SetState(SCE_PL_OPERATOR);
1118
 
                                                if (sc.chNext == '/') {
1119
 
                                                        sc.Forward();
1120
 
                                                }
1121
 
                                        }
1122
 
                                }
1123
 
                        } else if (sc.ch == '='         // POD
1124
 
                                           && setPOD.Contains(sc.chNext)
1125
 
                                           && sc.atLineStart) {
1126
 
                                sc.SetState(SCE_PL_POD);
1127
 
                                backFlag = BACK_NONE;
1128
 
                        } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {  // extended '-' cases
1129
 
                                unsigned int bk = sc.currentPos;
1130
 
                                unsigned int fw = 2;
1131
 
                                if (setSingleCharOp.Contains(sc.chNext) &&      // file test operators
1132
 
                                        !setWord.Contains(sc.GetRelative(2))) {
1133
 
                                        sc.SetState(SCE_PL_WORD);
1134
 
                                } else {
1135
 
                                        // nominally a minus and bareword; find extent of bareword
1136
 
                                        while (setWord.Contains(sc.GetRelative(fw)))
1137
 
                                                fw++;
1138
 
                                        sc.SetState(SCE_PL_OPERATOR);
1139
 
                                }
1140
 
                                // force to bareword for hash key => or {variable literal} cases
1141
 
                                if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1142
 
                                        sc.ChangeState(SCE_PL_IDENTIFIER);
1143
 
                                }
1144
 
                                backFlag = BACK_NONE;
1145
 
                        } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1146
 
                                sc.Complete();
1147
 
                                if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1148
 
                                        sc.SetState(SCE_PL_SUB_PROTOTYPE);
1149
 
                                        backFlag = BACK_NONE;
1150
 
                                } else {
1151
 
                                        sc.SetState(SCE_PL_OPERATOR);
1152
 
                                }
1153
 
                        } else if (setPerlOperator.Contains(sc.ch)) {   // operators
1154
 
                                sc.SetState(SCE_PL_OPERATOR);
1155
 
                                if (sc.Match('.', '.')) {       // .. and ...
1156
 
                                        sc.Forward();
1157
 
                                        if (sc.chNext == '.') sc.Forward();
1158
 
                                }
1159
 
                        } else if (sc.ch == 4 || sc.ch == 26) {         // ^D and ^Z ends valid perl source
1160
 
                                sc.SetState(SCE_PL_DATASECTION);
1161
 
                        } else {
1162
 
                                // keep colouring defaults
1163
 
                                sc.Complete();
1164
 
                        }
1165
 
                }
1166
 
        }
1167
 
        sc.Complete();
1168
 
}
1169
 
 
1170
 
static bool IsCommentLine(int line, Accessor &styler) {
1171
 
        int pos = styler.LineStart(line);
1172
 
        int eol_pos = styler.LineStart(line + 1) - 1;
1173
 
        for (int i = pos; i < eol_pos; i++) {
1174
 
                char ch = styler[i];
1175
 
                int style = styler.StyleAt(i);
1176
 
                if (ch == '#' && style == SCE_PL_COMMENTLINE)
1177
 
                        return true;
1178
 
                else if (!IsASpaceOrTab(ch))
1179
 
                        return false;
1180
 
        }
1181
 
        return false;
1182
 
}
1183
 
 
1184
 
static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
1185
 
                        Accessor &styler) {
1186
 
        bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
1187
 
        bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
1188
 
        // Custom folding of POD and packages
1189
 
 
1190
 
        // property fold.perl.pod 
1191
 
        //      Enable folding Pod blocks when using the Perl lexer. 
1192
 
        bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
1193
 
 
1194
 
        // property fold.perl.package 
1195
 
        //      Enable folding packages when using the Perl lexer. 
1196
 
        bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
1197
 
 
1198
 
        unsigned int endPos = startPos + length;
1199
 
        int visibleChars = 0;
1200
 
        int lineCurrent = styler.GetLine(startPos);
1201
 
        int levelPrev = SC_FOLDLEVELBASE;
1202
 
        if (lineCurrent > 0)
1203
 
                levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1204
 
        int levelCurrent = levelPrev;
1205
 
        char chNext = styler[startPos];
1206
 
        char chPrev = styler.SafeGetCharAt(startPos - 1);
1207
 
        int styleNext = styler.StyleAt(startPos);
1208
 
        // Used at end of line to determine if the line was a package definition
1209
 
        bool isPackageLine = false;
1210
 
        bool isPodHeading = false;
1211
 
        for (unsigned int i = startPos; i < endPos; i++) {
1212
 
                char ch = chNext;
1213
 
                chNext = styler.SafeGetCharAt(i + 1);
1214
 
                int style = styleNext;
1215
 
                styleNext = styler.StyleAt(i + 1);
1216
 
                bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1217
 
                bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1218
 
                // Comment folding
1219
 
                if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1220
 
                {
1221
 
                        if (!IsCommentLine(lineCurrent - 1, styler)
1222
 
                                && IsCommentLine(lineCurrent + 1, styler))
1223
 
                                levelCurrent++;
1224
 
                        else if (IsCommentLine(lineCurrent - 1, styler)
1225
 
                                         && !IsCommentLine(lineCurrent+1, styler))
1226
 
                                levelCurrent--;
1227
 
                }
1228
 
                if (style == SCE_PL_OPERATOR) {
1229
 
                        if (ch == '{') {
1230
 
                                levelCurrent++;
1231
 
                        } else if (ch == '}') {
1232
 
                                levelCurrent--;
1233
 
                        }
1234
 
                }
1235
 
                // Custom POD folding
1236
 
                if (foldPOD && atLineStart) {
1237
 
                        int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1238
 
                        if (style == SCE_PL_POD) {
1239
 
                                if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1240
 
                                        levelCurrent++;
1241
 
                                else if (styler.Match(i, "=cut"))
1242
 
                                        levelCurrent--;
1243
 
                                else if (styler.Match(i, "=head"))
1244
 
                                        isPodHeading = true;
1245
 
                        } else if (style == SCE_PL_DATASECTION) {
1246
 
                                if (ch == '=' && isascii(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1247
 
                                        levelCurrent++;
1248
 
                                else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1249
 
                                        levelCurrent--;
1250
 
                                else if (styler.Match(i, "=head"))
1251
 
                                        isPodHeading = true;
1252
 
                                // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1253
 
                                // reset needed as level test is vs. SC_FOLDLEVELBASE
1254
 
                                else if (styler.Match(i, "__END__"))
1255
 
                                        levelCurrent = SC_FOLDLEVELBASE;
1256
 
                        }
1257
 
                }
1258
 
                // Custom package folding
1259
 
                if (foldPackage && atLineStart) {
1260
 
                        if (style == SCE_PL_WORD && styler.Match(i, "package")) {
1261
 
                                isPackageLine = true;
1262
 
                        }
1263
 
                }
1264
 
 
1265
 
                if (atEOL) {
1266
 
                        int lev = levelPrev;
1267
 
                        if (isPodHeading) {
1268
 
                                lev = levelPrev - 1;
1269
 
                                lev |= SC_FOLDLEVELHEADERFLAG;
1270
 
                                isPodHeading = false;
1271
 
                        }
1272
 
                        // Check if line was a package declaration
1273
 
                        // because packages need "special" treatment
1274
 
                        if (isPackageLine) {
1275
 
                                lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1276
 
                                levelCurrent = SC_FOLDLEVELBASE + 1;
1277
 
                                isPackageLine = false;
1278
 
                        }
1279
 
                        lev |= levelCurrent << 16;
1280
 
                        if (visibleChars == 0 && foldCompact)
1281
 
                                lev |= SC_FOLDLEVELWHITEFLAG;
1282
 
                        if ((levelCurrent > levelPrev) && (visibleChars > 0))
1283
 
                                lev |= SC_FOLDLEVELHEADERFLAG;
1284
 
                        if (lev != styler.LevelAt(lineCurrent)) {
1285
 
                                styler.SetLevel(lineCurrent, lev);
1286
 
                        }
1287
 
                        lineCurrent++;
1288
 
                        levelPrev = levelCurrent;
1289
 
                        visibleChars = 0;
1290
 
                }
1291
 
                if (!isspacechar(ch))
1292
 
                        visibleChars++;
1293
 
                chPrev = ch;
1294
 
        }
1295
 
        // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1296
 
        int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1297
 
        styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1298
 
}
1299
 
 
1300
 
static const char * const perlWordListDesc[] = {
1301
 
        "Keywords",
1302
 
        0
1303
 
};
1304
 
 
1305
 
LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);