~ubuntu-branches/ubuntu/hardy/codeblocks/hardy-backports

« back to all changes in this revision

Viewing changes to src/sdk/wxscintilla/src/scintilla/src/LexPerl.cxx

  • Committer: Bazaar Package Importer
  • Author(s): Michael Casadevall
  • Date: 2008-07-17 04:39:23 UTC
  • Revision ID: james.westby@ubuntu.com-20080717043923-gmsy5cwkdjswghkm
Tags: upstream-8.02
ImportĀ upstreamĀ versionĀ 8.02

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// Scintilla source code edit control
 
2
/** @file LexPerl.cxx
 
3
 ** Lexer for subset of Perl.
 
4
 **/
 
5
// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
 
6
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
 
7
// The License.txt file describes the conditions under which this software may be distributed.
 
8
 
 
9
#include <stdlib.h>
 
10
#include <string.h>
 
11
#include <ctype.h>
 
12
#include <stdio.h>
 
13
#include <stdarg.h>
 
14
 
 
15
#include "Platform.h"
 
16
 
 
17
#include "PropSet.h"
 
18
#include "Accessor.h"
 
19
#include "KeyWords.h"
 
20
#include "Scintilla.h"
 
21
#include "SciLexer.h"
 
22
 
 
23
#define PERLNUM_BINARY 1    // order is significant: 1-4 cannot have a dot
 
24
#define PERLNUM_HEX 2
 
25
#define PERLNUM_OCTAL 3
 
26
#define PERLNUM_FLOAT 4     // actually exponent part
 
27
#define PERLNUM_DECIMAL 5   // 1-5 are numbers; 6-7 are strings
 
28
#define PERLNUM_VECTOR 6
 
29
#define PERLNUM_V_VECTOR 7
 
30
#define PERLNUM_BAD 8
 
31
 
 
32
#define BACK_NONE 0         // lookback state for bareword disambiguation:
 
33
#define BACK_OPERATOR 1     // whitespace/comments are insignificant
 
34
#define BACK_KEYWORD 2      // operators/keywords are needed for disambiguation
 
35
 
 
36
#define HERE_DELIM_MAX 256
 
37
 
 
38
static inline bool isEOLChar(char ch) {
 
39
        return (ch == '\r') || (ch == '\n');
 
40
}
 
41
 
 
42
static bool isSingleCharOp(char ch) {
 
43
        char strCharSet[2];
 
44
        strCharSet[0] = ch;
 
45
        strCharSet[1] = '\0';
 
46
        return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
 
47
}
 
48
 
 
49
static inline bool isPerlOperator(char ch) {
 
50
        if (ch == '^' || ch == '&' || ch == '\\' ||
 
51
                ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
 
52
                ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
 
53
                ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
 
54
                ch == '>' || ch == ',' ||
 
55
                ch == '?' || ch == '!' || ch == '.' || ch == '~')
 
56
                return true;
 
57
        // these chars are already tested before this call
 
58
        // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
 
59
        return false;
 
60
}
 
61
 
 
62
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 
63
        char s[100];
 
64
    unsigned int i, len = end - start;
 
65
    if (len > 30) { len = 30; }
 
66
        for (i = 0; i < len; i++, start++) s[i] = styler[start];
 
67
    s[i] = '\0';
 
68
        return keywords.InList(s);
 
69
}
 
70
 
 
71
static inline bool isEndVar(char ch) {
 
72
        return !isalnum(ch) && ch != '#' && ch != '$' &&
 
73
               ch != '_' && ch != '\'';
 
74
}
 
75
 
 
76
 
 
77
static inline bool isNonQuote(char ch) {
 
78
        return isalnum(ch) || ch == '_';
 
79
}
 
80
 
 
81
static inline char actualNumStyle(int numberStyle) {
 
82
    if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
 
83
        return SCE_PL_STRING;
 
84
    } else if (numberStyle == PERLNUM_BAD) {
 
85
        return SCE_PL_ERROR;
 
86
    }
 
87
    return SCE_PL_NUMBER;
 
88
}
 
89
 
 
90
static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
 
91
        if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
 
92
                return false;
 
93
        }
 
94
        while (*val) {
 
95
                if (*val != styler[pos++]) {
 
96
                        return false;
 
97
                }
 
98
                val++;
 
99
        }
 
100
        return true;
 
101
}
 
102
 
 
103
static char opposite(char ch) {
 
104
        if (ch == '(')
 
105
                return ')';
 
106
        if (ch == '[')
 
107
                return ']';
 
108
        if (ch == '{')
 
109
                return '}';
 
110
        if (ch == '<')
 
111
                return '>';
 
112
        return ch;
 
113
}
 
114
 
 
115
static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
 
116
                             WordList *keywordlists[], Accessor &styler) {
 
117
 
 
118
        // Lexer for perl often has to backtrack to start of current style to determine
 
119
        // which characters are being used as quotes, how deeply nested is the
 
120
        // start position and what the termination string is for here documents
 
121
 
 
122
        WordList &keywords = *keywordlists[0];
 
123
 
 
124
        class HereDocCls {
 
125
        public:
 
126
                int State;              // 0: '<<' encountered
 
127
                // 1: collect the delimiter
 
128
                // 2: here doc text (lines after the delimiter)
 
129
                char Quote;             // the char after '<<'
 
130
                bool Quoted;            // true if Quote in ('\'','"','`')
 
131
                int DelimiterLength;    // strlen(Delimiter)
 
132
                char *Delimiter;        // the Delimiter, 256: sizeof PL_tokenbuf
 
133
                HereDocCls() {
 
134
                        State = 0;
 
135
            Quote = 0;
 
136
            Quoted = false;
 
137
                        DelimiterLength = 0;
 
138
                        Delimiter = new char[HERE_DELIM_MAX];
 
139
                        Delimiter[0] = '\0';
 
140
                }
 
141
                ~HereDocCls() {
 
142
                        delete []Delimiter;
 
143
                }
 
144
        };
 
145
        HereDocCls HereDoc;     // TODO: FIFO for stacked here-docs
 
146
 
 
147
        class QuoteCls {
 
148
                public:
 
149
                int  Rep;
 
150
                int  Count;
 
151
                char Up;
 
152
                char Down;
 
153
                QuoteCls() {
 
154
                        this->New(1);
 
155
                }
 
156
                void New(int r) {
 
157
                        Rep   = r;
 
158
                        Count = 0;
 
159
                        Up    = '\0';
 
160
                        Down  = '\0';
 
161
                }
 
162
                void Open(char u) {
 
163
                        Count++;
 
164
                        Up    = u;
 
165
                        Down  = opposite(Up);
 
166
                }
 
167
        };
 
168
        QuoteCls Quote;
 
169
 
 
170
        int state = initStyle;
 
171
        char numState = PERLNUM_DECIMAL;
 
172
        int dotCount = 0;
 
173
        unsigned int lengthDoc = startPos + length;
 
174
        //int sookedpos = 0; // these have no apparent use, see POD state
 
175
        //char sooked[100];
 
176
        //sooked[sookedpos] = '\0';
 
177
 
 
178
        // If in a long distance lexical state, seek to the beginning to find quote characters
 
179
        // Perl strings can be multi-line with embedded newlines, so backtrack.
 
180
        // Perl numbers have additional state during lexing, so backtrack too.
 
181
        if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
 
182
                while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
 
183
                        startPos--;
 
184
                }
 
185
                startPos = styler.LineStart(styler.GetLine(startPos));
 
186
                state = styler.StyleAt(startPos - 1);
 
187
        }
 
188
        if ( state == SCE_PL_STRING_Q
 
189
        || state == SCE_PL_STRING_QQ
 
190
        || state == SCE_PL_STRING_QX
 
191
        || state == SCE_PL_STRING_QR
 
192
        || state == SCE_PL_STRING_QW
 
193
        || state == SCE_PL_REGEX
 
194
        || state == SCE_PL_REGSUBST
 
195
        || state == SCE_PL_STRING
 
196
        || state == SCE_PL_BACKTICKS
 
197
        || state == SCE_PL_CHARACTER
 
198
        || state == SCE_PL_NUMBER
 
199
        || state == SCE_PL_IDENTIFIER
 
200
    || state == SCE_PL_ERROR
 
201
        ) {
 
202
                while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
 
203
                        startPos--;
 
204
                }
 
205
                state = SCE_PL_DEFAULT;
 
206
        }
 
207
 
 
208
    // lookback at start of lexing to set proper state for backflag
 
209
    // after this, they are updated when elements are lexed
 
210
    int backflag = BACK_NONE;
 
211
    unsigned int backPos = startPos;
 
212
    if (backPos > 0) {
 
213
        backPos--;
 
214
        int sty = SCE_PL_DEFAULT;
 
215
        while ((backPos > 0) && (sty = styler.StyleAt(backPos),
 
216
               sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))
 
217
            backPos--;
 
218
        if (sty == SCE_PL_OPERATOR)
 
219
            backflag = BACK_OPERATOR;
 
220
        else if (sty == SCE_PL_WORD)
 
221
            backflag = BACK_KEYWORD;
 
222
    }
 
223
 
 
224
        styler.StartAt(startPos);
 
225
        char chPrev = styler.SafeGetCharAt(startPos - 1);
 
226
        if (startPos == 0)
 
227
                chPrev = '\n';
 
228
        char chNext = styler[startPos];
 
229
        styler.StartSegment(startPos);
 
230
 
 
231
        for (unsigned int i = startPos; i < lengthDoc; i++) {
 
232
                char ch = chNext;
 
233
                // if the current character is not consumed due to the completion of an
 
234
                // earlier style, lexing can be restarted via a simple goto
 
235
        restartLexer:
 
236
                chNext = styler.SafeGetCharAt(i + 1);
 
237
                char chNext2 = styler.SafeGetCharAt(i + 2);
 
238
 
 
239
                if (styler.IsLeadByte(ch)) {
 
240
                        chNext = styler.SafeGetCharAt(i + 2);
 
241
                        chPrev = ' ';
 
242
                        i += 1;
 
243
                        continue;
 
244
                }
 
245
                if ((chPrev == '\r' && ch == '\n')) {   // skip on DOS/Windows
 
246
                        styler.ColourTo(i, state);
 
247
                        chPrev = ch;
 
248
                        continue;
 
249
                }
 
250
 
 
251
                if (HereDoc.State == 1 && isEOLChar(ch)) {
 
252
                        // Begin of here-doc (the line after the here-doc delimiter):
 
253
                        // Lexically, the here-doc starts from the next line after the >>, but the
 
254
                        // first line of here-doc seem to follow the style of the last EOL sequence
 
255
                        HereDoc.State = 2;
 
256
                        if (HereDoc.Quoted) {
 
257
                                if (state == SCE_PL_HERE_DELIM) {
 
258
                                        // Missing quote at end of string! We are stricter than perl.
 
259
                                        // Colour here-doc anyway while marking this bit as an error.
 
260
                                        state = SCE_PL_ERROR;
 
261
                                }
 
262
                                styler.ColourTo(i - 1, state);
 
263
                                switch (HereDoc.Quote) {
 
264
                                case '\'':
 
265
                                        state = SCE_PL_HERE_Q ;
 
266
                                        break;
 
267
                                case '"':
 
268
                                        state = SCE_PL_HERE_QQ;
 
269
                                        break;
 
270
                                case '`':
 
271
                                        state = SCE_PL_HERE_QX;
 
272
                                        break;
 
273
                                }
 
274
                        } else {
 
275
                                styler.ColourTo(i - 1, state);
 
276
                                switch (HereDoc.Quote) {
 
277
                                case '\\':
 
278
                                        state = SCE_PL_HERE_Q ;
 
279
                                        break;
 
280
                                default :
 
281
                                        state = SCE_PL_HERE_QQ;
 
282
                                }
 
283
                        }
 
284
                }
 
285
 
 
286
                if (state == SCE_PL_DEFAULT) {
 
287
                        if (isdigit(ch) || (isdigit(chNext) &&
 
288
                                (ch == '.' || ch == 'v'))) {
 
289
                                state = SCE_PL_NUMBER;
 
290
                backflag = BACK_NONE;
 
291
                                numState = PERLNUM_DECIMAL;
 
292
                                dotCount = 0;
 
293
                                if (ch == '0') {        // hex,bin,octal
 
294
                                        if (chNext == 'x') {
 
295
                                                numState = PERLNUM_HEX;
 
296
                                        } else if (chNext == 'b') {
 
297
                        numState = PERLNUM_BINARY;
 
298
                    } else if (isdigit(chNext)) {
 
299
                        numState = PERLNUM_OCTAL;
 
300
                    }
 
301
                    if (numState != PERLNUM_DECIMAL) {
 
302
                                                i++;
 
303
                                                ch = chNext;
 
304
                                                chNext = chNext2;
 
305
                    }
 
306
                                } else if (ch == 'v') { // vector
 
307
                                        numState = PERLNUM_V_VECTOR;
 
308
                                }
 
309
                        } else if (iswordstart(ch)) {
 
310
                // if immediately prefixed by '::', always a bareword
 
311
                state = SCE_PL_WORD;
 
312
                if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
 
313
                    state = SCE_PL_IDENTIFIER;
 
314
                }
 
315
                unsigned int kw = i + 1;
 
316
                // first check for possible quote-like delimiter
 
317
                                if (ch == 's' && !isNonQuote(chNext)) {
 
318
                                        state = SCE_PL_REGSUBST;
 
319
                                        Quote.New(2);
 
320
                                } else if (ch == 'm' && !isNonQuote(chNext)) {
 
321
                                        state = SCE_PL_REGEX;
 
322
                                        Quote.New(1);
 
323
                                } else if (ch == 'q' && !isNonQuote(chNext)) {
 
324
                                        state = SCE_PL_STRING_Q;
 
325
                                        Quote.New(1);
 
326
                                } else if (ch == 'y' && !isNonQuote(chNext)) {
 
327
                                        state = SCE_PL_REGSUBST;
 
328
                                        Quote.New(2);
 
329
                                } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
 
330
                                        state = SCE_PL_REGSUBST;
 
331
                                        Quote.New(2);
 
332
                    kw++;
 
333
                                } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
 
334
                                        if      (chNext == 'q') state = SCE_PL_STRING_QQ;
 
335
                                        else if (chNext == 'x') state = SCE_PL_STRING_QX;
 
336
                                        else if (chNext == 'r') state = SCE_PL_STRING_QR;
 
337
                                        else if (chNext == 'w') state = SCE_PL_STRING_QW;
 
338
                                        Quote.New(1);
 
339
                    kw++;
 
340
                                } else if (ch == 'x' && (chNext == '=' ||       // repetition
 
341
                           (chNext != '_' && !isalnum(chNext)) ||
 
342
                           (isdigit(chPrev) && isdigit(chNext)))) {
 
343
                    state = SCE_PL_OPERATOR;
 
344
                }
 
345
                // if potentially a keyword, scan forward and grab word, then check
 
346
                // if it's really one; if yes, disambiguation test is performed
 
347
                // otherwise it is always a bareword and we skip a lot of scanning
 
348
                // note: keywords assumed to be limited to [_a-zA-Z] only
 
349
                if (state == SCE_PL_WORD) {
 
350
                    while (iswordstart(styler.SafeGetCharAt(kw))) kw++;
 
351
                    if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
 
352
                        state = SCE_PL_IDENTIFIER;
 
353
                    }
 
354
                }
 
355
                // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
 
356
                // for quote-like delimiters/keywords, attempt to disambiguate
 
357
                // to select for bareword, change state -> SCE_PL_IDENTIFIER
 
358
                if (state != SCE_PL_IDENTIFIER && i > 0) {
 
359
                    unsigned int j = i;
 
360
                    bool moreback = false;      // true if passed newline/comments
 
361
                    bool brace = false;         // true if opening brace found
 
362
                    char ch2;
 
363
                    // first look backwards past whitespace/comments for EOLs
 
364
                    // if BACK_NONE, neither operator nor keyword, so skip test
 
365
                    if (backflag != BACK_NONE) {
 
366
                        while (--j > backPos) {
 
367
                            if (isEOLChar(styler.SafeGetCharAt(j)))
 
368
                                moreback = true;
 
369
                        }
 
370
                        ch2 = styler.SafeGetCharAt(j);
 
371
                        if (ch2 == '{' && !moreback) {
 
372
                            // {bareword: possible variable spec
 
373
                            brace = true;
 
374
                        } else if ((ch2 == '&')
 
375
                                // &bareword: subroutine call
 
376
                                || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')
 
377
                                // ->bareword: part of variable spec
 
378
                                || (ch2 == 'b' && styler.Match(j - 2, "su"))) {
 
379
                                // sub bareword: subroutine declaration
 
380
                                // (implied BACK_KEYWORD, no keywords end in 'sub'!)
 
381
                            state = SCE_PL_IDENTIFIER;
 
382
                        }
 
383
                        // if status still ambiguous, look forward after word past
 
384
                        // tabs/spaces only; if ch2 isn't one of '[{(,' it can never
 
385
                        // match anything, so skip the whole thing
 
386
                        j = kw;
 
387
                        if (state != SCE_PL_IDENTIFIER
 
388
                            && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')
 
389
                            && kw < lengthDoc) {
 
390
                            while (ch2 = styler.SafeGetCharAt(j),
 
391
                                   (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {
 
392
                                j++;
 
393
                            }
 
394
                            if ((ch2 == '}' && brace)
 
395
                             // {bareword}: variable spec
 
396
                             || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {
 
397
                             // [{(, bareword=>: hash literal
 
398
                                state = SCE_PL_IDENTIFIER;
 
399
                            }
 
400
                        }
 
401
                    }
 
402
                }
 
403
                backflag = BACK_NONE;
 
404
                // an identifier or bareword
 
405
                if (state == SCE_PL_IDENTIFIER) {
 
406
                    if ((!iswordchar(chNext) && chNext != '\'')
 
407
                        || (chNext == '.' && chNext2 == '.')) {
 
408
                        // We need that if length of word == 1!
 
409
                        // This test is copied from the SCE_PL_WORD handler.
 
410
                        styler.ColourTo(i, SCE_PL_IDENTIFIER);
 
411
                        state = SCE_PL_DEFAULT;
 
412
                    }
 
413
                // a keyword
 
414
                } else if (state == SCE_PL_WORD) {
 
415
                    i = kw - 1;
 
416
                    if (ch == '_' && chNext == '_' &&
 
417
                        (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
 
418
                      || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) {
 
419
                        styler.ColourTo(i, SCE_PL_DATASECTION);
 
420
                        state = SCE_PL_DATASECTION;
 
421
                    } else {
 
422
                        styler.ColourTo(i, SCE_PL_WORD);
 
423
                        state = SCE_PL_DEFAULT;
 
424
                        backflag = BACK_KEYWORD;
 
425
                        backPos = i;
 
426
                    }
 
427
                    ch = styler.SafeGetCharAt(i);
 
428
                    chNext = styler.SafeGetCharAt(i + 1);
 
429
                // a repetition operator 'x'
 
430
                } else if (state == SCE_PL_OPERATOR) {
 
431
                    styler.ColourTo(i, SCE_PL_OPERATOR);
 
432
                    state = SCE_PL_DEFAULT;
 
433
                // quote-like delimiter, skip one char if double-char delimiter
 
434
                } else {
 
435
                    i = kw - 1;
 
436
                    chNext = styler.SafeGetCharAt(i + 1);
 
437
                }
 
438
                        } else if (ch == '#') {
 
439
                                state = SCE_PL_COMMENTLINE;
 
440
                        } else if (ch == '\"') {
 
441
                                state = SCE_PL_STRING;
 
442
                                Quote.New(1);
 
443
                                Quote.Open(ch);
 
444
                backflag = BACK_NONE;
 
445
                        } else if (ch == '\'') {
 
446
                                if (chPrev == '&') {
 
447
                                        // Archaic call
 
448
                                        styler.ColourTo(i, state);
 
449
                                } else {
 
450
                                        state = SCE_PL_CHARACTER;
 
451
                                        Quote.New(1);
 
452
                                        Quote.Open(ch);
 
453
                                }
 
454
                backflag = BACK_NONE;
 
455
                        } else if (ch == '`') {
 
456
                                state = SCE_PL_BACKTICKS;
 
457
                                Quote.New(1);
 
458
                                Quote.Open(ch);
 
459
                backflag = BACK_NONE;
 
460
                        } else if (ch == '$') {
 
461
                                if ((chNext == '{') || isspacechar(chNext)) {
 
462
                                        styler.ColourTo(i, SCE_PL_SCALAR);
 
463
                                } else {
 
464
                                        state = SCE_PL_SCALAR;
 
465
                                        if (chNext == '`' && chNext2 == '`') {
 
466
                                                i += 2;
 
467
                                                ch = styler.SafeGetCharAt(i);
 
468
                                                chNext = styler.SafeGetCharAt(i + 1);
 
469
                                        } else {
 
470
                                                i++;
 
471
                                                ch = chNext;
 
472
                                                chNext = chNext2;
 
473
                                        }
 
474
                                }
 
475
                backflag = BACK_NONE;
 
476
                        } else if (ch == '@') {
 
477
                                if (isalpha(chNext) || chNext == '#' || chNext == '$'
 
478
                                        || chNext == '_' || chNext == '+' || chNext == '-') {
 
479
                                        state = SCE_PL_ARRAY;
 
480
                                } else if (chNext != '{' && chNext != '[') {
 
481
                                        styler.ColourTo(i, SCE_PL_ARRAY);
 
482
                                } else {
 
483
                                        styler.ColourTo(i, SCE_PL_ARRAY);
 
484
                                }
 
485
                backflag = BACK_NONE;
 
486
                        } else if (ch == '%') {
 
487
                                if (isalpha(chNext) || chNext == '#' || chNext == '$'
 
488
                    || chNext == '_' || chNext == '!' || chNext == '^') {
 
489
                                        state = SCE_PL_HASH;
 
490
                    i++;
 
491
                    ch = chNext;
 
492
                    chNext = chNext2;
 
493
                                } else if (chNext == '{') {
 
494
                                        styler.ColourTo(i, SCE_PL_HASH);
 
495
                                } else {
 
496
                                        styler.ColourTo(i, SCE_PL_OPERATOR);
 
497
                                }
 
498
                backflag = BACK_NONE;
 
499
                        } else if (ch == '*') {
 
500
                char strch[2];
 
501
                strch[0] = chNext;
 
502
                strch[1] = '\0';
 
503
                                if (isalpha(chNext) || chNext == '_' ||
 
504
                    NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
 
505
                                        state = SCE_PL_SYMBOLTABLE;
 
506
                    i++;
 
507
                    ch = chNext;
 
508
                    chNext = chNext2;
 
509
                                } else if (chNext == '{') {
 
510
                                        styler.ColourTo(i, SCE_PL_SYMBOLTABLE);
 
511
                                } else {
 
512
                                        if (chNext == '*') {    // exponentiation
 
513
                                                i++;
 
514
                                                ch = chNext;
 
515
                                                chNext = chNext2;
 
516
                                        }
 
517
                                        styler.ColourTo(i, SCE_PL_OPERATOR);
 
518
                                }
 
519
                backflag = BACK_NONE;
 
520
                        } else if (ch == '/' || (ch == '<' && chNext == '<')) {
 
521
                                // Explicit backward peeking to set a consistent preferRE for
 
522
                                // any slash found, so no longer need to track preferRE state.
 
523
                                // Find first previous significant lexed element and interpret.
 
524
                // Test for HERE doc start '<<' shares this code, helps to
 
525
                // determine if it should be an operator.
 
526
                                bool preferRE = false;
 
527
                bool isHereDoc = (ch == '<');
 
528
                bool hereDocSpace = false;      // these are for corner case:
 
529
                bool hereDocScalar = false;     // SCALAR [whitespace] '<<'
 
530
                                unsigned int bk = (i > 0)? i - 1: 0;
 
531
                                char bkch;
 
532
                                styler.Flush();
 
533
                if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
 
534
                    hereDocSpace = true;
 
535
                                while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
 
536
                                        styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
 
537
                                        bk--;
 
538
                                }
 
539
                                if (bk == 0) {
 
540
                                        // position 0 won't really be checked; rarely happens
 
541
                                        // hard to fix due to an unsigned index i
 
542
                                        preferRE = true;
 
543
                                } else {
 
544
                                        int bkstyle = styler.StyleAt(bk);
 
545
                                        bkch = styler.SafeGetCharAt(bk);
 
546
                                        switch(bkstyle) {
 
547
                                        case SCE_PL_OPERATOR:
 
548
                                                preferRE = true;
 
549
                                                if (bkch == ')' || bkch == ']') {
 
550
                                                        preferRE = false;
 
551
                                                } else if (bkch == '}') {
 
552
                                                        // backtrack further, count balanced brace pairs
 
553
                                                        // if a brace pair found, see if it's a variable
 
554
                                                        int braceCount = 1;
 
555
                                                        while (--bk > 0) {
 
556
                                                                bkstyle = styler.StyleAt(bk);
 
557
                                                                if (bkstyle == SCE_PL_OPERATOR) {
 
558
                                                                        bkch = styler.SafeGetCharAt(bk);
 
559
                                                                        if (bkch == ';') {      // early out
 
560
                                                                                break;
 
561
                                                                        } else if (bkch == '}') {
 
562
                                                                                braceCount++;
 
563
                                                                        } else if (bkch == '{') {
 
564
                                                                                if (--braceCount == 0)
 
565
                                                                                        break;
 
566
                                                                        }
 
567
                                                                }
 
568
                                                        }
 
569
                                                        if (bk == 0) {
 
570
                                                                // at beginning, true
 
571
                                                        } else if (braceCount == 0) {
 
572
                                                                // balanced { found, bk>0, skip more whitespace
 
573
                                                                if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
 
574
                                                                        while (bk > 0) {
 
575
                                                                                bkstyle = styler.StyleAt(--bk);
 
576
                                                                                if (bkstyle != SCE_PL_DEFAULT)
 
577
                                                                                        break;
 
578
                                                                        }
 
579
                                                                }
 
580
                                                                bkstyle = styler.StyleAt(bk);
 
581
                                                                if (bkstyle == SCE_PL_SCALAR
 
582
                                                                 || bkstyle == SCE_PL_ARRAY
 
583
                                                                 || bkstyle == SCE_PL_HASH
 
584
                                                                 || bkstyle == SCE_PL_SYMBOLTABLE
 
585
                                                                 || bkstyle == SCE_PL_OPERATOR) {
 
586
                                                                        preferRE = false;
 
587
                                                                }
 
588
                                                        }
 
589
                                                }
 
590
                                                break;
 
591
                                        case SCE_PL_IDENTIFIER:
 
592
                                                preferRE = true;
 
593
                                                if (bkch == '>') {      // inputsymbol
 
594
                                                        preferRE = false;
 
595
                                                        break;
 
596
                                                }
 
597
                                                // backtrack to find "->" or "::" before identifier
 
598
                                                while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
 
599
                                                        bk--;
 
600
                                                }
 
601
                                                while (bk > 0) {
 
602
                                                        bkstyle = styler.StyleAt(bk);
 
603
                                                        if (bkstyle == SCE_PL_DEFAULT ||
 
604
                                                            bkstyle == SCE_PL_COMMENTLINE) {
 
605
                                                        } else if (bkstyle == SCE_PL_OPERATOR) {
 
606
                                                                // gcc 3.2.3 bloats if more compact form used
 
607
                                                                bkch = styler.SafeGetCharAt(bk);
 
608
                                                                if (bkch == '>') { // "->"
 
609
                                                                        if (styler.SafeGetCharAt(bk - 1) == '-') {
 
610
                                                                                preferRE = false;
 
611
                                                                                break;
 
612
                                                                        }
 
613
                                                                } else if (bkch == ':') { // "::"
 
614
                                                                        if (styler.SafeGetCharAt(bk - 1) == ':') {
 
615
                                                                                preferRE = false;
 
616
                                                                                break;
 
617
                                                                        }
 
618
                                                                }
 
619
                                                        } else {// bare identifier, usually a function call but Perl
 
620
                                                                // optimizes them as pseudo-constants, then the next
 
621
                                                                // '/' will be a divide; favour divide over regex
 
622
                                                                // if there is a whitespace after the '/'
 
623
                                                                if (isspacechar(chNext)) {
 
624
                                                                        preferRE = false;
 
625
                                                                }
 
626
                                                                break;
 
627
                                                        }
 
628
                                                        bk--;
 
629
                                                }
 
630
                                                break;
 
631
                    case SCE_PL_SCALAR:     // for $var<< case
 
632
                        hereDocScalar = true;
 
633
                        break;
 
634
                                        // other styles uses the default, preferRE=false
 
635
                                        case SCE_PL_WORD:
 
636
                                        case SCE_PL_POD:
 
637
                                        case SCE_PL_POD_VERB:
 
638
                                        case SCE_PL_HERE_Q:
 
639
                                        case SCE_PL_HERE_QQ:
 
640
                                        case SCE_PL_HERE_QX:
 
641
                                                preferRE = true;
 
642
                                                break;
 
643
                                        }
 
644
                                }
 
645
                if (isHereDoc) {    // handle HERE doc
 
646
                    // if SCALAR whitespace '<<', *always* a HERE doc
 
647
                    if (preferRE || (hereDocSpace && hereDocScalar)) {
 
648
                        state = SCE_PL_HERE_DELIM;
 
649
                        HereDoc.State = 0;
 
650
                    } else {        // << operator
 
651
                                                i++;
 
652
                                                ch = chNext;
 
653
                                                chNext = chNext2;
 
654
                        styler.ColourTo(i, SCE_PL_OPERATOR);
 
655
                    }
 
656
                } else {            // handle regexp
 
657
                    if (preferRE) {
 
658
                        state = SCE_PL_REGEX;
 
659
                        Quote.New(1);
 
660
                        Quote.Open(ch);
 
661
                    } else {        // / operator
 
662
                        styler.ColourTo(i, SCE_PL_OPERATOR);
 
663
                    }
 
664
                }
 
665
                backflag = BACK_NONE;
 
666
                        } else if (ch == '<') {
 
667
                                // looks forward for matching > on same line
 
668
                                unsigned int fw = i + 1;
 
669
                                while (fw < lengthDoc) {
 
670
                                        char fwch = styler.SafeGetCharAt(fw);
 
671
                                        if (fwch == ' ') {
 
672
                                                if (styler.SafeGetCharAt(fw-1) != '\\' ||
 
673
                                                    styler.SafeGetCharAt(fw-2) != '\\')
 
674
                                                break;
 
675
                                        } else if (isEOLChar(fwch) || isspacechar(fwch)) {
 
676
                                                break;
 
677
                                        } else if (fwch == '>') {
 
678
                                                if ((fw - i) == 2 &&    // '<=>' case
 
679
                                                    styler.SafeGetCharAt(fw-1) == '=') {
 
680
                                                        styler.ColourTo(fw, SCE_PL_OPERATOR);
 
681
                                                } else {
 
682
                                                        styler.ColourTo(fw, SCE_PL_IDENTIFIER);
 
683
                                                }
 
684
                                                i = fw;
 
685
                                                ch = fwch;
 
686
                                                chNext = styler.SafeGetCharAt(i+1);
 
687
                                        }
 
688
                                        fw++;
 
689
                                }
 
690
                                styler.ColourTo(i, SCE_PL_OPERATOR);
 
691
                backflag = BACK_NONE;
 
692
                        } else if (ch == '='    // POD
 
693
                                   && isalpha(chNext)
 
694
                                   && (isEOLChar(chPrev))) {
 
695
                                state = SCE_PL_POD;
 
696
                backflag = BACK_NONE;
 
697
                                //sookedpos = 0;
 
698
                                //sooked[sookedpos] = '\0';
 
699
                        } else if (ch == '-'    // file test operators
 
700
                                   && isSingleCharOp(chNext)
 
701
                                   && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
 
702
                                styler.ColourTo(i + 1, SCE_PL_WORD);
 
703
                                state = SCE_PL_DEFAULT;
 
704
                                i++;
 
705
                                ch = chNext;
 
706
                                chNext = chNext2;
 
707
                backflag = BACK_NONE;
 
708
                        } else if (isPerlOperator(ch)) {
 
709
                                if (ch == '.' && chNext == '.') { // .. and ...
 
710
                                        i++;
 
711
                                        if (chNext2 == '.') { i++; }
 
712
                                        state = SCE_PL_DEFAULT;
 
713
                                        ch = styler.SafeGetCharAt(i);
 
714
                                        chNext = styler.SafeGetCharAt(i + 1);
 
715
                                }
 
716
                                styler.ColourTo(i, SCE_PL_OPERATOR);
 
717
                backflag = BACK_OPERATOR;
 
718
                backPos = i;
 
719
                        } else {
 
720
                                // keep colouring defaults to make restart easier
 
721
                                styler.ColourTo(i, SCE_PL_DEFAULT);
 
722
                        }
 
723
                } else if (state == SCE_PL_NUMBER) {
 
724
                        if (ch == '.') {
 
725
                                if (chNext == '.') {
 
726
                                        // double dot is always an operator
 
727
                                        goto numAtEnd;
 
728
                                } else if (numState <= PERLNUM_FLOAT) {
 
729
                                        // non-decimal number or float exponent, consume next dot
 
730
                                        styler.ColourTo(i - 1, SCE_PL_NUMBER);
 
731
                                        styler.ColourTo(i, SCE_PL_OPERATOR);
 
732
                                        state = SCE_PL_DEFAULT;
 
733
                                } else { // decimal or vectors allows dots
 
734
                                        dotCount++;
 
735
                                        if (numState == PERLNUM_DECIMAL) {
 
736
                                                if (dotCount > 1) {
 
737
                                                        if (isdigit(chNext)) { // really a vector
 
738
                                                                numState = PERLNUM_VECTOR;
 
739
                                                        } else  // number then dot
 
740
                                                                goto numAtEnd;
 
741
                                                }
 
742
                                        } else { // vectors
 
743
                                                if (!isdigit(chNext))   // vector then dot
 
744
                                                        goto numAtEnd;
 
745
                                        }
 
746
                                }
 
747
                        } else if (ch == '_' && numState == PERLNUM_DECIMAL) {
 
748
                                if (!isdigit(chNext)) {
 
749
                                        goto numAtEnd;
 
750
                                }
 
751
                        } else if (isalnum(ch)) {
 
752
                                if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
 
753
                                        if (isalpha(ch)) {
 
754
                                                if (dotCount == 0) { // change to word
 
755
                                                        state = SCE_PL_IDENTIFIER;
 
756
                                                } else { // vector then word
 
757
                                                        goto numAtEnd;
 
758
                                                }
 
759
                                        }
 
760
                                } else if (numState == PERLNUM_DECIMAL) {
 
761
                                        if (ch == 'E' || ch == 'e') { // exponent
 
762
                                                numState = PERLNUM_FLOAT;
 
763
                                                if (chNext == '+' || chNext == '-') {
 
764
                                                        i++;
 
765
                                                        ch = chNext;
 
766
                                                        chNext = chNext2;
 
767
                                                }
 
768
                                        } else if (!isdigit(ch)) { // number then word
 
769
                                                goto numAtEnd;
 
770
                                        }
 
771
                                } else if (numState == PERLNUM_FLOAT) {
 
772
                                        if (!isdigit(ch)) { // float then word
 
773
                                                goto numAtEnd;
 
774
                                        }
 
775
                                } else if (numState == PERLNUM_OCTAL) {
 
776
                    if (!isdigit(ch))
 
777
                        goto numAtEnd;
 
778
                    else if (ch > '7')
 
779
                        numState = PERLNUM_BAD;
 
780
                } else if (numState == PERLNUM_BINARY) {
 
781
                    if (!isdigit(ch))
 
782
                        goto numAtEnd;
 
783
                    else if (ch > '1')
 
784
                        numState = PERLNUM_BAD;
 
785
                } else if (numState == PERLNUM_HEX) {
 
786
                    int ch2 = toupper(ch);
 
787
                    if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F'))
 
788
                        goto numAtEnd;
 
789
                                } else {//(numState == PERLNUM_BAD) {
 
790
                    if (!isdigit(ch))
 
791
                        goto numAtEnd;
 
792
                }
 
793
                        } else {
 
794
                                // complete current number or vector
 
795
                        numAtEnd:
 
796
                                styler.ColourTo(i - 1, actualNumStyle(numState));
 
797
                                state = SCE_PL_DEFAULT;
 
798
                                goto restartLexer;
 
799
                        }
 
800
                } else if (state == SCE_PL_IDENTIFIER) {
 
801
                        if (!iswordstart(chNext) && chNext != '\'') {
 
802
                                styler.ColourTo(i, SCE_PL_IDENTIFIER);
 
803
                                state = SCE_PL_DEFAULT;
 
804
                                ch = ' ';
 
805
                        }
 
806
                } else {
 
807
                        if (state == SCE_PL_COMMENTLINE) {
 
808
                                if (isEOLChar(ch)) {
 
809
                                        styler.ColourTo(i - 1, state);
 
810
                                        state = SCE_PL_DEFAULT;
 
811
                                        goto restartLexer;
 
812
                                } else if (isEOLChar(chNext)) {
 
813
                                        styler.ColourTo(i, state);
 
814
                                        state = SCE_PL_DEFAULT;
 
815
                                }
 
816
                        } else if (state == SCE_PL_HERE_DELIM) {
 
817
                                //
 
818
                                // From perldata.pod:
 
819
                                // ------------------
 
820
                                // A line-oriented form of quoting is based on the shell ``here-doc''
 
821
                                // syntax.
 
822
                                // Following a << you specify a string to terminate the quoted material,
 
823
                                // and all lines following the current line down to the terminating
 
824
                                // string are the value of the item.
 
825
                                // The terminating string may be either an identifier (a word),
 
826
                                // or some quoted text.
 
827
                                // If quoted, the type of quotes you use determines the treatment of
 
828
                                // the text, just as in regular quoting.
 
829
                                // An unquoted identifier works like double quotes.
 
830
                                // There must be no space between the << and the identifier.
 
831
                                // (If you put a space it will be treated as a null identifier,
 
832
                                // which is valid, and matches the first empty line.)
 
833
                                // (This is deprecated, -w warns of this syntax)
 
834
                                // The terminating string must appear by itself (unquoted and with no
 
835
                                // surrounding whitespace) on the terminating line.
 
836
                                //
 
837
                                // From Bash info:
 
838
                                // ---------------
 
839
                                // Specifier format is: <<[-]WORD
 
840
                                // Optional '-' is for removal of leading tabs from here-doc.
 
841
                                // Whitespace acceptable after <<[-] operator.
 
842
                                //
 
843
                                if (HereDoc.State == 0) { // '<<' encountered
 
844
                    bool gotspace = false;
 
845
                    unsigned int oldi = i;
 
846
                    if (chNext == ' ' || chNext == '\t') {
 
847
                        // skip whitespace; legal for quoted delimiters
 
848
                        gotspace = true;
 
849
                        do {
 
850
                            i++;
 
851
                            chNext = styler.SafeGetCharAt(i + 1);
 
852
                        } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t'));
 
853
                        chNext2 = styler.SafeGetCharAt(i + 2);
 
854
                    }
 
855
                                        HereDoc.State = 1;
 
856
                                        HereDoc.Quote = chNext;
 
857
                                        HereDoc.Quoted = false;
 
858
                                        HereDoc.DelimiterLength = 0;
 
859
                                        HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 
860
                                        if (chNext == '\'' || chNext == '"' || chNext == '`') {
 
861
                        // a quoted here-doc delimiter
 
862
                                                i++;
 
863
                                                ch = chNext;
 
864
                                                chNext = chNext2;
 
865
                                                HereDoc.Quoted = true;
 
866
                                        } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
 
867
                                                || chNext == '=' || chNext == '$' || chNext == '@'
 
868
                        || ((isalpha(chNext) || chNext == '_') && gotspace)) {
 
869
                                                // left shift << or <<= operator cases
 
870
                        // restore position if operator
 
871
                        i = oldi;
 
872
                                                styler.ColourTo(i, SCE_PL_OPERATOR);
 
873
                                                state = SCE_PL_DEFAULT;
 
874
                                                HereDoc.State = 0;
 
875
                        goto restartLexer;
 
876
                                        } else {
 
877
                                                // an unquoted here-doc delimiter, no special handling
 
878
                        // (cannot be prefixed by spaces/tabs), or
 
879
                                                // symbols terminates; deprecated zero-length delimiter
 
880
                                        }
 
881
 
 
882
                                } else if (HereDoc.State == 1) { // collect the delimiter
 
883
                    backflag = BACK_NONE;
 
884
                                        if (HereDoc.Quoted) { // a quoted here-doc delimiter
 
885
                                                if (ch == HereDoc.Quote) { // closing quote => end of delimiter
 
886
                                                        styler.ColourTo(i, state);
 
887
                                                        state = SCE_PL_DEFAULT;
 
888
                                                } else {
 
889
                                                        if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
 
890
                                                                i++;
 
891
                                                                ch = chNext;
 
892
                                                                chNext = chNext2;
 
893
                                                        }
 
894
                                                        HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
 
895
                                                        HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 
896
                                                }
 
897
                                        } else { // an unquoted here-doc delimiter
 
898
                                                if (isalnum(ch) || ch == '_') {
 
899
                                                        HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
 
900
                                                        HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 
901
                                                } else {
 
902
                                                        styler.ColourTo(i - 1, state);
 
903
                                                        state = SCE_PL_DEFAULT;
 
904
                                                        goto restartLexer;
 
905
                                                }
 
906
                                        }
 
907
                                        if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
 
908
                                                styler.ColourTo(i - 1, state);
 
909
                                                state = SCE_PL_ERROR;
 
910
                                                goto restartLexer;
 
911
                                        }
 
912
                                }
 
913
                        } else if (HereDoc.State == 2) {
 
914
                                // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
 
915
                                if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
 
916
                                        i += HereDoc.DelimiterLength;
 
917
                                        chPrev = styler.SafeGetCharAt(i - 1);
 
918
                                        ch = styler.SafeGetCharAt(i);
 
919
                                        if (isEOLChar(ch)) {
 
920
                                                styler.ColourTo(i - 1, state);
 
921
                                                state = SCE_PL_DEFAULT;
 
922
                        backflag = BACK_NONE;
 
923
                                                HereDoc.State = 0;
 
924
                                                goto restartLexer;
 
925
                                        }
 
926
                                        chNext = styler.SafeGetCharAt(i + 1);
 
927
                                }
 
928
                        } else if (state == SCE_PL_POD
 
929
                                || state == SCE_PL_POD_VERB) {
 
930
                                if (isEOLChar(chPrev)) {
 
931
                                        if (ch == ' ' || ch == '\t') {
 
932
                                                styler.ColourTo(i - 1, state);
 
933
                                                state = SCE_PL_POD_VERB;
 
934
                                        } else {
 
935
                                                styler.ColourTo(i - 1, state);
 
936
                                                state = SCE_PL_POD;
 
937
                                                if (ch == '=') {
 
938
                                                        if (isMatch(styler, lengthDoc, i, "=cut")) {
 
939
                                                                styler.ColourTo(i - 1 + 4, state);
 
940
                                                                i += 4;
 
941
                                                                state = SCE_PL_DEFAULT;
 
942
                                                                ch = styler.SafeGetCharAt(i);
 
943
                                                                //chNext = styler.SafeGetCharAt(i + 1);
 
944
                                                                goto restartLexer;
 
945
                                                        }
 
946
                                                }
 
947
                                        }
 
948
                                }
 
949
                        } else if (state == SCE_PL_SCALAR       // variable names
 
950
                                || state == SCE_PL_ARRAY
 
951
                                || state == SCE_PL_HASH
 
952
                                || state == SCE_PL_SYMBOLTABLE) {
 
953
                                if (ch == ':' && chNext == ':') {       // skip ::
 
954
                                        i++;
 
955
                                        ch = chNext;
 
956
                                        chNext = chNext2;
 
957
                                }
 
958
                                else if (isEndVar(ch)) {
 
959
                                        if (i == (styler.GetStartSegment() + 1)) {
 
960
                                                // Special variable: $(, $_ etc.
 
961
                                                styler.ColourTo(i, state);
 
962
                                                state = SCE_PL_DEFAULT;
 
963
                                        } else {
 
964
                                                styler.ColourTo(i - 1, state);
 
965
                                                state = SCE_PL_DEFAULT;
 
966
                                                goto restartLexer;
 
967
                                        }
 
968
                                }
 
969
                        } else if (state == SCE_PL_REGEX
 
970
                                || state == SCE_PL_STRING_QR
 
971
                                ) {
 
972
                                if (!Quote.Up && !isspacechar(ch)) {
 
973
                                        Quote.Open(ch);
 
974
                                } else if (ch == '\\' && Quote.Up != '\\') {
 
975
                                        // SG: Is it save to skip *every* escaped char?
 
976
                                        i++;
 
977
                                        ch = chNext;
 
978
                                        chNext = styler.SafeGetCharAt(i + 1);
 
979
                                } else {
 
980
                                        if (ch == Quote.Down /*&& chPrev != '\\'*/) {
 
981
                                                Quote.Count--;
 
982
                                                if (Quote.Count == 0) {
 
983
                                                        Quote.Rep--;
 
984
                                                        if (Quote.Up == Quote.Down) {
 
985
                                                                Quote.Count++;
 
986
                                                        }
 
987
                                                }
 
988
                                                if (!isalpha(chNext)) {
 
989
                                                        if (Quote.Rep <= 0) {
 
990
                                                                styler.ColourTo(i, state);
 
991
                                                                state = SCE_PL_DEFAULT;
 
992
                                                                ch = ' ';
 
993
                                                        }
 
994
                                                }
 
995
                                        } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
 
996
                                                Quote.Count++;
 
997
                                        } else if (!isalpha(chNext)) {
 
998
                                                if (Quote.Rep <= 0) {
 
999
                                                        styler.ColourTo(i, state);
 
1000
                                                        state = SCE_PL_DEFAULT;
 
1001
                                                        ch = ' ';
 
1002
                                                }
 
1003
                                        }
 
1004
                                }
 
1005
                        } else if (state == SCE_PL_REGSUBST) {
 
1006
                                if (!Quote.Up && !isspacechar(ch)) {
 
1007
                                        Quote.Open(ch);
 
1008
                                } else if (ch == '\\' && Quote.Up != '\\') {
 
1009
                                        // SG: Is it save to skip *every* escaped char?
 
1010
                                        i++;
 
1011
                                        ch = chNext;
 
1012
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1013
                                } else {
 
1014
                                        if (Quote.Count == 0 && Quote.Rep == 1) {
 
1015
                                                /* We matched something like s(...) or tr{...}
 
1016
                                                * and are looking for the next matcher characters,
 
1017
                                                * which could be either bracketed ({...}) or non-bracketed
 
1018
                                                * (/.../).
 
1019
                                                *
 
1020
                                                * Number-signs are problematic.  If they occur after
 
1021
                                                * the close of the first part, treat them like
 
1022
                                                * a Quote.Up char, even if they actually start comments.
 
1023
                                                *
 
1024
                                                * If we find an alnum, we end the regsubst, and punt.
 
1025
                                                *
 
1026
                                                * Eric Promislow   ericp@activestate.com  Aug 9,2000
 
1027
                                                */
 
1028
                                                if (isspacechar(ch)) {
 
1029
                                                        // Keep going
 
1030
                                                }
 
1031
                                                else if (isalnum(ch)) {
 
1032
                                                        styler.ColourTo(i, state);
 
1033
                                                        state = SCE_PL_DEFAULT;
 
1034
                                                        ch = ' ';
 
1035
                                                } else {
 
1036
                                                        Quote.Open(ch);
 
1037
                                                }
 
1038
                                        } else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
 
1039
                                                Quote.Count--;
 
1040
                                                if (Quote.Count == 0) {
 
1041
                                                        Quote.Rep--;
 
1042
                                                }
 
1043
                                                if (!isalpha(chNext)) {
 
1044
                                                        if (Quote.Rep <= 0) {
 
1045
                                                                styler.ColourTo(i, state);
 
1046
                                                                state = SCE_PL_DEFAULT;
 
1047
                                                                ch = ' ';
 
1048
                                                        }
 
1049
                                                }
 
1050
                                                if (Quote.Up == Quote.Down) {
 
1051
                                                        Quote.Count++;
 
1052
                                                }
 
1053
                                        } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
 
1054
                                                Quote.Count++;
 
1055
                                        } else if (!isalpha(chNext)) {
 
1056
                                                if (Quote.Rep <= 0) {
 
1057
                                                        styler.ColourTo(i, state);
 
1058
                                                        state = SCE_PL_DEFAULT;
 
1059
                                                        ch = ' ';
 
1060
                                                }
 
1061
                                        }
 
1062
                                }
 
1063
                        } else if (state == SCE_PL_STRING_Q
 
1064
                                || state == SCE_PL_STRING_QQ
 
1065
                                || state == SCE_PL_STRING_QX
 
1066
                                || state == SCE_PL_STRING_QW
 
1067
                                || state == SCE_PL_STRING
 
1068
                                || state == SCE_PL_CHARACTER
 
1069
                                || state == SCE_PL_BACKTICKS
 
1070
                                ) {
 
1071
                                if (!Quote.Down && !isspacechar(ch)) {
 
1072
                                        Quote.Open(ch);
 
1073
                                } else if (ch == '\\' && Quote.Up != '\\') {
 
1074
                                        i++;
 
1075
                                        ch = chNext;
 
1076
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1077
                                } else if (ch == Quote.Down) {
 
1078
                                        Quote.Count--;
 
1079
                                        if (Quote.Count == 0) {
 
1080
                                                Quote.Rep--;
 
1081
                                                if (Quote.Rep <= 0) {
 
1082
                                                        styler.ColourTo(i, state);
 
1083
                                                        state = SCE_PL_DEFAULT;
 
1084
                                                        ch = ' ';
 
1085
                                                }
 
1086
                                                if (Quote.Up == Quote.Down) {
 
1087
                                                        Quote.Count++;
 
1088
                                                }
 
1089
                                        }
 
1090
                                } else if (ch == Quote.Up) {
 
1091
                                        Quote.Count++;
 
1092
                                }
 
1093
                        }
 
1094
                }
 
1095
                if (state == SCE_PL_ERROR) {
 
1096
                        break;
 
1097
                }
 
1098
                chPrev = ch;
 
1099
        }
 
1100
        styler.ColourTo(lengthDoc - 1, state);
 
1101
}
 
1102
 
 
1103
static bool IsCommentLine(int line, Accessor &styler) {
 
1104
        int pos = styler.LineStart(line);
 
1105
        int eol_pos = styler.LineStart(line + 1) - 1;
 
1106
        for (int i = pos; i < eol_pos; i++) {
 
1107
                char ch = styler[i];
 
1108
        int style = styler.StyleAt(i);
 
1109
                if (ch == '#' && style == SCE_PL_COMMENTLINE)
 
1110
                        return true;
 
1111
                else if (ch != ' ' && ch != '\t')
 
1112
                        return false;
 
1113
        }
 
1114
        return false;
 
1115
}
 
1116
 
 
1117
static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
 
1118
                            Accessor &styler) {
 
1119
        bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
 
1120
        bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 
1121
        // Custom folding of POD and packages
 
1122
        bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
 
1123
        bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
 
1124
        unsigned int endPos = startPos + length;
 
1125
        int visibleChars = 0;
 
1126
        int lineCurrent = styler.GetLine(startPos);
 
1127
        int levelPrev = SC_FOLDLEVELBASE;
 
1128
        if (lineCurrent > 0)
 
1129
                levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
 
1130
        int levelCurrent = levelPrev;
 
1131
        char chNext = styler[startPos];
 
1132
        char chPrev = styler.SafeGetCharAt(startPos - 1);
 
1133
        int styleNext = styler.StyleAt(startPos);
 
1134
        // Used at end of line to determine if the line was a package definition
 
1135
        bool isPackageLine = false;
 
1136
        bool isPodHeading = false;
 
1137
        for (unsigned int i = startPos; i < endPos; i++) {
 
1138
                char ch = chNext;
 
1139
                chNext = styler.SafeGetCharAt(i + 1);
 
1140
                int style = styleNext;
 
1141
                styleNext = styler.StyleAt(i + 1);
 
1142
                bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
 
1143
                bool atLineStart = isEOLChar(chPrev) || i == 0;
 
1144
        // Comment folding
 
1145
                if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
 
1146
        {
 
1147
            if (!IsCommentLine(lineCurrent - 1, styler)
 
1148
                && IsCommentLine(lineCurrent + 1, styler))
 
1149
                levelCurrent++;
 
1150
            else if (IsCommentLine(lineCurrent - 1, styler)
 
1151
                     && !IsCommentLine(lineCurrent+1, styler))
 
1152
                levelCurrent--;
 
1153
        }
 
1154
                if (style == SCE_C_OPERATOR) {
 
1155
                        if (ch == '{') {
 
1156
                                levelCurrent++;
 
1157
                        } else if (ch == '}') {
 
1158
                                levelCurrent--;
 
1159
                        }
 
1160
                }
 
1161
                // Custom POD folding
 
1162
                if (foldPOD && atLineStart) {
 
1163
                        int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
 
1164
                        if (style == SCE_PL_POD) {
 
1165
                                if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
 
1166
                                        levelCurrent++;
 
1167
                                else if (styler.Match(i, "=cut"))
 
1168
                                        levelCurrent--;
 
1169
                                else if (styler.Match(i, "=head"))
 
1170
                                        isPodHeading = true;
 
1171
                        } else if (style == SCE_PL_DATASECTION) {
 
1172
                if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
 
1173
                    levelCurrent++;
 
1174
                else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
 
1175
                    levelCurrent--;
 
1176
                else if (styler.Match(i, "=head"))
 
1177
                                        isPodHeading = true;
 
1178
                // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
 
1179
                // reset needed as level test is vs. SC_FOLDLEVELBASE
 
1180
                else if (styler.Match(i, "__END__"))
 
1181
                    levelCurrent = SC_FOLDLEVELBASE;
 
1182
            }
 
1183
                }
 
1184
                // Custom package folding
 
1185
                if (foldPackage && atLineStart) {
 
1186
                        if (style == SCE_PL_WORD && styler.Match(i, "package")) {
 
1187
                                isPackageLine = true;
 
1188
                        }
 
1189
                }
 
1190
 
 
1191
                if (atEOL) {
 
1192
                        int lev = levelPrev;
 
1193
                        if (isPodHeading) {
 
1194
                lev = levelPrev - 1;
 
1195
                lev |= SC_FOLDLEVELHEADERFLAG;
 
1196
                isPodHeading = false;
 
1197
                        }
 
1198
                        // Check if line was a package declaration
 
1199
                        // because packages need "special" treatment
 
1200
                        if (isPackageLine) {
 
1201
                                lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
 
1202
                                levelCurrent = SC_FOLDLEVELBASE + 1;
 
1203
                                isPackageLine = false;
 
1204
                        }
 
1205
            lev |= levelCurrent << 16;
 
1206
                        if (visibleChars == 0 && foldCompact)
 
1207
                                lev |= SC_FOLDLEVELWHITEFLAG;
 
1208
                        if ((levelCurrent > levelPrev) && (visibleChars > 0))
 
1209
                                lev |= SC_FOLDLEVELHEADERFLAG;
 
1210
                        if (lev != styler.LevelAt(lineCurrent)) {
 
1211
                                styler.SetLevel(lineCurrent, lev);
 
1212
                        }
 
1213
                        lineCurrent++;
 
1214
                        levelPrev = levelCurrent;
 
1215
                        visibleChars = 0;
 
1216
                }
 
1217
                if (!isspacechar(ch))
 
1218
                        visibleChars++;
 
1219
                chPrev = ch;
 
1220
        }
 
1221
        // Fill in the real level of the next line, keeping the current flags as they will be filled in later
 
1222
        int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
 
1223
        styler.SetLevel(lineCurrent, levelPrev | flagsNext);
 
1224
}
 
1225
 
 
1226
static const char * const perlWordListDesc[] = {
 
1227
        "Keywords",
 
1228
        0
 
1229
};
 
1230
 
 
1231
LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc);
 
1232