~ubuntu-branches/debian/experimental/geany/experimental

« back to all changes in this revision

Viewing changes to scintilla/LexHTML.cxx

  • Committer: Bazaar Package Importer
  • Author(s): Damián Viano
  • Date: 2008-05-02 11:37:45 UTC
  • mfrom: (1.2.1 upstream) (3.1.6 hardy)
  • Revision ID: james.westby@ubuntu.com-20080502113745-xzp4g6dmovrpoj17
Tags: 0.14-1
New upstream release (Closes: #478126)

Show diffs side-by-side

added added

removed removed

Lines of Context:
19
19
#include "KeyWords.h"
20
20
#include "Scintilla.h"
21
21
#include "SciLexer.h"
 
22
#include "CharacterSet.h"
 
23
 
 
24
#ifdef SCI_NAMESPACE
 
25
using namespace Scintilla;
 
26
#endif
22
27
 
23
28
#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
24
29
#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
35
40
        return (ch < 0x80) && (isalnum(ch) || ch == '_');
36
41
}
37
42
 
 
43
inline bool IsOperator(int ch) {
 
44
        if (isascii(ch) && isalnum(ch))
 
45
                return false;
 
46
        // '.' left out as it is used to make up numbers
 
47
        if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
 
48
                ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
 
49
                ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
 
50
                ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
 
51
                ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
 
52
                ch == '?' || ch == '!' || ch == '.' || ch == '~')
 
53
                return true;
 
54
        return false;
 
55
}
 
56
 
38
57
static inline int MakeLowerCase(int ch) {
39
58
        if (ch < 'A' || ch > 'Z')
40
59
                return ch;
66
85
                return eScriptJS;
67
86
        if (strstr(s, "php"))
68
87
                return eScriptPHP;
69
 
        if (strstr(s, "xml"))
 
88
        if (strstr(s, "xml")) {
 
89
                const char *xml = strstr(s, "xml");
 
90
                for (const char *t=s; t<xml; t++) {
 
91
                        if (!IsASpace(*t)) {
 
92
                                return prevValue;
 
93
                        }
 
94
                }
70
95
                return eScriptXML;
 
96
        }
71
97
 
72
98
        return prevValue;
73
99
}
102
128
}
103
129
 
104
130
static int statePrintForState(int state, script_mode inScriptType) {
105
 
        int StateToPrint;
 
131
        int StateToPrint = state;
106
132
 
107
 
        if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
108
 
                StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
109
 
        } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
110
 
                StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
111
 
        } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
112
 
                StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
113
 
        } else {
114
 
                StateToPrint = state;
 
133
        if (state >= SCE_HJ_START) {
 
134
                if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 
135
                        StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 
136
                } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 
137
                        StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 
138
                } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 
139
                        StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 
140
                }
115
141
        }
116
142
 
117
143
        return StateToPrint;
173
199
        bool allowTermination = !isStringState(state);
174
200
        if (allowTermination) {
175
201
                switch (state) {
 
202
                case SCE_HB_COMMENTLINE:
176
203
                case SCE_HPHP_COMMENT:
177
204
                case SCE_HP_COMMENTLINE:
178
205
                case SCE_HPA_COMMENTLINE:
222
249
 
223
250
static int classifyTagHTML(unsigned int start, unsigned int end,
224
251
                           WordList &keywords, Accessor &styler, bool &tagDontFold,
225
 
                           bool caseSensitive) {
 
252
                           bool caseSensitive, bool isXml) {
226
253
        char s[30 + 2];
227
254
        // Copy after the '<'
228
255
        unsigned int i = 0;
238
265
        s[i] = ' ';
239
266
        s[i+1] = '\0';
240
267
 
 
268
        // if the current language is XML, I can fold any tag
 
269
        // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
241
270
        //...to find it in the list of no-container-tags
242
 
        // (There are many more. We will need a keywordlist in the property file for this)
243
 
        tagDontFold = (NULL != strstr("meta link img area br hr input ",s));
 
271
        tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ",s));
244
272
 
245
273
        //now we can remove the trailing space
246
274
        s[i] = '\0';
390
418
        return Result;
391
419
}
392
420
 
393
 
static inline bool ishtmlwordchar(char ch) {
 
421
static inline bool ishtmlwordchar(int ch) {
394
422
        return !isascii(ch) ||
395
423
                (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
396
424
}
397
425
 
398
 
static inline bool issgmlwordchar(char ch) {
 
426
static inline bool issgmlwordchar(int ch) {
399
427
        return !isascii(ch) ||
400
428
                (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
401
429
}
402
430
 
403
 
static inline bool IsPhpWordStart(const unsigned char ch) {
 
431
static inline bool IsPhpWordStart(int ch) {
404
432
        return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
405
433
}
406
434
 
407
 
static inline bool IsPhpWordChar(char ch) {
 
435
static inline bool IsPhpWordChar(int ch) {
408
436
        return IsADigit(ch) || IsPhpWordStart(ch);
409
437
}
410
438
 
425
453
                   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
426
454
}
427
455
 
428
 
static bool isLineEnd(char ch) {
 
456
static bool isLineEnd(int ch) {
429
457
        return ch == '\r' || ch == '\n';
430
458
}
431
459
 
432
 
static bool isOKBeforeRE(char ch) {
 
460
static bool isOKBeforeRE(int ch) {
433
461
        return (ch == '(') || (ch == '=') || (ch == ',');
434
462
}
435
463
 
457
485
}
458
486
 
459
487
static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
460
 
                                  Accessor &styler) {
 
488
                                  Accessor &styler, bool isXml) {
461
489
        WordList &keywords = *keywordlists[0];
462
490
        WordList &keywords2 = *keywordlists[1];
463
491
        WordList &keywords3 = *keywordlists[2];
465
493
        WordList &keywords5 = *keywordlists[4];
466
494
        WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
467
495
 
468
 
        // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
469
 
        styler.StartAt(startPos, STYLE_MAX);
 
496
        // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 
497
        styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
470
498
        char prevWord[200];
471
499
        prevWord[0] = '\0';
472
500
        char phpStringDelimiter[200]; // PHP is not limited in length, we are
488
516
                length++;
489
517
                state = styler.StyleAt(startPos);
490
518
        }
491
 
        styler.StartAt(startPos, STYLE_MAX);
 
519
        styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
492
520
 
493
521
        int lineCurrent = styler.GetLine(startPos);
494
522
        int lineState;
515
543
        const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
516
544
        const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
517
545
 
 
546
        const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 
547
        const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 
548
        const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 
549
 
518
550
        int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
519
551
        int levelCurrent = levelPrev;
520
552
        int visibleChars = 0;
521
553
 
522
 
        char chPrev = ' ';
523
 
        char ch = ' ';
524
 
        char chPrevNonWhite = ' ';
 
554
        int chPrev = ' ';
 
555
        int ch = ' ';
 
556
        int chPrevNonWhite = ' ';
525
557
        // look back to set chPrevNonWhite properly for better regex colouring
526
558
        if (scriptLanguage == eScriptJS && startPos > 0) {
527
559
                int back = startPos;
533
565
                                break;
534
566
                }
535
567
                if (style == SCE_HJ_SYMBOLS) {
536
 
                        chPrevNonWhite = styler.SafeGetCharAt(back);
 
568
                        chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
537
569
                }
538
570
        }
539
571
 
540
572
        styler.StartSegment(startPos);
541
573
        const int lengthDoc = startPos + length;
542
574
        for (int i = startPos; i < lengthDoc; i++) {
543
 
                const char chPrev2 = chPrev;
 
575
                const int chPrev2 = chPrev;
544
576
                chPrev = ch;
545
 
                if (!isspacechar(ch) && state != SCE_HJ_COMMENT &&
 
577
                if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
546
578
                        state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
547
579
                        chPrevNonWhite = ch;
548
 
                ch = styler[i];
549
 
                char chNext = styler.SafeGetCharAt(i + 1);
550
 
                const char chNext2 = styler.SafeGetCharAt(i + 2);
 
580
                ch = static_cast<unsigned char>(styler[i]);
 
581
                int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 
582
                const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
551
583
 
552
584
                // Handle DBCS codepages
553
 
                if (styler.IsLeadByte(ch)) {
 
585
                if (styler.IsLeadByte(static_cast<char>(ch))) {
554
586
                        chPrev = ' ';
555
587
                        i += 1;
556
588
                        continue;
557
589
                }
558
590
 
559
 
                if ((!isspacechar(ch) || !foldCompact) && fold)
 
591
                if ((!IsASpace(ch) || !foldCompact) && fold)
560
592
                        visibleChars++;
561
593
 
562
594
                // decide what is the current state to print (depending of the script tag)
644
676
                        case SCE_HJ_COMMENTDOC:
645
677
                        //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
646
678
                        // the end of script marker from some JS interpreters.
 
679
                        case SCE_HB_COMMENTLINE:
 
680
                        case SCE_HBA_COMMENTLINE:
647
681
                        case SCE_HJ_DOUBLESTRING:
648
682
                        case SCE_HJ_SINGLESTRING:
649
683
                        case SCE_HJ_REGEX:
650
684
                        case SCE_HB_STRING:
 
685
                        case SCE_HBA_STRING:
651
686
                        case SCE_HP_STRING:
652
687
                        case SCE_HP_TRIPLE:
653
688
                        case SCE_HP_TRIPLEDOUBLE:
654
689
                                break;
655
690
                        default :
656
691
                                // check if the closing tag is a script tag
657
 
                                if (state == SCE_HJ_COMMENTLINE) {
 
692
                                if (state == SCE_HJ_COMMENTLINE || isXml) {
658
693
                                        char tag[7]; // room for the <script> tag
659
 
                                        char chr;       // current char
660
 
                                        int j=0;
661
 
                                        chr = styler.SafeGetCharAt(i+2);
662
 
                                        while (j < 6 && !isspacechar(chr)) {
 
694
                                        int j = 0;
 
695
                                        char chr = styler.SafeGetCharAt(i+2);
 
696
                                        while (j < 6 && !IsASpace(chr)) {
663
697
                                                tag[j++] = static_cast<char>(MakeLowerCase(chr));
664
698
                                                chr = styler.SafeGetCharAt(i+2+j);
665
699
                                        }
708
742
                                levelCurrent++;
709
743
                        }
710
744
                        // should be better
711
 
                        ch = styler.SafeGetCharAt(i);
 
745
                        ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
712
746
                        continue;
713
747
                }
714
748
 
747
781
                        if (foldHTMLPreprocessor)
748
782
                                levelCurrent++;
749
783
                        // should be better
750
 
                        ch = styler.SafeGetCharAt(i);
 
784
                        ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
751
785
                        continue;
752
786
                }
753
787
 
783
817
                             ((inScriptType == eNonHtmlPreProc)
784
818
                              || (inScriptType == eNonHtmlScriptPreProc)) && (
785
819
                                 ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
786
 
                             ) && (chNext == '>')) ||
 
820
                             ) && (chNext == '>') && (! isCommentASPState(state))) ||
787
821
                         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
788
822
                        if (state == SCE_H_ASPAT) {
789
823
                                aspScript = segIsScriptingIndicator(styler,
862
896
                                styler.ColourTo(i - 1, StateToPrint);
863
897
                                state = SCE_H_SGML_SIMPLESTRING;
864
898
                        } else if ((ch == '-') && (chPrev == '-')) {
865
 
                                styler.ColourTo(i - 2, StateToPrint);
 
899
                                if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
 
900
                                        styler.ColourTo(i - 2, StateToPrint);
 
901
                                }
866
902
                                state = SCE_H_SGML_COMMENT;
867
903
                        } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
868
904
                                styler.ColourTo(i - 2, StateToPrint);
924
960
                                }
925
961
                                // find the length of the word
926
962
                                int size = 1;
927
 
                                while (ishtmlwordchar(styler.SafeGetCharAt(i + size)))
 
963
                                while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
928
964
                                        size++;
929
965
                                styler.ColourTo(i + size - 1, StateToPrint);
930
966
                                i += size - 1;
931
967
                                visibleChars += size - 1;
932
 
                                ch = styler.SafeGetCharAt(i);
 
968
                                ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
933
969
                                if (scriptLanguage == eScriptSGMLblock) {
934
970
                                        state = SCE_H_SGML_BLOCK_DEFAULT;
935
971
                                } else {
1012
1048
                        }
1013
1049
                        break;
1014
1050
                case SCE_H_TAGUNKNOWN:
1015
 
                        if (!ishtmlwordchar(ch) && !((ch == '/') && (chPrev == '<')) && ch != '[') {
 
1051
                        if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1016
1052
                                int eClass = classifyTagHTML(styler.GetStartSegment(),
1017
 
                                        i - 1, keywords, styler, tagDontFold, caseSensitive);
 
1053
                                        i - 1, keywords, styler, tagDontFold, caseSensitive, isXml);
1018
1054
                                if (eClass == SCE_H_SCRIPT) {
1019
1055
                                        if (!tagClosing) {
1020
1056
                                                inScriptType = eNonHtmlScript;
1064
1100
                        }
1065
1101
                        break;
1066
1102
                case SCE_H_ATTRIBUTE:
1067
 
                        if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
 
1103
                        if (!setAttributeContinue.Contains(ch)) {
1068
1104
                                if (inScriptType == eNonHtmlScript) {
1069
1105
                                        int scriptLanguagePrev = scriptLanguage;
1070
1106
                                        clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1137
1173
                                i++;
1138
1174
                                ch = chNext;
1139
1175
                                state = SCE_H_DEFAULT;
1140
 
                        } else if (ishtmlwordchar(ch)) {
 
1176
                        } else if (setHTMLWord.Contains(ch)) {
1141
1177
                                styler.ColourTo(i - 1, StateToPrint);
1142
1178
                                state = SCE_H_ATTRIBUTE;
1143
1179
                        }
1161
1197
                        }
1162
1198
                        break;
1163
1199
                case SCE_H_VALUE:
1164
 
                        if (!ishtmlwordchar(ch)) {
 
1200
                        if (!setHTMLWord.Contains(ch)) {
1165
1201
                                if (ch == '\"' && chPrev == '=') {
1166
1202
                                        // Should really test for being first character
1167
1203
                                        state = SCE_H_DOUBLESTRING;
1198
1234
                case SCE_HJ_DEFAULT:
1199
1235
                case SCE_HJ_START:
1200
1236
                case SCE_HJ_SYMBOLS:
1201
 
                        if (iswordstart(ch)) {
 
1237
                        if (IsAWordStart(ch)) {
1202
1238
                                styler.ColourTo(i - 1, StateToPrint);
1203
1239
                                state = SCE_HJ_WORD;
1204
1240
                        } else if (ch == '/' && chNext == '*') {
1227
1263
                                styler.ColourTo(i - 1, StateToPrint);
1228
1264
                                state = SCE_HJ_COMMENTLINE;
1229
1265
                                i += 2;
1230
 
                        } else if (isoperator(ch)) {
 
1266
                        } else if (IsOperator(ch)) {
1231
1267
                                styler.ColourTo(i - 1, StateToPrint);
1232
1268
                                styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1233
1269
                                state = SCE_HJ_DEFAULT;
1239
1275
                        }
1240
1276
                        break;
1241
1277
                case SCE_HJ_WORD:
1242
 
                        if (!iswordchar(ch)) {
 
1278
                        if (!IsAWordChar(ch)) {
1243
1279
                                classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1244
1280
                                //styler.ColourTo(i - 1, eHTJSKeyword);
1245
1281
                                state = SCE_HJ_DEFAULT;
1258
1294
                                        styler.ColourTo(i - 1, StateToPrint);
1259
1295
                                        state = SCE_HJ_COMMENTLINE;
1260
1296
                                        i += 2;
1261
 
                                } else if (isoperator(ch)) {
 
1297
                                } else if (IsOperator(ch)) {
1262
1298
                                        styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1263
1299
                                        state = SCE_HJ_DEFAULT;
1264
1300
                                }
1328
1364
                                        while (isascii(chNext) && islower(chNext)) {   // gobble regex flags
1329
1365
                                                i++;
1330
1366
                                                ch = chNext;
1331
 
                                                chNext = styler.SafeGetCharAt(i + 1);
 
1367
                                                chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1332
1368
                                        }
1333
1369
                                }
1334
1370
                                styler.ColourTo(i, StateToPrint);
1338
1374
                                if (chNext == '\\' || chNext == '/') {
1339
1375
                                        i++;
1340
1376
                                        ch = chNext;
1341
 
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1377
                                        chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1342
1378
                                }
1343
1379
                        }
1344
1380
                        break;
1345
1381
                case SCE_HB_DEFAULT:
1346
1382
                case SCE_HB_START:
1347
 
                        if (iswordstart(ch)) {
 
1383
                        if (IsAWordStart(ch)) {
1348
1384
                                styler.ColourTo(i - 1, StateToPrint);
1349
1385
                                state = SCE_HB_WORD;
1350
1386
                        } else if (ch == '\'') {
1357
1393
                                   styler.SafeGetCharAt(i + 3) == '-') {
1358
1394
                                styler.ColourTo(i - 1, StateToPrint);
1359
1395
                                state = SCE_HB_COMMENTLINE;
1360
 
                        } else if (isoperator(ch)) {
 
1396
                        } else if (IsOperator(ch)) {
1361
1397
                                styler.ColourTo(i - 1, StateToPrint);
1362
1398
                                styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1363
1399
                                state = SCE_HB_DEFAULT;
1369
1405
                        }
1370
1406
                        break;
1371
1407
                case SCE_HB_WORD:
1372
 
                        if (!iswordchar(ch)) {
 
1408
                        if (!IsAWordChar(ch)) {
1373
1409
                                state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1374
1410
                                if (state == SCE_HB_DEFAULT) {
1375
1411
                                        if (ch == '\"') {
1376
1412
                                                state = SCE_HB_STRING;
1377
1413
                                        } else if (ch == '\'') {
1378
1414
                                                state = SCE_HB_COMMENTLINE;
1379
 
                                        } else if (isoperator(ch)) {
 
1415
                                        } else if (IsOperator(ch)) {
1380
1416
                                                styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1381
1417
                                                state = SCE_HB_DEFAULT;
1382
1418
                                        }
1409
1445
                        break;
1410
1446
                case SCE_HP_DEFAULT:
1411
1447
                case SCE_HP_START:
1412
 
                        if (iswordstart(ch)) {
 
1448
                        if (IsAWordStart(ch)) {
1413
1449
                                styler.ColourTo(i - 1, StateToPrint);
1414
1450
                                state = SCE_HP_WORD;
1415
1451
                        } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1426
1462
                                        state = SCE_HP_TRIPLEDOUBLE;
1427
1463
                                        ch = ' ';
1428
1464
                                        chPrev = ' ';
1429
 
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1465
                                        chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1430
1466
                                } else {
1431
1467
                                        //                                      state = statePrintForState(SCE_HP_STRING,inScriptType);
1432
1468
                                        state = SCE_HP_STRING;
1438
1474
                                        state = SCE_HP_TRIPLE;
1439
1475
                                        ch = ' ';
1440
1476
                                        chPrev = ' ';
1441
 
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1477
                                        chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1442
1478
                                } else {
1443
1479
                                        state = SCE_HP_CHARACTER;
1444
1480
                                }
1445
 
                        } else if (isoperator(ch)) {
 
1481
                        } else if (IsOperator(ch)) {
1446
1482
                                styler.ColourTo(i - 1, StateToPrint);
1447
1483
                                styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1448
1484
                        } else if ((ch == ' ') || (ch == '\t')) {
1453
1489
                        }
1454
1490
                        break;
1455
1491
                case SCE_HP_WORD:
1456
 
                        if (!iswordchar(ch)) {
 
1492
                        if (!IsAWordChar(ch)) {
1457
1493
                                classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1458
1494
                                state = SCE_HP_DEFAULT;
1459
1495
                                if (ch == '#') {
1464
1500
                                                state = SCE_HP_TRIPLEDOUBLE;
1465
1501
                                                ch = ' ';
1466
1502
                                                chPrev = ' ';
1467
 
                                                chNext = styler.SafeGetCharAt(i + 1);
 
1503
                                                chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1468
1504
                                        } else {
1469
1505
                                                state = SCE_HP_STRING;
1470
1506
                                        }
1474
1510
                                                state = SCE_HP_TRIPLE;
1475
1511
                                                ch = ' ';
1476
1512
                                                chPrev = ' ';
1477
 
                                                chNext = styler.SafeGetCharAt(i + 1);
 
1513
                                                chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1478
1514
                                        } else {
1479
1515
                                                state = SCE_HP_CHARACTER;
1480
1516
                                        }
1481
 
                                } else if (isoperator(ch)) {
 
1517
                                } else if (IsOperator(ch)) {
1482
1518
                                        styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1483
1519
                                }
1484
1520
                        }
1494
1530
                                if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1495
1531
                                        i++;
1496
1532
                                        ch = chNext;
1497
 
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1533
                                        chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1498
1534
                                }
1499
1535
                        } else if (ch == '\"') {
1500
1536
                                styler.ColourTo(i, StateToPrint);
1506
1542
                                if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1507
1543
                                        i++;
1508
1544
                                        ch = chNext;
1509
 
                                        chNext = styler.SafeGetCharAt(i + 1);
 
1545
                                        chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1510
1546
                                }
1511
1547
                        } else if (ch == '\'') {
1512
1548
                                styler.ColourTo(i, StateToPrint);
1527
1563
                        break;
1528
1564
                        ///////////// start - PHP state handling
1529
1565
                case SCE_HPHP_WORD:
1530
 
                        if (!iswordchar(ch)) {
 
1566
                        if (!IsAWordChar(ch)) {
1531
1567
                                classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1532
1568
                                if (ch == '/' && chNext == '*') {
1533
1569
                                        i++;
1547
1583
                                        state = SCE_HPHP_SIMPLESTRING;
1548
1584
                                } else if (ch == '$' && IsPhpWordStart(chNext)) {
1549
1585
                                        state = SCE_HPHP_VARIABLE;
1550
 
                                } else if (isoperator(ch)) {
 
1586
                                } else if (IsOperator(ch)) {
1551
1587
                                        state = SCE_HPHP_OPERATOR;
1552
1588
                                } else {
1553
1589
                                        state = SCE_HPHP_DEFAULT;
1560
1596
                                && strchr(".xXabcdefABCDEF", ch) == NULL
1561
1597
                                && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1562
1598
                                styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1563
 
                                if (isoperator(ch))
 
1599
                                if (IsOperator(ch))
1564
1600
                                        state = SCE_HPHP_OPERATOR;
1565
1601
                                else
1566
1602
                                        state = SCE_HPHP_DEFAULT;
1569
1605
                case SCE_HPHP_VARIABLE:
1570
1606
                        if (!IsPhpWordChar(ch)) {
1571
1607
                                styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1572
 
                                if (isoperator(ch))
 
1608
                                if (IsOperator(ch))
1573
1609
                                        state = SCE_HPHP_OPERATOR;
1574
1610
                                else
1575
1611
                                        state = SCE_HPHP_DEFAULT;
1599
1635
                                styler.ColourTo(i - 1, StateToPrint);
1600
1636
                                state = SCE_HPHP_HSTRING_VARIABLE;
1601
1637
                        } else if (styler.Match(i, phpStringDelimiter)) {
1602
 
                                if (strlen(phpStringDelimiter) > 1)
1603
 
                                        i += strlen(phpStringDelimiter) - 1;
 
1638
                                const int psdLength = strlen(phpStringDelimiter);
 
1639
                                if ((psdLength > 1) && ((i + psdLength) < lengthDoc))
 
1640
                                        i += psdLength - 1;
1604
1641
                                styler.ColourTo(i, StateToPrint);
1605
1642
                                state = SCE_HPHP_DEFAULT;
1606
1643
                        }
1632
1669
                        styler.ColourTo(i - 1, StateToPrint);
1633
1670
                        if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
1634
1671
                                state = SCE_HPHP_NUMBER;
1635
 
                        } else if (iswordstart(ch)) {
 
1672
                        } else if (IsAWordStart(ch)) {
1636
1673
                                state = SCE_HPHP_WORD;
1637
1674
                        } else if (ch == '/' && chNext == '*') {
1638
1675
                                i++;
1652
1689
                                state = SCE_HPHP_SIMPLESTRING;
1653
1690
                        } else if (ch == '$' && IsPhpWordStart(chNext)) {
1654
1691
                                state = SCE_HPHP_VARIABLE;
1655
 
                        } else if (isoperator(ch)) {
 
1692
                        } else if (IsOperator(ch)) {
1656
1693
                                state = SCE_HPHP_OPERATOR;
1657
 
                        } else if ((state == SCE_HPHP_OPERATOR) && (isspacechar(ch))) {
 
1694
                        } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
1658
1695
                                state = SCE_HPHP_DEFAULT;
1659
1696
                        }
1660
1697
                        break;
1670
1707
                                state = SCE_HB_STRING;
1671
1708
                        } else if (ch == '\'') {
1672
1709
                                state = SCE_HB_COMMENTLINE;
1673
 
                        } else if (iswordstart(ch)) {
 
1710
                        } else if (IsAWordStart(ch)) {
1674
1711
                                state = SCE_HB_WORD;
1675
 
                        } else if (isoperator(ch)) {
 
1712
                        } else if (IsOperator(ch)) {
1676
1713
                                styler.ColourTo(i, SCE_HB_DEFAULT);
1677
1714
                        }
1678
1715
                } else if (state == SCE_HBA_DEFAULT) {    // One of the above succeeded
1680
1717
                                state = SCE_HBA_STRING;
1681
1718
                        } else if (ch == '\'') {
1682
1719
                                state = SCE_HBA_COMMENTLINE;
1683
 
                        } else if (iswordstart(ch)) {
 
1720
                        } else if (IsAWordStart(ch)) {
1684
1721
                                state = SCE_HBA_WORD;
1685
 
                        } else if (isoperator(ch)) {
 
1722
                        } else if (IsOperator(ch)) {
1686
1723
                                styler.ColourTo(i, SCE_HBA_DEFAULT);
1687
1724
                        }
1688
1725
                } else if (state == SCE_HJ_DEFAULT) {    // One of the above succeeded
1697
1734
                                state = SCE_HJ_DOUBLESTRING;
1698
1735
                        } else if ((ch == '\'') && (nonEmptySegment)) {
1699
1736
                                state = SCE_HJ_SINGLESTRING;
1700
 
                        } else if (iswordstart(ch)) {
 
1737
                        } else if (IsAWordStart(ch)) {
1701
1738
                                state = SCE_HJ_WORD;
1702
 
                        } else if (isoperator(ch)) {
 
1739
                        } else if (IsOperator(ch)) {
1703
1740
                                styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1704
1741
                        }
1705
1742
                }
1715
1752
        }
1716
1753
}
1717
1754
 
 
1755
static void ColouriseXMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 
1756
                                  Accessor &styler) {
 
1757
        // Passing in true because we're lexing XML
 
1758
        ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists,styler, true);
 
1759
}
 
1760
 
 
1761
static void ColouriseHTMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 
1762
                                  Accessor &styler) {
 
1763
        // Passing in false because we're notlexing XML
 
1764
        ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists,styler, false);
 
1765
}
 
1766
 
1718
1767
static bool isASPScript(int state) {
1719
1768
        return
1720
1769
                (state >= SCE_HJA_START && state <= SCE_HJA_REGEX) ||
1789
1838
                        sc.SetState(SCE_H_DEFAULT);
1790
1839
                }
1791
1840
        } else if (sc.state == SCE_H_TAGUNKNOWN) {
1792
 
                if (!ishtmlwordchar(static_cast<char>(sc.ch)) && !((sc.ch == '/') && (sc.chPrev == '<')) && sc.ch != '[') {
 
1841
                if (!ishtmlwordchar(sc.ch) && !((sc.ch == '/') && (sc.chPrev == '<')) && sc.ch != '[') {
1793
1842
                        char s[100];
1794
1843
                        sc.GetCurrentLowered(s, sizeof(s));
1795
1844
                        if (s[1] == '/') {
1812
1861
                        }
1813
1862
                }
1814
1863
        } else if (sc.state == SCE_H_ATTRIBUTE) {
1815
 
                if (!ishtmlwordchar(static_cast<char>(sc.ch))) {
 
1864
                if (!ishtmlwordchar(sc.ch)) {
1816
1865
                        char s[100];
1817
1866
                        sc.GetCurrentLowered(s, sizeof(s));
1818
1867
                        if (!keywordsTags.InList(s)) {
1864
1913
                } else if (sc.ch == '>') {
1865
1914
                        sc.SetState(SCE_H_TAG);
1866
1915
                        sc.ForwardSetState(SCE_H_DEFAULT);
1867
 
                } else if (ishtmlwordchar(static_cast<char>(sc.ch))) {
 
1916
                } else if (ishtmlwordchar(sc.ch)) {
1868
1917
                        sc.SetState(SCE_H_ATTRIBUTE);
1869
1918
                }
1870
1919
        }
1908
1957
 
1909
1958
static void ColouriseASPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1910
1959
                                  Accessor &styler) {
1911
 
        // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
1912
 
        StyleContext sc(startPos, length, initStyle, styler, 0x7f);
 
1960
        // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 
1961
        StyleContext sc(startPos, length, initStyle, styler, static_cast<char>(STYLE_MAX));
1913
1962
        for (; sc.More(); sc.Forward()) {
1914
1963
                ColouriseASPPiece(sc, keywordlists);
1915
1964
        }
1992
2041
                        sc.SetState(SCE_HPHP_SIMPLESTRING);
1993
2042
                } else if (sc.ch == '$' && IsPhpWordStart(static_cast<char>(sc.chNext))) {
1994
2043
                        sc.SetState(SCE_HPHP_VARIABLE);
1995
 
                } else if (isoperator(static_cast<char>(sc.ch))) {
 
2044
                } else if (IsOperator(static_cast<char>(sc.ch))) {
1996
2045
                        sc.SetState(SCE_HPHP_OPERATOR);
1997
2046
                }
1998
2047
        }
2000
2049
 
2001
2050
static void ColourisePHPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2002
2051
                                  Accessor &styler) {
2003
 
        // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
2004
 
        StyleContext sc(startPos, length, initStyle, styler, 0x7f);
 
2052
        // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 
2053
        StyleContext sc(startPos, length, initStyle, styler, static_cast<char>(STYLE_MAX));
2005
2054
        for (; sc.More(); sc.Forward()) {
2006
2055
                ColourisePHPPiece(sc, keywordlists);
2007
2056
        }
2011
2060
static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2012
2061
                                               Accessor &styler) {
2013
2062
        if(startPos == 0) initStyle = SCE_HPHP_DEFAULT;
2014
 
                ColouriseHyperTextDoc(startPos,length,initStyle,keywordlists,styler);
 
2063
                ColouriseHTMLDoc(startPos,length,initStyle,keywordlists,styler);
2015
2064
}
2016
2065
 
2017
2066
static const char * const htmlWordListDesc[] = {
2034
2083
        0,
2035
2084
};
2036
2085
 
2037
 
LexerModule lmHTML(SCLEX_HTML, ColouriseHyperTextDoc, "hypertext", 0, htmlWordListDesc, 7);
2038
 
LexerModule lmXML(SCLEX_XML, ColouriseHyperTextDoc, "xml", 0, htmlWordListDesc, 7);
 
2086
LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc, 8);
 
2087
LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc, 8);
2039
2088
// SCLEX_ASP and SCLEX_PHP should not be used in new code: use SCLEX_HTML instead.
2040
 
LexerModule lmASP(SCLEX_ASP, ColouriseASPDoc, "asp", 0, htmlWordListDesc, 7);
2041
 
LexerModule lmPHP(SCLEX_PHP, ColourisePHPDoc, "php", 0, htmlWordListDesc, 7);
2042
 
LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 7);
 
2089
LexerModule lmASP(SCLEX_ASP, ColouriseASPDoc, "asp", 0, htmlWordListDesc, 8);
 
2090
LexerModule lmPHP(SCLEX_PHP, ColourisePHPDoc, "php", 0, htmlWordListDesc, 8);
 
2091
LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 8);