~ivle-dev/ivle/codemirror

« back to all changes in this revision

Viewing changes to ivle/webapp/filesystem/browser/media/codemirror/contrib/python/js/parsepython.js

  • Committer: David Coles
  • Date: 2010-05-31 10:38:53 UTC
  • Revision ID: coles.david@gmail.com-20100531103853-8xypjpracvwy0qt4
Editor: Added CodeMirror-0.67 Javascript code editor source from 
http://marijn.haverbeke.nl/codemirror/ (zlib-style licence)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
var PythonParser = Editor.Parser = (function() {
 
2
    function wordRegexp(words) {
 
3
        return new RegExp("^(?:" + words.join("|") + ")$");
 
4
    }
 
5
    var DELIMITERCLASS = 'py-delimiter';
 
6
    var LITERALCLASS = 'py-literal';
 
7
    var ERRORCLASS = 'py-error';
 
8
    var OPERATORCLASS = 'py-operator';
 
9
    var IDENTIFIERCLASS = 'py-identifier';
 
10
    var STRINGCLASS = 'py-string';
 
11
    var BYTESCLASS = 'py-bytes';
 
12
    var UNICODECLASS = 'py-unicode';
 
13
    var RAWCLASS = 'py-raw';
 
14
    var NORMALCONTEXT = 'normal';
 
15
    var STRINGCONTEXT = 'string';
 
16
    var singleOperators = '+-*/%&|^~<>';
 
17
    var doubleOperators = wordRegexp(['==', '!=', '\\<=', '\\>=', '\\<\\>',
 
18
                                      '\\<\\<', '\\>\\>', '\\/\\/', '\\*\\*']);
 
19
    var singleDelimiters = '()[]{}@,:`=;';
 
20
    var doubleDelimiters = ['\\+=', '\\-=', '\\*=', '/=', '%=', '&=', '\\|=',
 
21
                            '\\^='];
 
22
    var tripleDelimiters = wordRegexp(['//=','\\>\\>=','\\<\\<=','\\*\\*=']);
 
23
    var singleStarters = singleOperators + singleDelimiters + '=!';
 
24
    var doubleStarters = '=<>*/';
 
25
    var identifierStarters = /[_A-Za-z]/;
 
26
 
 
27
    var wordOperators = wordRegexp(['and', 'or', 'not', 'is', 'in']);
 
28
    var commonkeywords = ['as', 'assert', 'break', 'class', 'continue',
 
29
                          'def', 'del', 'elif', 'else', 'except', 'finally',
 
30
                          'for', 'from', 'global', 'if', 'import',
 
31
                          'lambda', 'pass', 'raise', 'return',
 
32
                          'try', 'while', 'with', 'yield'];
 
33
    var commontypes = ['bool', 'classmethod', 'complex', 'dict', 'enumerate',
 
34
                       'float', 'frozenset', 'int', 'list', 'object',
 
35
                       'property', 'reversed', 'set', 'slice', 'staticmethod',
 
36
                       'str', 'super', 'tuple', 'type'];
 
37
    var py2 = {'types': ['basestring', 'buffer', 'file', 'long', 'unicode',
 
38
                         'xrange'],
 
39
               'keywords': ['exec', 'print'],
 
40
               'version': 2 };
 
41
    var py3 = {'types': ['bytearray', 'bytes', 'filter', 'map', 'memoryview',
 
42
                         'open', 'range', 'zip'],
 
43
               'keywords': ['nonlocal'],
 
44
               'version': 3};
 
45
 
 
46
    var py, keywords, types, stringStarters, stringTypes, config;
 
47
 
 
48
    function configure(conf) {
 
49
        if (!conf.hasOwnProperty('pythonVersion')) {
 
50
            conf.pythonVersion = 2;
 
51
        }
 
52
        if (!conf.hasOwnProperty('strictErrors')) {
 
53
            conf.strictErrors = true;
 
54
        }
 
55
        if (conf.pythonVersion != 2 && conf.pythonVersion != 3) {
 
56
            alert('CodeMirror: Unknown Python Version "' +
 
57
                  conf.pythonVersion +
 
58
                  '", defaulting to Python 2.x.');
 
59
            conf.pythonVersion = 2;
 
60
        }
 
61
        if (conf.pythonVersion == 3) {
 
62
            py = py3;
 
63
            stringStarters = /[\'\"rbRB]/;
 
64
            stringTypes = /[rb]/;
 
65
            doubleDelimiters.push('\\-\\>');
 
66
        } else {
 
67
            py = py2;
 
68
            stringStarters = /[\'\"RUru]/;
 
69
            stringTypes = /[ru]/;
 
70
        }
 
71
        config = conf;
 
72
        keywords = wordRegexp(commonkeywords.concat(py.keywords));
 
73
        types = wordRegexp(commontypes.concat(py.types));
 
74
        doubleDelimiters = wordRegexp(doubleDelimiters);
 
75
    }
 
76
 
 
77
    var tokenizePython = (function() {
 
78
        function normal(source, setState) {
 
79
            var stringDelim, threeStr, temp, type, word, possible = {};
 
80
            var ch = source.next();
 
81
            
 
82
            function filterPossible(token, styleIfPossible) {
 
83
                if (!possible.style && !possible.content) {
 
84
                    return token;
 
85
                } else if (typeof(token) == STRINGCONTEXT) {
 
86
                    token = {content: source.get(), style: token};
 
87
                }
 
88
                if (possible.style || styleIfPossible) {
 
89
                    token.style = styleIfPossible ? styleIfPossible : possible.style;
 
90
                }
 
91
                if (possible.content) {
 
92
                    token.content = possible.content + token.content;
 
93
                }
 
94
                possible = {};
 
95
                return token;
 
96
            }
 
97
 
 
98
            // Handle comments
 
99
            if (ch == '#') {
 
100
                while (!source.endOfLine()) {
 
101
                    source.next();
 
102
                }
 
103
                return 'py-comment';
 
104
            }
 
105
            // Handle special chars
 
106
            if (ch == '\\') {
 
107
                if (!source.endOfLine()) {
 
108
                    var whitespace = true;
 
109
                    while (!source.endOfLine()) {
 
110
                        if(!(/[\s\u00a0]/.test(source.next()))) {
 
111
                            whitespace = false;
 
112
                        }
 
113
                    }
 
114
                    if (!whitespace) {
 
115
                        return ERRORCLASS;
 
116
                    }
 
117
                }
 
118
                return 'py-special';
 
119
            }
 
120
            // Handle operators and delimiters
 
121
            if (singleStarters.indexOf(ch) != -1 || (ch == "." && !source.matches(/\d/))) {
 
122
                if (doubleStarters.indexOf(source.peek()) != -1) {
 
123
                    temp = ch + source.peek();
 
124
                    // It must be a double delimiter or operator or triple delimiter
 
125
                    if (doubleOperators.test(temp)) {
 
126
                        source.next();
 
127
                        var nextChar = source.peek();
 
128
                        if (nextChar && tripleDelimiters.test(temp + nextChar)) {
 
129
                            source.next();
 
130
                            return DELIMITERCLASS;
 
131
                        } else {
 
132
                            return OPERATORCLASS;
 
133
                        }
 
134
                    } else if (doubleDelimiters.test(temp)) {
 
135
                        source.next();
 
136
                        return DELIMITERCLASS;
 
137
                    }
 
138
                }
 
139
                // It must be a single delimiter or operator
 
140
                if (singleOperators.indexOf(ch) != -1 || ch == ".") {
 
141
                    return OPERATORCLASS;
 
142
                } else if (singleDelimiters.indexOf(ch) != -1) {
 
143
                    if (ch == '@' && source.matches(/\w/)) {
 
144
                        source.nextWhileMatches(/[\w\d_]/);
 
145
                        return {style:'py-decorator',
 
146
                                content: source.get()};
 
147
                    } else {
 
148
                        return DELIMITERCLASS;
 
149
                    }
 
150
                } else {
 
151
                    return ERRORCLASS;
 
152
                }
 
153
            }
 
154
            // Handle number literals
 
155
            if (/\d/.test(ch) || (ch == "." && source.matches(/\d/))) {
 
156
                if (ch === '0' && !source.endOfLine()) {
 
157
                    switch (source.peek()) {
 
158
                        case 'o':
 
159
                        case 'O':
 
160
                            source.next();
 
161
                            source.nextWhileMatches(/[0-7]/);
 
162
                            return filterPossible(LITERALCLASS, ERRORCLASS);
 
163
                        case 'x':
 
164
                        case 'X':
 
165
                            source.next();
 
166
                            source.nextWhileMatches(/[0-9A-Fa-f]/);
 
167
                            return filterPossible(LITERALCLASS, ERRORCLASS);
 
168
                        case 'b':
 
169
                        case 'B':
 
170
                            source.next();
 
171
                            source.nextWhileMatches(/[01]/);
 
172
                            return filterPossible(LITERALCLASS, ERRORCLASS);
 
173
                    }
 
174
                }
 
175
                source.nextWhileMatches(/\d/);
 
176
                if (ch != '.' && source.peek() == '.') {
 
177
                    source.next();
 
178
                    source.nextWhileMatches(/\d/);
 
179
                }
 
180
                // Grab an exponent
 
181
                if (source.matches(/e/i)) {
 
182
                    source.next();
 
183
                    if (source.peek() == '+' || source.peek() == '-') {
 
184
                        source.next();
 
185
                    }
 
186
                    if (source.matches(/\d/)) {
 
187
                        source.nextWhileMatches(/\d/);
 
188
                    } else {
 
189
                        return filterPossible(ERRORCLASS);
 
190
                    }
 
191
                }
 
192
                // Grab a complex number
 
193
                if (source.matches(/j/i)) {
 
194
                    source.next();
 
195
                }
 
196
 
 
197
                return filterPossible(LITERALCLASS);
 
198
            }
 
199
            // Handle strings
 
200
            if (stringStarters.test(ch)) {
 
201
                var peek = source.peek();
 
202
                var stringType = STRINGCLASS;
 
203
                if ((stringTypes.test(ch)) && (peek == '"' || peek == "'")) {
 
204
                    switch (ch.toLowerCase()) {
 
205
                        case 'b':
 
206
                            stringType = BYTESCLASS;
 
207
                            break;
 
208
                        case 'r':
 
209
                            stringType = RAWCLASS;
 
210
                            break;
 
211
                        case 'u':
 
212
                            stringType = UNICODECLASS;
 
213
                            break;
 
214
                    }
 
215
                    ch = source.next();
 
216
                    stringDelim = ch;
 
217
                    if (source.peek() != stringDelim) {
 
218
                        setState(inString(stringType, stringDelim));
 
219
                        return null;
 
220
                    } else {
 
221
                        source.next();
 
222
                        if (source.peek() == stringDelim) {
 
223
                            source.next();
 
224
                            threeStr = stringDelim + stringDelim + stringDelim;
 
225
                            setState(inString(stringType, threeStr));
 
226
                            return null;
 
227
                        } else {
 
228
                            return stringType;
 
229
                        }
 
230
                    }
 
231
                } else if (ch == "'" || ch == '"') {
 
232
                    stringDelim = ch;
 
233
                    if (source.peek() != stringDelim) {
 
234
                        setState(inString(stringType, stringDelim));
 
235
                        return null;
 
236
                    } else {
 
237
                        source.next();
 
238
                        if (source.peek() == stringDelim) {
 
239
                            source.next();
 
240
                            threeStr = stringDelim + stringDelim + stringDelim;
 
241
                            setState(inString(stringType, threeStr));
 
242
                            return null;
 
243
                        } else {
 
244
                            return stringType;
 
245
                        }
 
246
                    }
 
247
                }
 
248
            }
 
249
            // Handle Identifier
 
250
            if (identifierStarters.test(ch)) {
 
251
                source.nextWhileMatches(/[\w\d_]/);
 
252
                word = source.get();
 
253
                if (wordOperators.test(word)) {
 
254
                    type = OPERATORCLASS;
 
255
                } else if (keywords.test(word)) {
 
256
                    type = 'py-keyword';
 
257
                } else if (types.test(word)) {
 
258
                    type = 'py-type';
 
259
                } else {
 
260
                    type = IDENTIFIERCLASS;
 
261
                    while (source.peek() == '.') {
 
262
                        source.next();
 
263
                        if (source.matches(identifierStarters)) {
 
264
                            source.nextWhileMatches(/[\w\d]/);
 
265
                        } else {
 
266
                            type = ERRORCLASS;
 
267
                            break;
 
268
                        }
 
269
                    }
 
270
                    word = word + source.get();
 
271
                }
 
272
                return filterPossible({style: type, content: word});
 
273
            }
 
274
 
 
275
            // Register Dollar sign and Question mark as errors. Always!
 
276
            if (/\$\?/.test(ch)) {
 
277
                return filterPossible(ERRORCLASS);
 
278
            }
 
279
 
 
280
            return filterPossible(ERRORCLASS);
 
281
        }
 
282
 
 
283
        function inString(style, terminator) {
 
284
            return function(source, setState) {
 
285
                var matches = [];
 
286
                var found = false;
 
287
                while (!found && !source.endOfLine()) {
 
288
                    var ch = source.next(), newMatches = [];
 
289
                    // Skip escaped characters
 
290
                    if (ch == '\\') {
 
291
                        if (source.peek() == '\n') {
 
292
                            break;
 
293
                        }
 
294
                        ch = source.next();
 
295
                        ch = source.next();
 
296
                    }
 
297
                    if (ch == terminator.charAt(0)) {
 
298
                        matches.push(terminator);
 
299
                    }
 
300
                    for (var i = 0; i < matches.length; i++) {
 
301
                        var match = matches[i];
 
302
                        if (match.charAt(0) == ch) {
 
303
                            if (match.length == 1) {
 
304
                                setState(normal);
 
305
                                found = true;
 
306
                                break;
 
307
                            } else {
 
308
                                newMatches.push(match.slice(1));
 
309
                            }
 
310
                        }
 
311
                    }
 
312
                    matches = newMatches;
 
313
                }
 
314
                return style;
 
315
            };
 
316
        }
 
317
 
 
318
        return function(source, startState) {
 
319
            return tokenizer(source, startState || normal);
 
320
        };
 
321
    })();
 
322
 
 
323
    function parsePython(source, basecolumn) {
 
324
        if (!keywords) {
 
325
            configure({});
 
326
        }
 
327
        basecolumn = basecolumn || 0;
 
328
 
 
329
        var tokens = tokenizePython(source);
 
330
        var lastToken = null;
 
331
        var column = basecolumn;
 
332
        var context = {prev: null,
 
333
                       endOfScope: false,
 
334
                       startNewScope: false,
 
335
                       level: basecolumn,
 
336
                       next: null,
 
337
                       type: NORMALCONTEXT
 
338
                       };
 
339
 
 
340
        function pushContext(level, type) {
 
341
            type = type ? type : NORMALCONTEXT;
 
342
            context = {prev: context,
 
343
                       endOfScope: false,
 
344
                       startNewScope: false,
 
345
                       level: level,
 
346
                       next: null,
 
347
                       type: type
 
348
                       };
 
349
        }
 
350
 
 
351
        function popContext(remove) {
 
352
            remove = remove ? remove : false;
 
353
            if (context.prev) {
 
354
                if (remove) {
 
355
                    context = context.prev;
 
356
                    context.next = null;
 
357
                } else {
 
358
                    context.prev.next = context;
 
359
                    context = context.prev;
 
360
                }
 
361
            }
 
362
        }
 
363
 
 
364
        function indentPython(context) {
 
365
            var temp;
 
366
            return function(nextChars, currentLevel, direction) {
 
367
                if (direction === null || direction === undefined) {
 
368
                    if (nextChars) {
 
369
                        while (context.next) {
 
370
                            context = context.next;
 
371
                        }
 
372
                    }
 
373
                    return context.level;
 
374
                }
 
375
                else if (direction === true) {
 
376
                    if (currentLevel == context.level) {
 
377
                        if (context.next) {
 
378
                            return context.next.level;
 
379
                        } else {
 
380
                            return context.level;
 
381
                        }
 
382
                    } else {
 
383
                        temp = context;
 
384
                        while (temp.prev && temp.prev.level > currentLevel) {
 
385
                            temp = temp.prev;
 
386
                        }
 
387
                        return temp.level;
 
388
                    }
 
389
                } else if (direction === false) {
 
390
                    if (currentLevel > context.level) {
 
391
                        return context.level;
 
392
                    } else if (context.prev) {
 
393
                        temp = context;
 
394
                        while (temp.prev && temp.prev.level >= currentLevel) {
 
395
                            temp = temp.prev;
 
396
                        }
 
397
                        if (temp.prev) {
 
398
                            return temp.prev.level;
 
399
                        } else {
 
400
                            return temp.level;
 
401
                        }
 
402
                    }
 
403
                }
 
404
                return context.level;
 
405
            };
 
406
        }
 
407
 
 
408
        var iter = {
 
409
            next: function() {
 
410
                var token = tokens.next();
 
411
                var type = token.style;
 
412
                var content = token.content;
 
413
 
 
414
                if (lastToken) {
 
415
                    if (lastToken.content == 'def' && type == IDENTIFIERCLASS) {
 
416
                        token.style = 'py-func';
 
417
                    }
 
418
                    if (lastToken.content == '\n') {
 
419
                        var tempCtx = context;
 
420
                        // Check for a different scope
 
421
                        if (type == 'whitespace' && context.type == NORMALCONTEXT) {
 
422
                            if (token.value.length < context.level) {
 
423
                                while (token.value.length < context.level) {
 
424
                                    popContext();
 
425
                                }
 
426
 
 
427
                                if (token.value.length != context.level) {
 
428
                                    context = tempCtx;
 
429
                                    if (config.strictErrors) {
 
430
                                        token.style = ERRORCLASS;
 
431
                                    }
 
432
                                } else {
 
433
                                    context.next = null;
 
434
                                }
 
435
                            }
 
436
                        } else if (context.level !== basecolumn &&
 
437
                                   context.type == NORMALCONTEXT) {
 
438
                            while (basecolumn !== context.level) {
 
439
                                popContext();
 
440
                            }
 
441
 
 
442
                            if (context.level !== basecolumn) {
 
443
                                context = tempCtx;
 
444
                                if (config.strictErrors) {
 
445
                                    token.style = ERRORCLASS;
 
446
                                }
 
447
                            }
 
448
                        }
 
449
                    }
 
450
                }
 
451
 
 
452
                // Handle Scope Changes
 
453
                switch(type) {
 
454
                    case STRINGCLASS:
 
455
                    case BYTESCLASS:
 
456
                    case RAWCLASS:
 
457
                    case UNICODECLASS:
 
458
                        if (context.type !== STRINGCONTEXT) {
 
459
                            pushContext(context.level + 1, STRINGCONTEXT);
 
460
                        }
 
461
                        break;
 
462
                    default:
 
463
                        if (context.type === STRINGCONTEXT) {
 
464
                            popContext(true);
 
465
                        }
 
466
                        break;
 
467
                }
 
468
                switch(content) {
 
469
                    case '.':
 
470
                    case '@':
 
471
                        // These delimiters don't appear by themselves
 
472
                        if (content !== token.value) {
 
473
                            token.style = ERRORCLASS;
 
474
                        }
 
475
                        break;
 
476
                    case ':':
 
477
                        // Colons only delimit scope inside a normal scope
 
478
                        if (context.type === NORMALCONTEXT) {
 
479
                            context.startNewScope = context.level+indentUnit;
 
480
                        }
 
481
                        break;
 
482
                    case '(':
 
483
                    case '[':
 
484
                    case '{':
 
485
                        // These start a sequence scope
 
486
                        pushContext(column + content.length, 'sequence');
 
487
                        break;
 
488
                    case ')':
 
489
                    case ']':
 
490
                    case '}':
 
491
                        // These end a sequence scope
 
492
                        popContext(true);
 
493
                        break;
 
494
                    case 'pass':
 
495
                    case 'return':
 
496
                        // These end a normal scope
 
497
                        if (context.type === NORMALCONTEXT) {
 
498
                            context.endOfScope = true;
 
499
                        }
 
500
                        break;
 
501
                    case '\n':
 
502
                        // Reset our column
 
503
                        column = basecolumn;
 
504
                        // Make any scope changes
 
505
                        if (context.endOfScope) {
 
506
                            context.endOfScope = false;
 
507
                            popContext();
 
508
                        } else if (context.startNewScope !== false) {
 
509
                            var temp = context.startNewScope;
 
510
                            context.startNewScope = false;
 
511
                            pushContext(temp, NORMALCONTEXT);
 
512
                        }
 
513
                        // Newlines require an indentation function wrapped in a closure for proper context.
 
514
                        token.indentation = indentPython(context);
 
515
                        break;
 
516
                }
 
517
 
 
518
                // Keep track of current column for certain scopes.
 
519
                if (content != '\n') {
 
520
                    column += token.value.length;
 
521
                }
 
522
 
 
523
                lastToken = token;
 
524
                return token;
 
525
            },
 
526
 
 
527
            copy: function() {
 
528
                var _context = context, _tokenState = tokens.state;
 
529
                return function(source) {
 
530
                    tokens = tokenizePython(source, _tokenState);
 
531
                    context = _context;
 
532
                    return iter;
 
533
                };
 
534
            }
 
535
        };
 
536
        return iter;
 
537
    }
 
538
 
 
539
    return {make: parsePython,
 
540
            electricChars: "",
 
541
            configure: configure};
 
542
})();