1
/***********************************************************************
3
A JavaScript tokenizer / parser / beautifier / compressor.
5
This version is suitable for Node.js. With minimal changes (the
6
exports stuff) it should work on any JS platform.
8
This file contains the tokenizer/parser. It is a port to JavaScript
9
of parse-js [1], a JavaScript parser library written in Common Lisp
10
by Marijn Haverbeke. Thank you Marijn!
12
[1] http://marijn.haverbeke.nl/parse-js/
16
- tokenizer(code) -- returns a function. Call the returned
17
function to fetch the next token.
19
- parse(code) -- returns an AST of the given JavaScript code.
21
-------------------------------- (C) ---------------------------------
24
<mihai.bazon@gmail.com>
25
http://mihai.bazon.net/blog
27
Distributed under the BSD license:
29
Copyright 2010 (c) Mihai Bazon <mihai.bazon@gmail.com>
30
Based on parse-js (http://marijn.haverbeke.nl/parse-js/).
32
Redistribution and use in source and binary forms, with or without
33
modification, are permitted provided that the following conditions
36
* Redistributions of source code must retain the above
37
copyright notice, this list of conditions and the following
40
* Redistributions in binary form must reproduce the above
41
copyright notice, this list of conditions and the following
42
disclaimer in the documentation and/or other materials
43
provided with the distribution.
45
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER āAS ISā AND ANY
46
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
49
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
50
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
52
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
54
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
55
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58
***********************************************************************/
60
/* -----[ Tokenizer (constants) ]----- */
62
var KEYWORDS = array_to_hash([
90
var RESERVED_WORDS = array_to_hash([
123
var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
132
var KEYWORDS_ATOM = array_to_hash([
139
var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
141
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
142
var RE_OCT_NUMBER = /^0[0-7]+$/;
143
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
145
var OPERATORS = array_to_hash([
192
var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b"));
194
var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:"));
196
var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
198
var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
200
/* -----[ Tokenizer ]----- */
202
// regexps adapted from http://xregexp.com/plugins/#unicode
204
letter: new RegExp("[\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0523\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971\\u0972\\u097B-\\u097F\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D3D\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC\\u0EDD\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8B\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10D0-\\u10FA\\u10FC\\u1100-\\u1159\\u115F-\\u11A2\\u11A8-\\u11F9\\u1200-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u1676\\u1681-\\u169A\\u16A0-\\u16EA\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u1900-\\u191C\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19A9\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u2094\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2183\\u2184\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2C6F\\u2C71-\\u2C7D\\u2C80-\\u2CE4\\u2D00-\\u2D25\\u2D30-\\u2D65\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u3006\\u3031-\\u3035\\u303B\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31B7\\u31F0-\\u31FF\\u3400\\u4DB5\\u4E00\\u9FC3\\uA000-\\uA48C\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA65F\\uA662-\\uA66E\\uA67F-\\uA697\\uA717-\\uA71F\\uA722-\\uA788\\uA78B\\uA78C\\uA7FB-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA90A-\\uA925\\uA930-\\uA946\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAC00\\uD7A3\\uF900-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC]"),
205
non_spacing_mark: new RegExp("[\\u0300-\\u036F\\u0483-\\u0487\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065E\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0900-\\u0902\\u093C\\u0941-\\u0948\\u094D\\u0951-\\u0955\\u0962\\u0963\\u0981\\u09BC\\u09C1-\\u09C4\\u09CD\\u09E2\\u09E3\\u0A01\\u0A02\\u0A3C\\u0A41\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70\\u0A71\\u0A75\\u0A81\\u0A82\\u0ABC\\u0AC1-\\u0AC5\\u0AC7\\u0AC8\\u0ACD\\u0AE2\\u0AE3\\u0B01\\u0B3C\\u0B3F\\u0B41-\\u0B44\\u0B4D\\u0B56\\u0B62\\u0B63\\u0B82\\u0BC0\\u0BCD\\u0C3E-\\u0C40\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C62\\u0C63\\u0CBC\\u0CBF\\u0CC6\\u0CCC\\u0CCD\\u0CE2\\u0CE3\\u0D41-\\u0D44\\u0D4D\\u0D62\\u0D63\\u0DCA\\u0DD2-\\u0DD4\\u0DD6\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F71-\\u0F7E\\u0F80-\\u0F84\\u0F86\\u0F87\\u0F90-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102D-\\u1030\\u1032-\\u1037\\u1039\\u103A\\u103D\\u103E\\u1058\\u1059\\u105E-\\u1060\\u1071-\\u1074\\u1082\\u1085\\u1086\\u108D\\u109D\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752\\u1753\\u1772\\u1773\\u17B7-\\u17BD\\u17C6\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u1922\\u1927\\u1928\\u1932\\u1939-\\u193B\\u1A17\\u1A18\\u1A56\\u1A58-\\u1A5E\\u1A60\\u1A62\\u1A65-\\u1A6C\\u1A73-\\u1A7C\\u1A7F\\u1B00-\\u1B03\\u1B34\\u1B36-\\u1B3A\\u1B3C\\u1B42\\u1B6B-\\u1B73\\u1B80\\u1B81\\u1BA2-\\u1BA5\\u1BA8\\u1BA9\\u1C2C-\\u1C33\\u1C36\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1DFF\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA802\\uA806\\uA80B\\uA825\\uA826\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA951\\uA980-\\uA982\\uA9B3\\uA9B6-\\uA9B9\\uA9BC\\uAA29-\\uAA2E\\uAA31\\uAA32\\uAA35\\uAA36\\uAA43\\uAA4C\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABE5\\uABE8\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26]"),
206
space_combining_mark: new RegExp("[\\u0903\\u093E-\\u0940\\u0949-\\u094C\\u094E\\u0982\\u0983\\u09BE-\\u09C0\\u09C7\\u09C8\\u09CB\\u09CC\\u09D7\\u0A03\\u0A3E-\\u0A40\\u0A83\\u0ABE-\\u0AC0\\u0AC9\\u0ACB\\u0ACC\\u0B02\\u0B03\\u0B3E\\u0B40\\u0B47\\u0B48\\u0B4B\\u0B4C\\u0B57\\u0BBE\\u0BBF\\u0BC1\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0BD7\\u0C01-\\u0C03\\u0C41-\\u0C44\\u0C82\\u0C83\\u0CBE\\u0CC0-\\u0CC4\\u0CC7\\u0CC8\\u0CCA\\u0CCB\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D40\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D57\\u0D82\\u0D83\\u0DCF-\\u0DD1\\u0DD8-\\u0DDF\\u0DF2\\u0DF3\\u0F3E\\u0F3F\\u0F7F\\u102B\\u102C\\u1031\\u1038\\u103B\\u103C\\u1056\\u1057\\u1062-\\u1064\\u1067-\\u106D\\u1083\\u1084\\u1087-\\u108C\\u108F\\u109A-\\u109C\\u17B6\\u17BE-\\u17C5\\u17C7\\u17C8\\u1923-\\u1926\\u1929-\\u192B\\u1930\\u1931\\u1933-\\u1938\\u19B0-\\u19C0\\u19C8\\u19C9\\u1A19-\\u1A1B\\u1A55\\u1A57\\u1A61\\u1A63\\u1A64\\u1A6D-\\u1A72\\u1B04\\u1B35\\u1B3B\\u1B3D-\\u1B41\\u1B43\\u1B44\\u1B82\\u1BA1\\u1BA6\\u1BA7\\u1BAA\\u1C24-\\u1C2B\\u1C34\\u1C35\\u1CE1\\u1CF2\\uA823\\uA824\\uA827\\uA880\\uA881\\uA8B4-\\uA8C3\\uA952\\uA953\\uA983\\uA9B4\\uA9B5\\uA9BA\\uA9BB\\uA9BD-\\uA9C0\\uAA2F\\uAA30\\uAA33\\uAA34\\uAA4D\\uAA7B\\uABE3\\uABE4\\uABE6\\uABE7\\uABE9\\uABEA\\uABEC]"),
207
connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
210
function is_letter(ch) {
211
return UNICODE.letter.test(ch);
214
function is_digit(ch) {
215
ch = ch.charCodeAt(0);
216
return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
219
function is_alphanumeric_char(ch) {
220
return is_digit(ch) || is_letter(ch);
223
function is_unicode_combining_mark(ch) {
224
return UNICODE.non_spacing_mark.test(ch) || UNICODE.space_combining_mark.test(ch);
227
function is_unicode_connector_punctuation(ch) {
228
return UNICODE.connector_punctuation.test(ch);
231
function is_identifier_start(ch) {
232
return ch == "$" || ch == "_" || is_letter(ch);
235
function is_identifier_char(ch) {
236
return is_identifier_start(ch)
237
|| is_unicode_combining_mark(ch)
239
|| is_unicode_connector_punctuation(ch)
240
|| ch == "\u200c" // zero-width non-joiner <ZWNJ>
241
|| ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
245
function parse_js_number(num) {
246
if (RE_HEX_NUMBER.test(num)) {
247
return parseInt(num.substr(2), 16);
248
} else if (RE_OCT_NUMBER.test(num)) {
249
return parseInt(num.substr(1), 8);
250
} else if (RE_DEC_NUMBER.test(num)) {
251
return parseFloat(num);
255
function JS_Parse_Error(message, line, col, pos) {
256
this.message = message;
263
this.stack = ex.stack;
267
JS_Parse_Error.prototype.toString = function() {
268
return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack;
271
function js_error(message, line, col, pos) {
272
throw new JS_Parse_Error(message, line, col, pos);
275
function is_token(token, type, val) {
276
return token.type == type && (val == null || token.value == val);
281
function tokenizer($TEXT) {
284
text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
291
newline_before : false,
292
regex_allowed : false,
296
function peek() { return S.text.charAt(S.pos); };
298
function next(signal_eof) {
299
var ch = S.text.charAt(S.pos++);
300
if (signal_eof && !ch)
303
S.newline_before = true;
316
function find(what, signal_eof) {
317
var pos = S.text.indexOf(what, S.pos);
318
if (signal_eof && pos == -1) throw EX_EOF;
322
function start_token() {
328
function token(type, value, is_comment) {
329
S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
330
(type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
331
(type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
338
nlb : S.newline_before
341
ret.comments_before = S.comments_before;
342
S.comments_before = [];
344
S.newline_before = false;
348
function skip_whitespace() {
349
while (HOP(WHITESPACE_CHARS, peek()))
353
function read_while(pred) {
354
var ret = "", ch = peek(), i = 0;
355
while (ch && pred(ch, i++)) {
362
function parse_error(err) {
363
js_error(err, S.tokline, S.tokcol, S.tokpos);
366
function read_num(prefix) {
367
var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
368
var num = read_while(function(ch, i){
369
if (ch == "x" || ch == "X") {
370
if (has_x) return false;
373
if (!has_x && (ch == "E" || ch == "e")) {
374
if (has_e) return false;
375
return has_e = after_e = true;
378
if (after_e || (i == 0 && !prefix)) return true;
381
if (ch == "+") return after_e;
384
if (!has_dot && !has_x)
385
return has_dot = true;
388
return is_alphanumeric_char(ch);
392
var valid = parse_js_number(num);
394
return token("num", valid);
396
parse_error("Invalid syntax: " + num);
400
function read_escaped_char() {
403
case "n" : return "\n";
404
case "r" : return "\r";
405
case "t" : return "\t";
406
case "b" : return "\b";
407
case "v" : return "\u000b";
408
case "f" : return "\f";
409
case "0" : return "\0";
410
case "x" : return String.fromCharCode(hex_bytes(2));
411
case "u" : return String.fromCharCode(hex_bytes(4));
412
case "\n": return "";
417
function hex_bytes(n) {
420
var digit = parseInt(next(true), 16);
422
parse_error("Invalid hex-character pattern in string");
423
num = (num << 4) | digit;
428
function read_string() {
429
return with_eof_error("Unterminated string constant", function(){
430
var quote = next(), ret = "";
434
// read OctalEscapeSequence (XXX: deprecated if "strict mode")
435
// https://github.com/mishoo/UglifyJS/issues/178
436
var octal_len = 0, first = null;
437
ch = read_while(function(ch){
438
if (ch >= "0" && ch <= "7") {
443
else if (first <= "3" && octal_len <= 2) return ++octal_len;
444
else if (first >= "4" && octal_len <= 1) return ++octal_len;
448
if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
449
else ch = read_escaped_char();
451
else if (ch == quote) break;
454
return token("string", ret);
458
function read_line_comment() {
460
var i = find("\n"), ret;
462
ret = S.text.substr(S.pos);
463
S.pos = S.text.length;
465
ret = S.text.substring(S.pos, i);
468
return token("comment1", ret, true);
471
function read_multiline_comment() {
473
return with_eof_error("Unterminated multiline comment", function(){
474
var i = find("*/", true),
475
text = S.text.substring(S.pos, i),
476
tok = token("comment2", text, true);
478
S.line += text.split("\n").length - 1;
479
S.newline_before = text.indexOf("\n") >= 0;
481
// https://github.com/mishoo/UglifyJS/issues/#issue/100
482
if (/^@cc_on/i.test(text)) {
483
warn("WARNING: at line " + S.line);
484
warn("*** Found \"conditional comment\": " + text);
485
warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer.");
492
function read_name() {
493
var backslash = false, name = "", ch;
494
while ((ch = peek()) != null) {
496
if (ch == "\\") backslash = true, next();
497
else if (is_identifier_char(ch)) name += next();
501
if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
502
ch = read_escaped_char();
503
if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
511
function read_regexp() {
512
return with_eof_error("Unterminated regular expression", function(){
513
var prev_backslash = false, regexp = "", ch, in_class = false;
514
while ((ch = next(true))) if (prev_backslash) {
516
prev_backslash = false;
517
} else if (ch == "[") {
520
} else if (ch == "]" && in_class) {
523
} else if (ch == "/" && !in_class) {
525
} else if (ch == "\\") {
526
prev_backslash = true;
530
var mods = read_name();
531
return token("regexp", [ regexp, mods ]);
535
function read_operator(prefix) {
537
if (!peek()) return op;
538
var bigger = op + peek();
539
if (HOP(OPERATORS, bigger)) {
546
return token("operator", grow(prefix || next()));
549
function handle_slash() {
551
var regex_allowed = S.regex_allowed;
554
S.comments_before.push(read_line_comment());
555
S.regex_allowed = regex_allowed;
558
S.comments_before.push(read_multiline_comment());
559
S.regex_allowed = regex_allowed;
562
return S.regex_allowed ? read_regexp() : read_operator("/");
565
function handle_dot() {
567
return is_digit(peek())
569
: token("punc", ".");
572
function read_word() {
573
var word = read_name();
574
return !HOP(KEYWORDS, word)
575
? token("name", word)
576
: HOP(OPERATORS, word)
577
? token("operator", word)
578
: HOP(KEYWORDS_ATOM, word)
579
? token("atom", word)
580
: token("keyword", word);
583
function with_eof_error(eof_error, cont) {
587
if (ex === EX_EOF) parse_error(eof_error);
592
function next_token(force_regexp) {
594
return read_regexp();
598
if (!ch) return token("eof");
599
if (is_digit(ch)) return read_num();
600
if (ch == '"' || ch == "'") return read_string();
601
if (HOP(PUNC_CHARS, ch)) return token("punc", next());
602
if (ch == ".") return handle_dot();
603
if (ch == "/") return handle_slash();
604
if (HOP(OPERATOR_CHARS, ch)) return read_operator();
605
if (ch == "\\" || is_identifier_start(ch)) return read_word();
606
parse_error("Unexpected character '" + ch + "'");
609
next_token.context = function(nc) {
618
/* -----[ Parser (constants) ]----- */
620
var UNARY_PREFIX = array_to_hash([
632
var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
634
var ASSIGNMENT = (function(a, ret, i){
635
while (i < a.length) {
636
ret[a[i]] = a[i].substr(0, a[i].length - 1);
641
["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="],
646
var PRECEDENCE = (function(a, ret){
647
for (var i = 0, n = 1; i < a.length; ++i, ++n) {
649
for (var j = 0; j < b.length; ++j) {
661
["==", "===", "!=", "!=="],
662
["<", ">", "<=", ">=", "in", "instanceof"],
670
var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]);
672
var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]);
674
/* -----[ Parser ]----- */
676
function NodeWithToken(str, start, end) {
682
NodeWithToken.prototype.toString = function() { return this.name; };
684
function parse($TEXT, exigent_mode, embed_tokens) {
687
input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
698
function is(type, value) {
699
return is_token(S.token, type, value);
702
function peek() { return S.peeked || (S.peeked = S.input()); };
719
function croak(msg, line, col, pos) {
720
var ctx = S.input.context();
722
line != null ? line : ctx.tokline,
723
col != null ? col : ctx.tokcol,
724
pos != null ? pos : ctx.tokpos);
727
function token_error(token, msg) {
728
croak(msg, token.line, token.col);
731
function unexpected(token) {
734
token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
737
function expect_token(type, val) {
741
token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
744
function expect(punc) { return expect_token("punc", punc); };
746
function can_insert_semicolon() {
747
return !exigent_mode && (
748
S.token.nlb || is("eof") || is("punc", "}")
752
function semicolon() {
753
if (is("punc", ";")) next();
754
else if (!can_insert_semicolon()) unexpected();
758
return slice(arguments);
761
function parenthesised() {
763
var ex = expression();
768
function add_tokens(str, start, end) {
769
return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end);
772
function maybe_embed_tokens(parser) {
773
if (embed_tokens) return function() {
775
var ast = parser.apply(this, arguments);
776
ast[0] = add_tokens(ast[0], start, prev());
782
var statement = maybe_embed_tokens(function() {
783
if (is("operator", "/")) {
785
S.token = S.input(true); // force regexp
787
switch (S.token.type) {
793
return simple_statement();
796
return is_token(peek(), "punc", ":")
797
? labeled_statement(prog1(S.token.value, next, next))
798
: simple_statement();
801
switch (S.token.value) {
803
return as("block", block_());
806
return simple_statement();
815
switch (prog1(S.token.value, next)) {
817
return break_cont("break");
820
return break_cont("continue");
824
return as("debugger");
827
return (function(body){
828
expect_token("keyword", "while");
829
return as("do", prog1(parenthesised, semicolon), body);
830
})(in_loop(statement));
836
return function_(true);
842
if (S.in_function == 0)
843
croak("'return' outside of function");
847
: can_insert_semicolon()
849
: prog1(expression, semicolon));
852
return as("switch", parenthesised(), switch_block_());
855
return as("throw", prog1(expression, semicolon));
861
return prog1(var_, semicolon);
864
return prog1(const_, semicolon);
867
return as("while", parenthesised(), in_loop(statement));
870
return as("with", parenthesised(), statement());
878
function labeled_statement(label) {
879
S.labels.push(label);
880
var start = S.token, stat = statement();
881
if (exigent_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
884
return as("label", label, stat);
887
function simple_statement() {
888
return as("stat", prog1(expression, semicolon));
891
function break_cont(type) {
893
if (!can_insert_semicolon()) {
894
name = is("name") ? S.token.value : null;
898
if (!member(name, S.labels))
899
croak("Label " + name + " without matching loop or statement");
901
else if (S.in_loop == 0)
902
croak(type + " not inside a loop or switch");
904
return as(type, name);
910
if (!is("punc", ";")) {
911
init = is("keyword", "var")
912
? (next(), var_(true))
913
: expression(true, true);
914
if (is("operator", "in"))
917
return regular_for(init);
920
function regular_for(init) {
922
var test = is("punc", ";") ? null : expression();
924
var step = is("punc", ")") ? null : expression();
926
return as("for", init, test, step, in_loop(statement));
929
function for_in(init) {
930
var lhs = init[0] == "var" ? as("name", init[1][0]) : init;
932
var obj = expression();
934
return as("for-in", init, lhs, obj, in_loop(statement));
937
var function_ = maybe_embed_tokens(function(in_statement) {
938
var name = is("name") ? prog1(S.token.value, next) : null;
939
if (in_statement && !name)
942
return as(in_statement ? "defun" : "function",
946
while (!is("punc", ")")) {
947
if (first) first = false; else expect(",");
948
if (!is("name")) unexpected();
949
a.push(S.token.value);
958
var loop = S.in_loop;
968
var cond = parenthesised(), body = statement(), belse;
969
if (is("keyword", "else")) {
973
return as("if", cond, body, belse);
979
while (!is("punc", "}")) {
980
if (is("eof")) unexpected();
987
var switch_block_ = curry(in_loop, function(){
989
var a = [], cur = null;
990
while (!is("punc", "}")) {
991
if (is("eof")) unexpected();
992
if (is("keyword", "case")) {
995
a.push([ expression(), cur ]);
998
else if (is("keyword", "default")) {
1002
a.push([ null, cur ]);
1005
if (!cur) unexpected();
1006
cur.push(statement());
1014
var body = block_(), bcatch, bfinally;
1015
if (is("keyword", "catch")) {
1019
croak("Name expected");
1020
var name = S.token.value;
1023
bcatch = [ name, block_() ];
1025
if (is("keyword", "finally")) {
1027
bfinally = block_();
1029
if (!bcatch && !bfinally)
1030
croak("Missing catch/finally blocks");
1031
return as("try", body, bcatch, bfinally);
1034
function vardefs(no_in) {
1039
var name = S.token.value;
1041
if (is("operator", "=")) {
1043
a.push([ name, expression(false, no_in) ]);
1047
if (!is("punc", ","))
1054
function var_(no_in) {
1055
return as("var", vardefs(no_in));
1059
return as("const", vardefs());
1063
var newexp = expr_atom(false), args;
1064
if (is("punc", "(")) {
1066
args = expr_list(")");
1070
return subscripts(as("new", newexp, args), true);
1073
var expr_atom = maybe_embed_tokens(function(allow_calls) {
1074
if (is("operator", "new")) {
1079
switch (S.token.value) {
1082
return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
1085
return subscripts(array_(), allow_calls);
1088
return subscripts(object_(), allow_calls);
1092
if (is("keyword", "function")) {
1094
return subscripts(function_(false), allow_calls);
1096
if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
1097
var atom = S.token.type == "regexp"
1098
? as("regexp", S.token.value[0], S.token.value[1])
1099
: as(S.token.type, S.token.value);
1100
return subscripts(prog1(atom, next), allow_calls);
1105
function expr_list(closing, allow_trailing_comma, allow_empty) {
1106
var first = true, a = [];
1107
while (!is("punc", closing)) {
1108
if (first) first = false; else expect(",");
1109
if (allow_trailing_comma && is("punc", closing)) break;
1110
if (is("punc", ",") && allow_empty) {
1111
a.push([ "atom", "undefined" ]);
1113
a.push(expression(false));
1121
return as("array", expr_list("]", !exigent_mode, true));
1124
function object_() {
1125
var first = true, a = [];
1126
while (!is("punc", "}")) {
1127
if (first) first = false; else expect(",");
1128
if (!exigent_mode && is("punc", "}"))
1129
// allow trailing comma
1131
var type = S.token.type;
1133
var is_quoted = false;
1134
switch (S.token.type) {
1138
name = prog1(S.token.value, next);
1144
if (type == "name" && (name == "get" || name == "set") && !is("punc", ":")) {
1145
node = [ as_name(), function_(false), name ];
1148
node = [ name, expression(false) ];
1150
if (is_quoted) node.quoted = true;
1154
return as("object", a);
1157
function as_name() {
1158
switch (S.token.type) {
1163
return prog1(S.token.value, next);
1169
function subscripts(expr, allow_calls) {
1170
if (is("punc", ".")) {
1172
return subscripts(as("dot", expr, as_name()), allow_calls);
1174
if (is("punc", "[")) {
1176
return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls);
1178
if (allow_calls && is("punc", "(")) {
1180
return subscripts(as("call", expr, expr_list(")")), true);
1185
function maybe_unary(allow_calls) {
1186
if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
1187
return make_unary("unary-prefix",
1188
prog1(S.token.value, next),
1189
maybe_unary(allow_calls));
1191
var val = expr_atom(allow_calls);
1192
while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
1193
val = make_unary("unary-postfix", S.token.value, val);
1199
function make_unary(tag, op, expr) {
1200
if ((op == "++" || op == "--") && !is_assignable(expr))
1201
croak("Invalid use of " + op + " operator");
1202
return as(tag, op, expr);
1205
function expr_op(left, min_prec, no_in) {
1206
var op = is("operator") ? S.token.value : null;
1207
if (op && op == "in" && no_in) op = null;
1208
var prec = op != null ? PRECEDENCE[op] : null;
1209
if (prec != null && prec > min_prec) {
1211
var right = expr_op(maybe_unary(true), prec, no_in);
1212
return expr_op(as("binary", op, left, right), min_prec, no_in);
1217
function expr_ops(no_in) {
1218
return expr_op(maybe_unary(true), 0, no_in);
1221
function maybe_conditional(no_in) {
1222
var expr = expr_ops(no_in);
1223
if (is("operator", "?")) {
1225
var yes = expression(false);
1227
return as("conditional", expr, yes, expression(false, no_in));
1232
function is_assignable(expr) {
1233
if (!exigent_mode) return true;
1241
return expr[1] != "this";
1245
function maybe_assign(no_in) {
1246
var left = maybe_conditional(no_in), val = S.token.value;
1247
if (is("operator") && HOP(ASSIGNMENT, val)) {
1248
if (is_assignable(left)) {
1250
return as("assign", ASSIGNMENT[val], left, maybe_assign(no_in));
1252
croak("Invalid assignment");
1257
var expression = maybe_embed_tokens(function(commas, no_in) {
1258
if (arguments.length == 0)
1260
var expr = maybe_assign(no_in);
1261
if (commas && is("punc", ",")) {
1263
return as("seq", expr, expression(true, no_in));
1268
function in_loop(cont) {
1277
return as("toplevel", (function(a){
1279
a.push(statement());
1285
/* -----[ Utilities ]----- */
1288
var args = slice(arguments, 1);
1289
return function() { return f.apply(this, args.concat(slice(arguments))); };
1292
function prog1(ret) {
1293
if (ret instanceof Function)
1295
for (var i = 1, n = arguments.length; --n > 0; ++i)
1300
function array_to_hash(a) {
1302
for (var i = 0; i < a.length; ++i)
1307
function slice(a, start) {
1308
return Array.prototype.slice.call(a, start || 0);
1311
function characters(str) {
1312
return str.split("");
1315
function member(name, array) {
1316
for (var i = array.length; --i >= 0;)
1317
if (array[i] === name)
1322
function HOP(obj, prop) {
1323
return Object.prototype.hasOwnProperty.call(obj, prop);
1326
var warn = function() {};
1328
/* -----[ Exports ]----- */
1330
exports.tokenizer = tokenizer;
1331
exports.parse = parse;
1332
exports.slice = slice;
1333
exports.curry = curry;
1334
exports.member = member;
1335
exports.array_to_hash = array_to_hash;
1336
exports.PRECEDENCE = PRECEDENCE;
1337
exports.KEYWORDS_ATOM = KEYWORDS_ATOM;
1338
exports.RESERVED_WORDS = RESERVED_WORDS;
1339
exports.KEYWORDS = KEYWORDS;
1340
exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN;
1341
exports.OPERATORS = OPERATORS;
1342
exports.is_alphanumeric_char = is_alphanumeric_char;
1343
exports.set_logger = function(logger) {