3
# Copyright 2012 the V8 project authors. All rights reserved.
4
# Redistribution and use in source and binary forms, with or without
5
# modification, are permitted provided that the following conditions are
8
# * Redistributions of source code must retain the above copyright
9
# notice, this list of conditions and the following disclaimer.
10
# * Redistributions in binary form must reproduce the above
11
# copyright notice, this list of conditions and the following
12
# disclaimer in the documentation and/or other materials provided
13
# with the distribution.
14
# * Neither the name of Google Inc. nor the names of its
15
# contributors may be used to endorse or promote products derived
16
# from this software without specific prior written permission.
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
"""A JavaScript minifier.
32
It is far from being a complete JS parser, so there are many valid
33
JavaScript programs that will be ruined by it. Another strangeness is that
34
it accepts $ and % as parts of identifiers. It doesn't merge lines or strip
35
out blank lines in order to ease debugging. Variables at the top scope are
36
properties of the global object so we can't rename them. It is assumed that
37
you introduce variables with var as if JavaScript followed C++ scope rules
38
around curly braces, so the declaration must be above the first use.
42
minifier = JavaScriptMinifier()
43
program1 = minifier.JSMinify(program1)
44
program2 = minifier.JSMinify(program2)
50
class JavaScriptMinifier(object):
51
"""An object that you can feed code snippets to to get them minified."""
54
# We prepopulate the list of identifiers that shouldn't be used. These
55
# short language keywords could otherwise be used by the script as variable
57
self.seen_identifiers = {"do": True, "in": True}
58
self.identifier_counter = 0
59
self.in_comment = False
63
def LookAtIdentifier(self, m):
64
"""Records identifiers or keywords that we see in use.
66
(So we can avoid renaming variables to these strings.)
68
m: The match object returned by re.search.
73
identifier = m.group(1)
74
self.seen_identifiers[identifier] = True
77
"""Called when we encounter a '{'."""
81
"""Called when we encounter a '}'."""
83
# We treat each top-level opening brace as a single scope that can span
84
# several sets of nested braces.
87
self.identifier_counter = 0
89
def Declaration(self, m):
90
"""Rewrites bits of the program selected by a regexp.
92
These can be curly braces, literal strings, function declarations and var
93
declarations. (These last two must be on one line including the opening
94
curly brace of the function for their variables to be renamed).
97
m: The match object returned by re.search.
100
The string that should replace the match in the rewritten program.
102
matched_text = m.group(0)
103
if matched_text == "{":
106
if matched_text == "}":
109
if re.match("[\"'/]", matched_text):
111
m = re.match(r"var ", matched_text)
113
var_names = matched_text[m.end():]
114
var_names = re.split(r",", var_names)
115
return "var " + ",".join(map(self.FindNewName, var_names))
116
m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
118
up_to_args = m.group(1)
120
args = re.split(r",", args)
122
return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
124
if matched_text in self.map:
125
return self.map[matched_text]
129
def CharFromNumber(self, number):
130
"""A single-digit base-52 encoding using a-zA-Z."""
132
return chr(number + 97)
134
return chr(number + 65)
136
def FindNewName(self, var_name):
137
"""Finds a new 1-character or 2-character name for a variable.
139
Enters it into the mapping table for this scope.
142
var_name: The name of the variable before renaming.
145
The new name of the variable.
148
# Variable names that end in _ are member variables of the global object,
149
# so they can be visible from code in a different scope. We leave them
151
if var_name in self.map:
152
return self.map[var_name]
153
if self.nesting == 0:
156
identifier_first_char = self.identifier_counter % 52
157
identifier_second_char = self.identifier_counter // 52
158
new_identifier = self.CharFromNumber(identifier_first_char)
159
if identifier_second_char != 0:
161
self.CharFromNumber(identifier_second_char - 1) + new_identifier)
162
self.identifier_counter += 1
163
if not new_identifier in self.seen_identifiers:
166
self.map[var_name] = new_identifier
167
return new_identifier
169
def RemoveSpaces(self, m):
170
"""Returns literal strings unchanged, replaces other inputs with group 2.
172
Other inputs are replaced with the contents of capture 1. This is either
173
a single space or an empty string.
176
m: The match object returned by re.search.
179
The string that should be inserted instead of the matched text.
181
entire_match = m.group(0)
182
replacement = m.group(1)
183
if re.match(r"'.*'$", entire_match):
185
if re.match(r'".*"$', entire_match):
187
if re.match(r"/.+/$", entire_match):
191
def JSMinify(self, text):
192
"""The main entry point. Takes a text and returns a compressed version.
194
The compressed version hopefully does the same thing. Line breaks are
198
text: The text of the code snippet as a multiline string.
201
The compressed text of the code snippet as a multiline string.
204
for line in re.split(r"\n", text):
205
line = line.replace("\t", " ")
207
m = re.search(r"\*/", line)
209
line = line[m.end():]
210
self.in_comment = False
215
if not self.in_comment:
216
line = re.sub(r"/\*.*?\*/", " ", line)
217
line = re.sub(r"//.*", "", line)
218
m = re.search(r"/\*", line)
220
line = line[:m.start()]
221
self.in_comment = True
223
# Strip leading and trailing spaces.
224
line = re.sub(r"^ +", "", line)
225
line = re.sub(r" +$", "", line)
226
# A regexp that matches a literal string surrounded by "double quotes".
227
# This regexp can handle embedded backslash-escaped characters including
228
# embedded backslash-escaped double quotes.
229
double_quoted_string = r'"(?:[^"\\]|\\.)*"'
230
# A regexp that matches a literal string surrounded by 'double quotes'.
231
single_quoted_string = r"'(?:[^'\\]|\\.)*'"
232
# A regexp that matches a regexp literal surrounded by /slashes/.
233
# Don't allow a regexp to have a ) before the first ( since that's a
234
# syntax error and it's probably just two unrelated slashes.
235
# Also don't allow it to come after anything that can only be the
236
# end of a primary expression.
237
slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/"
238
# Replace multiple spaces with a single space.
239
line = re.sub("|".join([double_quoted_string,
240
single_quoted_string,
245
# Strip single spaces unless they have an identifier character both before
246
# and after the space. % and $ are counted as identifier characters.
247
line = re.sub("|".join([double_quoted_string,
248
single_quoted_string,
250
r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
253
# Collect keywords and identifiers that are already in use.
254
if self.nesting == 0:
255
re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
256
function_declaration_regexp = (
257
r"\bfunction" # Function definition keyword...
258
r"( [\w$%]+)?" # ...optional function name...
259
r"\([\w$%,]+\)\{") # ...argument declarations.
260
# Unfortunately the keyword-value syntax { key:value } makes the key look
261
# like a variable where in fact it is a literal string. We use the
262
# presence or absence of a question mark to try to distinguish between
263
# this case and the ternary operator: "condition ? iftrue : iffalse".
264
if re.search(r"\?", line):
265
block_trailing_colon = r""
267
block_trailing_colon = r"(?![:\w$%])"
268
# Variable use. Cannot follow a period precede a colon.
269
variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
270
line = re.sub("|".join([double_quoted_string,
271
single_quoted_string,
273
r"\{", # Curly braces.
275
r"\bvar [\w$%,]+", # var declarations.
276
function_declaration_regexp,
277
variable_use_regexp]),
280
new_lines.append(line)
282
return "\n".join(new_lines) + "\n"