1
// Copyright 2010 Google Inc. All Rights Reserved.
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
15
#ifndef PAGESPEED_KERNEL_JS_JS_MINIFY_H_
16
#define PAGESPEED_KERNEL_JS_JS_MINIFY_H_
18
#include "pagespeed/kernel/base/basictypes.h"
19
#include "pagespeed/kernel/base/source_map.h"
20
#include "pagespeed/kernel/base/string.h"
21
#include "pagespeed/kernel/base/string_util.h"
22
#include "pagespeed/kernel/js/js_keywords.h"
23
#include "pagespeed/kernel/js/js_tokenizer.h"
29
// Represents the kind of whitespace between two tokens:
30
// kNoWhitespace means that there is no whitespace between the tokens.
31
// kSpace means there's been at least one space/tab, but no linebreaks.
32
// kLinebreak means there's been at least one linebreak.
33
enum JsWhitespace { kNoWhitespace, kSpace, kLinebreak };
35
// This works like JsTokenizer, except that it only emits whitespace and
36
// comment tokens that are deemed necessary for the script to work. IE
37
// conditional compilation comments are kept; other comments are removed.
38
// Whitespace tokens are only emitted if they are necessary to separate other
39
// tokens or for semicolon insertion, and any that are emitted will be
40
// collapsed to a single whitespace character.
41
class JsMinifyingTokenizer {
43
// Creates a tokenizer that will tokenize the given input string (which must
44
// outlive the JsMinifyingTokenizer object).
45
JsMinifyingTokenizer(const JsTokenizerPatterns* patterns, StringPiece input);
47
// Version that sets source mappings as well.
48
// Note: Source Maps are only correct for ASCII text. Line and column numbers
49
// will be incorrect if there are multi-byte chars in input.
50
// TODO(sligocki): Fix this.
52
const JsTokenizerPatterns* patterns, StringPiece input,
53
net_instaweb::source_map::MappingVector* mappings);
55
~JsMinifyingTokenizer();
57
// Gets the next token type from the input,
58
JsKeywords::Type NextToken(StringPiece* token_out);
60
// True if an error has been encountered. All future calls to NextToken()
61
// will return JsKeywords::kEndOfInput with an empty token string.
62
bool has_error() const { return tokenizer_.has_error(); }
65
JsKeywords::Type NextTokenHelper(
66
StringPiece* token_out,
67
net_instaweb::source_map::Mapping* token_out_position);
69
// Determines whether we need to include whitespace to separate the given
70
// token from the previous token.
71
bool WhitespaceNeededBefore(JsKeywords::Type type, StringPiece token);
73
JsTokenizer tokenizer_;
74
JsWhitespace whitespace_; // Whitespace since the previous token.
75
JsKeywords::Type prev_type_;
76
StringPiece prev_token_;
77
JsKeywords::Type next_type_;
78
StringPiece next_token_;
79
net_instaweb::source_map::MappingVector* mappings_;
80
net_instaweb::source_map::Mapping current_position_;
81
net_instaweb::source_map::Mapping next_position_;
83
DISALLOW_COPY_AND_ASSIGN(JsMinifyingTokenizer);
86
// Minifies the given UTF8-encoded JavaScript code; returns true if the code
87
// parsed successfully, or false if a syntax error prevented complete
88
// minification. Even if this function returns false, the output string will
89
// still be fully populated from the input; the portion of the input up to the
90
// parse error will be minified, and the remainder will be passed through
93
// The input should be UTF8-encoded (or plain ASCII); the minifier does have
94
// some limited capability to tolerate invalid UTF8 bytes, so Latin1-encoded
95
// input will often work, but no guarantees are made.
96
bool MinifyUtf8Js(const JsTokenizerPatterns* patterns,
97
StringPiece input, GoogleString* output);
99
// Minify JS and returns a source mapping. The input should be UTF8-encoded
100
// (or plain ASCII); the minifier does have some limited capability to tolerate
101
// invalid UTF8 bytes, so Latin1-encoded input will often work, but no
102
// guarantees are made.
103
bool MinifyUtf8JsWithSourceMap(
104
const JsTokenizerPatterns* patterns,
105
StringPiece input, GoogleString* output,
106
net_instaweb::source_map::MappingVector* mappings);
108
///////////////////////////////////////////////////////////////////////////////
109
// Below is the old JsMinify implementation. It has several known issues that
110
// the newer implementation above fixes, but for now is still more
113
// TODO(mdsteele): Deprecate these functions once we're more confident in the
114
// new implementation, and remove them once all clients are migrated.
115
///////////////////////////////////////////////////////////////////////////////
117
// Returns true if minification was successful, false otherwise.
118
bool MinifyJs(const StringPiece& input, GoogleString* out);
120
// Returns true if minification was successful, false otherwise.
121
bool GetMinifiedJsSize(const StringPiece& input, int* minimized_size);
123
// Returns true if minification and collapsing string was successful, false
124
// otherwise. This functin is a special use of js_minify. It minifies the JS
125
// and removes all the string literals. Example:
126
// origial: var x = 'asd \' lse'
128
bool MinifyJsAndCollapseStrings(const StringPiece& input, GoogleString* output);
129
bool GetMinifiedStringCollapsedJsSize(const StringPiece& input,
130
int* minimized_size);
134
} // namespace pagespeed
136
#endif // PAGESPEED_KERNEL_JS_JS_MINIFY_H_