2
* Copyright 2011 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: slamm@google.com (Stephen Lamm)
19
// Search for synchronous loads of Google Analytics similar to the following:
21
// <script type="text/javascript">
22
// var gaJsHost = (("https:" == document.location.protocol) ?
23
// "https://ssl." : "http://www.");
24
// document.write(unescape("%3Cscript src='" + gaJsHost +
25
// "google-analytics.com/ga.js type='text/javascript'" +
26
// "%3E%3C/script%3E"));
28
// <script type="text/javascript">
30
// var pageTracker = _gat._getTracker("UA-XXXXX-X");
31
// pageTracker._trackPageview();
35
// Replace the document.write with a new snippet that loads ga.js
36
// asynchronously. Also, insert a replacement for _getTracker that
37
// converts any calls to the synchronous API to the asynchronous API.
38
// The _getTracker replacement is a new function that returns a mock
39
// tracker object. Anytime a synchronous API method is called, the
40
// mock tracker fowards it to a _gaq.push(...) call.
42
// An alternative approach would been to find all the API calls and
43
// rewrite them to the asynchronous API. However, to be done properly,
44
// it would have had the added complication of using a JavaScript
48
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_GOOGLE_ANALYTICS_FILTER_H_
49
#define NET_INSTAWEB_REWRITER_PUBLIC_GOOGLE_ANALYTICS_FILTER_H_
53
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
54
#include "net/instaweb/util/public/basictypes.h"
55
#include "net/instaweb/util/public/scoped_ptr.h"
56
#include "net/instaweb/util/public/string.h"
57
#include "net/instaweb/util/public/string_util.h"
59
namespace net_instaweb {
61
class HtmlCharactersNode;
62
class HtmlCommentNode;
64
class HtmlIEDirectiveNode;
70
// Edit a substring in a script element.
74
kGaJsScriptSrcLoad = 0,
78
ScriptEditor(HtmlElement* script_element_,
79
HtmlCharactersNode* characters_node,
80
GoogleString::size_type pos,
81
GoogleString::size_type len,
84
HtmlElement* GetScriptElement() const { return script_element_; }
85
HtmlCharactersNode* GetScriptCharactersNode() const {
86
return script_characters_node_;
88
Type GetType() const { return editor_type_; }
90
void NewContents(const StringPiece &replacement,
91
GoogleString* contents) const;
94
HtmlElement* script_element_;
95
HtmlCharactersNode* script_characters_node_;
97
GoogleString::size_type pos_;
98
GoogleString::size_type len_;
101
DISALLOW_COPY_AND_ASSIGN(ScriptEditor);
105
// Filter <script> tags.
106
// Rewrite qualifying sync loads of Google Analytics as async loads.
107
class GoogleAnalyticsFilter : public EmptyHtmlFilter {
109
typedef StringPieceVector MethodVector;
111
explicit GoogleAnalyticsFilter(HtmlParse* html_parse,
112
Statistics* statistics);
113
virtual ~GoogleAnalyticsFilter();
115
// The filter will take ownership of the method vectors.
116
explicit GoogleAnalyticsFilter(HtmlParse* html_parse,
117
Statistics* statistics,
118
MethodVector* glue_methods,
119
MethodVector* unhandled_methods);
121
static void InitStats(Statistics* statistics);
123
virtual void StartDocument();
124
virtual void EndDocument();
125
virtual void StartElement(HtmlElement* element);
126
virtual void EndElement(HtmlElement* element);
128
virtual void Flush();
130
// Expected HTML Events in <script> elements.
131
virtual void Characters(HtmlCharactersNode* characters_node);
133
// Unexpected HTML Events in <script> elements.
134
virtual void Comment(HtmlCommentNode* comment);
135
virtual void Cdata(HtmlCdataNode* cdata);
136
virtual void IEDirective(HtmlIEDirectiveNode* directive);
138
virtual const char* Name() const { return "GoogleAnalytics"; }
140
static const char kPageLoadCount[];
141
static const char kRewrittenCount[];
146
bool MatchSyncLoad(StringPiece contents,
147
GoogleString::size_type* pos,
148
GoogleString::size_type* len) const;
149
bool MatchSyncInit(StringPiece contents,
150
GoogleString::size_type start_pos,
151
GoogleString::size_type* pos,
152
GoogleString::size_type* len) const;
153
bool MatchUnhandledCalls(StringPiece contents,
154
GoogleString::size_type start_pos) const;
155
void FindRewritableScripts();
156
void GetSyncToAsyncScript(GoogleString* buffer) const;
157
bool RewriteAsAsync();
161
std::vector<ScriptEditor*> script_editors_;
163
scoped_ptr<MethodVector> glue_methods_; // methods to forward to async api
164
scoped_ptr<MethodVector> unhandled_methods_; // if found, skip rewrite
166
HtmlParse* html_parse_;
167
HtmlElement* script_element_; // NULL if not in script element
168
HtmlCharactersNode* script_characters_node_; // NULL if not found in script
170
Variable* page_load_count_;
171
Variable* rewritten_count_;
173
DISALLOW_COPY_AND_ASSIGN(GoogleAnalyticsFilter);
176
} // namespace net_instaweb
178
#endif // NET_INSTAWEB_REWRITER_PUBLIC_GOOGLE_ANALYTICS_FILTER_H_