2
* Copyright 2014 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: stevensr@google.com (Ryan Stevens)
19
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_MOBILIZE_REWRITE_FILTER_H_
20
#define NET_INSTAWEB_REWRITER_PUBLIC_MOBILIZE_REWRITE_FILTER_H_
24
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
25
#include "net/instaweb/htmlparse/public/html_name.h"
26
#include "net/instaweb/util/public/basictypes.h"
27
#include "pagespeed/kernel/base/string_util.h"
29
namespace net_instaweb {
31
class HtmlCharactersNode;
37
// A mobile role and its associated HTML attribute value.
40
// Tags which aren't explicitly tagged with a data-mobile-role attribute,
41
// but we want to keep anyway, such as <style> or <script> tags in the body.
43
// The page header, such as <h1> or logos.
45
// Nav sections of the page. The HTML of nav blocks will be completely
46
// rewritten to be mobile friendly by deleting unwanted elements in the
49
// Main content of the page.
51
// Any block that isn't one of the above. Marginal content is put at the end
52
// and remains pretty much untouched with respect to modifying HTML or
55
// Elements without a data-mobile-role attribute, or with an unknown
56
// attribute value, will be kInvalid.
60
static const MobileRole kMobileRoles[kInvalid];
62
MobileRole(Level level, const char* value)
66
static const MobileRole* FromString(const StringPiece& mobile_role);
67
static Level LevelFromString(const StringPiece& mobile_role);
68
static const char* StringFromLevel(Level level) {
69
return (level < kInvalid) ? kMobileRoles[level].value : NULL;
73
const char* const value; // Set to a static string in cc.
76
// Rewrite HTML to be mobile-friendly based on "data-mobile-role" attributes in
77
// the HTML tags. To reorganize the DOM, the filter puts containers at the end
78
// of the body into which we move tagged elements. The containers are later
79
// removed after the filter is done processing the document body. The filter
80
// applies the following transformations:
81
// - Add mobile <style> and <meta name="viewport"...> tags to the head.
82
// - Remove all table tags (but keep the content). Almost all tables in desktop
83
// HTML are for formatting, not displaying data, and they tend not to resize
84
// well for mobile. The easiest thing to do is to simply strip out the
85
// formatting and hope the content reflows properly.
86
// - Reorder body of the HTML DOM elements based on mobile role. Any elements
87
// which don't have an important parent will get removed, except for a
88
// special set of "keeper" tags (like <script> or <style>). The keeper tags
89
// are retained because they are often necessary for the website to work
90
// properly, and because they have no visible appearance on the page.
91
// - Remove all elements from inside data-mobile-role="navigational" elements
92
// except in a special set of nav tags (notably <a>). Nav sections often do
93
// not resize well due to fixed width formatting and drop-down menus, so it
94
// is often necessary to pull out what you want, instead of shuffling around
98
// - TODO (stevensr): This script does not handle flush windows in the body.
99
// - TODO (stevensr): It would be nice to tweak the table-xform behavior via
100
// options. Also, there has been mention that removing tables across flush
101
// windows could be problematic. This should be addressed at some point.
102
// - TODO (stevensr): Enable this filter only for mobile UAs, and have a query
103
// param option to turn it on for all UAs for debugging.
104
// - TODO (stevensr): Write pcache entry if rewriting page fails. We should
105
// then probably inject some JS to auto-refresh the page so the user does not
106
// see the badly rewritten result.
107
// - TODO (stevensr): Add a separate wildcard option to allow/disallow URLs
108
// from using this filter. Of course sites can use our existing Allow and
109
// Disallow directives but that turns off all optimizations, and this one is
110
// one that might be extra finicky (e.g. don't touch my admin pages).
111
// - TODO (stevensr): Turn on css_move_to_head_filter.cc to reorder elements
112
// we inject into the head.
113
class MobilizeRewriteFilter : public EmptyHtmlFilter {
115
static const char kPagesMobilized[];
116
static const char kKeeperBlocks[];
117
static const char kHeaderBlocks[];
118
static const char kNavigationalBlocks[];
119
static const char kContentBlocks[];
120
static const char kMarginalBlocks[];
121
static const char kDeletedElements[];
123
explicit MobilizeRewriteFilter(RewriteDriver* rewrite_driver);
124
virtual ~MobilizeRewriteFilter();
126
static void InitStats(Statistics* statistics);
128
virtual void StartDocument();
129
virtual void EndDocument();
130
virtual void StartElement(HtmlElement* element);
131
virtual void EndElement(HtmlElement* element);
132
virtual void Characters(HtmlCharactersNode* characters);
133
virtual const char* Name() const { return "MobilizeRewrite"; }
136
void HandleStartTagInBody(HtmlElement* element);
137
void HandleEndTagInBody(HtmlElement* element);
138
void AddStyleAndViewport(HtmlElement* element);
139
void AddReorderContainers(HtmlElement* element);
140
void RemoveReorderContainers();
141
bool IsReorderContainer(HtmlElement* element);
142
HtmlElement* MobileRoleToContainer(MobileRole::Level level);
143
MobileRole::Level GetMobileRole(HtmlElement* element);
145
bool InImportantElement() {
146
return (important_element_depth_ > 0);
149
bool CheckForKeyword(
150
const HtmlName::Keyword* sorted_list, int len, HtmlName::Keyword keyword);
151
void LogMovedBlock(MobileRole::Level level);
153
RewriteDriver* driver_;
154
std::vector<HtmlName::Keyword> nav_keyword_stack_;
155
std::vector<HtmlElement*> mobile_role_containers_;
156
int important_element_depth_;
157
int body_element_depth_;
158
int nav_element_depth_;
159
bool reached_reorder_containers_;
161
bool added_containers_;
164
// Number of web pages we have mobilized.
165
Variable* num_pages_mobilized_;
166
// Number of blocks of each mobile role encountered and reordered.
167
Variable* num_keeper_blocks_;
168
Variable* num_header_blocks_;
169
Variable* num_navigational_blocks_;
170
Variable* num_content_blocks_;
171
Variable* num_marginal_blocks_;
172
// Number of elements deleted.
173
Variable* num_elements_deleted_;
175
// Used for overriding default behavior in testing.
176
friend class MobilizeRewriteFilterTest;
177
// Style content we are injecting into the page. Usually points to a static
178
// asset, but MobilizeRewriteFilterTest will override this with something
179
// small to simplify testing.
180
const char* style_css_;
182
DISALLOW_COPY_AND_ASSIGN(MobilizeRewriteFilter);
185
} // namespace net_instaweb
187
#endif // NET_INSTAWEB_REWRITER_PUBLIC_MOBILIZE_REWRITE_FILTER_H_