2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: sligocki@google.com (Shawn Ligocki)
19
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CSS_FILTER_H_
20
#define NET_INSTAWEB_REWRITER_PUBLIC_CSS_FILTER_H_
22
#include "net/instaweb/htmlparse/public/html_element.h"
23
#include "net/instaweb/rewriter/public/css_hierarchy.h"
24
#include "net/instaweb/rewriter/public/css_resource_slot.h"
25
#include "net/instaweb/rewriter/public/css_url_encoder.h"
26
#include "net/instaweb/rewriter/public/output_resource_kind.h"
27
#include "net/instaweb/rewriter/public/resource.h"
28
#include "net/instaweb/rewriter/public/server_context.h"
29
#include "net/instaweb/rewriter/public/resource_slot.h"
30
#include "net/instaweb/rewriter/public/rewrite_filter.h"
31
#include "net/instaweb/rewriter/public/rewrite_options.h"
32
#include "net/instaweb/rewriter/public/single_rewrite_context.h"
33
#include "net/instaweb/util/public/basictypes.h"
34
#include "net/instaweb/util/public/google_url.h"
35
#include "net/instaweb/util/public/scoped_ptr.h"
36
#include "net/instaweb/util/public/string.h"
37
#include "net/instaweb/util/public/string_util.h"
45
namespace net_instaweb {
47
class AssociationTransformer;
49
class CssImageRewriter;
51
class HtmlCharactersNode;
52
class ImageCombineFilter;
53
class ImageRewriteFilter;
55
class OutputPartitions;
56
class ResourceContext;
59
class RewriteDomainTransformer;
62
class UrlSegmentEncoder;
65
// Find and parse all CSS in the page and apply transformations including:
66
// minification, combining, refactoring, and optimizing sub-resources.
68
// Currently only does basic minification.
70
// Note that CssCombineFilter currently does combining (although there is a bug)
71
// but CssFilter will eventually replace this.
73
// Currently only deals with inline <style> tags and external <link> resources.
74
// It does not consider style= attributes on arbitrary elements.
75
class CssFilter : public RewriteFilter {
79
CssFilter(RewriteDriver* driver,
80
// TODO(sligocki): Temporary pattern until we figure out a better
81
// way to do this without passing all filters around everywhere.
82
CacheExtender* cache_extender,
83
ImageRewriteFilter* image_rewriter,
84
ImageCombineFilter* image_combiner);
87
// May be called multiple times, in case there are multiple statistics
89
static void InitStats(Statistics* statistics);
91
// Initialize & Terminate must be paired.
92
static void Initialize();
93
static void Terminate();
95
// Add this filters related options to the given vector.
96
static void AddRelatedOptions(StringPieceVector* target);
98
// Note: AtExitManager needs to be initialized or you get a nasty error:
99
// Check failed: false. Tried to RegisterCallback without an AtExitManager.
100
// This is called by Initialize.
101
static void InitializeAtExitManager();
103
virtual void StartDocumentImpl();
104
virtual void StartElementImpl(HtmlElement* element);
105
virtual void Characters(HtmlCharactersNode* characters);
106
virtual void EndElementImpl(HtmlElement* element);
108
virtual const char* Name() const { return "CssFilter"; }
109
virtual const char* id() const { return RewriteOptions::kCssFilterId; }
110
virtual void EncodeUserAgentIntoResourceContext(
111
ResourceContext* context) const;
113
static const char kBlocksRewritten[];
114
static const char kParseFailures[];
115
static const char kFallbackRewrites[];
116
static const char kFallbackFailures[];
117
static const char kRewritesDropped[];
118
static const char kTotalBytesSaved[];
119
static const char kTotalOriginalBytes[];
120
static const char kUses[];
121
static const char kCharsetMismatch[];
122
static const char kInvalidUrl[];
123
static const char kLimitExceeded[];
124
static const char kMinifyFailed[];
125
static const char kRecursion[];
126
static const char kComplexQueries[];
128
RewriteContext* MakeNestedFlatteningContextInNewSlot(
129
const ResourcePtr& resource, const GoogleString& location,
130
CssFilter::Context* rewriter, RewriteContext* parent,
131
CssHierarchy* hierarchy);
133
virtual const RewriteOptions::Filter* RelatedFilters(int* num_filters) const {
134
*num_filters = merged_filters_size_;
135
return merged_filters_;
137
virtual const StringPieceVector* RelatedOptions() const {
138
return related_options_;
142
virtual RewriteContext* MakeRewriteContext();
143
virtual const UrlSegmentEncoder* encoder() const;
144
virtual RewriteContext* MakeNestedRewriteContext(
145
RewriteContext* parent, const ResourceSlotPtr& slot);
148
friend class Context;
149
friend class CssFlattenImportsContext; // for statistics
150
friend class CssHierarchy; // for statistics
154
kAttributeWithoutUrls,
158
Context* MakeContext(RewriteDriver* driver,
159
RewriteContext* parent);
161
// Starts the asynchronous rewrite process for inline CSS 'text'.
162
void StartInlineRewrite(HtmlCharactersNode* text);
164
// Starts the asynchronous rewrite process for inline CSS inside the given
165
// element's given style attribute.
166
void StartAttributeRewrite(HtmlElement* element,
167
HtmlElement::Attribute* style,
168
InlineCssKind inline_css_kind);
170
// Starts the asynchronous rewrite process for external CSS referenced by
171
// attribute 'src' of 'link'.
172
void StartExternalRewrite(HtmlElement* link, HtmlElement::Attribute* src);
174
ResourceSlot* MakeSlotForInlineCss(HtmlElement* element,
175
const StringPiece& content);
176
CssFilter::Context* StartRewriting(const ResourceSlotPtr& slot);
178
// Get the charset of the HTML being parsed which can be specified in the
179
// driver's headers, defaulting to ISO-8859-1 if isn't. Then, if a charset
180
// is specified in the given element, check that they agree, and if not
181
// return false and set the failure reason, otherwise return true and assign
182
// the first charset to '*charset'.
183
bool GetApplicableCharset(const HtmlElement* element,
184
GoogleString* charset,
185
GoogleString* failure_reason) const;
187
// Get the media specified in the given element, if any. Returns true if
188
// media were found false if not.
189
bool GetApplicableMedia(const HtmlElement* element,
190
StringVector* media) const;
192
bool in_style_element_; // Are we in a style element?
193
// This is meaningless if in_style_element_ is false:
194
HtmlElement* style_element_; // The element we are in.
196
// The charset extracted from a meta tag, if any.
197
GoogleString meta_tag_charset_;
199
// Filters we delegate to.
200
CacheExtender* cache_extender_;
201
ImageRewriteFilter* image_rewrite_filter_;
202
ImageCombineFilter* image_combiner_;
205
// # of CSS blocks (CSS files, <style> blocks or style= attributes)
206
// successfully rewritten.
207
Variable* num_blocks_rewritten_;
208
// # of CSS blocks that rewriter failed to parse.
209
Variable* num_parse_failures_;
210
// # of CSS blocks that failed to be parsed, but were rewritten in the
212
Variable* num_fallback_rewrites_;
213
// # of CSS blocks that failed to be rewritten in the fallback path.
214
Variable* num_fallback_failures_;
215
// # of CSS rewrites which were not applied because they made the CSS larger
216
// and did not rewrite any images in it/flatten any other CSS files into it.
217
Variable* num_rewrites_dropped_;
218
// # of bytes saved from rewriting CSS (including minification and the
219
// increase of bytes from longer image URLs and the increase of bytes
220
// from @import flattening).
221
// TODO(sligocki): This should consider the input size to be the input sizes
222
// of all CSS files flattened into this one. Currently it does not.
223
UpDownCounter* total_bytes_saved_;
224
// Sum of original bytes of all successfully rewritten CSS blocks.
225
// total_bytes_saved_ / total_original_bytes_ should be the
226
// average percentage reduction of CSS block size.
227
Variable* total_original_bytes_;
228
// # of uses of rewritten CSS (updating <link> href= attributes,
229
// <style> contents or style= attributes).
231
// # of times CSS was not flattened because of a charset mismatch.
232
Variable* num_flatten_imports_charset_mismatch_;
233
// # of times CSS was not flattened because of an invalid @import URL.
234
Variable* num_flatten_imports_invalid_url_;
235
// # of times CSS was not flattened because the resulting CSS too big.
236
Variable* num_flatten_imports_limit_exceeded_;
237
// # of times CSS was not flattened because minification failed.
238
Variable* num_flatten_imports_minify_failed_;
239
// # of times CSS was not flattened because of recursive imports.
240
Variable* num_flatten_imports_recursion_;
241
// # of times CSS was not flattened because it had complex media queries.
242
Variable* num_flatten_imports_complex_queries_;
244
CssUrlEncoder encoder_;
246
// The filters related to this filter.
247
static const RewriteOptions::Filter* merged_filters_;
248
static int merged_filters_size_;
250
// The options related to this filter.
251
static StringPieceVector* related_options_;
253
DISALLOW_COPY_AND_ASSIGN(CssFilter);
256
// Context used by CssFilter under async flow.
257
class CssFilter::Context : public SingleRewriteContext {
259
Context(CssFilter* filter, RewriteDriver* driver,
260
RewriteContext* parent,
261
CacheExtender* cache_extender,
262
ImageRewriteFilter* image_rewriter,
263
ImageCombineFilter* image_combiner,
264
ResourceContext* context);
267
// Setup rewriting for inline, attribute, or external CSS.
268
void SetupInlineRewrite(HtmlElement* style_element, HtmlCharactersNode* text);
269
void SetupAttributeRewrite(HtmlElement* element,
270
HtmlElement::Attribute* src,
271
InlineCssKind inline_css_kind);
272
void SetupExternalRewrite(HtmlElement* element,
273
const GoogleUrl& base_gurl,
274
const GoogleUrl& trim_gurl);
276
// Starts nested rewrite jobs for any imports or images contained in the CSS.
277
// Marked public, so that it's accessible from CssHierarchy.
278
void RewriteCssFromNested(RewriteContext* parent, CssHierarchy* hierarchy);
280
// Specialization to absolutify URLs in input resource in case of rewrite
281
// fail or deadline exceeded.
282
virtual bool SendFallbackResponse(StringPiece output_url_base,
283
StringPiece input_contents,
284
AsyncFetch* async_fetch,
285
MessageHandler* handler);
287
CssResourceSlotFactory* slot_factory() { return &slot_factory_; }
289
CssHierarchy* mutable_hierarchy() { return &hierarchy_; }
292
virtual void Render();
293
virtual void Harvest();
294
virtual bool Partition(OutputPartitions* partitions,
295
OutputResourceVector* outputs);
296
virtual void RewriteSingle(const ResourcePtr& input,
297
const OutputResourcePtr& output);
298
virtual const char* id() const { return filter_->id(); }
299
virtual OutputResourceKind kind() const { return kRewrittenResource; }
300
virtual GoogleString CacheKeySuffix() const;
301
virtual const UrlSegmentEncoder* encoder() const;
303
// Implements UserAgentCacheKey method of RewriteContext.
304
virtual GoogleString UserAgentCacheKey(
305
const ResourceContext* resource_context) const;
308
void GetCssBaseUrlToUse(const ResourcePtr& input_resource,
309
GoogleUrl* css_base_gurl_to_use);
311
void GetCssTrimUrlToUse(const ResourcePtr& input_resource,
312
const StringPiece& output_url_base,
313
GoogleUrl* css_base_gurl_to_use);
315
void GetCssTrimUrlToUse(const ResourcePtr& input_resource,
316
const OutputResourcePtr& output_resource,
317
GoogleUrl* css_base_gurl_to_use);
319
bool RewriteCssText(const GoogleUrl& css_base_gurl,
320
const GoogleUrl& css_trim_gurl,
321
const StringPiece& in_text,
323
bool text_is_declarations,
324
MessageHandler* handler);
326
// Starts nested rewrite jobs for any imports or images contained in the CSS.
327
void RewriteCssFromRoot(const GoogleUrl& css_base_gurl,
328
const GoogleUrl& css_trim_gurl,
329
const StringPiece& in_text, int64 in_text_size,
330
bool has_unparseables, Css::Stylesheet* stylesheet);
332
// Fall back to using CssTagScanner to find the URLs and rewrite them
333
// that way. Like RewriteCssFromRoot, output is written into output
334
// resource in Harvest(). Called if CSS Parser fails to parse doc.
335
// Returns whether or not fallback rewriting succeeds. Fallback can fail
336
// if URLs in CSS are not parseable.
337
bool FallbackRewriteUrls(const GoogleUrl& css_base_gurl,
338
const GoogleUrl& css_trim_gurl,
339
const StringPiece& in_text);
341
// Tries to write out a (potentially edited) stylesheet out to out_text,
342
// and returns whether we should consider the result as an improvement.
343
bool SerializeCss(int64 in_text_size,
344
const Css::Stylesheet* stylesheet,
345
const GoogleUrl& css_base_gurl,
346
const GoogleUrl& css_trim_gurl,
347
bool previously_optimized,
348
bool stylesheet_is_declarations,
350
GoogleString* out_text,
351
MessageHandler* handler);
353
// Used by the asynchronous rewrite callbacks (RewriteSingle + Harvest) to
354
// determine if what is being rewritten is a style attribute or a stylesheet,
355
// since an attribute comprises only declarations, unlike a stlyesheet.
356
bool IsInlineAttribute() const {
357
return (rewrite_inline_attribute_ != NULL);
360
// Determine the appropriate image inlining threshold based upon whether we're
361
// in an html file (<style> tag or style= attribute) or in an external css
363
int64 ImageInlineMaxBytes() const;
366
scoped_ptr<CssImageRewriter> css_image_rewriter_;
367
CssResourceSlotFactory slot_factory_;
368
CssHierarchy hierarchy_;
372
// Are we performing a fallback rewrite?
374
// Transformer used by CssTagScanner to rewrite URLs if we failed to
375
// parse CSS. This will only be defined if CSS parsing failed.
376
scoped_ptr<AssociationTransformer> fallback_transformer_;
377
// Backup transformer for AssociationTransformer. Absolutifies URLs and
378
// rewrites their domains as necessary if they can't be cache extended.
379
scoped_ptr<RewriteDomainTransformer> absolutifier_;
381
// The element containing the CSS being rewritten, either a script element
382
// (inline), a link element (external), or anything with a style attribute.
383
HtmlElement* rewrite_element_;
385
// Style element containing inline CSS (see StartInlineRewrite) -or-
386
// any element with a style attribute (see StartAttributeRewrite), or
387
// NULL if we're rewriting external stuff.
388
HtmlElement* rewrite_inline_element_;
390
// Node with inline CSS to rewrite, or NULL if we're rewriting external stuff.
391
HtmlCharactersNode* rewrite_inline_char_node_;
393
// The style attribute associated with rewrite_inline_element_. Mutually
394
// exclusive with rewrite_inline_char_node_ since style elements cannot
395
// have style attributes.
396
HtmlElement::Attribute* rewrite_inline_attribute_;
398
// Indicates the kind of CSS inline CSS we are rewriting (<style> vs. style=,
399
// and whether we've noticed any URLs). Only valid if the other
400
// rewrite_inline_ fields reflect us doing inline rewriting.
401
InlineCssKind rewrite_inline_css_kind_;
403
// Information needed for nested rewrites or finishing up serialization.
405
GoogleUrl initial_css_base_gurl_;
406
GoogleUrl initial_css_trim_gurl_;
407
scoped_ptr<GoogleUrl> base_gurl_for_fallback_;
408
scoped_ptr<GoogleUrl> trim_gurl_for_fallback_;
409
ResourcePtr input_resource_;
410
OutputResourcePtr output_resource_;
412
DISALLOW_COPY_AND_ASSIGN(Context);
415
} // namespace net_instaweb
417
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CSS_FILTER_H_