2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Authors: jmarantz@google.com (Joshua Marantz)
18
// jefftk@google.com (Jeff Kaufman)
19
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_TAG_SCANNER_H_
20
#define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_TAG_SCANNER_H_
22
#include <cstddef> // for NULL
25
#include "net/instaweb/htmlparse/public/html_element.h"
26
#include "net/instaweb/http/public/semantic_type.h"
28
namespace net_instaweb {
32
namespace resource_tag_scanner {
34
struct UrlCategoryPair {
35
HtmlElement::Attribute* url;
36
semantic_type::Category category;
40
category(semantic_type::kUndefined) {}
43
typedef std::vector<UrlCategoryPair> UrlCategoryVector;
45
// If the attribute is url-valued, determine it's semantic category. Return
46
// kUndefined otherwise.
48
// Supported patterns are:
52
// <link rel="stylesheet" href=...>
55
// <link rel="icon" href=...>
56
// <link rel="apple-touch-icon" href=...>
57
// <link rel="apple-touch-icon-precomposed" href=...>
58
// <link rel="apple-touch-startup-image" href=...>
59
// <body background=...>
60
// <td background=...>
61
// <th background=...>
62
// <table background=...>
63
// <thead background=...>
64
// <tbody background=...>
65
// <tfoot background=...>
66
// <input type="image" src=...>
73
// <blockquote cite=...>
78
// <button formaction=...>
79
// <input formaction=...>
80
// <frame longdesc=...>
81
// <iframe longdesc=...>
88
// <html manifest=...>
95
// 1. Because we don't parse the codebase attribute of applet or object elements
96
// it's not safe for us to manipulate any of their other url-valued
99
// 2. The base tag is dealt with elsewhere, but here we skip it. It's not safe
100
// to make any changes to it, so this is safe. It's also not safe to make
101
// changes to <head profile=...> because one use of a profile is as a
102
// globally unique name which a browser or other agent recognizes and
103
// interprets without accessing, so we skip it too.
105
// 3. While usemap, an attribute of img, input, and object, is technically a
106
// url, it always has the form #name or #id, which means there's nothing we
107
// can do to improve it and there's no harm in leaving it out.
109
// These exceptions aside, we should support all url-valued attributes in
111
// http://dev.w3.org/html5/spec/section-index.html#attributes-1
112
// http://www.w3.org/TR/REC-html40/index/attributes.html
114
semantic_type::Category CategorizeAttribute(
115
const HtmlElement* element,
116
const HtmlElement::Attribute* attribute,
117
const RewriteOptions* options);
119
// Examines an HTML element to determine if it's a link to any sort of resource,
120
// extracting out the HREF, SRC, or other URL attributes and identifying their
121
// categories. Because, for example, a LINK tag can be an image, stylesheet, or
122
// nearly anything else, it's not sufficient for callers to assume they can
123
// figure out the category from the element's name.
125
// See CategorizeAttribute for the meaning of "url-valued attribute".
127
// Attributes that we can't decode, such as non-ascii urls, will be skipped.
129
// Attributes are returned in left-to-right order.
130
void ScanElement(HtmlElement* element, const RewriteOptions* options,
131
UrlCategoryVector* attributes);
133
} // namespace resource_tag_scanner
134
} // namespace net_instaweb
136
#endif // NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_TAG_SCANNER_H_