2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: sligocki@google.com (Shawn Ligocki)
18
// jmarantz@google.com (Joshua Marantz)
20
// Resources are created by a RewriteDriver. Input resources are
21
// read from URLs or the file system. Output resources are constructed
22
// programatically, usually by transforming one or more existing
23
// resources. Both input and output resources inherit from this class
24
// so they can be used interchangeably in successive rewrite passes.
26
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_H_
27
#define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_H_
31
#include "base/logging.h"
32
#include "net/instaweb/http/public/http_value.h"
33
#include "net/instaweb/http/public/meta_data.h"
34
#include "net/instaweb/http/public/request_context.h"
35
#include "net/instaweb/http/public/response_headers.h"
36
#include "net/instaweb/util/public/basictypes.h"
37
#include "net/instaweb/util/public/ref_counted_ptr.h"
38
#include "net/instaweb/util/public/string.h"
39
#include "net/instaweb/util/public/string_util.h"
40
#include "pagespeed/kernel/base/callback.h"
42
namespace net_instaweb {
53
typedef RefCountedPtr<Resource> ResourcePtr;
54
typedef std::vector<ResourcePtr> ResourceVector;
56
class Resource : public RefCounted<Resource> {
65
// This enumerates possible follow-up behaviors when a requested resource was
67
enum NotCacheablePolicy {
68
kLoadEvenIfNotCacheable,
69
kReportFailureIfNotCacheable,
72
// This enumerates different states of the fetched response.
73
enum FetchResponseStatus {
76
kFetchStatusUncacheable,
83
Resource(const RewriteDriver* driver, const ContentType* type);
85
// Common methods across all deriviations
86
ServerContext* server_context() const { return server_context_; }
88
// Checks if the contents are loaded and valid and also if the resource is
89
// up-to-date and cacheable by a proxy like us.
90
virtual bool IsValidAndCacheable() const;
92
// Whether the domain on which the resource is present is explicitly
93
// authorized or not. Unauthorized resources can be created for the purpose
94
// of inlining content into the HTML.
95
bool is_authorized_domain() { return is_authorized_domain_; }
96
void set_is_authorized_domain(bool is_authorized) {
97
is_authorized_domain_ = is_authorized;
100
// Answers question: Are we allowed to rewrite the contents now?
101
// Checks if valid and cacheable and if it has a no-transform header.
102
// rewrite_uncacheable is used to answer question whether the resource can be
103
// optimized even if it is not cacheable.
104
// If a resource cannot be rewritten, the reason is appended to *reason.
105
bool IsSafeToRewrite(bool rewrite_uncacheable, GoogleString* reason) const;
106
bool IsSafeToRewrite(bool rewrite_uncacheable) const {
107
// TODO(jmaessen): Convert all remaining call sites to use a reason.
108
GoogleString reason_ignored;
109
return IsSafeToRewrite(rewrite_uncacheable, &reason_ignored);
112
// TODO(sligocki): Do we need these or can we just use IsValidAndCacheable
114
bool loaded() const { return response_headers_.status_code() != 0; }
115
bool HttpStatusOk() const {
116
return (response_headers_.status_code() == HttpStatus::kOK);
119
// Loads contents of resource asynchronously, calling callback when
120
// done. If the resource contents are already loaded into the object,
121
// the callback will be called directly, rather than asynchronously. The
122
// resource will be passed to the callback, with its contents and headers
125
// This is implemented in terms of LoadAndCallback, taking care of the case
126
// where the resource is already loaded.
127
void LoadAsync(NotCacheablePolicy not_cacheable_policy,
128
const RequestContextPtr& request_context,
129
AsyncCallback* callback);
131
// If the resource is about to expire from the cache, re-fetches the
132
// resource in background to try to prevent it from expiring.
134
// Base implementation does nothing, since most subclasses of this do not
136
virtual void RefreshIfImminentlyExpiring();
138
// Computes (with non-trivial cost) a hash of contents of a loaded resource.
139
// Precondition: IsValidAndCacheable().
140
// Warning: this uses contents_hasher_ and not the primary hasher,
141
// unlike the hashes computed by OutputResource for naming purposes on
143
GoogleString ContentsHash() const;
145
// Adds a new InputInfo object representing this resource to CachedResult,
146
// assigning the index supplied.
147
void AddInputInfoToPartition(HashHint suggest_include_content_hash,
148
int index, CachedResult* partition);
150
// Set CachedResult's input info used for expiration validation.
151
// If include_content_hash is kIncludeInputHash, and it makes sense for
152
// the Resource type to check if resource changed based by content hash
153
// (e.g. it would be pointless for data:), the hash of resource's
154
// contents should also be set on 'input'.
156
// Default one sets resource type as CACHED and sets an expiration timestamp,
157
// last modified, date, and, if requested, content hash.
158
// If a derived class has a different criterion for validity, override
160
virtual void FillInPartitionInputInfo(HashHint suggest_include_content_hash,
163
void FillInPartitionInputInfoFromResponseHeaders(
164
const ResponseHeaders& headers,
167
// Returns 0 if resource is not cacheable.
168
// TODO(sligocki): Look through callsites and make sure this is being
169
// interpreted correctly.
170
int64 CacheExpirationTimeMs() const;
172
StringPiece contents() const {
174
bool got_contents = value_.ExtractContents(&val);
175
CHECK(got_contents) << "Resource contents read before loading: " << url();
178
ResponseHeaders* response_headers() { return &response_headers_; }
179
const ResponseHeaders* response_headers() const { return &response_headers_; }
180
const ContentType* type() const { return type_; }
181
virtual void SetType(const ContentType* type);
183
// Note: this is empty if the header is not specified.
184
StringPiece charset() const { return charset_; }
185
void set_charset(StringPiece c) { c.CopyToString(&charset_); }
187
// Gets the absolute URL of the resource
188
virtual GoogleString url() const = 0;
190
// Gets the cache key for resource. This may be different from URL
191
// if the resource is e.g. UA-dependent.
192
virtual GoogleString cache_key() const {
196
// Computes the content-type (and charset) based on response_headers and
197
// extension, and sets it via SetType.
198
void DetermineContentType();
200
// We define a new Callback type here because we need to
201
// pass in the Resource to the Done callback so it can
202
// collect the fetched data.
203
class AsyncCallback {
205
explicit AsyncCallback(const ResourcePtr& resource) : resource_(resource) {}
207
virtual ~AsyncCallback();
208
virtual void Done(bool lock_failure, bool resource_ok) = 0;
210
const ResourcePtr& resource() { return resource_; }
213
ResourcePtr resource_;
214
DISALLOW_COPY_AND_ASSIGN(AsyncCallback);
217
// An AsyncCallback for a freshen. The Done() callback in the default
218
// implementation deletes itself.
219
class FreshenCallback : public AsyncCallback {
221
explicit FreshenCallback(const ResourcePtr& resource)
222
: AsyncCallback(resource) {}
224
virtual ~FreshenCallback();
225
// Returns NULL by default. Sublasses should override this if they want this
226
// to be updated based on the response fetched while freshening.
227
virtual InputInfo* input_info() { return NULL; }
229
// This is called with resource_ok = true only if the hash of the fetched
230
// response is the same as the hash in input_info()->input_content_hash().
231
virtual void Done(bool lock_failure, bool resource_ok) {
236
DISALLOW_COPY_AND_ASSIGN(FreshenCallback);
239
// Links in the HTTP contents and header from a fetched value.
240
// The contents are linked by sharing. The HTTPValue also
241
// contains a serialization of the headers, and this routine
242
// parses them into response_headers_ and return whether that was
244
bool Link(HTTPValue* source, MessageHandler* handler);
246
// Freshen a soon-to-expire resource so that we minimize the number
247
// of cache misses when serving live traffic.
248
// Note that callback may be NULL, and all subclasses must handle this.
249
virtual void Freshen(FreshenCallback* callback, MessageHandler* handler);
251
// Links the stale fallback value that can be used in case a fetch fails.
252
void LinkFallbackValue(HTTPValue* value);
254
void set_is_background_fetch(bool x) { is_background_fetch_ = x; }
255
bool is_background_fetch() const { return is_background_fetch_; }
257
FetchResponseStatus fetch_response_status() {
258
return fetch_response_status_;
261
void set_fetch_response_status(FetchResponseStatus x) {
262
fetch_response_status_ = x;
265
// Returns whether this type of resource should use the HTTP Cache. This
266
// method is based on properties of the class, not the resource itself, and
267
// helps short-circuit pointless cache lookups for file-based and data URLs.
268
virtual bool UseHttpCache() const = 0;
272
REFCOUNT_FRIEND_DECLARATION(Resource);
273
friend class ServerContext;
274
friend class ReadAsyncHttpCacheCallback; // uses LoadAndCallback
275
friend class RewriteDriver; // for ReadIfCachedWithStatus
276
friend class UrlReadAsyncFetchCallback;
278
// Load the resource asynchronously, storing ResponseHeaders and
279
// contents in object. Calls 'callback' when finished. The
280
// ResourcePtr used to construct 'callback' must be the same as the
281
// resource used to invoke this method.
283
// Setting not_cacheable_policy to kLoadEvenIfNotCacheable will permit it
284
// to consider loading to be successful on Cache-Control:private and
285
// Cache-Control:no-cache resources. It should not affect /whether/ the
286
// callback gets involved, only whether it gets true or false.
287
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
288
const RequestContextPtr& request_context,
289
AsyncCallback* callback) = 0;
291
void set_enable_cache_purge(bool x) { enable_cache_purge_ = x; }
292
ResponseHeaders::VaryOption respect_vary() const { return respect_vary_; }
293
void set_respect_vary(ResponseHeaders::VaryOption x) { respect_vary_ = x; }
294
void set_proactive_resource_freshening(bool x) {
295
proactive_resource_freshening_ = x;
298
void set_disable_rewrite_on_no_transform(bool x) {
299
disable_rewrite_on_no_transform_ = x;
301
ServerContext* server_context_;
303
const ContentType* type_;
304
GoogleString charset_;
305
HTTPValue value_; // contains contents and meta-data
306
ResponseHeaders response_headers_;
308
// A stale value that can be used in case we aren't able to fetch a fresh
309
// version of the resource. Note that this should only be used if it is not
311
HTTPValue fallback_value_;
314
// Minimalist constructor for DummyResource with server_context_ == NULL
315
// used in association_transformer_test.cc
317
friend class DummyResource;
319
// The status of the fetched response.
320
FetchResponseStatus fetch_response_status_;
322
// Indicates whether we are trying to load the resource for a background
323
// rewrite or to serve a user request.
324
// Note that by default, we assume that every fetch is triggered in the
325
// background and is not user-facing unless we explicitly set
326
// is_background_fetch_ to false.
327
bool is_background_fetch_;
328
bool enable_cache_purge_;
329
bool proactive_resource_freshening_;
330
bool disable_rewrite_on_no_transform_;
331
bool is_authorized_domain_;
332
ResponseHeaders::VaryOption respect_vary_;
334
DISALLOW_COPY_AND_ASSIGN(Resource);
337
// Sometimes some portions of URL space need to be handled differently
338
// by dedicated resource subclasses. ResourceProvider callbacks are used
339
// to teach RewriteDriver about these, so it knows not to build regular
340
// UrlInputResource objects.
341
typedef Callback2<const GoogleUrl&, bool*> ResourceUrlClaimant;
343
} // namespace net_instaweb
345
#endif // NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_H_