2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: sligocki@google.com (Shawn Ligocki)
19
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_FACTORY_H_
20
#define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_FACTORY_H_
25
#include "net/instaweb/util/public/basictypes.h"
26
#include "net/instaweb/util/public/function.h"
27
#include "net/instaweb/util/public/null_statistics.h"
28
#include "net/instaweb/util/public/scoped_ptr.h"
29
#include "net/instaweb/util/public/string.h"
30
#include "net/instaweb/util/public/string_util.h"
32
namespace pagespeed { namespace js { struct JsTokenizerPatterns; } }
34
namespace net_instaweb {
37
class CacheHtmlInfoFinder;
38
class CriticalCssFinder;
39
class CriticalImagesFinder;
40
class CriticalLineInfoFinder;
41
class CriticalSelectorFinder;
43
class FlushEarlyInfoFinder;
44
class ExperimentMatcher;
47
class NamedLockManager;
51
class QueuedWorkerPool;
55
class RewriteOptionsManager;
59
class StaticAssetManager;
63
class UrlAsyncFetcher;
65
class UsageDataReporter;
66
class UserAgentMatcher;
67
class UserAgentNormalizer;
69
// Manages the construction and ownership of most objects needed to create
70
// RewriteDrivers. If you have your own versions of these classes (specific
71
// implementations of UrlAsyncFetcher, Hasher, etc.) you can make your own
72
// subclass of RewriteDriverFactory to use these by default.
73
class RewriteDriverFactory {
75
// Helper for users of defer_cleanup; see below.
76
template<class T> class Deleter;
78
enum WorkerPoolCategory {
81
kLowPriorityRewriteWorkers,
82
// Make sure to insert new values above this line.
86
// Takes ownership of thread_system.
87
RewriteDriverFactory(const ProcessContext& process_context,
88
ThreadSystem* thread_system);
90
// Initializes default options we want to hard-code into the
91
// base-class to get consistency across deployments. Subclasses
92
// that override NewRewriteOptions() should call this method from
93
// their constructor. It is safe to call this multiple times.
94
void InitializeDefaultOptions();
96
virtual ~RewriteDriverFactory();
98
// The RewriteDriverFactory will create objects of default type through the
99
// New* method from drived classes. Here are the objects that can be
100
// replaced before creating the RewriteDriver.
101
// Note: RewriteDriver takes ownership of these.
102
void set_html_parse_message_handler(MessageHandler* message_handler);
103
void set_message_handler(MessageHandler* message_handler);
104
void set_file_system(FileSystem* file_system);
105
void set_hasher(Hasher* hasher);
106
void set_nonce_generator(NonceGenerator* nonce_generator);
107
void set_url_namer(UrlNamer* url_namer);
108
void set_signature(SHA1Signature* signature);
109
void set_timer(Timer* timer);
110
void set_usage_data_reporter(UsageDataReporter* reporter);
112
// Set up a directory for slurped files for HTML and resources. If
113
// read_only is true, then it will only read from these files, and
114
// this will eliminate the usage of any other url_fetcher. If
115
// read_only is false, then the existing url fetcher will be used as
116
// a fallback if the slurped file is not found, and slurped files will
117
// be subsequently written so they don't have to be fetched from
118
// the Internet again.
120
// You must set the slurp directory prior to calling ComputeUrlAsyncFetcher.
121
void set_slurp_directory(const StringPiece& directory);
122
void set_slurp_read_only(bool read_only);
123
void set_slurp_print_urls(bool read_only);
125
// Setting HTTP caching on causes both the fetcher and the async
126
// fecher to return cached versions.
127
void set_force_caching(bool u) { force_caching_ = u; }
129
// You can call set_base_url_async_fetcher to set up real async fetching
130
// for real serving or for modeling of live traffic.
132
// These fetchers may be used directly when serving traffic, or they
133
// may be aggregated with other fetchers (e.g. for slurping).
135
// You cannot set the base URL fetcher once ComputeUrlAsyncFetcher has
137
void set_base_url_async_fetcher(UrlAsyncFetcher* url_fetcher);
138
// Takes ownership of distributed_fetcher.
139
void set_base_distributed_async_fetcher(UrlAsyncFetcher* distributed_fetcher);
140
bool set_filename_prefix(StringPiece p);
142
// Determines whether Slurping is enabled.
143
bool slurping_enabled() const { return !slurp_directory_.empty(); }
145
MessageHandler* html_parse_message_handler();
146
MessageHandler* message_handler();
147
FileSystem* file_system();
148
NonceGenerator* nonce_generator();
149
// TODO(sligocki): Remove hasher() and force people to make a NewHasher when
152
UrlNamer* url_namer();
153
UserAgentMatcher* user_agent_matcher();
154
StaticAssetManager* static_asset_manager();
155
SHA1Signature* signature();
156
RewriteOptions* default_options() { return default_options_.get(); }
157
virtual RewriteOptionsManager* NewRewriteOptionsManager();
159
// These accessors are *not* thread-safe until after the first call, as they
160
// do unlocked lazy initialization, so they must be called at least once prior
161
// to starting threads. Normally this is done by CreateServerContext() or
162
// InitServerContext().
164
NamedLockManager* lock_manager();
165
QueuedWorkerPool* WorkerPool(WorkerPoolCategory pool);
166
Scheduler* scheduler();
167
UsageDataReporter* usage_data_reporter();
168
const pagespeed::js::JsTokenizerPatterns* js_tokenizer_patterns() const {
169
return js_tokenizer_patterns_;
171
const std::vector<const UserAgentNormalizer*>& user_agent_normalizers();
173
// Computes URL fetchers using the base fetcher, and optionally,
174
// slurp_directory and slurp_read_only. These are not thread-safe;
175
// they must be called once prior to spawning threads, e.g. via
176
// CreateServerContext.
177
virtual UrlAsyncFetcher* ComputeUrlAsyncFetcher();
178
virtual UrlAsyncFetcher* ComputeDistributedFetcher();
180
// Threadsafe mechanism to create a managed ServerContext. The
181
// ServerContext is owned by the factory, and should not be
182
// deleted directly. Currently it is not possible to delete a
183
// server context except by deleting the entire factory.
185
// Implemented in terms of NewServerContext().
186
ServerContext* CreateServerContext();
188
// Initializes a ServerContext that has been new'd directly. This
189
// allows 2-phase initialization if required. There is no need to
190
// call this if you use CreateServerContext.
191
void InitServerContext(ServerContext* server_context);
193
// Called from InitServerContext, but virtualized separately as it is
194
// platform-specific. This method must call on the server context:
195
// set_http_cache, set_metadata_cache, set_filesystem_metadata_cache, and
196
// MakePropertyCaches.
197
virtual void SetupCaches(ServerContext* server_context) = 0;
199
// Returns true if this platform uses beacon-based measurements to make
200
// run-time decisions. This is used to determine how to configure various
201
// beacon-based filters.
202
virtual bool UseBeaconResultsInFilters() const = 0;
204
// Provides an optional hook for adding rewrite passes to the HTML filter
205
// chain. This should be used for filters that are specific to a particular
206
// RewriteDriverFactory implementation.
207
virtual void AddPlatformSpecificRewritePasses(RewriteDriver* driver);
209
// Provides an optional hook for adding rewriters to the .pagespeed. resource
210
// decoding chain. This should be used for rewriters that are specific to a
211
// particular RewriteDriverFactory implementation. The caller should only use
212
// the resulting driver for reconstructing a .pagespeed. resource, not for
213
// transforming HTML. Therefore, implementations should add any
214
// platform-specific rewriter whose id might appear in a .pagespeed. URL.
215
// This should be done independent of RewriteOptions, since we only store
216
// a single decoding driver globally to save memory.
217
virtual void AddPlatformSpecificDecodingPasses(RewriteDriver* driver);
219
// Provides an optional hook for customizing the RewriteDriver object
220
// using the options set on it. This is called before
221
// RewriteDriver::AddFilters() and AddPlatformSpecificRewritePasses().
222
virtual void ApplyPlatformSpecificConfiguration(RewriteDriver* driver);
224
ThreadSystem* thread_system() { return thread_system_.get(); }
226
// Returns the set of directories that we (our our subclasses) have created
228
const StringSet& created_directories() const {
229
return created_directories_;
232
bool async_rewrites() { return true; }
234
// Collection of global statistics objects. This is thread-unsafe:
235
// it must be called prior to spawning threads, and after any calls
236
// to SetStatistics. Failing that, it will be initialized in the
237
// first call to InitServerContext(), which is thread-safe.
238
RewriteStats* rewrite_stats();
240
// statistics (default is NullStatistics). This can be overridden by calling
241
// SetStatistics, either from subclasses or externally.
242
Statistics* statistics() { return statistics_; }
244
// Initializes statistics variables. This must be done at process
245
// startup to enable shared memory segments in Apache to be set up.
246
static void InitStats(Statistics* statistics);
248
// Initializes static variables. Initialize/Terminate calls must be paired.
249
static void Initialize();
250
static void Terminate();
252
// Does *not* take ownership of Statistics.
253
void SetStatistics(Statistics* stats);
255
// Clean up all the factory-owned resources: fetchers, pools,
256
// Server Contexts, the Drivers owned by the Server Contexts,
257
// and worker threads.
258
virtual void ShutDown();
260
// Registers the directory as having been created by us.
261
void AddCreatedDirectory(const GoogleString& dir);
263
// Creates a new empty RewriteOptions object, with no default settings.
264
// Generally configurations go factory's default_options() ->
265
// ServerContext::global_options() -> RewriteDriverFactory,
266
// but this method just provides a blank set of options.
267
virtual RewriteOptions* NewRewriteOptions();
269
// Creates a new empty RewriteOptions object meant for use for
270
// custom options from queries or headers. Default implementation just
271
// forwards to NewRewriteOptions().
272
virtual RewriteOptions* NewRewriteOptionsForQuery();
274
// get/set the version placed into the X-[Mod-]Page(s|-S)peed header.
275
const GoogleString& version_string() const { return version_string_; }
276
void set_version_string(const StringPiece& version_string) {
277
version_string.CopyToString(&version_string_);
280
// Causes the given function to be Run after all the threads are shutdown,
281
// in order to do any needed resource cleanups. The Deleter<T> template below
282
// may be useful for object deletion cleanups.
283
void defer_cleanup(Function* f) { deferred_cleanups_.push_back(f); }
285
// Queues an object for deletion at the last phase of RewriteDriverFactory
287
template<class T> void TakeOwnership(T* obj) {
288
defer_cleanup(new RewriteDriverFactory::Deleter<T>(obj));
291
// Base method that returns true if the given ip is a debug ip.
292
virtual bool IsDebugClient(const GoogleString& ip) const {
296
// Creates an ExperimentMatcher, which is used to match clients or sessions to
297
// a specific experiment.
298
virtual ExperimentMatcher* NewExperimentMatcher();
300
// Returns the preferred webp image quality vector for client options.
301
const std::vector<int>* preferred_webp_qualities() {
302
return &preferred_webp_qualities_;
305
// Returns the preferred jpeg image quality vector for client options.
306
const std::vector<int>* preferred_jpeg_qualities() {
307
return &preferred_jpeg_qualities_;
310
// Returns true if the correct number of WebP qualities are parsed and set.
311
bool SetPreferredWebpQualities(const StringPiece& qualities);
313
// Returns true if the correct number of JPEG qualities are parsed and set.
314
bool SetPreferredJpegQualities(const StringPiece& qualities);
317
bool FetchersComputed() const;
318
virtual void StopCacheActivity();
319
StringPiece filename_prefix();
321
// Used by subclasses to indicate that a ServerContext has been
322
// terminated. Returns true if this was the last server context
323
// known to this factory.
324
bool TerminateServerContext(ServerContext* server_context);
326
// Implementors of RewriteDriverFactory must supply default definitions
327
// for each of these methods, although they may be overridden via set_
328
// methods above. These methods all instantiate objects and transfer
329
// ownership to the caller.
330
virtual UrlAsyncFetcher* DefaultAsyncUrlFetcher() = 0;
331
virtual MessageHandler* DefaultHtmlParseMessageHandler() = 0;
332
virtual MessageHandler* DefaultMessageHandler() = 0;
333
virtual FileSystem* DefaultFileSystem() = 0;
334
virtual NonceGenerator* DefaultNonceGenerator();
335
virtual Timer* DefaultTimer();
336
virtual SHA1Signature* DefaultSignature();
338
virtual Hasher* NewHasher() = 0;
340
// Creates a new ServerContext* object. ServerContext itself must be
341
// overridden per Factory as it has at least one pure virtual method.
342
virtual ServerContext* NewServerContext() = 0;
344
// Create a new ServerContext used for decoding only. Unlike NewServerContext,
345
// the resulting ServerContext should not be fresh, but should have some of
346
// its platform dependencies injected --- but just enough for decoding URLs,
347
// and not full operation. At the time of writing it needs the timer,
348
// url namer, hasher, message handler, and stats; expensive stuff like
349
// cache backends is not needed, however.
351
// You may find InitStubDecodingServerContext() useful for doing that, as it
352
// will inject all of these from what's available in 'this'.
353
virtual ServerContext* NewDecodingServerContext() = 0;
355
virtual UrlAsyncFetcher* DefaultDistributedUrlFetcher() { return NULL; }
357
virtual CriticalCssFinder* DefaultCriticalCssFinder();
358
virtual CriticalImagesFinder* DefaultCriticalImagesFinder(
359
ServerContext* server_context);
360
virtual CriticalSelectorFinder* DefaultCriticalSelectorFinder(
361
ServerContext* server_context);
363
// Default implementation returns NULL.
364
virtual CacheHtmlInfoFinder* DefaultCacheHtmlInfoFinder(
365
PropertyCache* cache, ServerContext* server_context);
367
// Default implementation returns NULL.
368
virtual FlushEarlyInfoFinder* DefaultFlushEarlyInfoFinder();
370
// Default implementation returns a valid CriticalSelectorFinder.
371
virtual CriticalLineInfoFinder* DefaultCriticalLineInfoFinder(
372
ServerContext* server_context);
374
// They may also supply a custom lock manager. The default implementation
375
// will use the file system.
376
virtual NamedLockManager* DefaultLockManager();
378
// They may also supply a custom Url namer. The default implementation
379
// performs sharding and appends '.pagespeed.<filter>.<hash>.<extension>'.
380
virtual UrlNamer* DefaultUrlNamer();
382
virtual UserAgentMatcher* DefaultUserAgentMatcher();
383
virtual UsageDataReporter* DefaultUsageDataReporter();
385
// Provides an optional hook to add user-agent normalizers specific to
386
// needs of a specific RewriteDriverFactory implementation. The new entries
387
// should be appended to the end of *out (without clearing it), and should
388
// still be owned by the RewriteDriverFactory subclass.
390
// Default implementation does nothing.
391
virtual void AddPlatformSpecificUserAgentNormalizers(
392
std::vector<const UserAgentNormalizer*>* out);
394
// Subclasses can override this to create an appropriately-sized thread
395
// pool for their environment. The default implementation will always
396
// make one with a single thread.
397
virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolCategory pool,
400
// Subclasses can override this method to request load-shedding to happen
401
// if the low-priority work pool has too many inactive sequences queued up
402
// waiting (the returned value will be a threshold beyond which things
403
// will start getting dropped). The default implementation returns
404
// kNoLoadShedding, which disables the feature. See also
405
// QueuedWorkerPool::set_load_shedding_threshold
406
virtual int LowPriorityLoadSheddingThreshold() const;
408
// Subclasses can override this to create an appropriate Scheduler
409
// subclass if the default isn't acceptable.
410
virtual Scheduler* CreateScheduler();
412
// Called before creating the url fetchers.
413
virtual void FetcherSetupHooks();
415
// Override this if you want to change what directory locks go into
416
// when using the default filesystem-based lock manager. The default is
418
virtual StringPiece LockFilePrefix();
420
// Initializes the StaticAssetManager.
421
virtual void InitStaticAssetManager(
422
StaticAssetManager* static_asset_manager) {}
424
// Sets up enough of platform dependencies in 'context' to be able to use
425
// it for decoding URLs, based on this object's values and some stubs.
426
void InitStubDecodingServerContext(ServerContext* context);
429
void RebuildDecodingDriverForTests(ServerContext* server_context);
432
// Creates a StaticAssetManager instance. Default implementation creates an
433
// instance that disables serving of filter javascript via gstatic
434
// (gstatic.com is the domain google uses for serving static content).
435
StaticAssetManager* DefaultStaticAssetManager();
437
void SetupSlurpDirectories();
438
void Init(); // helper-method for constructors.
440
void InitDecodingDriver(ServerContext* server_context);
442
scoped_ptr<MessageHandler> html_parse_message_handler_;
443
scoped_ptr<MessageHandler> message_handler_;
444
scoped_ptr<FileSystem> file_system_;
445
UrlAsyncFetcher* url_async_fetcher_;
446
UrlAsyncFetcher* distributed_async_fetcher_;
447
scoped_ptr<UrlAsyncFetcher> base_url_async_fetcher_;
448
scoped_ptr<UrlAsyncFetcher> base_distributed_async_fetcher_;
449
scoped_ptr<Hasher> hasher_;
450
scoped_ptr<NonceGenerator> nonce_generator_;
451
scoped_ptr<SHA1Signature> signature_;
452
scoped_ptr<UrlNamer> url_namer_;
453
scoped_ptr<UserAgentMatcher> user_agent_matcher_;
455
// Lazily filled-in list of UA normalizers, including the default ones
456
// this class adds, and any additional ones added by user_agent_normalizers()
457
// calling subclass' AddPlatformSpecificUserAgentNormalizers on this.
458
std::vector<const UserAgentNormalizer*> user_agent_normalizers_;
459
scoped_ptr<StaticAssetManager> static_asset_manager_;
460
scoped_ptr<Timer> timer_;
461
scoped_ptr<Scheduler> scheduler_;
462
scoped_ptr<UsageDataReporter> usage_data_reporter_;
463
// RE2 patterns needed for JsTokenizer.
464
const pagespeed::js::JsTokenizerPatterns* js_tokenizer_patterns_;
466
GoogleString filename_prefix_;
467
GoogleString slurp_directory_;
469
bool slurp_read_only_;
470
bool slurp_print_urls_;
472
// protected by server_context_mutex_;
473
typedef std::set<ServerContext*> ServerContextSet;
474
ServerContextSet server_contexts_;
475
scoped_ptr<AbstractMutex> server_context_mutex_;
477
// Stores options with hard-coded defaults and adjustments from
478
// the core system, subclasses, and command-line.
479
scoped_ptr<RewriteOptions> default_options_;
481
// Keep around a RewriteDriver just for decoding resource URLs, using
482
// the default options. This is possible because the id->RewriteFilter
483
// table is fully constructed independent of the options; we however
484
// still inject options into some of the Decode methods since we also
485
// need to honor things like forbids. We also have a special
486
// ServerContext just for it, to avoid connecting it to any particular
488
scoped_ptr<ServerContext> decoding_server_context_;
489
scoped_ptr<RewriteDriver> decoding_driver_;
491
// Manage locks for output resources.
492
scoped_ptr<NamedLockManager> lock_manager_;
494
scoped_ptr<ThreadSystem> thread_system_;
496
// Default statistics implementation which can be overridden by children
497
// by calling SetStatistics().
498
NullStatistics null_statistics_;
499
Statistics* statistics_;
501
StringSet created_directories_;
503
std::vector<QueuedWorkerPool*> worker_pools_;
505
// These must be initialized after the RewriteDriverFactory subclass has been
506
// constructed so it can use a the statistics() override.
507
scoped_ptr<RewriteStats> rewrite_stats_;
509
// To assist with subclass destruction-order, subclasses can register
510
// functions to run late in the destructor.
511
std::vector<Function*> deferred_cleanups_;
513
// Version string to put into HTTP response headers.
514
// TODO(sligocki): Remove. Redundant with RewriteOptions::x_header_value().
515
GoogleString version_string_;
517
// The hostname we're running on. Used to set the same field in ServerContext.
518
GoogleString hostname_;
520
// Image qualities used for client options.
521
// Each vector contains 5 integers used as recompression qualities for
522
// quality preference and screen resolution combinations.
523
// Note that the default values cannot be changed in Apache currently.
524
std::vector<int> preferred_webp_qualities_;
525
std::vector<int> preferred_jpeg_qualities_;
527
DISALLOW_COPY_AND_ASSIGN(RewriteDriverFactory);
530
// Helper for users of RewriterDriverFactory::defer_cleanup --- instantiates
531
// into objects that call the appropriate delete operator when Run.
532
template<class T> class RewriteDriverFactory::Deleter : public Function {
534
explicit Deleter(T* obj) : obj_(obj) {}
535
virtual void Run() { delete obj_; }
538
DISALLOW_COPY_AND_ASSIGN(Deleter);
541
} // namespace net_instaweb
543
#endif // NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_FACTORY_H_