2
* Copyright 2011 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: sligocki@google.com (Shawn Ligocki)
19
// NOTE: This interface is actively under development and may be
20
// changed extensively. Contact us at mod-pagespeed-discuss@googlegroups.com
21
// if you are interested in using it.
23
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_FETCH_H_
24
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_FETCH_H_
30
#include "net/instaweb/automatic/public/html_detector.h"
31
#include "net/instaweb/http/public/async_fetch.h"
32
#include "net/instaweb/http/public/meta_data.h"
33
#include "net/instaweb/http/public/request_context.h"
34
#include "net/instaweb/http/public/user_agent_matcher.h"
35
#include "net/instaweb/util/public/queued_worker_pool.h"
36
#include "net/instaweb/util/public/basictypes.h"
37
#include "net/instaweb/util/public/fallback_property_page.h"
38
#include "net/instaweb/util/public/gtest_prod.h"
39
#include "net/instaweb/util/public/property_cache.h"
40
#include "net/instaweb/util/public/scoped_ptr.h"
41
#include "net/instaweb/util/public/string.h"
42
#include "net/instaweb/util/public/string_util.h"
44
namespace net_instaweb {
47
class CacheUrlAsyncFetcher;
52
class ProxyFetchPropertyCallbackCollector;
55
class ResponseHeaders;
60
// Factory for creating and starting ProxyFetches. Must outlive all
61
// ProxyFetches it creates.
62
class ProxyFetchFactory {
64
explicit ProxyFetchFactory(ServerContext* server_context);
67
// Convenience method that calls CreateNewProxyFetch and then StartFetch() on
68
// the resulting fetch.
69
void StartNewProxyFetch(
70
const GoogleString& url,
71
AsyncFetch* async_fetch,
72
RewriteDriver* driver,
73
ProxyFetchPropertyCallbackCollector* property_callback,
74
AsyncFetch* original_content_fetch);
76
// Creates a new proxy fetch and passes it to the fetcher to start it. If the
77
// UrlNamer doesn't authorize this url it calls CleanUp() on the driver,
78
// Detach() on the property callback, Done() on the async_fetch and
79
// original_content_fetch, and returns NULL.
81
// If you're using a fetcher for the original request content you should use
82
// StartNewProxyFetch() instead. CreateNewProxyFetch is for callers who will
83
// not be calling StartFetch() and instead will call HeadersComplete(),
84
// Write(), Flush(), and Done() as they get data in from another source.
85
ProxyFetch* CreateNewProxyFetch(
86
const GoogleString& url,
87
AsyncFetch* async_fetch,
88
RewriteDriver* driver,
89
ProxyFetchPropertyCallbackCollector* property_callback,
90
AsyncFetch* original_content_fetch);
92
// Initiates the PropertyCache lookup. See ngx_pagespeed.cc or
93
// proxy_interface.cc for example usage.
94
static ProxyFetchPropertyCallbackCollector* InitiatePropertyCacheLookup(
95
bool is_resource_fetch,
96
const GoogleUrl& request_url,
97
ServerContext* server_context,
98
RewriteOptions* options,
99
AsyncFetch* async_fetch,
100
const bool requires_blink_cohort,
101
bool* added_page_property_callback);
103
MessageHandler* message_handler() const { return handler_; }
106
friend class ProxyFetch;
108
// Helps track the status of in-flight ProxyFetches. These are intended for
109
// use only by ProxyFetch.
111
// TODO(jmarantz): Enumerate outstanding fetches in server status page.
112
void RegisterNewFetch(ProxyFetch* proxy_fetch);
113
void RegisterFinishedFetch(ProxyFetch* proxy_fetch);
115
ServerContext* server_context_;
117
MessageHandler* handler_;
119
scoped_ptr<AbstractMutex> outstanding_proxy_fetches_mutex_;
120
std::set<ProxyFetch*> outstanding_proxy_fetches_;
122
DISALLOW_COPY_AND_ASSIGN(ProxyFetchFactory);
125
// Tracks a single property-cache lookup. These lookups are initiated
126
// immediately upon handling the request, in parallel with determining
127
// domain-specific RewriteOptions and fetching the HTTP headers for the HTML.
129
// Request handling can proceed in parallel with the property-cache lookups,
130
// including RewriteOptions lookup and initating the HTTP fetch. However,
131
// handling incoming bytes will be blocked waiting for property-cache lookups
133
class ProxyFetchPropertyCallback : public PropertyPage {
135
ProxyFetchPropertyCallback(PageType page_type,
136
PropertyCache* property_cache,
137
const StringPiece& url,
138
const StringPiece& options_signature_hash,
139
UserAgentMatcher::DeviceType device_type,
140
ProxyFetchPropertyCallbackCollector* collector,
141
AbstractMutex* mutex);
143
PageType page_type() const { return page_type_; }
145
// Delegates to collector_'s IsCacheValid.
146
virtual bool IsCacheValid(int64 write_timestamp_ms) const;
148
virtual void Done(bool success);
152
UserAgentMatcher::DeviceType device_type_;
153
ProxyFetchPropertyCallbackCollector* collector_;
155
DISALLOW_COPY_AND_ASSIGN(ProxyFetchPropertyCallback);
158
// Tracks a collection of property-cache lookups occurring in parallel.
159
// Sequence is used to execute various functions in an orderly fashion to
160
// avoid any kind of race between Done(), ConnectProxyFetch(), Detach() and
161
// AddPostLookupTask(). When any function is called, it is added to the
162
// sequence and added function will be executed immediately if sequence is
163
// free, otherwise it will wait for its turn.
166
// InitiatePropertyCacheLookup-->AddPostLookupTask-->Initiate Html Fetch
167
// | (Added to Sequence) |
170
// (Added to Sequence) -------------------
172
// ConnectProxyFetch() Detach()
173
// (Added to Sequence)
175
// This will also wait for RequestHeadersComplete() to be called before
176
// invoking any post-completion callbacks (but not before canceling them
178
class ProxyFetchPropertyCallbackCollector {
180
ProxyFetchPropertyCallbackCollector(ServerContext* server_context,
181
const StringPiece& url,
182
const RequestContextPtr& req_ctx,
183
const RewriteOptions* options,
184
UserAgentMatcher::DeviceType device_type);
185
virtual ~ProxyFetchPropertyCallbackCollector();
187
// Add a callback to be handled by this collector.
188
// Transfers ownership of the callback to the collector.
189
void AddCallback(ProxyFetchPropertyCallback* callback);
191
// Must be called once request headers have been resolved from configuration,
192
// Gates successful post-lookup callback invocation.
193
void RequestHeadersComplete();
195
// In our flow, we initiate the property-cache lookup prior to
196
// creating a proxy-fetch, so that RewriteOptions lookup can proceed
197
// in parallel. If/when we determine that ProxyFetch is associated
198
// with HTML content, we connect it to this callback. Note that if
199
// the property cache lookups have completed, this will result in
200
// a direct call into proxy_fetch->PropertyCacheComplete.
201
void ConnectProxyFetch(ProxyFetch* proxy_fetch);
203
// If for any reason we decide *not* to initiate a ProxyFetch for a
204
// request, then we need to 'detach' this request so that we can
205
// delete it once it completes, rather than waiting for a
206
// ProxyFetch to be inserted. The status code of the response is passed from
207
// ProxyFetch to the Collector. In case the status code is unknown then pass
208
// RewriteDriver::kStatusCodeUnknown.
209
void Detach(HttpStatus::Code status_code);
211
// Returns the actual property page.
212
PropertyPage* property_page() {
213
return fallback_property_page_ == NULL ?
214
NULL : fallback_property_page_->actual_property_page();
217
// Returns the fallback property page.
218
FallbackPropertyPage* fallback_property_page() {
219
return fallback_property_page_.get();
222
// Returns the collected PropertyPage with the corresponding page_type.
223
// Ownership of the object is transferred to the caller.
224
PropertyPage* ReleasePropertyPage(
225
ProxyFetchPropertyCallback::PageType page_type);
227
// Releases the ownership of fallback property page.
228
FallbackPropertyPage* ReleaseFallbackPropertyPage() {
229
return fallback_property_page_.release();
232
// In our flow, property-page will be available via RewriteDriver only after
233
// ProxyFetch is set. But there may be instances where the result may be
234
// required even before proxy-fetch is created. Any task that depends on the
235
// PropertyCache result will be executed as soon as PropertyCache lookup is
236
// done and RequestHeadersComplete() has been called.
238
// func is guaranteed to execute after PropertyCache lookup has completed, as
239
// long as ProxyFetch is not set before PropertyCache lookup is done. One
240
// should use PropertyCache result via RewriteDriver if some other thread can
241
// initiate SetProxyFetch().
242
void AddPostLookupTask(Function* func);
244
// If options_ is NULL returns true. Else, returns true if (url_,
245
// write_timestamp_ms) is valid as per URL cache invalidation entries is
247
bool IsCacheValid(int64 write_timestamp_ms) const;
249
// Called by a ProxyFetchPropertyCallback when the former is complete.
250
void Done(ProxyFetchPropertyCallback* callback);
252
const RequestContextPtr& request_context() { return request_context_; }
254
// Returns DeviceType from device property page.
255
UserAgentMatcher::DeviceType device_type() { return device_type_; }
258
friend class ProxyFetchPropertyCallbackCollectorTest;
259
void ExecuteDone(ProxyFetchPropertyCallback* callback);
260
void ExecuteAddPostLookupTask(Function* func);
261
void ExecuteConnectProxyFetch(ProxyFetch* proxy_fetch);
262
void ExecuteDetach(HttpStatus::Code status_code);
263
void ExecuteRequestHeadersComplete();
265
void RunPostLookupsAndCleanupIfSafe();
267
// Updates the status code of response in property cache.
268
void UpdateStatusCodeInPropertyCache();
270
std::set<ProxyFetchPropertyCallback*> pending_callbacks_;
271
std::map<ProxyFetchPropertyCallback::PageType, PropertyPage*>
273
scoped_ptr<AbstractMutex> mutex_;
274
ServerContext* const server_context_;
275
QueuedWorkerPool::Sequence* const sequence_;
276
const GoogleString url_;
277
const RequestContextPtr request_context_;
278
const UserAgentMatcher::DeviceType device_type_;
279
bool is_options_valid_; // protected by mutex_.
280
// Unless guarded by mutex_, the fields are only accessed by code serialized
284
bool request_headers_ok_;
285
ProxyFetch* proxy_fetch_;
286
std::vector<Function*> post_lookup_task_vector_;
287
const RewriteOptions* options_; // protected by mutex_;
288
HttpStatus::Code status_code_; // status_code_ of the response.
289
scoped_ptr<FallbackPropertyPage> fallback_property_page_;
291
DISALLOW_COPY_AND_ASSIGN(ProxyFetchPropertyCallbackCollector);
294
// Manages a single fetch of an HTML or resource file from the original server.
295
// If it is an HTML file, it is rewritten.
296
// Fetch is initialized by calling ProxyFetchFactory::StartNewProxyFetch().
297
// For fetching pagespeed rewritten resources, use ResourceFetch.
298
// This is only meant to be used by ProxyInterface.
300
// Takes ownership of custom_options.
302
// The ProxyFetch passes through non-HTML directly to base_writer.
304
// For HTML, the sequence is this:
305
// 1. HeadersComplete is called, allowing us to establish we've got HTML.
306
// 2. Some number of calls to Write occur.
307
// 3. Optional: Flush is called, followed by more Writes. Repeat.
308
// 4. Done is called.
309
// These virtual methods are called from some arbitrary thread, e.g. a
310
// dedicated fetcher thread. We use a QueuedWorkerPool::Sequence to
311
// offload them to a worker-thread. This implementation bundles together
312
// multiple Writes, and depending on the timing, may move Flushes to
313
// follow Writes and collapse multiple Flushes into one.
314
class ProxyFetch : public SharedAsyncFetch {
316
// These strings identify sync-points for reproducing races between
317
// PropertyCache lookup completion and Origin HTML Fetch completion.
318
static const char kCollectorConnectProxyFetchFinish[];
319
static const char kCollectorDetachFinish[];
320
static const char kCollectorDoneFinish[];
321
static const char kCollectorFinish[];
322
static const char kCollectorDetachStart[];
323
static const char kCollectorRequestHeadersCompleteFinish[];
325
// These strings identify sync-points for introducing races between
326
// PropertyCache lookup completion and HeadersComplete.
327
static const char kHeadersSetupRaceAlarmQueued[];
328
static const char kHeadersSetupRaceDone[];
329
static const char kHeadersSetupRaceFlush[];
330
static const char kHeadersSetupRacePrefix[];
331
static const char kHeadersSetupRaceWait[];
333
// Number of milliseconds to wait, in a test, for an event that we
334
// are hoping does not occur, specifically an inappropriate call to
335
// base_fetch()->HeadersComplete() while we are still mutating
336
// response headers in SetupForHtml.
338
// This is used only for testing.
339
static const int kTestSignalTimeoutMs = 200;
342
// protected interface from AsyncFetch.
343
virtual void HandleHeadersComplete();
344
virtual bool HandleWrite(const StringPiece& content, MessageHandler* handler);
345
virtual bool HandleFlush(MessageHandler* handler);
346
virtual void HandleDone(bool success);
347
virtual bool IsCachedResultValid(const ResponseHeaders& headers);
350
friend class ProxyFetchFactory;
351
friend class ProxyFetchPropertyCallbackCollector;
352
friend class MockProxyFetch;
353
FRIEND_TEST(ProxyFetchTest, TestInhibitParsing);
355
// Called by ProxyFetchPropertyCallbackCollector when all property-cache
356
// fetches are complete. This function takes ownership of collector.
357
virtual void PropertyCacheComplete(
358
ProxyFetchPropertyCallbackCollector* collector);
360
// If cross_domain is true, we're requested under a domain different from
361
// the underlying host, using proxy mode in UrlNamer.
362
ProxyFetch(const GoogleString& url,
364
ProxyFetchPropertyCallbackCollector* property_cache_callback,
365
AsyncFetch* async_fetch,
366
AsyncFetch* original_content_fetch,
367
RewriteDriver* driver,
368
ServerContext* server_context,
370
ProxyFetchFactory* factory);
371
virtual ~ProxyFetch();
373
const RewriteOptions* Options();
375
// Once we have decided this is HTML, begin parsing and set headers.
378
// Adds a pagespeed header to response_headers if enabled.
379
void AddPagespeedHeader();
381
// Sets up driver_, registering the writer and start parsing url.
382
// Returns whether we started parsing successfully or not.
385
// Start the fetch which includes preparing the request.
388
// Actually do the fetch, called from callback of StartFetch.
389
// "prepare_success" represents whether the request was prepared successfully
391
void DoFetch(bool prepare_success);
393
// Handles buffered HTML writes, flushes, and done calls
394
// in the QueuedWorkerPool::Sequence sequence_.
395
void ExecuteQueued();
397
// Schedules the task to run any buffered work, if needed. Assumes mutex
399
void ScheduleQueueExecutionIfNeeded();
401
// Frees up the RewriteDriver (via FinishParse or Cleanup),
402
// calls the callback (nulling out callback_ to ensure that we don't
403
// do it again), notifies the ProxyInterface that the fetch is
404
// complete, and deletes the ProxyFetch.
405
void Finish(bool success);
407
// Used to wrap up the FinishParseAsync invocation.
408
void CompleteFinishParse(bool success);
410
// Callback we give to ExecuteFlushIfRequestedAsync to notify us when
411
// it's done with its work.
414
// Management functions for idle_alarm_. Must only be called from
417
// Cancels any previous alarm.
418
void CancelIdleAlarm();
420
// Cancels previous alarm and starts next one.
421
void QueueIdleAlarm();
423
// Handler for the alarm; run in sequence_.
424
void HandleIdleAlarm();
427
ServerContext* server_context_;
430
scoped_ptr<CacheUrlAsyncFetcher> cache_fetcher_;
432
// True if we're handling a cross-domain request in proxy mode, which
433
// should do some additional checking.
436
// Does page claim to be "Content-Type: text/html"? (It may be lying)
439
// Has a call to StartParse succeeded? We'll only do this if we actually
440
// decide it is HTML.
443
// Has a call to RewriteDriver::ParseText been made yet.
444
bool parse_text_called_;
446
// Tracks whether Done() has been called.
449
HtmlDetector html_detector_;
451
// Tracks a set of outstanding property-cache lookups. This is NULLed
452
// when the property-cache completes or when we detach it. We use
453
// this to detach the callback if we decide we don't care about the
454
// property-caches because we discovered we are not working with HTML.
455
ProxyFetchPropertyCallbackCollector* property_cache_callback_;
457
// Fetch where raw original headers and contents are sent.
458
// To contrast, base_fetch() is sent rewritten contents and headers.
459
// If NULL, original_content_fetch_ is ignored.
460
AsyncFetch* original_content_fetch_;
462
// ProxyFetch is responsible for getting RewriteDrivers from the pool and
463
// putting them back.
464
RewriteDriver* driver_;
466
// True if we have queued up ExecuteQueued but did not
468
bool queue_run_job_created_;
470
// As the UrlAsyncFetcher calls our Write & Flush methods, we collect
471
// the text in text_queue, and note the Flush call in
472
// network_flush_requested_, returning control to the fetcher as quickly
473
// as possible so it can continue to process incoming network traffic.
475
// We offload the handling of the incoming text events to a
476
// QueuedWorkerPool::Sequence. Note that we may receive a new chunk
477
// of text while we are still processing an old chunk. The sequentiality
478
// is preserved by QueuedWorkerPool::Sequence.
480
// The Done callback is also indirected through this Sequence.
481
scoped_ptr<AbstractMutex> mutex_;
482
StringStarVector text_queue_;
483
bool network_flush_outstanding_;
484
QueuedWorkerPool::Sequence* sequence_;
486
// done_oustanding_ will be true if we got called with ::Done but didn't
487
// invoke Finish yet.
488
bool done_outstanding_;
490
// Finish is true if we started Finish, perhaps doing FinishParseAsync.
491
// Accessed only from within context of sequence_.
494
// done_result_ is used to store the result of ::Done if we're deferring
495
// handling it until the driver finishes handling a Flush.
498
// We may also end up receiving new events in between calling FlushAsync
499
// and getting the callback called. In that case, we want to hold off
500
// on actually dispatching things queued up above.
501
bool waiting_for_flush_to_finish_;
503
// Alarm used to keep track of inactivity, in order to help issue
504
// flushes. Must only be accessed from the thread context of sequence_
505
QueuedAlarm* idle_alarm_;
507
ProxyFetchFactory* factory_;
509
// Set to true if this proxy_fetch is the result of a distributed fetch.
510
bool distributed_fetch_;
512
DISALLOW_COPY_AND_ASSIGN(ProxyFetch);
515
} // namespace net_instaweb
517
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_FETCH_H_