2
* Copyright 2012 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: piatek@google.com (Michael Piatek)
19
#ifndef NET_INSTAWEB_HTTP_PUBLIC_REQUEST_CONTEXT_H_
20
#define NET_INSTAWEB_HTTP_PUBLIC_REQUEST_CONTEXT_H_
24
#include "base/logging.h"
25
#include "pagespeed/kernel/base/basictypes.h"
26
#include "pagespeed/kernel/base/ref_counted_ptr.h"
27
#include "pagespeed/kernel/base/scoped_ptr.h"
28
#include "pagespeed/kernel/base/string.h"
29
#include "pagespeed/kernel/base/string_util.h"
30
#include "pagespeed/kernel/http/http_options.h"
32
namespace net_instaweb {
34
class AbstractLogRecord;
41
typedef RefCountedPtr<RequestContext> RequestContextPtr;
43
// A class which wraps state associated with a request.
45
// This object should be reference counted, wrapped in a RequestContextPtr. We
46
// use reference counting because, depending on the timing of asynchronous
47
// rewrites, RPC calls, and so on, a RequestContext may outlive the original
48
// HTTP request serving, or not. Reference counting avoids the complexity of
49
// explicit transfer of ownership in these cases.
50
class RequestContext : public RefCounted<RequestContext> {
52
// Types of split html request.
53
enum SplitRequestType {
59
// |logging_mutex| will be passed to the request context's AbstractLogRecord,
60
// which will take ownership of it. If you will be doing logging in a real
61
// (threaded) environment, pass in a real mutex. If not, a NullMutex is fine.
62
// |timer| will be passed to the TimingInfo, which will *not* take ownership.
63
// Passing NULL for |timer| is allowed.
64
RequestContext(const HttpOptions& options, AbstractMutex* logging_mutex,
66
// If you use this constructor, you MUST set_options() later.
67
RequestContext(AbstractMutex* logging_mutex, Timer* timer);
69
// TODO(marq): Move this test context factory to a test-specific file.
70
// Makes a request context for running tests.
71
// Note: Test RequestContexts do not pay attention to options.
72
static RequestContextPtr NewTestRequestContext(ThreadSystem* thread_system) {
73
return NewTestRequestContextWithTimer(thread_system, NULL);
75
static RequestContextPtr NewTestRequestContextWithTimer(
76
ThreadSystem* thread_system, Timer* timer);
77
static RequestContextPtr NewTestRequestContext(AbstractLogRecord* log_record);
79
// Creates a new, unowned AbstractLogRecord, for use by some subordinate
80
// action. Also useful in case of background activity where logging is
81
// required after the response is written out, e.g., blink flow.
82
virtual AbstractLogRecord* NewSubordinateLogRecord(
83
AbstractMutex* logging_mutex);
85
// The root trace context is associated with the user request which we
86
// are attempting to serve. If this is a request with constituent resources
87
// that we rewrite, there may be several dependent fetches synthesized
88
// by PSOL during rewrites. Those are traced separately.
89
RequestTrace* root_trace_context() { return root_trace_context_.get(); }
90
// Takes ownership of the given context.
91
void set_root_trace_context(RequestTrace* x);
93
// Creates a new RequestTrace associated with a request depending on the
94
// root user request; e.g., a subresource fetch for an HTML page.
96
// This implementation is a no-op. Subclasses should customize this based
97
// on their underlying tracing system. A few interface notes:
98
// - The caller is not responsible for releasing memory or managing the
99
// lifecycle of the RequestTrace.
100
// - A call to CreateDependentTraceContext() need not be matched by a call
101
// to ReleaseDependentTraceContext(). Cleanup should be automatic and
102
// managed by RequestContext subclass implementations.
103
virtual RequestTrace* CreateDependentTraceContext(const StringPiece& label) {
107
// Releases this object's reference to the given context and frees memory.
108
// Calls to CreateDependentTraceContext need not be matched by
109
// calls to this function. If a dependent trace span is not released when
110
// the request context reference count drops to zero, this object will clean
111
// all dependent traces.
113
// Note that automatic cleanup of dependent traces is provided for safety.
114
// To provide meaningful performance statistics, cleanup should be
115
// coupled with the completion of the event being traced.
117
// Subclasses should customize this based on their underlying tracing system.
118
virtual void ReleaseDependentTraceContext(RequestTrace* t);
120
// The log record for the this request, created when the request context is.
121
virtual AbstractLogRecord* log_record();
123
// Determines whether this request is using the SPDY protocol.
124
bool using_spdy() const { return using_spdy_; }
125
void set_using_spdy(bool x) { using_spdy_ = x; }
127
// The minimal private suffix for the hostname specified in this request.
128
// This should be calculated from the hostname by considering the list of
129
// public suffixes and including one additional component. So if a host is
130
// "a.b.c.d.e.f.g" and "e.f.g" is on the public suffix list then the minimal
131
// private suffix is "d.e.f.g".
133
// There are two ways of specifying the host -- with the Host header, or on
134
// the initial request line. The caller should make sure to look in both
137
// If a system doesn't want to fragment the cache by minimal private suffix,
138
// it may set value to the empty string.
139
const GoogleString& minimal_private_suffix() const {
140
return minimal_private_suffix_;
142
void set_minimal_private_suffix(StringPiece minimal_private_suffix) {
143
minimal_private_suffix.CopyToString(&minimal_private_suffix_);
146
// Indicates whether the request-headers tell us that a browser can
147
// render webp images.
148
void set_accepts_webp(bool x) { accepts_webp_ = x; }
149
bool accepts_webp() const { return accepts_webp_; }
151
// Indicates the type of split html request.
152
SplitRequestType split_request_type() const {
153
return split_request_type_;
155
void set_split_request_type(SplitRequestType type) {
156
split_request_type_ = type;
159
int64 request_id() const {
162
void set_request_id(int64 x) {
166
const GoogleString& sticky_query_parameters_token() const {
167
return sticky_query_parameters_token_;
169
void set_sticky_query_parameters_token(StringPiece x) {
170
x.CopyToString(&sticky_query_parameters_token_);
173
// Authorized a particular external domain to be fetched from. The caller of
174
// this method MUST ensure that the domain is not some internal site within
175
// the firewall/LAN hosting the server. Note that this doesn't affect
177
// TODO(morlovich): It's not clearly this is the appropriate mechanism
178
// for all the authorizations --- we may want to scope this to a request
180
void AddSessionAuthorizedFetchOrigin(const GoogleString& origin) {
181
session_authorized_fetch_origins_.insert(origin);
184
// Returns true for exactly the origins that were authorized for this
185
// particular session by calls to AddSessionAuthorizedFetchOrigin()
186
bool IsSessionAuthorizedFetchOrigin(const GoogleString& origin) const {
187
return session_authorized_fetch_origins_.find(origin)
188
!= session_authorized_fetch_origins_.end();
191
// Prepare the AbstractLogRecord for a subsequent call to WriteLog. This
192
// might include propagating information collected in the RequestContext,
193
// TimingInfo for example, to the underlying logging infrastructure.
194
void PrepareLogRecordForOutput();
196
// Write the log for background rewriting into disk.
197
void WriteBackgroundRewriteLog();
199
// Return the log record for background rewrites. If it doesn't exist, create
201
AbstractLogRecord* GetBackgroundRewriteLog(
202
ThreadSystem* thread_system,
204
bool log_url_indices,
205
int max_rewrite_info_log_size);
207
// TimingInfo tracks various event timestamps over the lifetime of a request.
208
// The timeline looks (roughly) like the following, with the associated
210
// - Request Received/Context created: Init
212
// - Trigger: RequestStarted
214
// - Start Processing: ProcessingStarted
215
// - Lookup Properties?: PropertyCacheLookup*
217
// - Start parsing?: ParsingStarted
218
// - First byte sent to client: FirstByteReturned.
219
// - Finish: RequestFinished
220
// NOTE: This class is thread safe.
223
// Initialize the TimingInfo with the specified Timer. Sets init_ts_ to
224
// Timer::NowMs, from which GetElapsedMs is based.
225
// NOTE: Timer and mutex are not owned by TimingInfo.
226
TimingInfo(Timer* timer, AbstractMutex* mutex);
228
// This should be called when the request "starts", potentially after
229
// queuing. It denotes the request "start time", which "elapsed" timing
230
// values are relative to.
231
void RequestStarted();
233
// This should be called once the options are available and PSOL can start
234
// doing meaningful work.
235
void ProcessingStarted() { SetToNow(&processing_start_ts_ms_); }
237
// This should be called if/when HTML parsing begins.
238
void ParsingStarted() { SetToNow(&parsing_start_ts_ms_); }
240
// Called when the first byte is sent back to the user.
241
void FirstByteReturned();
243
// This should be called when a PropertyCache lookup is initiated.
244
void PropertyCacheLookupStarted() {
245
SetToNow(&pcache_lookup_start_ts_ms_);
248
// This should be called when a PropertyCache lookup completes.
249
void PropertyCacheLookupFinished() { SetToNow(&pcache_lookup_end_ts_ms_); }
251
// Called when the request is finished, i.e. the response has been sent to
253
void RequestFinished() { SetToNow(&end_ts_ms_); }
255
// Fetch related timing events.
256
// Note: Only the first call to FetchStarted will have an effect,
257
// subsequent calls are silent no-ops.
258
// TODO(gee): Fetch and cache timing is busted for reconstructing resources
259
// with multiple inputs.
261
void FetchHeaderReceived();
262
void FetchFinished();
264
// TODO(gee): I'd really prefer these to be start/end calls, but the
265
// WriteThroughCache design pattern will not allow for this.
266
void SetHTTPCacheLatencyMs(int64 latency_ms);
267
void SetL2HTTPCacheLatencyMs(int64 latency_ms);
269
// Milliseconds since Init.
270
int64 GetElapsedMs() const;
272
// Milliseconds from request start to processing start.
273
bool GetTimeToStartProcessingMs(int64* elapsed_ms) const {
274
return GetTimeFromStart(processing_start_ts_ms_, elapsed_ms);
277
// Milliseconds spent "processing": end time - start time - fetch time.
278
// TODO(gee): This naming is somewhat misleading since it is from request
279
// start not processing start. Leaving as is for historical reasons, at
280
// least for the time being.
281
bool GetProcessingElapsedMs(int64* elapsed_ms) const;
283
// Milliseconds from request start to pcache lookup start.
284
bool GetTimeToPropertyCacheLookupStartMs(int64* elapsed_ms) const {
285
return GetTimeFromStart(pcache_lookup_start_ts_ms_, elapsed_ms);
288
// Milliseconds from request start to pcache lookup end.
289
bool GetTimeToPropertyCacheLookupEndMs(int64* elapsed_ms) const {
290
return GetTimeFromStart(pcache_lookup_end_ts_ms_, elapsed_ms);
293
// HTTP Cache latencies.
294
bool GetHTTPCacheLatencyMs(int64* latency_ms) const;
295
bool GetL2HTTPCacheLatencyMs(int64* latency_ms) const;
297
// Milliseconds from request start to fetch start.
298
bool GetTimeToStartFetchMs(int64* elapsed_ms) const;
300
// Milliseconds from fetch start to header received.
301
bool GetFetchHeaderLatencyMs(int64* latency_ms) const;
303
// Milliseconds from fetch start to fetch end.
304
bool GetFetchLatencyMs(int64* latency_ms) const;
306
// Milliseconds from receiving the request (Init) to responding with the
307
// first byte of data.
308
bool GetTimeToFirstByte(int64* latency_ms) const;
310
// Milliseconds from request start to parse start.
311
bool GetTimeToStartParseMs(int64* elapsed_ms) const {
312
return GetTimeFromStart(parsing_start_ts_ms_, elapsed_ms);
315
int64 init_ts_ms() const { return init_ts_ms_; }
317
int64 start_ts_ms() const { return start_ts_ms_; }
322
// Set "ts_ms" to NowMs().
323
void SetToNow(int64* ts_ms) const;
325
// Set "elapsed_ms" to "ts_ms" - start_ms_ and returns true on success.
326
// Returns false if either start_ms_ or "ts_ms" have not been set (< 0).
327
bool GetTimeFromStart(int64 ts_ms, int64* elapsed_ms) const;
332
// Event Timestamps. These should appear in (roughly) chronological order.
333
// These need not be protected by mu_ as they are only accessed by a single
334
// thread at any given time, and subsequent accesses are made through
335
// paths which are synchronized by other locks (pcache callback collector,
339
int64 processing_start_ts_ms_;
340
int64 pcache_lookup_start_ts_ms_;
341
int64 pcache_lookup_end_ts_ms_;
342
int64 parsing_start_ts_ms_;
345
AbstractMutex* mu_; // Not owned by TimingInfo.
346
// The following members are protected by mu_;
347
int64 fetch_start_ts_ms_;
348
int64 fetch_header_ts_ms_;
349
int64 fetch_end_ts_ms_;
350
int64 first_byte_ts_ms_;
353
int64 http_cache_latency_ms_;
354
int64 l2http_cache_latency_ms_;
356
DISALLOW_COPY_AND_ASSIGN(TimingInfo);
359
const TimingInfo& timing_info() const { return timing_info_; }
360
TimingInfo* mutable_timing_info() { return &timing_info_; }
362
void set_options(const HttpOptions& options) {
363
DCHECK(!options_set_);
367
// This allows changing options already set.
368
// TODO(sligocki): It would be nice if we could make sure options are only
369
// set once. Is it worth the complexity to force that to be true?
370
void ResetOptions(const HttpOptions& options) {
374
const HttpOptions& options() const {
375
DCHECK(options_set_);
380
// TODO(gee): Fix this, it sucks.
381
// The default constructor will not create a LogRecord. Subclass constructors
382
// must do this explicitly.
383
RequestContext(const HttpOptions& options, AbstractMutex* mutex,
384
Timer* timer, AbstractLogRecord* log_record);
385
// Destructors in refcounted classes should be protected.
386
virtual ~RequestContext();
387
REFCOUNT_FRIEND_DECLARATION(RequestContext);
391
scoped_ptr<AbstractLogRecord> log_record_;
393
TimingInfo timing_info_;
395
// Logs tracing events associated with the root request.
396
scoped_ptr<RequestTrace> root_trace_context_;
398
// Log for recording background rewritings.
399
scoped_ptr<AbstractLogRecord> background_rewrite_log_record_;
401
StringSet session_authorized_fetch_origins_;
405
GoogleString minimal_private_suffix_;
407
SplitRequestType split_request_type_;
410
// The token specified by query parameter or header that must match the
411
// configured value for options to be converted to cookies.
412
GoogleString sticky_query_parameters_token_;
415
HttpOptions options_;
417
DISALLOW_COPY_AND_ASSIGN(RequestContext);
420
} // namespace net_instaweb
422
#endif // NET_INSTAWEB_HTTP_PUBLIC_REQUEST_CONTEXT_H_