2
* Copyright 2012 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: marq@google.com (Mark Cogan)
19
#ifndef NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_
20
#define NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_
23
// TODO(gee): Should this be in public? Do we really care?
24
#include "net/instaweb/util/enums.pb.h"
25
#include "net/instaweb/http/public/logging_proto.h"
26
#include "net/instaweb/http/public/logging_proto_impl.h"
27
// TODO(gee): Hmm, this sort of sucks.
28
#include "net/instaweb/http/public/request_context.h" // TimingInfo
29
#include "net/instaweb/rewriter/image_types.pb.h"
30
#include "net/instaweb/util/public/basictypes.h"
31
#include "net/instaweb/util/public/gtest_prod.h"
32
#include "net/instaweb/util/public/scoped_ptr.h"
33
#include "net/instaweb/util/public/string.h"
34
#include "net/instaweb/util/public/string_util.h"
36
// If your .cc file needs to use the types declared in logging_proto.h,
37
// you must also include net/instaweb/http/public/logging_proto_impl.h
38
// See that header file for an explanation of why this is necessary.
41
namespace net_instaweb {
45
// This class is a wrapper around a protobuf used to collect logging
46
// information. It also provides a simple aggregation mechanism for
47
// collecting the ids of applied rewriters.
49
// Care and feeding of log records:
50
// (1) All logging must be done through log records. No class should
51
// have static members of any logging proto class. Log records
52
// can either create the logging protos, or will take ownership of them.
53
// (2) All access and manipulation of log data must be guarded by the log
54
// record's mutex. Commonly repeated logging operations should be factored
55
// into functions in this class (and be so guarded therein).
56
// (3) In most cases, log records should be created and owned by request
59
// Subclasses may wrap some other type of protobuf; they must still provide
60
// access to a LoggingInfo instance, however.
61
class AbstractLogRecord {
63
// Construct a AbstractLogRecord with a new LoggingInfo proto and caller-
64
// supplied mutex. This class takes ownership of the mutex.
65
explicit AbstractLogRecord(AbstractMutex* mutex);
66
virtual ~AbstractLogRecord();
68
// For compatibility with older logging methods, returns a comma-joined string
69
// concatenating the sorted coalesced rewriter ids of APPLIED_OK entries in
70
// the rewriter_info array. Each id will appear once in the string if any
71
// number of successful rewrites for that id have been logged.
72
GoogleString AppliedRewritersString();
74
// Create a new rewriter logging submessage for |rewriter_id|, returning a
75
// pointer to it for later access. Note that this can return NULL if the
76
// size of rewriter_info has grown too large. It is the caller's
77
// responsibility to handle this safely.
78
RewriterInfo* NewRewriterInfo(const char* rewriter_id);
80
// Creates a new rewriter logging submessage for |rewriter_id|,
81
// and sets status it.
82
void SetRewriterLoggingStatus(
83
const char* rewriter_id, RewriterApplication::Status status);
85
// Creates a new rewriter logging submessage for |rewriter_id|,
86
// sets status and the url index.
87
void SetRewriterLoggingStatus(
88
const char* rewriter_id, const GoogleString& url,
89
RewriterApplication::Status status) {
90
SetRewriterLoggingStatusHelper(rewriter_id, url, status);
93
// Log the HTML level status for a filter. This should be called only once
94
// per filter, at the point where it is determined the filter is either
96
void LogRewriterHtmlStatus(const char* rewriter_id,
97
RewriterHtmlApplication::Status status);
99
// Log the status of a rewriter application on a resource.
100
// TODO(gee): I'd really prefer rewriter_id was an enum.
101
void LogRewriterApplicationStatus(
102
const char* rewriter_id, RewriterApplication::Status status);
104
// TODO(gee): Deprecate raw access to proto.
105
// Return the LoggingInfo proto wrapped by this class. Calling code must
106
// guard any reads and writes to this using mutex().
107
virtual LoggingInfo* logging_info() = 0;
109
// TODO(huibao): Rename LogImageBackgroundRewriteActivity() to make it clear
110
// that it will log even when the rewriting finishes in the line-of-request.
112
// Log image rewriting activity, which may not finish when the request
113
// processing is done. The outcome is a new log record with request type
114
// set to "BACKGROUND_REWRITE".
115
void LogImageBackgroundRewriteActivity(
116
RewriterApplication::Status status,
117
const GoogleString& url,
121
bool is_recompressed,
122
ImageType original_image_type,
123
ImageType optimized_image_type,
127
bool is_resized_using_rendered_dimensions,
131
// Atomically sets is_html_response in the logging proto.
132
void SetIsHtml(bool is_html);
134
// Updated the cohort info to set the found to true for the given
136
virtual void AddFoundPropertyToCohortInfo(
137
int page_type, const GoogleString& cohort,
138
const GoogleString& property) = 0;
140
// Updated the cohort info to set the retrieved to true for the given
142
virtual void AddRetrievedPropertyToCohortInfo(
143
int page_type, const GoogleString& cohort,
144
const GoogleString& property) = 0;
146
// Updates the cohort info to update the cache key state.
147
virtual void SetCacheStatusForCohortInfo(
148
int page_type, const GoogleString& cohort,
149
bool found, int key_state) = 0;
151
// Mutex-guarded log mutation convenience methods. The rule of thumb is that
152
// if a single-field update to a logging proto occurs multiple times, it
153
// should be factored out into a method on this class.
154
void SetBlinkRequestFlow(int flow);
155
void SetCacheHtmlRequestFlow(int flow);
156
void SetIsOriginalResourceCacheable(bool cacheable);
158
// Override SetBlinkInfoImpl if necessary.
159
void SetBlinkInfo(const GoogleString& user_agent);
161
// Override SetCacheHtmlInfoImpl if necessary.
162
void SetCacheHtmlLoggingInfo(const GoogleString& user_agent);
164
// Log a RewriterInfo for the flush early filter.
165
void LogFlushEarlyActivity(
167
const GoogleString& url,
168
RewriterApplication::Status status,
169
FlushEarlyResourceInfo::ContentType content_type,
170
FlushEarlyResourceInfo::ResourceType resource_type,
171
bool is_bandwidth_affected,
174
// Log a RewriterInfo for the image rewrite filter.
175
virtual void LogImageRewriteActivity(
177
const GoogleString& url,
178
RewriterApplication::Status status,
179
bool is_image_inlined,
180
bool is_critical_image,
181
bool is_url_rewritten,
183
bool try_low_res_src_insertion,
184
bool low_res_src_inserted,
185
ImageType low_res_image_type,
186
int low_res_data_size) = 0;
188
// TODO(gee): Change the callsites.
189
void LogJsDisableFilter(const char* id, bool has_pagespeed_no_defer);
191
void LogLazyloadFilter(const char* id,
192
RewriterApplication::Status status,
193
bool is_blacklisted, bool is_critical);
195
// Mutex-guarded log-writing operations. Derived classes should override
196
// *Impl methods. Returns false if the log write attempt failed.
199
// Return the mutex associated with this instance. Calling code should
200
// guard reads and writes of AbstractLogRecords
201
AbstractMutex* mutex() { return mutex_.get(); }
203
// Sets the maximum number of RewriterInfo submessages that can accumulate in
204
// the LoggingInfo proto wrapped by this class.
205
void SetRewriterInfoMaxSize(int x);
207
// Sets whether urls should be logged. This could potentially generate a lot
208
// of logs data, so this should be switched on only for debugging.
209
void SetAllowLoggingUrls(bool allow_logging_urls);
211
// Sets whether URL indices should be logged for every rewriter application
213
void SetLogUrlIndices(bool log_url_indices);
215
// Sets the number of critical images in HTML.
216
void SetNumHtmlCriticalImages(int num_html_critical_images);
218
// Sets the number of critical images in CSS.
219
void SetNumCssCriticalImages(int num_css_critical_images);
221
// Sets image related statistics.
222
virtual void SetImageStats(int num_img_tags, int num_inlined_img_tags,
223
int num_critical_images_used) = 0;
225
// Sets the number of external resources on an HTML page.
226
virtual void SetResourceCounts(int num_external_css, int num_scripts) = 0;
228
// Sets critical CSS related byte counts (all uncompressed).
229
void SetCriticalCssInfo(int critical_inlined_bytes,
230
int original_external_bytes,
233
// Log information related to the user agent and device making the request.
234
virtual void LogDeviceInfo(
236
bool supports_image_inlining,
237
bool supports_lazyload_images,
238
bool supports_critical_images_beacon,
239
bool supports_deferjs,
240
bool supports_webp_in_place,
241
bool supports_webp_rewritten_urls,
242
bool supports_webplossless_alpha,
244
bool supports_split_html,
245
bool can_preload_resources) = 0;
247
// Log whether the request is an XmlHttpRequest.
248
void LogIsXhr(bool is_xhr);
250
// Sets initial information for background rewrite log.
251
virtual void SetBackgroundRewriteInfo(
253
bool log_url_indices,
254
int max_rewrite_info_log_size);
256
// Set timing information in the logging implementation.
257
virtual void SetTimingInfo(const RequestContext::TimingInfo& timing_info) {}
260
// Implements setting Blink specific log information; base impl is a no-op.
261
virtual void SetBlinkInfoImpl(const GoogleString& user_agent) {}
263
// Implements setting Cache Html specific log information
264
virtual void SetCacheHtmlLoggingInfoImpl(const GoogleString& user_agent) {}
265
// Implements writing a log, base implementation is a no-op. Returns false if
267
virtual bool WriteLogImpl() = 0;
269
// Helper function which creates a new rewriter logging submessage for
270
// |rewriter_id|, sets status and the url index. It is intended to be called
271
// only inside logging code.
272
RewriterInfo* SetRewriterLoggingStatusHelper(
273
const char* rewriter_id, const GoogleString& url,
274
RewriterApplication::Status status);
277
// Called on construction.
281
const GoogleString& url, RewriteResourceInfo* rewrite_resource_info);
283
// Fill LoggingInfo proto with information collected from LogRewriterStatus
285
void PopulateRewriterStatusCounts();
287
// Thus must be set. Implementation constructors must minimally default this
289
scoped_ptr<AbstractMutex> mutex_;
291
// The maximum number of rewrite info logs stored for a single request.
292
int rewriter_info_max_size_;
294
// Allow urls to be logged.
295
bool allow_logging_urls_;
297
// Allow url indices to be logged.
298
bool log_url_indices_;
300
// Map which maintains the url to index for logging urls.
301
StringIntMap url_index_map_;
303
// Stats collected from calls to LogRewrite.
304
typedef std::map<RewriterApplication::Status, int> RewriteStatusCountMap;
305
struct RewriterStatsInternal {
306
RewriterHtmlApplication::Status html_status;
308
// RewriterApplication::Status -> count.
309
RewriteStatusCountMap status_counts;
311
RewriterStatsInternal()
312
: html_status(RewriterHtmlApplication::UNKNOWN_STATUS) {}
314
typedef std::map<GoogleString, RewriterStatsInternal> RewriterStatsMap;
315
RewriterStatsMap rewriter_stats_;
317
DISALLOW_COPY_AND_ASSIGN(AbstractLogRecord);
320
// Simple AbstractLogRecord implementation which owns a LoggingInfo protobuf.
321
class LogRecord : public AbstractLogRecord {
323
explicit LogRecord(AbstractMutex* mutex);
325
virtual ~LogRecord();
327
LoggingInfo* logging_info() { return logging_info_.get(); }
329
virtual void SetImageStats(int num_img_tags, int num_inlined_img_tags,
330
int num_critical_images_used) {}
332
virtual void SetResourceCounts(int num_external_css, int num_scripts) {}
334
virtual void AddFoundPropertyToCohortInfo(
335
int page_type, const GoogleString& cohort,
336
const GoogleString& property) {}
338
virtual void AddRetrievedPropertyToCohortInfo(
339
int page_type, const GoogleString& cohort,
340
const GoogleString& property) {}
342
void SetCacheStatusForCohortInfo(
343
int page_type, const GoogleString& cohort, bool found, int key_state) {}
345
virtual void LogImageRewriteActivity(
347
const GoogleString& url,
348
RewriterApplication::Status status,
349
bool is_image_inlined,
350
bool is_critical_image,
351
bool is_url_rewritten,
353
bool try_low_res_src_insertion,
354
bool low_res_src_inserted,
355
ImageType low_res_image_type,
356
int low_res_data_size) {}
358
virtual void LogDeviceInfo(
360
bool supports_image_inlining,
361
bool supports_lazyload_images,
362
bool supports_critical_images_beacon,
363
bool supports_deferjs,
364
bool supports_webp_in_place,
365
bool supports_webp_rewritten_urls,
366
bool supports_webplossless_alpha,
368
bool supports_split_html,
369
bool can_preload_resources) {}
371
bool WriteLogImpl() { return true; }
374
scoped_ptr<LoggingInfo> logging_info_;
377
// TODO(gee): I'm pretty sure the functionality can be provided by the previous
378
// ALR implementation, but for the time being leave this around to make the
379
// refactoring as limited as possilble.
380
// AbstractLogRecord that copies logging_info() when in WriteLog. This should
381
// be useful for testing any logging flow where an owned subordinate log record
383
class CopyOnWriteLogRecord : public LogRecord {
385
CopyOnWriteLogRecord(AbstractMutex* logging_mutex, LoggingInfo* logging_info)
386
: LogRecord(logging_mutex), logging_info_copy_(logging_info) {}
389
virtual bool WriteLogImpl() {
390
logging_info_copy_->CopyFrom(*logging_info());
395
LoggingInfo* logging_info_copy_; // Not owned by us.
397
DISALLOW_COPY_AND_ASSIGN(CopyOnWriteLogRecord);
400
} // namespace net_instaweb
402
#endif // NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_