2
* Copyright 2012 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: jmarantz@google.com (Joshua Marantz)
20
// Implements a cache that can be used to store multiple properties on
21
// a key. This can be useful if the origin data associated with the
22
// key is not cacheable itself, but we think some properties of it
23
// might be reasonably stable. The cache can optionally track how
24
// frequently the properties change, so that when a property is read,
25
// the reader can gauge how stable it is. It also will manage
26
// time-based expirations of property-cache data (NYI).
28
// It supports properties with widely varying update frequences,
29
// though these must be specified by the programmer by grouping
30
// objects of similar frequency in a Cohort.
33
// PropertyCache -- adds property semantics & grouping to the raw
34
// name/value Cache Interface.
36
// PropertyValue -- a single name/value pair with stability
37
// metadata, so that users of the PropertyValue can find out whether
38
// the property being measured appears to be stable.
40
// PropertyCache::Cohort -- labels a group of PropertyValues that
41
// are expected to have similar write-frequency. Properties are
42
// grouped together to minimize the number of cache lookups and
43
// puts. But we do not want to put all values into a single Cohort
44
// to avoid having fast-changing properties stomp on a slow-changing
45
// properties that share the same cache entry. Thus we initiate
46
// lookpus for all Cohorts immediately on receiving a URL, but
47
// we write back each Cohort independently, under programmer control.
49
// The concurrent read of all Cohorts can be implemented on top of
50
// a batched cache lookup if the platform supports it, to reduce
53
// Note that the Cohort* is simply a label, and doesn't hold the
54
// properties or the data.
56
// PropertyPage -- this tracks all the PropertyValues in all the
57
// Cohorts for a key (e.g., an HTML page URL). Generally a
58
// PropertyPage must be read prior to being written, so that
59
// unmodified PropertyValues in a Cohort are not erased by updating
60
// a single Cohert property. The page executes a Read/Modify/Write
61
// sequence, but there is no locking. Multiple processes & threads
62
// are potentially writing entries to the cache simultaneously, so
63
// there can be races which might stomp on writes for individual
64
// properties in a Cohort.
66
// The value of aggregating multiple properties into a Cohort is
67
// to reduce the query-traffic on caches.
69
// Let's study an example for URL "http://..." with two Cohorts,
70
// "dom_metrics" and "render_data", where we expect dom_metrics to be
71
// updated very frequently. In dom_metrics we have (not that this is
72
// useful) "num_divs" and "num_a_tags". In "render_data" we have
73
// "critical_image_list" and "referenced_resources". When we get a
74
// request for "http://example.com/index.html" we'll make a batched
77
// "prop/http://example.com/index.html@dom_metrics".
78
// "prop/http://example.com/index.html@render_data".
80
// Within the values for
81
// "prop/http://example.com/index.html@dom_metrics"
82
// we'll have a 2-element array of Property values for "num_divs" and
83
// "num_a_tags". We'll write to that cache entry; possibly every
84
// time http://example.com/index.html is rewritten, so that we can track
85
// how stable the number of divs and a_tags is, so that rewriters that
86
// might wish to exploit advance knowledge of how many tags are going to
87
// be in the document can determine how reliable that information is.
89
// In the future we might track real-time & limit the frequency of
90
// updates for a given entry.
92
#ifndef NET_INSTAWEB_UTIL_PUBLIC_PROPERTY_CACHE_H_
93
#define NET_INSTAWEB_UTIL_PUBLIC_PROPERTY_CACHE_H_
98
#include "net/instaweb/http/public/request_context.h"
99
#include "net/instaweb/util/public/basictypes.h"
100
#include "net/instaweb/util/public/cache_interface.h"
101
#include "net/instaweb/util/public/ref_counted_ptr.h"
102
#include "net/instaweb/util/public/scoped_ptr.h"
103
#include "net/instaweb/util/public/string.h"
104
#include "net/instaweb/util/public/string_util.h"
106
namespace net_instaweb {
108
class AbstractLogRecord;
110
class AbstractPropertyStoreGetCallback;
111
class PropertyCacheValues;
112
class PropertyValueProtobuf;
119
typedef std::vector<PropertyPage*> PropertyPageStarVector;
121
// Holds the value & stability-metadata for a property.
122
class PropertyValue {
124
StringPiece value() const;
125
bool has_value() const { return valid_; }
127
// The timestamp of the last time this data was written (in
128
// milliseconds since 1970).
129
int64 write_timestamp_ms() const;
131
// Determines whether a read was completed. Thus was_read() can be true
132
// even if !has_value().
133
bool was_read() { return was_read_; }
135
// Determines whether this property is sufficiently stable to be considered
136
// useful. E.g. if 30% of the time a property is wrong, then it probably
137
// cannot be relied upon for making optimization decisions.
138
bool IsStable(int stable_hit_per_thousand_threshold) const;
140
// Returns true if the value has not changed for last num_writes_unchanged
141
// writes and false otherwise.
142
bool IsRecentlyConstant(int num_writes_unchanged) const;
144
// Returns true if the index of least set bit for value is less than given
145
// index. The results are undefined when index is > 64.
146
static bool IsIndexOfLeastSetBitSmaller(uint64 value, int index);
149
friend class PropertyCache;
150
friend class PropertyPage;
152
// PropertyValues are managed by PropertyPage.
156
void set_was_read(bool was_read) { was_read_ = was_read; }
158
// Initializes the value based on a parsed protobuf from the physical cache.
159
void InitFromProtobuf(const PropertyValueProtobuf& value);
161
// Updates the value of a property, tracking stability so future
162
// Readers can get a sense of how stable it is. This is called from
163
// PropertyPage::UpdateValue only.
165
// Updating the value here buffers it in a protobuf, but does not commit
166
// it to the cache. PropertyPage::WriteCohort() is required to commit.
167
void SetValue(const StringPiece& value, int64 now_ms);
169
PropertyValueProtobuf* protobuf() { return proto_.get(); }
171
scoped_ptr<PropertyValueProtobuf> proto_;
176
DISALLOW_COPY_AND_ASSIGN(PropertyValue);
179
// Adds property-semantics to a raw cache API.
180
class PropertyCache {
182
// A Cohort is a set of properties that update at roughly the
183
// same expected frequency. The PropertyCache object keeps track of
184
// the known set of Cohorts but does not actually keep any data for
185
// them. The data only arrives when we do a lookup.
188
explicit Cohort(StringPiece name) {
189
name.CopyToString(&name_);
191
const GoogleString& name() const { return name_; }
196
DISALLOW_COPY_AND_ASSIGN(Cohort);
199
typedef std::vector<const Cohort*> CohortVector;
201
// Does not take ownership of the property_store, timer, stats, or threads
203
PropertyCache(PropertyStore* property_store,
206
ThreadSystem* threads);
209
// Reads all the PropertyValues in all the known Cohorts from
210
// cache, calling PropertyPage::Done when done. It is essential
211
// that the Cohorts are established prior to calling this function.
212
void Read(PropertyPage* property_page) const;
214
// Reads all the PropertyValues in the specified Cohorts from
215
// cache, calling PropertyPage::Done when done.
216
void ReadWithCohorts(const CohortVector& cohort_list,
217
PropertyPage* property_page) const;
219
// Returns all the cohorts from cache.
220
const CohortVector GetAllCohorts() const { return cohort_list_; }
222
// Determines whether a value that was read is reasonably stable.
223
bool IsStable(const PropertyValue* property) const {
224
return property->IsStable(mutations_per_1000_writes_threshold_);
227
// Determines whether a value is expired relative to the specified TTL.
229
// It is an error (DCHECK) to call this method when !property->has_value().
231
// Note; we could also store the TTL in the cache-value itself. That would
232
// be useful if we derived the TTL from the data or other transients. But
233
// our envisioned usage has the TTL coming from a configuration that is
234
// available at read-time, so for now we just use that.
235
bool IsExpired(const PropertyValue* property_value, int64 ttl_ms) const;
237
void set_mutations_per_1000_writes_threshold(int x) {
238
mutations_per_1000_writes_threshold_ = x;
241
// Establishes a new Cohort for this property cache. Note that you must call
242
// InitCohortStats prior to calling AddCohort.
243
const Cohort* AddCohort(const StringPiece& cohort_name);
245
// Returns the specified Cohort* or NULL if not found. Cohorts must
246
// be established at startup time, via AddCohort before any pages
247
// are processed via Read & Write.
248
const Cohort* GetCohort(const StringPiece& cohort_name) const;
250
// Allows turning off all reads/writes with a switch. Writes to a
251
// disabled cache are ignored. Reads cause Done(false) to be called
253
void set_enabled(bool x) { enabled_ = x; }
255
// Indicates if the property cache is enabled.
256
bool enabled() const { return enabled_; }
258
// Initialize stats for the specified cohort.
259
static void InitCohortStats(const GoogleString& cohort,
260
Statistics* statistics);
262
// Creates stats prefix for the given cohort.
263
static GoogleString GetStatsPrefix(const GoogleString& cohort_name);
265
// Returns timer pointer.
266
Timer* timer() const { return timer_; }
268
ThreadSystem* thread_system() const { return thread_system_; }
270
PropertyStore* property_store() { return property_store_; }
272
// TODO(jmarantz): add some statistics tracking for stomps, stability, etc.
275
PropertyStore* property_store_;
278
ThreadSystem* thread_system_;
280
int mutations_per_1000_writes_threshold_;
281
typedef std::map<GoogleString, Cohort*> CohortMap;
283
// For MutltiRead to scan all cohorts.
284
CohortVector cohort_list_;
287
DISALLOW_COPY_AND_ASSIGN(PropertyCache);
290
// Abstract interface for implementing a PropertyPage.
291
class AbstractPropertyPage {
293
virtual ~AbstractPropertyPage();
294
// Gets a property given the property name. The property can then be
295
// mutated, prior to the PropertyPage being written back to the cache.
296
virtual PropertyValue* GetProperty(
297
const PropertyCache::Cohort* cohort,
298
const StringPiece& property_name) = 0;
300
// Updates the value of a property, tracking stability & discarding
301
// writes when the existing data is more up-to-date.
302
virtual void UpdateValue(
303
const PropertyCache::Cohort* cohort, const StringPiece& property_name,
304
const StringPiece& value) = 0;
306
// Updates a Cohort of properties into the cache. It is a
307
// programming error (dcheck-fail) to Write a PropertyPage that
308
// was not read first. It is fine to Write after a failed Read.
309
virtual void WriteCohort(const PropertyCache::Cohort* cohort) = 0;
311
// This function returns the cache state for a given cohort.
312
virtual CacheInterface::KeyState GetCacheState(
313
const PropertyCache::Cohort* cohort) = 0;
315
// Deletes a property given the property name.
316
virtual void DeleteProperty(const PropertyCache::Cohort* cohort,
317
const StringPiece& property_name) = 0;
321
// Holds the property values associated with a single key. See more
322
// extensive comment for PropertyPage above.
323
class PropertyPage : public AbstractPropertyPage {
325
// The cache type associated with this callback.
328
kPropertyCacheFallbackPage,
329
kDevicePropertyCachePage,
332
virtual ~PropertyPage();
334
// Gets a property given the property name. The property can then be
335
// mutated, prior to the PropertyPage being written back to the cache.
337
// The returned PropertyValue object is owned by the PropertyPage and
338
// should not be deleted by the caller.
340
// This function creates the PropertyValue if it didn't already
341
// exist, either from a previous call or a cache-read.
343
// It is a programming error to call GetProperty on a PropertyPage
344
// that has not yet been read.
346
// Note that all the properties in all the Cohorts on a Page are read
347
// via PropertyCache::Read. This allows cache implementations that support
348
// batching to do so on the read. However, properties are written back to
349
// cache one Cohort at a time, via PropertyCache::WriteCohort.
350
virtual PropertyValue* GetProperty(const PropertyCache::Cohort* cohort,
351
const StringPiece& property_name);
353
// Updates the value of a property, tracking stability & discarding
354
// writes when the existing data is more up-to-date.
355
virtual void UpdateValue(
356
const PropertyCache::Cohort* cohort, const StringPiece& property_name,
357
const StringPiece& value);
359
// Updates a Cohort of properties into the cache. It is a
360
// programming error (dcheck-fail) to Write a PropertyPage that
361
// was not read first. It is fine to Write after a failed Read.
363
// Even if a PropertyValue was not changed since it was read, Write
364
// should be called periodically to update stability metrics.
365
virtual void WriteCohort(const PropertyCache::Cohort* cohort);
367
// This function returns the cache state for a given cohort.
369
// It is a programming error to call GetCacheState on a PropertyPage
370
// that has not yet been read.
371
CacheInterface::KeyState GetCacheState(const PropertyCache::Cohort* cohort);
373
// This function set the cache state for a given cohort. This is used by test
374
// code and CacheCallback to populate the state.
375
void SetCacheState(const PropertyCache::Cohort* cohort,
376
CacheInterface::KeyState x);
378
// Deletes a property given the property name.
380
// This function deletes the PropertyValue if it already exists, otherwise
381
// it is a no-op function.
383
// It is a programming error to call DeleteProperty on a PropertyPage
384
// that has not yet been read.
386
// This function actually does not commit it to cache.
387
void DeleteProperty(const PropertyCache::Cohort* cohort,
388
const StringPiece& property_name);
390
AbstractLogRecord* log_record() {
391
return request_context_->log_record();
394
// Read the property page from cache.
395
void Read(const PropertyCache::CohortVector& cohort_list);
397
// Abort the reading of PropertyPage.
400
// Called immediatly after the underlying cache lookup is done, from
401
// PropertyCache::CacheInterfaceCallback::Done().
402
virtual bool IsCacheValid(int64 write_timestamp_ms) const { return true; }
404
// Populate PropertyCacheValues to the respective cohort in PropertyPage.
405
void AddValueFromProtobuf(const PropertyCache::Cohort* cohort,
406
const PropertyValueProtobuf& proto);
408
// Returns the type of the page.
409
PageType page_type() { return page_type_; }
411
// Returns true if cohort present in the PropertyPage.
412
bool IsCohortPresent(const PropertyCache::Cohort* cohort);
414
// Finishes lookup for all the cohorts and call PropertyPage::Done() as fast
416
void FastFinishLookup();
418
// Generates PropertyCacheValues object from all the properties in the given
420
// Returns false, if cohort does not exists in the PropertyPage or no
421
// property is present in the cohort.
422
bool EncodePropertyCacheValues(const PropertyCache::Cohort* cohort,
423
PropertyCacheValues* values);
426
// The Page takes ownership of the mutex.
427
// TODO(pulkitg): Instead of passing full PropertyCache object, just pass
428
// objects which PropertyPage needs.
429
PropertyPage(PageType page_type,
431
StringPiece options_signature_hash,
432
StringPiece cache_key_suffix,
433
const RequestContextPtr& request_context,
434
AbstractMutex* mutex,
435
PropertyCache* property_cache);
437
// Called as a result of PropertyCache::Read when the data is available.
438
virtual void Done(bool success) = 0;
441
void SetupCohorts(const PropertyCache::CohortVector& cohort_list);
443
// Returns true if for the given cohort any property is deleted.
444
bool HasPropertyValueDeleted(const PropertyCache::Cohort* cohort);
446
void CallDone(bool success) {
451
typedef std::map<GoogleString, PropertyValue*> PropertyMap;
453
struct PropertyMapStruct {
454
explicit PropertyMapStruct(AbstractLogRecord* log)
455
: has_deleted_property(false),
459
bool has_deleted_property;
460
AbstractLogRecord* log_record;
461
CacheInterface::KeyState cache_state;
464
typedef std::map<const PropertyCache::Cohort*, PropertyMapStruct*>
466
CohortDataMap cohort_data_map_;
467
scoped_ptr<AbstractMutex> mutex_;
469
GoogleString options_signature_hash_;
470
GoogleString cache_key_suffix_;
471
RequestContextPtr request_context_;
473
PropertyCache* property_cache_; // Owned by the caller.
474
// AbstractPropertyStoreCallback is safe to use until
475
// AbstractPropertyStoreCallback::DeleteWhenDone() which is called in
476
// PropertyPage destructor, so property_store_callback_ lives longer than
478
AbstractPropertyStoreGetCallback* property_store_callback_;
481
DISALLOW_COPY_AND_ASSIGN(PropertyPage);
484
} // namespace net_instaweb
486
#endif // NET_INSTAWEB_UTIL_PUBLIC_PROPERTY_CACHE_H_