2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: jmarantz@google.com (Joshua Marantz)
19
#ifndef PAGESPEED_KERNEL_CACHE_LRU_CACHE_BASE_H_
20
#define PAGESPEED_KERNEL_CACHE_LRU_CACHE_BASE_H_
24
#include <utility> // for pair
26
#include "base/logging.h"
27
#include "pagespeed/kernel/base/basictypes.h"
28
#include "pagespeed/kernel/base/rde_hash_map.h"
29
#include "pagespeed/kernel/base/string.h"
30
#include "pagespeed/kernel/base/string_hash.h"
33
namespace net_instaweb {
35
// Implements a general purpose in-memory least-recently used (LRU)
36
// cache, using strings as keys and arbitrary values as objects.
37
// This implementation is not thread-safe, and must be combined with
38
// an externally managed mutex to make it so.
40
// This is templated on ValueType, which holds the value, and on ValueHelper,
43
// // Computes the size of a value. Used to track resource consumption.
44
// size_t size(const ValueType&) const;
46
// // Determines whether two values are equal.
47
// bool Equal(const ValueType& a, const ValueType& b) const;
49
// // Called when objects are evicted. Note that destruction does
50
// // not imply eviction.
51
// void EvictNotify(const ValueType&);
53
// // Determines whether a new_value should supercede old_value on a Put.
54
// bool ShouldReplace(const ValueType& old_value,
55
// const ValueType& new_value) const;
57
// ValueType must support copy-construction and assign-by-value.
58
template<class ValueType, class ValueHelper>
60
typedef std::pair<GoogleString, ValueType> KeyValuePair;
61
typedef std::list<KeyValuePair*> EntryList;
62
// STL guarantees lifetime of list itererators as long as the node is in list.
63
typedef typename EntryList::iterator ListNode;
65
typedef rde::hash_map<GoogleString, ListNode, CasePreserveStringHash> Map;
70
explicit Iterator(const typename EntryList::const_reverse_iterator& iter)
74
void operator++() { ++iter_; }
75
bool operator==(const Iterator& src) const { return iter_ == src.iter_; }
76
bool operator!=(const Iterator& src) const { return iter_ != src.iter_; }
78
const GoogleString& Key() const {
79
const KeyValuePair* key_value_pair = *iter_;
80
return key_value_pair->first;
82
const ValueType& Value() const {
83
const KeyValuePair* key_value_pair = *iter_;
84
return key_value_pair->second;
88
typename EntryList::const_reverse_iterator iter_;
90
// Implicit copy and assign are OK.
93
LRUCacheBase(size_t max_size, ValueHelper* value_helper)
94
: max_bytes_in_cache_(max_size),
95
current_bytes_in_cache_(0),
96
value_helper_(value_helper) {
103
// Resets the max size in the cache. This does not take effect immediately;
104
// e.g. if you are shrinking the cache size, this call will not evict
105
// anything. Only when something new is put into the cache will we evict.
106
void set_max_bytes_in_cache(size_t max_size) {
107
max_bytes_in_cache_ = max_size;
110
// Returns a pointer to the stored value, or NULL if not found, freshening
111
// the entry in the lru-list. Note: this pointer is safe to use until the
112
// next call to Put or Delete in the cache.
113
ValueType* GetFreshen(const GoogleString& key) {
114
ValueType* value = NULL;
115
typename Map::iterator p = map_.find(key);
116
if (p != map_.end()) {
117
ListNode cell = p->second;
118
KeyValuePair* key_value = *cell;
119
p->second = Freshen(cell);
120
// Note: it's safe to assume the list iterator will remain valid so that
121
// the caller can do what's necessary with the pointer.
122
// http://stackoverflow.com/questions/759274/
123
// what-is-the-lifetime-and-validity-of-c-iterators
124
value = &key_value->second;
132
ValueType* GetNoFreshen(const GoogleString& key) const {
133
ValueType* value = NULL;
134
typename Map::const_iterator p = map_.find(key);
135
if (p != map_.end()) {
136
ListNode cell = p->second;
137
KeyValuePair* key_value = *cell;
138
// See the above comment about stl::list::iterator lifetime.
139
value = &key_value->second;
147
// Puts an object into the cache. The value is copied using the assignment
149
void Put(const GoogleString& key, ValueType* new_value) {
150
// Just do one map operation, calling the awkward 'insert' which returns
151
// a pair. The bool indicates whether a new value was inserted, and the
152
// iterator provides access to the element, whether it's new or old.
154
// If the key is already in the map, this will give us access to the value
155
// cell, and the uninitialized cell will not be used.
157
std::pair<typename Map::iterator, bool> iter_found =
158
map_.insert(typename Map::value_type(key, cell));
159
bool found = !iter_found.second;
160
typename Map::iterator map_iter = iter_found.first;
161
bool need_to_insert = true;
163
cell = map_iter->second;
164
KeyValuePair* key_value = *cell;
166
// Protect the element that we are rewriting by erasing
167
// it from the entry_list prior to calling EvictIfNecessary,
168
// which can't find it if it isn't in the list.
169
if (!value_helper_->ShouldReplace(key_value->second, *new_value)) {
170
need_to_insert = false;
172
if (value_helper_->Equal(*new_value, key_value->second)) {
173
map_iter->second = Freshen(cell);
174
need_to_insert = false;
175
++num_identical_reinserts_;
178
CHECK_GE(current_bytes_in_cache_, EntrySize(key_value));
179
current_bytes_in_cache_ -= EntrySize(key_value);
181
lru_ordered_list_.erase(cell);
186
if (need_to_insert) {
187
// At this point, if we were doing a replacement, then the value
188
// is removed from the list, so we can treat replacements and new
189
// insertions the same way. In both cases, the new key is in the map
190
// as a result of the call to map_.insert above.
192
if (EvictIfNecessary(key.size() + value_helper_->size(*new_value))) {
193
// The new value fits. Put it in the LRU-list.
194
KeyValuePair* kvp = new KeyValuePair(map_iter->first, *new_value);
195
lru_ordered_list_.push_front(kvp);
196
map_iter->second = lru_ordered_list_.begin();
199
// The new value was too big to fit. Remove it from the map.
200
// it's already removed from the list. We have failed. We
201
// could potentially log this somewhere or keep a stat.
202
map_.erase(map_iter);
207
void Delete(const GoogleString& key) {
208
typename Map::iterator p = map_.find(key);
209
if (p != map_.end()) {
210
ListNode cell = p->second;
211
KeyValuePair* key_value = *cell;
212
lru_ordered_list_.erase(cell);
213
CHECK_GE(current_bytes_in_cache_, EntrySize(key_value));
214
current_bytes_in_cache_ -= EntrySize(key_value);
219
// TODO(jmarantz): count number of misses on a 'delete' request?
223
void MergeStats(const LRUCacheBase& src) {
224
current_bytes_in_cache_ += src.current_bytes_in_cache_;
225
num_evictions_ += src.num_evictions_;
226
num_hits_ += src.num_hits_;
227
num_misses_ += src.num_misses_;
228
num_inserts_ += src.num_inserts_;
229
num_identical_reinserts_ += src.num_identical_reinserts_;
230
num_deletes_ += src.num_deletes_;
233
// Total size in bytes of keys and values stored.
234
size_t size_bytes() const { return current_bytes_in_cache_; }
237
size_t max_bytes_in_cache() const { return max_bytes_in_cache_; }
239
// Number of elements stored
240
size_t num_elements() const { return map_.size(); }
242
size_t num_evictions() const { return num_evictions_; }
243
size_t num_hits() const { return num_hits_; }
244
size_t num_misses() const { return num_misses_; }
245
size_t num_inserts() const { return num_inserts_; }
246
size_t num_identical_reinserts() const { return num_identical_reinserts_; }
247
size_t num_deletes() const { return num_deletes_; }
249
// Sanity check the cache data structures.
251
CHECK_EQ(static_cast<size_t>(map_.size()), lru_ordered_list_.size());
253
size_t bytes_used = 0;
255
// Walk forward through the list, making sure the map and list elements
256
// point to each other correctly.
257
for (ListNode cell = lru_ordered_list_.begin(), e = lru_ordered_list_.end();
258
cell != e; ++cell, ++count) {
259
KeyValuePair* key_value = *cell;
260
typename Map::iterator map_iter = map_.find(key_value->first);
261
CHECK(map_iter != map_.end());
262
CHECK(map_iter->first == key_value->first);
263
CHECK(map_iter->second == cell);
264
bytes_used += EntrySize(key_value);
266
CHECK_EQ(count, static_cast<size_t>(map_.size()));
267
CHECK_EQ(current_bytes_in_cache_, bytes_used);
268
CHECK_LE(current_bytes_in_cache_, max_bytes_in_cache_);
270
// Walk backward through the list, making sure it's coherent as well.
272
for (typename EntryList::reverse_iterator cell = lru_ordered_list_.rbegin(),
273
e = lru_ordered_list_.rend(); cell != e; ++cell, ++count) {
275
CHECK_EQ(count, static_cast<size_t>(map_.size()));
278
// Clear the entire cache. Used primarily for testing. Note that this
279
// will not clear the stats, however it will update current_bytes_in_cache_.
281
current_bytes_in_cache_ = 0;
283
for (ListNode p = lru_ordered_list_.begin(), e = lru_ordered_list_.end();
285
KeyValuePair* key_value = *p;
288
lru_ordered_list_.clear();
292
// Clear the stats -- note that this will not clear the content.
298
num_identical_reinserts_ = 0;
302
// Iterators for walking cache entires from oldest to youngest.
303
Iterator Begin() const { return Iterator(lru_ordered_list_.rbegin()); }
304
Iterator End() const { return Iterator(lru_ordered_list_.rend()); }
307
// TODO(jmarantz): consider accounting for overhead for list cells, map
309
size_t EntrySize(KeyValuePair* kvp) const {
310
return kvp->first.size() + value_helper_->size(kvp->second);
313
ListNode Freshen(ListNode cell) {
314
if (cell != lru_ordered_list_.begin()) {
315
lru_ordered_list_.splice(lru_ordered_list_.begin(),
320
return lru_ordered_list_.begin();
323
bool EvictIfNecessary(size_t bytes_needed) {
325
if (bytes_needed < max_bytes_in_cache_) {
326
while (bytes_needed + current_bytes_in_cache_ > max_bytes_in_cache_) {
327
KeyValuePair* key_value = lru_ordered_list_.back();
328
lru_ordered_list_.pop_back();
329
CHECK_GE(current_bytes_in_cache_, EntrySize(key_value));
330
current_bytes_in_cache_ -= EntrySize(key_value);
331
value_helper_->EvictNotify(key_value->second);
332
map_.erase(key_value->first);
336
current_bytes_in_cache_ += bytes_needed;
342
// TODO(jmarantz): convert most of these to 'int'.
343
size_t max_bytes_in_cache_;
344
size_t current_bytes_in_cache_;
345
size_t num_evictions_;
346
mutable size_t num_hits_;
347
mutable size_t num_misses_;
349
size_t num_identical_reinserts_;
351
EntryList lru_ordered_list_;
353
ValueHelper* value_helper_;
355
DISALLOW_COPY_AND_ASSIGN(LRUCacheBase);
358
} // namespace net_instaweb
360
#endif // PAGESPEED_KERNEL_CACHE_LRU_CACHE_BASE_H_