|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
1 |
#
|
2 |
# Copyright (C) 2006 Robey Pointer <robey@lag.net>
|
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
16 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
17 |
#
|
|
18 |
||
19 |
"""
|
|
20 |
a cache for chewed-up "change" data structures, which are basically just a
|
|
21 |
different way of storing a revision. the cache improves lookup times 10x
|
|
22 |
over bazaar's xml revision structure, though, so currently still worth doing.
|
|
23 |
||
24 |
once a revision is committed in bazaar, it never changes, so once we have
|
|
25 |
cached a change, it's good forever.
|
|
26 |
"""
|
|
27 |
||
|
128.4.2
by Michael Hudson
rather brainlessly store the filechange cache in a sql database instead of a shelve store |
28 |
import cPickle |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
29 |
import os |
30 |
import time |
|
31 |
||
32 |
from loggerhead import util |
|
|
69
by Robey Pointer
switch the cache and text index to use file locking so they can be used by |
33 |
from loggerhead.lockfile import LockFile |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
34 |
|
35 |
||
|
49
by Robey Pointer
add top-level page listing available branches. also a patch from matty to not require external-url in atom feeds any more |
36 |
with_lock = util.with_lock('_lock', 'ChangeCache') |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
37 |
|
|
153
by Robert Collins
Add support for the sqlite3 bindings as a SQLITE_INTERFACE. (Robert Collins) |
38 |
SQLITE_INTERFACE = os.environ.get('SQLITE_INTERFACE', 'sqlite3') |
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
39 |
|
40 |
if SQLITE_INTERFACE == 'pysqlite2': |
|
41 |
from pysqlite2 import dbapi2 |
|
42 |
_param_marker = '?' |
|
43 |
elif SQLITE_INTERFACE == 'sqlite': |
|
44 |
import sqlite as dbapi2 |
|
45 |
_param_marker = '%s' |
|
|
153
by Robert Collins
Add support for the sqlite3 bindings as a SQLITE_INTERFACE. (Robert Collins) |
46 |
if SQLITE_INTERFACE == 'sqlite3': |
47 |
from sqlite3 import dbapi2 |
|
48 |
_param_marker = '?' |
|
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
49 |
else: |
|
128.4.15
by Michael Hudson
barry suggested i switch sqlite interface based on an environment variable, which makes much sense |
50 |
raise AssertionError("bad sqlite interface %r!?"%SQLITE_INTERFACE) |
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
51 |
|
52 |
_select_stmt = ("select data from revisiondata where revid = ?" |
|
53 |
).replace('?', _param_marker) |
|
54 |
_insert_stmt = ("insert into revisiondata (revid, data) " |
|
55 |
"values (?, ?)").replace('?', _param_marker) |
|
56 |
_update_stmt = ("update revisiondata set data = ? where revid = ?" |
|
57 |
).replace('?', _param_marker) |
|
58 |
||
59 |
||
60 |
||
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
61 |
|
62 |
class FakeShelf(object): |
|
|
128.4.6
by Michael Hudson
remove one layer of ick |
63 |
def __init__(self, filename): |
64 |
create_table = not os.path.exists(filename) |
|
|
128.4.8
by Michael Hudson
remove storm dependency (it'll be back) |
65 |
self.connection = dbapi2.connect(filename) |
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
66 |
self.cursor = self.connection.cursor() |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
67 |
if create_table: |
|
128.4.8
by Michael Hudson
remove storm dependency (it'll be back) |
68 |
self._create_table() |
69 |
def _create_table(self): |
|
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
70 |
self.cursor.execute( |
|
128.4.6
by Michael Hudson
remove one layer of ick |
71 |
"create table RevisionData "
|
|
128.4.9
by Michael Hudson
gar, fix problem with NULLs |
72 |
"(revid binary primary key, data binary)") |
|
128.4.8
by Michael Hudson
remove storm dependency (it'll be back) |
73 |
self.connection.commit() |
|
128.4.9
by Michael Hudson
gar, fix problem with NULLs |
74 |
def _serialize(self, obj): |
75 |
r = dbapi2.Binary(cPickle.dumps(obj, protocol=2)) |
|
76 |
return r |
|
77 |
def _unserialize(self, data): |
|
|
128.4.10
by Michael Hudson
don't commit so often when building the textindex search |
78 |
return cPickle.loads(str(data)) |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
79 |
def get(self, revid): |
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
80 |
self.cursor.execute(_select_stmt, (revid,)) |
81 |
filechange = self.cursor.fetchone() |
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
82 |
if filechange is None: |
83 |
return None |
|
84 |
else: |
|
|
128.4.9
by Michael Hudson
gar, fix problem with NULLs |
85 |
return self._unserialize(filechange[0]) |
|
128.4.10
by Michael Hudson
don't commit so often when building the textindex search |
86 |
def add(self, revid_obj_pairs, commit=True): |
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
87 |
for (r, d) in revid_obj_pairs: |
88 |
self.cursor.execute(_insert_stmt, (r, self._serialize(d))) |
|
|
128.4.10
by Michael Hudson
don't commit so often when building the textindex search |
89 |
if commit: |
90 |
self.connection.commit() |
|
91 |
def update(self, revid_obj_pairs, commit=True): |
|
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
92 |
for (r, d) in revid_obj_pairs: |
93 |
self.cursor.execute(_update_stmt, (self._serialize(d), r)) |
|
|
128.4.10
by Michael Hudson
don't commit so often when building the textindex search |
94 |
if commit: |
95 |
self.connection.commit() |
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
96 |
def count(self): |
|
128.4.11
by Michael Hudson
fiddle things around so you can use the python-sqlite or python-pysqlite |
97 |
self.cursor.execute( |
98 |
"select count(*) from revisiondata") |
|
99 |
return self.cursor.fetchone()[0] |
|
|
128.4.10
by Michael Hudson
don't commit so often when building the textindex search |
100 |
def close(self, commit=False): |
101 |
if commit: |
|
102 |
self.connection.commit() |
|
|
128.4.8
by Michael Hudson
remove storm dependency (it'll be back) |
103 |
self.connection.close() |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
104 |
|
105 |
class ChangeCache (object): |
|
|
128.4.10
by Michael Hudson
don't commit so often when building the textindex search |
106 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
107 |
def __init__(self, history, cache_path): |
108 |
self.history = history |
|
109 |
self.log = history.log |
|
|
144.1.10
by Michael Hudson
run reindent.py over the loggerhead package |
110 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
111 |
if not os.path.exists(cache_path): |
112 |
os.mkdir(cache_path) |
|
113 |
||
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
114 |
self._changes_filename = os.path.join(cache_path, 'changes.sql') |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
115 |
|
|
69
by Robey Pointer
switch the cache and text index to use file locking so they can be used by |
116 |
# use a lockfile since the cache folder could be shared across different processes.
|
117 |
self._lock = LockFile(os.path.join(cache_path, 'lock')) |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
118 |
self._closed = False |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
119 |
|
|
128.4.6
by Michael Hudson
remove one layer of ick |
120 |
## # this is fluff; don't slow down startup time with it.
|
121 |
## # but it is racy in tests :(
|
|
122 |
## def log_sizes():
|
|
123 |
## self.log.info('Using change cache %s; %d entries.' % (cache_path, self.size()))
|
|
124 |
## threading.Thread(target=log_sizes).start()
|
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
125 |
|
126 |
def _cache(self): |
|
|
128.4.6
by Michael Hudson
remove one layer of ick |
127 |
return FakeShelf(self._changes_filename) |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
128 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
129 |
@with_lock
|
130 |
def close(self): |
|
|
69
by Robey Pointer
switch the cache and text index to use file locking so they can be used by |
131 |
self.log.debug('Closing cache file.') |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
132 |
self._closed = True |
|
144.1.10
by Michael Hudson
run reindent.py over the loggerhead package |
133 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
134 |
@with_lock
|
135 |
def closed(self): |
|
136 |
return self._closed |
|
137 |
||
138 |
@with_lock
|
|
139 |
def flush(self): |
|
|
73
by Robey Pointer
heh, duh. i can't leave the shelf files open from multiple threads at once. |
140 |
pass
|
|
144.1.10
by Michael Hudson
run reindent.py over the loggerhead package |
141 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
142 |
@with_lock
|
|
128.2.7
by Robey Pointer
the diff cache isn't adding very much, and can grow very large. let's just |
143 |
def get_changes(self, revid_list): |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
144 |
"""
|
145 |
get a list of changes by their revision_ids. any changes missing
|
|
146 |
from the cache are fetched by calling L{History.get_change_uncached}
|
|
147 |
and inserted into the cache before returning.
|
|
148 |
"""
|
|
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
149 |
out = [] |
150 |
missing_revids = [] |
|
151 |
missing_revid_indices = [] |
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
152 |
cache = self._cache() |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
153 |
for revid in revid_list: |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
154 |
entry = cache.get(revid) |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
155 |
if entry is not None: |
156 |
out.append(entry) |
|
157 |
else: |
|
158 |
missing_revids.append(revid) |
|
159 |
missing_revid_indices.append(len(out)) |
|
160 |
out.append(None) |
|
161 |
if missing_revids: |
|
162 |
missing_entries = self.history.get_changes_uncached(missing_revids) |
|
163 |
missing_entry_dict = {} |
|
164 |
for entry in missing_entries: |
|
165 |
missing_entry_dict[entry.revid] = entry |
|
166 |
revid_entry_pairs = [] |
|
167 |
for i, revid in zip(missing_revid_indices, missing_revids): |
|
168 |
out[i] = entry = missing_entry_dict.get(revid) |
|
169 |
if entry is not None: |
|
170 |
revid_entry_pairs.append((revid, entry)) |
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
171 |
cache.add(revid_entry_pairs) |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
172 |
return filter(None, out) |
173 |
||
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
174 |
@with_lock
|
|
128.2.7
by Robey Pointer
the diff cache isn't adding very much, and can grow very large. let's just |
175 |
def full(self): |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
176 |
cache = self._cache() |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
177 |
last_revid = util.to_utf8(self.history.last_revid) |
178 |
revision_history = self.history.get_revision_history() |
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
179 |
return (cache.count() >= len(revision_history) |
180 |
and cache.get(last_revid) is not None) |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
181 |
|
|
73
by Robey Pointer
heh, duh. i can't leave the shelf files open from multiple threads at once. |
182 |
@with_lock
|
|
128.2.7
by Robey Pointer
the diff cache isn't adding very much, and can grow very large. let's just |
183 |
def size(self): |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
184 |
return self._cache().count() |
|
128.4.4
by Michael Hudson
use the sqlite not-shelf for the change cache too |
185 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
186 |
def check_rebuild(self, max_time=3600): |
187 |
"""
|
|
188 |
check if we need to fill in any missing pieces of the cache. pull in
|
|
189 |
any missing changes, but don't work any longer than C{max_time}
|
|
190 |
seconds.
|
|
191 |
"""
|
|
|
69
by Robey Pointer
switch the cache and text index to use file locking so they can be used by |
192 |
if self.closed() or self.full(): |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
193 |
return
|
|
144.1.10
by Michael Hudson
run reindent.py over the loggerhead package |
194 |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
195 |
self.log.info('Building revision cache...') |
196 |
start_time = time.time() |
|
197 |
last_update = time.time() |
|
198 |
count = 0 |
|
199 |
||
200 |
work = list(self.history.get_revision_history()) |
|
201 |
jump = 100 |
|
202 |
for i in xrange(0, len(work), jump): |
|
203 |
r = work[i:i + jump] |
|
|
49
by Robey Pointer
add top-level page listing available branches. also a patch from matty to not require external-url in atom feeds any more |
204 |
# must call into history so we grab the branch lock (otherwise, lock inversion)
|
205 |
self.history.get_changes(r) |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
206 |
if self.closed(): |
|
71
by Robey Pointer
exponential backoff isn't really working for the lockfile, so keep it down |
207 |
self.flush() |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
208 |
return
|
209 |
count += jump |
|
210 |
now = time.time() |
|
211 |
if now - start_time > max_time: |
|
212 |
self.log.info('Cache rebuilding will pause for now.') |
|
213 |
self.flush() |
|
214 |
return
|
|
215 |
if now - last_update > 60: |
|
216 |
self.log.info('Revision cache rebuilding continues: %d/%d' % (min(count, len(work)), len(work))) |
|
217 |
last_update = time.time() |
|
218 |
self.flush() |
|
|
71
by Robey Pointer
exponential backoff isn't really working for the lockfile, so keep it down |
219 |
# give someone else a chance at the lock
|
220 |
time.sleep(1) |
|
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
221 |
self.log.info('Revision cache rebuild completed.') |
|
48
by Robey Pointer
the big migration of branch-specific data to a BranchView object: actually |
222 |
self.flush() |
|
47
by Robey Pointer
slowly moving the branch-specific stuff into a common structure... |
223 |
|
|
128.1.55
by Michael Hudson
plumbing for a file change cache |
224 |
class FileChangeCache(object): |
225 |
def __init__(self, history, cache_path): |
|
226 |
self.history = history |
|
227 |
||
228 |
if not os.path.exists(cache_path): |
|
229 |
os.mkdir(cache_path) |
|
230 |
||
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
231 |
self._changes_filename = os.path.join(cache_path, 'filechanges.sql') |
|
128.1.55
by Michael Hudson
plumbing for a file change cache |
232 |
|
233 |
# use a lockfile since the cache folder could be shared across
|
|
234 |
# different processes.
|
|
|
128.1.57
by Michael Hudson
use a different lock file for the different caches |
235 |
self._lock = LockFile(os.path.join(cache_path, 'filechange-lock')) |
|
128.1.55
by Michael Hudson
plumbing for a file change cache |
236 |
|
237 |
@with_lock
|
|
238 |
def get_file_changes(self, entries): |
|
|
128.4.2
by Michael Hudson
rather brainlessly store the filechange cache in a sql database instead of a shelve store |
239 |
out = [] |
240 |
missing_entries = [] |
|
241 |
missing_entry_indices = [] |
|
|
128.4.14
by Michael Hudson
oops |
242 |
cache = FakeShelf(self._changes_filename) |
|
128.4.2
by Michael Hudson
rather brainlessly store the filechange cache in a sql database instead of a shelve store |
243 |
for entry in entries: |
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
244 |
changes = cache.get(entry.revid) |
|
128.4.2
by Michael Hudson
rather brainlessly store the filechange cache in a sql database instead of a shelve store |
245 |
if changes is not None: |
246 |
out.append(changes) |
|
247 |
else: |
|
248 |
missing_entries.append(entry) |
|
249 |
missing_entry_indices.append(len(out)) |
|
250 |
out.append(None) |
|
|
128.4.3
by Michael Hudson
use storm for the sqlite cache, insert objects in batches |
251 |
if missing_entries: |
252 |
missing_changes = self.history.get_file_changes_uncached(missing_entries) |
|
253 |
revid_changes_pairs = [] |
|
254 |
for i, entry, changes in zip( |
|
255 |
missing_entry_indices, missing_entries, missing_changes): |
|
256 |
revid_changes_pairs.append((entry.revid, changes)) |
|
257 |
out[i] = changes |
|
|
128.4.5
by Michael Hudson
reorganizations, cleanups. still utterly horrid though. |
258 |
cache.add(revid_changes_pairs) |
|
128.4.2
by Michael Hudson
rather brainlessly store the filechange cache in a sql database instead of a shelve store |
259 |
return out |