~craighewetson-deactivatedaccount/bzr-search/index-fileids

« back to all changes in this revision

Viewing changes to index.py

Committer: Robert Collins
Date: 2008-08-27 08:37:30 UTC
Revision ID: robertc@robertcollins.net-20080827083730-pcxlpnwaga8y67lt

Add log acceleration.

files modified:
NEWS

__init__.py

index.py

setup.py

tests/test_index.py

Show diffs side-by-side

added added

removed removed

index.py

1292

else:

1293

value = self._bisect_nodes[candidate_key]

1294

yield (self, candidate_key, value)

1295

1296

_original_make_search_filter = None

1297

1298

1299

def make_disable_search_filter(branch, generate_delta, search, log_rev_iterator):

1300

"""Disable search filtering if bzr-search will be active.

1301

1302

This filter replaces the default search filter, using the original filter

1303

if a bzr-search filter cannot be used.

1304

1305

:param branch: The branch being logged.

1306

:param generate_delta: Whether to generate a delta for each revision.

1307

:param search: A user text search string.

1308

:param log_rev_iterator: An input iterator containing all revisions that

1309

could be displayed, in lists.

1310

:return: An iterator over ((rev_id, revno, merge_depth), rev, delta).

1311

"""

1312

try:

1313

open_index_branch(branch)

1314

query = query_from_regex(search)

1315

if query:

1316

return log_rev_iterator

1317

except errors.NoSearchIndex:

1318

pass

1319

return _original_make_search_filter(branch, generate_delta, search,

1320

log_rev_iterator)

1321

1322

1323

def make_log_search_filter(branch, generate_delta, search, log_rev_iterator):

1324

"""Filter revisions by using a search index.

1325

1326

This filter looks up revids in the search index along with the search

1327

string, if the search string regex can be converted into a bzr-search

1328

query.

1329

1330

:param branch: The branch being logged.

1331

:param generate_delta: Whether to generate a delta for each revision.

1332

:param search: A user text search string.

1333

:param log_rev_iterator: An input iterator containing all revisions that

1334

could be displayed, in lists.

1335

:return: An iterator over ((rev_id, revno, merge_depth), rev, delta).

1336

"""

1337

# Can we possibly search on this regex?

1338

query = query_from_regex(search)

1339

if not query:

1340

return log_rev_iterator

1341

try:

1342

index = open_index_branch(branch)

1343

except errors.NoSearchIndex:

1344

return log_rev_iterator

1345

return _filter_log(index, query, log_rev_iterator)

1346

1347

1348

def _filter_log(index, query, log_rev_iterator):

1349

"""Filter log_rev_iterator's revision ids on query in index."""

1350

rev_ids = set()

1351

# TODO: we could lazy evaluate the search, for each revision we see - this

1352

# would allow searches that hit everything to be less-than-completely

1353

# evaluated before the first result is shown. OTOH knowing a miss will

1354

# require reading the entire search anyhow. Note that we can do better -

1355

# if we looked up the document id of the revision, we could search explicitly

1356

# for the document id in the search up front, and do many small searches. This is

1357

# likely better in terms of memory use. Needs refactoring etc.

1358

for result in index.search(query):

1359

if type(result) != RevisionHit:

1360

continue

1361

rev_ids.add(result.revision_key[0])

1362

for batch in log_rev_iterator:

1363

new_revs = []

1364

for item in batch:

1365

if item[0][0] in rev_ids:

1366

new_revs.append(item)

1367

yield new_revs

1368

1369

1370

def query_from_regex(regex):

1371

"""Convert a regex into a bzr-search query."""

1372

# Most trivial implementation ever

1373

if regex.count("\\b") != 2:

1374

return None

1375

regex = regex[2:-2]

1376

if regex.count("\\b") != 0:

1377

return None

1378

# Any additional whitespace implies something we can't search on:

1379

_ensure_regexes()

1380

if _tokeniser_re.search(regex):

1381

return None

1382

return [(regex,)]

Older »