~ubuntu-branches/ubuntu/oneiric/loggerhead/oneiric : revision 11

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Store history information in a database."""

18

19

try:

20

from sqlite3 import dbapi2

21

except ImportError:

22

from pysqlite2 import dbapi2

23

24

from collections import defaultdict, deque

25

import time

26

27

from bzrlib import (

28

debug,

29

errors,

30

lru_cache,

31

revision,

32

static_tuple,

33

trace,

34

ui,

35

)

36

37

from loggerhead import history_db_schema as schema

38

39

40

NULL_PARENTS = (revision.NULL_REVISION,)

41

42

43

def _n_params(n):

44

"""Create a query string representing N parameters.

45

46

n=1 => ?

47

n=2 => ?, ?

48

etc.

49

"""

50

return ', '.join('?'*n)

51

52

53

def _add_n_params(query, n):

54

"""Add n parameters to the query string.

55

56

the query should have a single '%s' in it to be expanded.

57

"""

58

return query % (_n_params(n),)

59

60

61

def _get_result_for_many(cursor, query, params):

62

"""Get the results for a query with a lot of parameters.

63

64

SQLite has a limit on how many parameters can be passed (often for a good

65

reason). However, we don't want to have to think about it right now. So

66

this just loops over params based on a maximum allowed per query. Then

67

return the whole result in one list.

68

69

:param query: An SQL query that should contain something like:

70

"WHERE foo IN (%s)"

71

:param params: A list of parameters to supply to the function.

72

"""

73

res = []

74

MAX_COUNT = 200

75

for start in xrange(0, len(params), MAX_COUNT):

76

next_params = params[start:start+MAX_COUNT]

77

res.extend(

78

cursor.execute(_add_n_params(query, len(next_params)),

79

next_params).fetchall())

80

return res

81

82

83

class Importer(object):

84

"""Import data from bzr into the history_db."""

85

86

_MAINLINE_PARENT_RANGE_LEN = 100

87

88

def __init__(self, db_path, a_branch, tip_revision_id=None,

89

incremental=False, validate=False):

90

db_conn = dbapi2.connect(db_path)

91

self._incremental = incremental

92

self._validate = validate

93

self._db_conn = db_conn

94

self._ensure_schema()

95

self._cursor = self._db_conn.cursor()

96

self._branch = a_branch

97

if tip_revision_id is None:

98

self._branch_tip_rev_id = a_branch.last_revision()

99

else:

100

self._branch_tip_rev_id = tip_revision_id

101

self._branch_tip_key = (self._branch_tip_rev_id,)

102

self._graph = None

103

if not self._incremental:

104

self._ensure_graph()

105

self._rev_id_to_db_id = {}

106

self._db_id_to_rev_id = {}

107

self._stats = defaultdict(lambda: 0)

108

# Map child_id => [parent_db_ids]

109

self._db_parent_map = {}

110

111

def set_max_cache_size(self, size):

112

"""Tell SQLite how many megabytes to cache internally."""

113

page_size = self._db_conn.execute('PRAGMA page_size').fetchone()[0]

114

self._db_conn.execute("PRAGMA cache_size = %d"

115

% (size / page_size,));

116

117

def _ensure_schema(self):

118

if not schema.is_initialized(self._db_conn, dbapi2.OperationalError):

119

schema.create_sqlite_db(self._db_conn)

120

if 'history_db' in debug.debug_flags:

121

trace.note('history_db initialized database')

122

# We know we can't do this incrementally, because nothing has

123

# existed before...

124

#self._incremental = False

125

126

def _ensure_revisions(self, revision_ids):

127

schema.ensure_revisions(self._cursor, revision_ids,

128

self._rev_id_to_db_id,

129

self._db_id_to_rev_id, self._graph)

130

131

def _ensure_graph(self):

132

if self._graph is not None:

133

return

134

repo = self._branch.repository

135

self._graph = repo.revisions.get_known_graph_ancestry(

136

[self._branch_tip_key])

137

138

def _is_imported(self, tip_rev_id):

139

res = self._cursor.execute(

140

"SELECT tip_revision FROM dotted_revno JOIN revision"

141

" ON dotted_revno.tip_revision = revision.db_id"

142

" WHERE revision_id = ?"

143

" AND tip_revision = merged_revision",

144

(tip_rev_id,)).fetchone()

145

return (res is not None)

146

147

def _insert_nodes(self, tip_rev_id, nodes):

148

"""Insert all of the nodes mentioned into the database."""

149

self._stats['_insert_node_calls'] += 1

150

rev_to_db = self._rev_id_to_db_id

151

tip_db_id = rev_to_db[tip_rev_id]

152

self._stats['total_nodes_inserted'] += len(nodes)

153

revno_entries = []

154

st = static_tuple.StaticTuple

155

def build_revno_info():

156

for dist, node in enumerate(nodes):

157

# TODO: Do we need to track the 'end_of_merge' and 'merge_depth'

158

# fields?

159

db_id = rev_to_db[node.key[0]]

160

revno_entries.append((tip_db_id,

161

db_id,

162

'.'.join(map(str, node.revno)),

163

node.end_of_merge,

164

node.merge_depth,

165

dist))

166

build_revno_info()

167

try:

168

schema.create_dotted_revnos(self._cursor, revno_entries)

169

except dbapi2.IntegrityError:

170

# Revisions already exist

171

return False

172

return True

173

174

def _update_parents(self, nodes):

175

"""Update parent information for all these nodes."""

176

# Get the keys and their parents

177

parent_keys = self._graph.get_parent_keys

178

parent_map = dict([(n.key[0], [p[0] for p in parent_keys(n.key)])

179

for n in nodes])

180

self._insert_parent_map(parent_map)

181

182

def _insert_parent_map(self, parent_map):

183

"""Insert all the entries in this parent map into the parent table."""

184

r_to_d = self._rev_id_to_db_id

185

def _ensure_parent_map_keys():

186

rev_ids = set([r for r in parent_map if r not in r_to_d])

187

rev_ids_update = rev_ids.update

188

for parent_keys in parent_map.itervalues():

189

rev_ids_update([p for p in parent_keys if p not in r_to_d])

190

self._ensure_revisions(rev_ids)

191

_ensure_parent_map_keys()

192

data = []

193

stuple = static_tuple.StaticTuple.from_sequence

194

for rev_id, parent_ids in parent_map.iteritems():

195

db_id = r_to_d[rev_id]

196

if db_id in self._db_parent_map:

197

# This has already been imported, skip it

198

continue

199

parent_db_ids = stuple([r_to_d[p_id] for p_id in parent_ids])

200

self._db_parent_map[db_id] = parent_db_ids

201

for idx, parent_db_id in enumerate(parent_db_ids):

202

data.append((db_id, parent_db_id, idx))

203

# Inserting the data in db-sorted order actually improves perf a fair

204

# amount. ~10%. My guess is that it keeps locality for uniqueness

205

# checks, etc.

206

data.sort()

207

self._cursor.executemany("INSERT OR IGNORE INTO parent"

208

" (child, parent, parent_idx)"

209

"VALUES (?, ?, ?)", data)

210

211

def do_import(self):

212

if revision.is_null(self._branch_tip_rev_id):

213

return

214

merge_sorted = self._import_tip(self._branch_tip_rev_id)

215

self._db_conn.commit()

216

217

def _get_merge_sorted_tip(self, tip_revision_id):

218

if self._incremental:

219

self._update_ancestry(tip_revision_id)

220

self._ensure_revisions([tip_revision_id])

221

tip_db_id = self._rev_id_to_db_id[tip_revision_id]

222

inc_merger = _IncrementalMergeSort(self, tip_db_id)

223

merge_sorted = inc_merger.topo_order()

224

# Map db_ids back to the keys that self._graph would generate

225

# Assert that the result is valid

226

if self._validate:

227

self._ensure_graph()

228

actual_ms = self._graph.merge_sort((tip_revision_id,))

229

actual_ms_iter = iter(actual_ms)

230

else:

231

actual_ms_iter = None

232

233

def assert_is_equal(x, y):

234

if x != y:

235

import pdb; pdb.set_trace()

236

db_to_rev = self._db_id_to_rev_id

237

for node in merge_sorted:

238

try:

239

node.key = (db_to_rev[node.key],)

240

except KeyError: # Look this one up in the db

241

rev_res = self._cursor.execute(

242

"SELECT revision_id FROM revision WHERE db_id = ?",

243

(node.key,)).fetchone()

244

rev_id = rev_res[0]

245

self._db_id_to_rev_id[node.key] = rev_id

246

self._rev_id_to_db_id[rev_id] = node.key

247

node.key = (rev_id,)

248

if actual_ms_iter is None:

249

continue

250

actual_node = actual_ms_iter.next()

251

assert_is_equal(node.key, actual_node.key)

252

assert_is_equal(node.revno, actual_node.revno)

253

assert_is_equal(node.merge_depth, actual_node.merge_depth)

254

assert_is_equal(node.end_of_merge, actual_node.end_of_merge)

255

if actual_ms_iter is not None:

256

try:

257

actual_node = actual_ms_iter.next()

258

except StopIteration:

259

# no problem they both say they've finished

260

pass

261

else:

262

# The next revision must have already been imported

263

assert self._is_imported(actual_node.key[0])

264

else:

265

merge_sorted = self._graph.merge_sort((tip_revision_id,))

266

return merge_sorted

267

268

def _import_tip(self, tip_revision_id, suppress_progress_and_commit=False):

269

if suppress_progress_and_commit:

270

pb = None

271

else:

272

pb = ui.ui_factory.nested_progress_bar()

273

if pb is not None:

274

pb.update('getting merge_sorted')

275

merge_sorted = self._get_merge_sorted_tip(tip_revision_id)

276

if not self._incremental:

277

# If _incremental all the revisions will have already been ensured

278

# by the _update_ancestry code.

279

if pb is not None:

280

pb.update('allocating revisions', 0,

281

len(merge_sorted))

282

self._ensure_revisions([n.key[0] for n in merge_sorted])

283

if pb is not None:

284

pb.update('updating parents', 0,

285

len(merge_sorted))

286

self._update_parents(merge_sorted)

287

try:

288

last_mainline_rev_id = None

289

new_nodes = []

290

for idx, node in enumerate(merge_sorted):

291

if pb is not None and idx & 0xFF == 0:

292

pb.update('importing', idx, len(merge_sorted))

293

if last_mainline_rev_id is None:

294

assert not new_nodes

295

assert node.merge_depth == 0, \

296

"We did not start at a mainline?"

297

last_mainline_rev_id = node.key[0]

298

new_nodes.append(node)

299

continue

300

if node.merge_depth == 0:

301

# We're at a new mainline. Insert the nodes for the

302

# previous mainline. If this has already been inserted, we

303

# assume all previous ones are also. Safe as long as we

304

# wait to commit() until we insert all parents.

305

if not self._insert_nodes(last_mainline_rev_id, new_nodes):

306

# This data has already been imported.

307

new_nodes = []

308

break

309

last_mainline_rev_id = node.key[0]

310

new_nodes = []

311

new_nodes.append(node)

312

if new_nodes:

313

assert last_mainline_rev_id is not None

314

self._insert_nodes(last_mainline_rev_id, new_nodes)

315

new_nodes = []

316

self._build_one_mainline(tip_revision_id)

317

finally:

318

if pb is not None:

319

pb.finished()

320

return merge_sorted

321

322

def _update_ancestry(self, new_tip_rev_id):

323

"""Walk the parents of this tip, updating 'revision' and 'parent'

324

325

self._rev_id_to_db_id will be updated.

326

"""

327

(known, parent_map,

328

children) = self._find_known_ancestors(new_tip_rev_id)

329

self._compute_gdfo_and_insert(known, children, parent_map)

330

self._insert_parent_map(parent_map)

331

# This seems to slow things down a fair amount. On bzrtools, we end up

332

# calling it 75 times, and it ends up taking 800ms. vs a total rutime

333

# of 1200ms otherwise.

334

# self._db_conn.commit()

335

336

def _find_known_ancestors(self, new_tip_rev_id):

337

"""Starting at tip, find ancestors we already have"""

338

needed = [new_tip_rev_id]

339

all_needed = set(new_tip_rev_id)

340

children = {}

341

parent_map = {}

342

known = {}

343

pb = ui.ui_factory.nested_progress_bar()

344

try:

345

while needed:

346

pb.update('Finding ancestry', len(all_needed), len(all_needed))

347

rev_id = needed.pop()

348

if rev_id in known:

349

# We may add particular parents multiple times, just ignore

350

# them once they've been found

351

continue

352

res = self._cursor.execute(

353

"SELECT gdfo FROM revision WHERE revision_id = ?",

354

[rev_id]).fetchone()

355

if res is not None:

356

known[rev_id] = res[0]

357

continue

358

# We don't have this entry recorded yet, add the parents to the

359

# search

360

pmap = self._branch.repository.get_parent_map([rev_id])

361

parent_map.update(pmap)

362

parent_ids = pmap.get(rev_id, None)

363

if parent_ids is None or parent_ids == NULL_PARENTS:

364

# We can insert this rev directly, because we know its

365

# gdfo, as it has no parents.

366

parent_map[rev_id] = ()

367

self._cursor.execute("INSERT INTO revision (revision_id, gdfo)"

368

" VALUES (?, ?)", (rev_id, 1))

369

# Wrap around to populate known quickly

370

needed.append(rev_id)

371

if parent_ids is None:

372

# This is a ghost, add it to the table

373

self._cursor.execute("INSERT INTO ghost (db_id)"

374

" SELECT db_id FROM revision"

375

" WHERE revision_id = ?",

376

(rev_id,))

377

continue

378

for parent_id in pmap[rev_id]:

379

if parent_id not in known:

380

if parent_id not in all_needed:

381

needed.append(parent_id)

382

all_needed.add(parent_id)

383

children.setdefault(parent_id, []).append(rev_id)

384

finally:

385

pb.finished()

386

return known, parent_map, children

387

388

def _compute_gdfo_and_insert(self, known, children, parent_map):

389

# At this point, we should have walked to all known parents, and should

390

# be able to build up the gdfo and parent info for all keys.

391

pending = [(gdfo, rev_id) for rev_id, gdfo in known.iteritems()]

392

while pending:

393

gdfo, rev_id = pending.pop()

394

for child_id in children.get(rev_id, []):

395

if child_id in known:

396

# XXX: Already numbered?

397

assert known[child_id] > gdfo

398

continue

399

parent_ids = parent_map[child_id]

400

max_gdfo = -1

401

for parent_id in parent_ids:

402

try:

403

this_gdfo = known[parent_id]

404

except KeyError:

405

# One parent hasn't been computed yet

406

break

407

if this_gdfo > max_gdfo:

408

max_gdfo = this_gdfo

409

else:

410

# All parents have their gdfo known

411

# assert gdfo == max_gdfo

412

child_gdfo = max_gdfo + 1

413

known[child_id] = child_gdfo

414

self._cursor.execute(

415

"INSERT INTO revision (revision_id, gdfo)"

416

" VALUES (?, ?)",

417

(child_id, child_gdfo))

418

# Put this into the pending queue so that *its* children

419

# also get updated

420

pending.append((child_gdfo, child_id))

421

if self._graph is not None:

422

for rev_id, gdfo in known.iteritems():

423

assert gdfo == self._graph._nodes[(rev_id,)].gdfo

424

425

def _get_db_id(self, revision_id):

426

db_res = self._cursor.execute('SELECT db_id FROM revision'

427

' WHERE revision_id = ?',

428

[revision_id]).fetchone()

429

if db_res is None:

430

return None

431

return db_res[0]

432

433

def _update_dotted(self, new_tip_rev_id):

434

"""We have a new 'tip' revision, Update the dotted_revno table."""

435

# Just make sure the db has valid info for all the existing entries

436

self._update_ancestry(new_tip_rev_id)

437

438

def _get_mainline_range_count(self, head_db_id):

439

"""Does the given head_db_id already have a range defined using it."""

440

res = self._cursor.execute("SELECT pkey, count, tail"

441

" FROM mainline_parent_range"

442

" WHERE head = ?"

443

" ORDER BY count DESC LIMIT 1",

444

[head_db_id]).fetchone()

445

if res is None:

446

return None, None, None

447

return res

448

449

def _get_mainline_range(self, range_key):

450

"""Get the revisions in the mainline range specified."""

451

res = self._cursor.execute("SELECT revision FROM mainline_parent"

452

" WHERE range = ?"

453

" ORDER BY dist DESC", [range_key])

454

return [r[0] for r in res.fetchall()]

455

456

def _get_lh_parent_db_id(self, revision_db_id):

457

parent_res = self._cursor.execute("""

458

SELECT parent.parent

459

FROM parent

460

WHERE parent.child = ?

461

AND parent_idx = 0

462

LIMIT 1 -- hint to the db, should always be only 1

463

""", (revision_db_id,)).fetchone()

464

# self._stats['lh_parent_step'] += 1

465

if parent_res is None:

466

return None

467

return parent_res[0]

468

469

def _insert_range(self, range_db_ids, tail_db_id):

470

head_db_id = range_db_ids[0]

471

self._cursor.execute("INSERT INTO mainline_parent_range"

472

" (head, tail, count) VALUES (?, ?, ?)",

473

(head_db_id, tail_db_id, len(range_db_ids)))

474

# Note: This works for sqlite, does it work for pgsql?

475

range_key = self._cursor.lastrowid

476

self._stats['ranges_inserted'] += 1

477

# Note that 'tail' is explicitly not included in the range

478

self._stats['revs_in_ranges'] += len(range_db_ids)

479

self._cursor.executemany(

480

"INSERT INTO mainline_parent (range, revision, dist)"

481

" VALUES (?, ?, ?)",

482

[(range_key, d, idx) for idx, d in enumerate(range_db_ids)])

483

484

def _build_one_mainline(self, head_rev_id):

485

# 1) Walk backward until you find an existing entry in the

486

# mainline_parent_range table (or you reach the end)

487

# 2) If the range has less than X revisions, include it in the

488

# revisions to be added

489

# 3) chop the list into X revision sections, and insert them

490

#

491

# This should ensure that any given ancestry has at most 1 section

492

# which has less than X revisions, and it should preserve convergence.

493

self._ensure_revisions([head_rev_id])

494

cur_db_id = self._rev_id_to_db_id[head_rev_id]

495

range_db_ids = []

496

while cur_db_id is not None:

497

(range_key, next_count,

498

tail) = self._get_mainline_range_count(cur_db_id)

499

if range_key is not None:

500

# This tip is already present in mainline_parent_range

501

# table.

502

if (range_db_ids

503

and next_count < self._MAINLINE_PARENT_RANGE_LEN):

504

range_db_ids.extend(self._get_mainline_range(range_key))

505

cur_db_id = tail

506

break

507

else:

508

range_db_ids.append(cur_db_id)

509

cur_db_id = self._get_lh_parent_db_id(cur_db_id)

510

# We now have a list of db ids that need to be split up into

511

# ranges.

512

while range_db_ids:

513

tail_db_ids = range_db_ids[-self._MAINLINE_PARENT_RANGE_LEN:]

514

del range_db_ids[-self._MAINLINE_PARENT_RANGE_LEN:]

515

self._insert_range(tail_db_ids, cur_db_id)

516

cur_db_id = tail_db_ids[0]

517

518

519

class _MergeSortNode(object):

520

"""A simple object that represents one entry in the merge sorted graph."""

521

522

__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge',

523

'_left_parent', '_left_pending_parent',

524

'_pending_parents', '_is_first',

525

)

526

527

def __init__(self, key, merge_depth, left_parent, pending_parents,

528

is_first):

529

self.key = key

530

self.merge_depth = merge_depth

531

self.revno = None

532

self.end_of_merge = None

533

self._left_parent = left_parent

534

self._left_pending_parent = left_parent

535

self._pending_parents = pending_parents

536

self._is_first = is_first

537

538

def __repr__(self):

539

return '%s(%s, %s, %s, %s [%s %s %s %s])' % (

540

self.__class__.__name__,

541

self.key, self.revno, self.end_of_merge, self.merge_depth,

542

self._left_parent, self._left_pending_parent,

543

self._pending_parents, self._is_first)

544

545

546

class _IncrementalMergeSort(object):

547

"""Context for importing partial history."""

548

# Note: all of the ids in this object are database ids. the revision_ids

549

# should have already been imported before we get to this step.

550

551

def __init__(self, importer, tip_db_id):

552

self._importer = importer

553

self._tip_db_id = tip_db_id

554

self._mainline_db_ids = None

555

self._imported_mainline_id = None

556

self._cursor = importer._cursor

557

self._stats = importer._stats

558

559

# db_id => gdfo

560

self._known_gdfo = {}

561

# db_ids that we know are ancestors of mainline_db_ids that are not

562

# ancestors of pre_mainline_id

563

self._interesting_ancestor_ids = set()

564

565

# Information from the dotted_revno table for revisions that are in the

566

# already-imported mainline.

567

self._imported_dotted_revno = {}

568

# What dotted revnos have been loaded

569

self._known_dotted = set()

570

# This is the gdfo of the current mainline revision search tip. This is

571

# the threshold such that

572

self._imported_gdfo = None

573

574

# Revisions that we are walking, to see if they are interesting, or

575

# already imported.

576

self._search_tips = None

577

# mainline revno => number of child branches

578

self._revno_to_branch_count = {}

579

# (revno, branch_num) => oldest seen child

580

self._branch_to_child_count = {}

581

582

self._depth_first_stack = None

583

self._scheduled_stack = None

584

self._seen_parents = None

585

# Map from db_id => parent_ids

586

self._parent_map = self._importer._db_parent_map

587

588

# We just populate all known ghosts here.

589

# TODO: Ghosts are expected to be rare. If we find a case where probing

590

# for them at runtime is better than grabbing them all at once,

591

# re-evaluate this decision.

592

self._ghosts = None

593

594

def _find_needed_mainline(self):

595

"""Find mainline revisions that need to be filled out.

596

597

:return: ([mainline_not_imported], most_recent_imported)

598

"""

599

db_id = self._tip_db_id

600

needed = []

601

while db_id is not None and not self._is_imported_db_id(db_id):

602

needed.append(db_id)

603

db_id = self._importer._get_lh_parent_db_id(db_id)

604

self._mainline_db_ids = needed

605

self._interesting_ancestor_ids.update(self._mainline_db_ids)

606

self._imported_mainline_id = db_id

607

608

def _get_initial_search_tips(self):

609

"""Grab the right-hand parents of all the interesting mainline.

610

611

We know we already searched all of the left-hand parents, so just grab

612

the right-hand parents.

613

"""

614

# TODO: Split this into a loop, since sqlite has a maximum number of

615

# parameters.

616

res = _get_result_for_many(self._cursor,

617

"SELECT parent, gdfo FROM parent, revision"

618

" WHERE parent.parent = revision.db_id"

619

" AND parent_idx != 0"

620

" AND child IN (%s)",

621

self._mainline_db_ids)

622

self._search_tips = set([r[0] for r in res])

623

self._stats['num_search_tips'] += len(self._search_tips)

624

self._known_gdfo.update(res)

625

# We know that we will eventually need at least 1 step of the mainline

626

# (it gives us the basis for numbering everything). We do it now,

627

# because it increases the 'cheap' filtering we can do right away.

628

self._stats['step mainline initial'] += 1

629

self._step_mainline()

630

ghost_res = self._cursor.execute("SELECT db_id FROM ghost").fetchall()

631

self._ghosts = set([g[0] for g in ghost_res])

632

633

def _is_imported_db_id(self, tip_db_id):

634

res = self._cursor.execute(

635

"SELECT count(*) FROM dotted_revno"

636

" WHERE tip_revision = ?"

637

" AND tip_revision = merged_revision",

638

(tip_db_id,)).fetchone()

639

return res[0] > 0

640

641

def _split_search_tips_by_gdfo(self, unknown):

642

"""For these search tips, mark ones 'interesting' based on gdfo.

643

644

All search tips are ancestors of _mainline_db_ids. So if their gdfo

645

indicates that they could not be merged into already imported

646

revisions, then we know automatically that they are

647

new-and-interesting. Further, if they are present in

648

_imported_dotted_revno, then we know they are not interesting, and

649

we will stop searching them.

650

651

Otherwise, we don't know for sure which category they fall into, so

652

we return them for further processing.

653

654

:return: still_unknown - search tips that aren't known to be

655

interesting, and aren't known to be in the ancestry of already

656

imported revisions.

657

"""

658

still_unknown = []

659

min_gdfo = None

660

for db_id in unknown:

661

if (db_id in self._imported_dotted_revno

662

or db_id == self._imported_mainline_id):

663

# This should be removed as a search tip, we know it isn't

664

# interesting, it is an ancestor of an imported revision

665

self._stats['split already imported'] += 1

666

self._search_tips.remove(db_id)

667

continue

668

gdfo = self._known_gdfo[db_id]

669

if gdfo >= self._imported_gdfo:

670

self._stats['split gdfo'] += 1

671

self._interesting_ancestor_ids.add(db_id)

672

else:

673

still_unknown.append(db_id)

674

return still_unknown

675

676

def _split_interesting_using_children(self, unknown_search_tips):

677

"""Find children of these search tips.

678

679

For each search tip, we find all of its known children. We then filter

680

the children by:

681

a) child is ignored if child in _interesting_ancestor_ids

682

b) child is ignored if gdfo(child) > _imported_gdfo

683

or (gdfo(child) == _imported_gdfo and child !=

684

_imported_mainline_id)

685

The reason for the extra check is because for the ancestry

686

left-to-be-searched, with tip at _imported_mainline_id, *only*

687

_imported_mainline_id is allowed to have gdfo = _imported_gdfo.

688

for each search tip, if there are no interesting children, then this

689

search tip is definitely interesting (there is no path for it to be

690

merged into a previous mainline entry.)

691

692

:return: still_unknown

693

still_unknown are the search tips that are still have children that

694

could be possibly merged.

695

"""

696

interesting = self._interesting_ancestor_ids

697

parent_child_res = self._cursor.execute(_add_n_params(

698

"SELECT parent, child FROM parent"

699

" WHERE parent IN (%s)",

700

len(unknown_search_tips)), unknown_search_tips).fetchall()

701

parent_to_children = {}

702

already_imported = set()

703

for parent, child in parent_child_res:

704

if (child in self._imported_dotted_revno

705

or child == self._imported_mainline_id):

706

# This child is already imported, so obviously the parent is,

707

# too.

708

self._stats['split child imported'] += 1

709

already_imported.add(parent)

710

already_imported.add(child)

711

parent_to_children.setdefault(parent, []).append(child)

712

self._search_tips.difference_update(already_imported)

713

possibly_merged_children = set(

714

[c for p, c in parent_child_res

715

if c not in interesting and p not in already_imported])

716

known_gdfo = self._known_gdfo

717

unknown_gdfos = [c for c in possibly_merged_children

718

if c not in known_gdfo]

719

# TODO: Is it more wasteful to join this table early, or is it better

720

# because we avoid having to pass N parameters back in?

721

# TODO: Would sorting the db ids help? They are the primary key for the

722

# table, so we could potentially fetch in a better order.

723

if unknown_gdfos:

724

res = self._cursor.execute(_add_n_params(

725

"SELECT db_id, gdfo FROM revision WHERE db_id IN (%s)",

726

len(unknown_gdfos)), unknown_gdfos)

727

known_gdfo.update(res)

728

min_gdfo = self._imported_gdfo

729

# Remove all of the children who have gdfo >= min. We already handled

730

# the == min case in the first loop.

731

possibly_merged_children.difference_update(

732

[c for c in possibly_merged_children if known_gdfo[c] >= min_gdfo])

733

still_unknown = []

734

for parent in unknown_search_tips:

735

if parent in already_imported:

736

self._stats['split parent imported'] += 1

737

continue

738

for c in parent_to_children[parent]:

739

if c in possibly_merged_children:

740

still_unknown.append(parent)

741

break

742

else: # All children could not be possibly merged

743

self._stats['split children interesting'] += 1

744

interesting.add(parent)

745

return still_unknown

746

747

def _step_mainline(self):

748

"""Move the mainline pointer by one, updating the data."""

749

self._stats['step mainline'] += 1

750

res = self._cursor.execute(

751

"SELECT merged_revision, revno, end_of_merge, merge_depth"

752

" FROM dotted_revno WHERE tip_revision = ? ORDER BY dist",

753

[self._imported_mainline_id]).fetchall()

754

stuple = static_tuple.StaticTuple.from_sequence

755

st = static_tuple.StaticTuple

756

dotted_info = [st(r[0], st(stuple(map(int, r[1].split('.'))),

757

r[2], r[3]))

758

for r in res]

759

self._stats['step mainline cache missed'] += 1

760

self._stats['step mainline added'] += len(dotted_info)

761

self._update_info_from_dotted_revno(dotted_info)

762

# TODO: We could remove search tips that show up as newly merged

763

# though that can wait until the next

764

# _split_search_tips_by_gdfo

765

# new_merged_ids = [r[0] for r in res]

766

res = self._cursor.execute("SELECT parent, gdfo"

767

" FROM parent, revision"

768

" WHERE parent = db_id"

769

" AND parent_idx = 0"

770

" AND child = ?",

771

[self._imported_mainline_id]).fetchone()

772

if res is None:

773

# Walked off the mainline...

774

# TODO: Make sure this stuff is tested

775

self._imported_mainline_id = None

776

self._imported_gdfo = 0

777

else:

778

self._imported_mainline_id, self._imported_gdfo = res

779

self._known_gdfo[self._imported_mainline_id] = self._imported_gdfo

780

781

def _step_search_tips(self):

782

"""Move the search tips to their parents."""

783

self._stats['step search tips'] += 1

784

res = _get_result_for_many(self._cursor,

785

"SELECT parent, gdfo FROM parent, revision"

786

" WHERE parent=db_id AND child IN (%s)",

787

list(self._search_tips))

788

# TODO: We could use this time to fill out _parent_map, rather than

789

# waiting until _push_node and duplicating a request to the

790

# parent table. It may be reasonable to wait on gdfo also...

791

792

# Filter out search tips that we've already searched via a different

793

# path. By construction, if we are stepping the search tips, we know

794

# that all previous search tips are either in

795

# self._imported_dotted_revno or in self._interesting_ancestor_ids.

796

# _imported_dotted_revno will be filtered in the first

797

# _split_search_tips_by_gdfo call, so we just filter out already

798

# interesting ones.

799

interesting = self._interesting_ancestor_ids

800

self._search_tips = set([r[0] for r in res if r[0] not in interesting])

801

# TODO: For search tips we will be removing, we don't need to join

802

# against revision since we should already have them. There may

803

# be other ways that we already know gdfo. It may be cheaper to

804

# check first.

805

self._stats['num_search_tips'] += len(self._search_tips)

806

self._known_gdfo.update(res)

807

808

def _ensure_lh_parent_info(self):

809

"""LH parents of interesting_ancestor_ids is either present or pending.

810

811

Either the data should be in _imported_dotted_revno, or the lh parent

812

should be in interesting_ancestor_ids (meaning we will number it).

813

"""

814

#XXX REMOVE: pmap = self._parent_map

815

missing_parent_ids = set()

816

for db_id in self._interesting_ancestor_ids:

817

parent_ids = self._get_parents(db_id)

818

if not parent_ids: # no parents, nothing to add

819

continue

820

lh_parent = parent_ids[0]

821

if lh_parent in self._interesting_ancestor_ids:

822

continue

823

if lh_parent in self._imported_dotted_revno:

824

continue

825

missing_parent_ids.add(lh_parent)

826

missing_parent_ids.difference_update(self._ghosts)

827

while missing_parent_ids:

828

self._stats['step mainline ensure LH'] += 1

829

self._step_mainline()

830

missing_parent_ids = missing_parent_ids.difference(

831

self._imported_dotted_revno)

832

833

def _find_interesting_ancestry(self):

834

self._find_needed_mainline()

835

self._get_initial_search_tips()

836

while self._search_tips:

837

# We don't know whether these search tips are known interesting, or

838

# known uninteresting

839

unknown = list(self._search_tips)

840

while unknown:

841

unknown = self._split_search_tips_by_gdfo(unknown)

842

if not unknown:

843

break

844

unknown = self._split_interesting_using_children(unknown)

845

if not unknown:

846

break

847

# The current search tips are the 'newest' possible tips right

848

# now. If we can't classify them as definitely being

849

# interesting, then we need to step the mainline until we can.

850

# This means that the current search tips have children that

851

# could be merged into an earlier mainline, walk the mainline

852

# to see if we can resolve that.

853

# Note that relying strictly on gdfo is a bit of a waste here,

854

# because you may have a rev with 10 children before it lands

855

# in mainline, but all 11 revs will be in the dotted_revno

856

# cache for that mainline.

857

self._stats['step mainline unknown'] += 1

858

self._step_mainline()

859

# All search_tips are known to either be interesting or

860

# uninteresting. Walk any search tips that remain.

861

self._step_search_tips()

862

# We're now sure we have all of the now-interesting revisions. To

863

# number them, we need their left-hand parents to be in

864

# _imported_dotted_revno

865

self._ensure_lh_parent_info()

866

867

def _update_info_from_dotted_revno(self, dotted_info):

868

"""Update info like 'child_seen' from the dotted_revno info."""

869

# TODO: We can move this iterator into a parameter, and have it

870

# continuously updated from _step_mainline()

871

self._imported_dotted_revno.update(dotted_info)

872

self._known_dotted.update([i[1][0] for i in dotted_info])

873

for db_id, (revno, eom, depth) in dotted_info:

874

if len(revno) > 1: # dotted revno, make sure branch count is right

875

base_revno = revno[0]

876

if (base_revno not in self._revno_to_branch_count

877

or revno[1] > self._revno_to_branch_count[base_revno]):

878

self._revno_to_branch_count[base_revno] = revno[1]

879

branch_key = revno[:2]

880

mini_revno = revno[2]

881

else:

882

# *mainline* branch

883

branch_key = 0

884

mini_revno = revno[0]

885

# We found a mainline branch, make sure it is marked as such

886

self._revno_to_branch_count.setdefault(0, 0)

887

if (branch_key not in self._branch_to_child_count

888

or mini_revno > self._branch_to_child_count[branch_key]):

889

self._branch_to_child_count[branch_key] = mini_revno

890

891

def _is_first_child(self, parent_id):

892

"""Is this the first child seen for the given parent?"""

893

if parent_id in self._seen_parents:

894

return False

895

# We haven't seen this while walking, but perhaps the already merged

896

# stuff has.

897

self._seen_parents.add(parent_id)

898

if parent_id not in self._imported_dotted_revno:

899

# Haven't seen this parent merged before, so we can't have seen

900

# children of it

901

return True

902

revno = self._imported_dotted_revno[parent_id][0]

903

if len(revno) > 1:

904

branch_key = revno[:2]

905

mini_revno = revno[2]

906

else:

907

branch_key = 0

908

mini_revno = revno[0]

909

if self._branch_to_child_count.get(branch_key, 0) > mini_revno:

910

# This revision shows up in the graph, but another revision in this

911

# branch shows up later, so this revision must have already been

912

# seen

913

return False

914

# If we got this far, it doesn't appear to have been seen.

915

return True

916

917

def _get_parents(self, db_id):

918

if db_id in self._parent_map:

919

parent_ids = self._parent_map[db_id]

920

else:

921

parent_res = self._cursor.execute(

922

"SELECT parent FROM parent WHERE child = ?"

923

" ORDER BY parent_idx", (db_id,)).fetchall()

924

parent_ids = static_tuple.StaticTuple.from_sequence(

925

[r[0] for r in parent_res])

926

self._parent_map[db_id] = parent_ids

927

return parent_ids

928

929

def _push_node(self, db_id, merge_depth):

930

# TODO: Check if db_id is a ghost (not allowed on the stack)

931

self._stats['pushed'] += 1

932

if db_id not in self._interesting_ancestor_ids:

933

# This is a parent that we really don't need to number

934

self._stats['pushed uninteresting'] += 1

935

return

936

parent_ids = self._get_parents(db_id)

937

if len(parent_ids) <= 0:

938

left_parent = None

939

# We are dealing with a 'new root' possibly because of a ghost,

940

# possibly because of merging a new ancestry.

941

# KnownGraph.merge_sort just always says True here, so stick with

942

# that

943

is_first = True

944

else:

945

left_parent = parent_ids[0]

946

if left_parent in self._ghosts:

947

left_parent = None

948

is_first = True

949

else:

950

is_first = self._is_first_child(left_parent)

951

# Note: we don't have to filter out self._ghosts here, as long as we do

952

# it in _push_node

953

pending_parents = static_tuple.StaticTuple.from_sequence(

954

[p for p in parent_ids[1:] if p not in self._ghosts])

955

# v- logically probably better as a tuple or object. We currently

956

# modify it in place, so we use a list

957

self._depth_first_stack.append(

958

_MergeSortNode(db_id, merge_depth, left_parent, pending_parents,

959

is_first))

960

961

def _step_to_latest_branch(self, base_revno):

962

"""Step the mainline until we've loaded the latest sub-branch.

963

964

This is used when we need to create a new child branch. We need to

965

ensure that we've loaded the most-recently-merged branch, so that we

966

can generate the correct branch counter.

967

968

For example, if you have a revision whose left-hand parent is 1.2.3,

969

you need to load mainline revisions until you find some revision like

970

(1.?.1). This will ensure that you have the most recent branch merged

971

to mainline that was branched from the revno=1 revision in mainline.

972

973

Note that if we find (1,3,1) before finding (1,2,1) that is ok. As long

974

as we have found the first revision of any sub-branch, we know that

975

we've found the most recent (since we walk backwards).

976

977

:param base_revno: The revision that this branch is based on. 0 means

978

that this is a new-root branch.

979

:return: None

980

"""

981

self._stats['step to latest'] += 1

982

step_count = 0

983

start_point = self._imported_mainline_id

984

found = None

985

while self._imported_mainline_id is not None:

986

if (base_revno,) in self._known_dotted:

987

# We have walked far enough to load the original revision,

988

# which means we've loaded all children.

989

self._stats['step to latest found base'] += 1

990

found = (base_revno,)

991

break

992

# Estimate what is the most recent branch, and see if we have read

993

# its first revision

994

branch_count = self._revno_to_branch_count.get(base_revno, 0)

995

root_of_branch_revno = (base_revno, branch_count, 1)

996

# Note: if branch_count == 0, that means we haven't seen any

997

# other branches for this revision.

998

if root_of_branch_revno in self._known_dotted:

999

found = root_of_branch_revno

1000

break

1001

self._stats['step mainline to-latest'] += 1

1002

if base_revno == 0:

1003

self._stats['step mainline to-latest NULL'] += 1

1004

self._step_mainline()

1005

step_count += 1

1006

1007

def _pop_node(self):

1008

"""Move the last node from the _depth_first_stack to _scheduled_stack.

1009

1010

This is the most left-hand node that we are able to find.

1011

"""

1012

node = self._depth_first_stack.pop()

1013

if node._left_parent is not None:

1014

parent_revno = self._imported_dotted_revno[node._left_parent][0]

1015

if node._is_first: # We simply number as parent + 1

1016

if len(parent_revno) == 1:

1017

mini_revno = parent_revno[0] + 1

1018

revno = (mini_revno,)

1019

# Not sure if we *have* to maintain it, but it does keep

1020

# our data-structures consistent

1021

if mini_revno > self._branch_to_child_count[0]:

1022

self._branch_to_child_count[0] = mini_revno

1023

else:

1024

revno = parent_revno[:2] + (parent_revno[2] + 1,)

1025

else:

1026

# we need a new branch number. To get this correct, we have to

1027

# make sure that the beginning of this branch has been loaded

1028

if len(parent_revno) > 1:

1029

# if parent_revno is a mainline, then

1030

# _ensure_lh_parent_info should have already loaded enough

1031

# data. So we only do this when the parent is a merged

1032

# revision.

1033

self._step_to_latest_branch(parent_revno[0])

1034

base_revno = parent_revno[0]

1035

branch_count = (

1036

self._revno_to_branch_count.get(base_revno, 0) + 1)

1037

self._revno_to_branch_count[base_revno] = branch_count

1038

revno = (base_revno, branch_count, 1)

1039

else:

1040

# We found a new root. There are 2 cases:

1041

# a) This is the very first revision in the branch. In which case

1042

# self._revno_to_branch_count won't have any counts for

1043

# 'revno' 0.

1044

# b) This is a ghost / the first revision in a branch that was

1045

# merged. We need to allocate a new branch number.

1046

# This distinction is pretty much the same as the 'is_first'

1047

# check for revs with a parent if you consider the NULL revision

1048

# to be revno 0.

1049

# We have to walk back far enough to be sure that we have the

1050

# most-recent merged new-root. This can be detected by finding

1051

# any new root's first revision. And, of course, we should find

1052

# the last one first while walking backwards.

1053

# Theory:

1054

# When you see (0,X,1) you've reached the point where the X

1055

# number was chosen. A hypothetical (0,X+1,1) revision would

1056

# only be numbered X+1 if it was merged after (0,X,1). Thus

1057

# the *first* (0,?,1) revision you find merged must be the

1058

# last.

1059

1060

self._step_to_latest_branch(0)

1061

branch_count = self._revno_to_branch_count.get(0, -1) + 1

1062

self._revno_to_branch_count[0] = branch_count

1063

if branch_count == 0: # This is the mainline

1064

revno = (1,)

1065

self._branch_to_child_count[0] = 1

1066

else:

1067

revno = (0, branch_count, 1)

1068

if not self._scheduled_stack:

1069

# For all but mainline revisions, we break on the end-of-merge. So

1070

# when we start new numbering, end_of_merge is True. For mainline

1071

# revisions, this is only true when we don't have a parent.

1072

end_of_merge = True

1073

if node._left_parent is not None and node.merge_depth == 0:

1074

end_of_merge = False

1075

else:

1076

prev_node = self._scheduled_stack[-1]

1077

if prev_node.merge_depth < node.merge_depth:

1078

end_of_merge = True

1079

elif (prev_node.merge_depth == node.merge_depth

1080

and prev_node.key not in self._parent_map[node.key]):

1081

# Next node is not a direct parent

1082

end_of_merge = True

1083

else:

1084

end_of_merge = False

1085

revno = static_tuple.StaticTuple.from_sequence(revno)

1086

node.revno = revno

1087

node.end_of_merge = end_of_merge

1088

self._imported_dotted_revno[node.key] = static_tuple.StaticTuple(

1089

revno, end_of_merge, node.merge_depth)

1090

self._known_dotted.add(revno)

1091

node._pending_parents = None

1092

self._scheduled_stack.append(node)

1093

1094

def _compute_merge_sort(self):

1095

self._depth_first_stack = []

1096

self._scheduled_stack = []

1097

self._seen_parents = set()

1098

if not self._mainline_db_ids:

1099

# Nothing to number

1100

return

1101

self._push_node(self._mainline_db_ids[0], 0)

1102

1103

while self._depth_first_stack:

1104

last = self._depth_first_stack[-1]

1105

if last._left_pending_parent is None and not last._pending_parents:

1106

# The parents have been processed, pop the node

1107

self._pop_node()

1108

continue

1109

while (last._left_pending_parent is not None

1110

or last._pending_parents):

1111

if last._left_pending_parent is not None:

1112

# Push on the left-hand-parent

1113

next_db_id = last._left_pending_parent

1114

last._left_pending_parent = None

1115

else:

1116

pending_parents = last._pending_parents

1117

next_db_id = pending_parents[-1]

1118

last._pending_parents = pending_parents[:-1]

1119

if next_db_id in self._imported_dotted_revno:

1120

continue

1121

if next_db_id == last._left_parent: #Is the left-parent?

1122

next_merge_depth = last.merge_depth

1123

else:

1124

next_merge_depth = last.merge_depth + 1

1125

self._push_node(next_db_id, next_merge_depth)

1126

# And switch to the outer loop

1127

break

1128

1129

def topo_order(self):

1130

self._find_interesting_ancestry()

1131

self._compute_merge_sort()

1132

return list(reversed(self._scheduled_stack))

1133

1134

1135

class Querier(object):

1136

"""Perform queries on an existing history db."""

1137

1138

def __init__(self, db_path, a_branch):

1139

self._db_path = db_path

1140

self._db_conn = None

1141

self._cursor = None

1142

self._importer_lock = None

1143

self._branch = a_branch

1144

self._branch_tip_rev_id = a_branch.last_revision()

1145

self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)

1146

self._tip_is_imported = False

1147

self._stats = defaultdict(lambda: 0)

1148

1149

def set_importer_lock(self, lock):

1150

"""Add a thread-lock for building and running an Importer.

1151

1152

The DB back-end is generally single-writer, so add a thread lock to

1153

avoid having two writers trying to access it at the same time.

1154

1155

This will be used as part of _import_tip. Note that it doesn't (yet?)

1156

support anything like timeout.

1157

"""

1158

self._importer_lock = lock

1159

1160

def _get_cursor(self):

1161

if self._cursor is not None:

1162

return self._cursor

1163

db_conn = dbapi2.connect(self._db_path)

1164

self._db_conn = db_conn

1165

self._cursor = self._db_conn.cursor()

1166

return self._cursor

1167

1168

def ensure_branch_tip(self):

1169

"""Ensure that the branch tip has been imported.

1170

1171

This will run Importer if it has not.

1172

"""

1173

if self._branch_tip_db_id is not None and self._tip_is_imported:

1174

return

1175

if self._branch_tip_db_id is None:

1176

# This revision has not been seen by the DB, so we know it isn't

1177

# imported

1178

self._import_tip()

1179

return

1180

if self._is_imported_db_id(self._branch_tip_db_id):

1181

# This revision was seen, and imported

1182

self._tip_is_imported = True

1183

return

1184

self._import_tip()

1185

1186

def _import_tip(self):

1187

if self._cursor is not None:

1188

self.close()

1189

if self._importer_lock is not None:

1190

self._importer_lock.acquire()

1191

try:

1192

t = time.time()

1193

importer = Importer(self._db_path, self._branch,

1194

tip_revision_id=self._branch_tip_rev_id,

1195

incremental=True)

1196

importer.do_import()

1197

tdelta = time.time() - t

1198

if 'history_db' in debug.debug_flags:

1199

trace.note('imported %d nodes on-the-fly in %.3fs'

1200

% (importer._stats.get('total_nodes_inserted', 0),

1201

tdelta))

1202

self._db_conn = importer._db_conn

1203

self._cursor = importer._cursor

1204

self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)

1205

self._tip_is_imported = True

1206

finally:

1207

if self._importer_lock is not None:

1208

self._importer_lock.release()

1209

1210

def _is_imported_db_id(self, tip_db_id):

1211

res = self._get_cursor().execute(

1212

"SELECT count(*) FROM dotted_revno"

1213

" WHERE tip_revision = ?"

1214

" AND tip_revision = merged_revision",

1215

(tip_db_id,)).fetchone()

1216

return res[0] > 0

1217

1218

def close(self):

1219

if self._db_conn is not None:

1220

self._db_conn.close()

1221

self._db_conn = None

1222

self._cursor = None

1223

1224

def _get_db_id(self, revision_id):

1225

try:

1226

db_res = self._get_cursor().execute(

1227

'SELECT db_id FROM revision'

1228

' WHERE revision_id = ?',

1229

[revision_id]).fetchone()

1230

except dbapi2.OperationalError:

1231

return None

1232

if db_res is None:

1233

return None

1234

return db_res[0]

1235

1236

def get_lh_parent_rev_id(self, revision_id):

1237

parent_res = self._get_cursor().execute("""

1238

SELECT p.revision_id

1239

FROM parent, revision as c, revision as p

1240

WHERE parent.child = c.db_id

1241

AND parent.parent = p.db_id

1242

AND c.revision_id = ?

1243

AND parent_idx = 0

1244

""", (revision_id,)).fetchone()

1245

self._stats['lh_parent_step'] += 1

1246

if parent_res is None:

1247

return None

1248

return parent_res[0]

1249

1250

def get_children(self, revision_id):

1251

"""Returns all the children the db knows about for this revision_id.

1252

1253

(we should probably try to filter it based on ancestry of

1254

self._branch_tip_rev_id...)

1255

"""

1256

# One option for people who care, is to just have them turn around a

1257

# request for get_dotted_revnos(), and things that aren't there are not

1258

# in the ancestry.

1259

cursor = self._get_cursor()

1260

res = cursor.execute("SELECT c.revision_id"

1261

" FROM revision p, parent, revision c"

1262

" WHERE child = c.db_id"

1263

" AND parent = p.db_id"

1264

" AND p.revision_id = ?",

1265

(revision_id,)).fetchall()

1266

return [r[0] for r in res]

1267

1268

def _get_lh_parent_db_id(self, revision_db_id):

1269

parent_res = self._get_cursor().execute("""

1270

SELECT parent.parent

1271

FROM parent

1272

WHERE parent.child = ?

1273

AND parent_idx = 0

1274

""", (revision_db_id,)).fetchone()

1275

self._stats['lh_parent_step'] += 1

1276

if parent_res is None:

1277

return None

1278

return parent_res[0]

1279

1280

def _get_range_key_and_tail(self, tip_db_id):

1281

"""Return the best range w/ head = tip_db_id or None."""

1282

range_res = self._get_cursor().execute(

1283

"SELECT pkey, tail"

1284

" FROM mainline_parent_range"

1285

" WHERE head = ?"

1286

" ORDER BY count DESC LIMIT 1",

1287

(tip_db_id,)).fetchone()

1288

if range_res is None:

1289

tail = self._get_lh_parent_db_id(tip_db_id)

1290

return None, tail

1291

return range_res

1292

1293

def get_dotted_revnos(self, revision_ids):

1294

"""Determine the dotted revno, using the range info, etc."""

1295

self.ensure_branch_tip()

1296

t = time.time()

1297

cursor = self._get_cursor()

1298

tip_db_id = self._branch_tip_db_id

1299

if tip_db_id is None:

1300

return {}

1301

db_ids = set()

1302

db_id_to_rev_id = {}

1303

for rev_id in revision_ids:

1304

db_id = self._get_db_id(rev_id)

1305

if db_id is None:

1306

import pdb; pdb.set_trace()

1307

db_ids.add(db_id)

1308

db_id_to_rev_id[db_id] = rev_id

1309

revnos = {}

1310

while tip_db_id is not None and db_ids:

1311

self._stats['num_steps'] += 1

1312

range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)

1313

if range_key is None:

1314

revno_res = cursor.execute(_add_n_params(

1315

"SELECT merged_revision, revno FROM dotted_revno"

1316

" WHERE tip_revision = ?"

1317

" AND merged_revision IN (%s)",

1318

len(db_ids)),

1319

[tip_db_id] + list(db_ids)).fetchall()

1320

else:

1321

revno_res = cursor.execute(_add_n_params(

1322

"SELECT merged_revision, revno"

1323

" FROM dotted_revno, mainline_parent"

1324

" WHERE tip_revision = mainline_parent.revision"

1325

" AND mainline_parent.range = ?"

1326

" AND merged_revision IN (%s)",

1327

len(db_ids)),

1328

[range_key] + list(db_ids)).fetchall()

1329

tip_db_id = next_db_id

1330

for db_id, revno in revno_res:

1331

db_ids.discard(db_id)

1332

revnos[db_id_to_rev_id[db_id]] = tuple(map(int,

1333

revno.split('.')))

1334

self._stats['query_time'] += (time.time() - t)

1335

return revnos

1336

1337

def get_revision_ids(self, revnos):

1338

"""Map from a dotted-revno back into a revision_id."""

1339

self.ensure_branch_tip()

1340

t = time.time()

1341

tip_db_id = self._branch_tip_db_id

1342

# TODO: If tip_db_id is None, maybe we want to raise an exception here?

1343

# To indicate that the branch has not been imported yet

1344

revno_strs = set(['.'.join(map(str, revno)) for revno in revnos])

1345

revno_map = {}

1346

cursor = self._get_cursor()

1347

while tip_db_id is not None and revno_strs:

1348

self._stats['num_steps'] += 1

1349

range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)

1350

if range_key is None:

1351

revision_res = cursor.execute(_add_n_params(

1352

"SELECT revision_id, revno"

1353

" FROM dotted_revno, revision"

1354

" WHERE merged_revision = revision.db_id"

1355

" AND tip_revision = ?"

1356

" AND revno IN (%s)", len(revno_strs)),

1357

[tip_db_id] + list(revno_strs)).fetchall()

1358

else:

1359

revision_res = cursor.execute(_add_n_params(

1360

"SELECT revision_id, revno"

1361

" FROM dotted_revno, mainline_parent, revision"

1362

" WHERE tip_revision = mainline_parent.revision"

1363

" AND merged_revision = revision.db_id"

1364

" AND mainline_parent.range = ?"

1365

" AND revno IN (%s)", len(revno_strs)),

1366

[range_key] + list(revno_strs)).fetchall()

1367

tip_db_id = next_db_id

1368

for revision_id, revno_str in revision_res:

1369

dotted = tuple(map(int, revno_str.split('.')))

1370

revno_strs.discard(revno_str)

1371

revno_map[dotted] = revision_id

1372

self._stats['query_time'] += (time.time() - t)

1373

return revno_map

1374

1375

def get_mainline_where_merged(self, revision_ids):

1376

"""Determine what mainline revisions merged the given revisions."""

1377

self.ensure_branch_tip()

1378

t = time.time()

1379

tip_db_id = self._branch_tip_db_id

1380

if tip_db_id is None:

1381

return {}

1382

cursor = self._get_cursor()

1383

db_ids = set()

1384

db_id_to_rev_id = {}

1385

for rev_id in revision_ids:

1386

db_id = self._get_db_id(rev_id)

1387

if db_id is None:

1388

import pdb; pdb.set_trace()

1389

db_ids.add(db_id)

1390

db_id_to_rev_id[db_id] = rev_id

1391

revision_to_mainline_map = {}

1392

while tip_db_id is not None and db_ids:

1393

self._stats['num_steps'] += 1

1394

range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)

1395

if range_key is None:

1396

mainline_res = cursor.execute(_add_n_params(

1397

"SELECT revision_id, merged_revision"

1398

" FROM dotted_revno, revision"

1399

" WHERE tip_revision = ?"

1400

" AND tip_revision = revision.db_id"

1401

" AND merged_revision IN (%s)",

1402

len(db_ids)),

1403

[tip_db_id] + list(db_ids)).fetchall()

1404

else:

1405

mainline_res = cursor.execute(_add_n_params(

1406

"SELECT revision_id, merged_revision"

1407

" FROM dotted_revno, mainline_parent, revision"

1408

" WHERE tip_revision = mainline_parent.revision"

1409

" AND tip_revision = db_id"

1410

" AND mainline_parent.range = ?"

1411

" AND merged_revision IN (%s)",

1412

len(db_ids)),

1413

[range_key] + list(db_ids)).fetchall()

1414

tip_db_id = next_db_id

1415

for mainline_revision_id, merged_db_id in mainline_res:

1416

db_ids.discard(merged_db_id)

1417

revision_to_mainline_map[db_id_to_rev_id[merged_db_id]] = \

1418

mainline_revision_id

1419

self._stats['query_time'] += (time.time() - t)

1420

return revision_to_mainline_map

1421

1422

def _get_mainline_range_starting_at(self, head_db_id):

1423

"""Try to find a range at this tip.

1424

1425

If a range cannot be found, just find the next parent.

1426

:return: (range_or_None, next_db_id)

1427

"""

1428

cursor = self._get_cursor()

1429

range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)

1430

if range_key is None:

1431

return None, next_db_id

1432

# TODO: Is ORDER BY dist ASC expensive? We know a priori that the list

1433

# is probably already in sorted order, but does sqlite know that?

1434

range_db_ids = cursor.execute(

1435

"SELECT revision FROM mainline_parent"

1436

" WHERE range = ? ORDER BY dist ASC",

1437

(range_key,)).fetchall()

1438

db_ids = [r[0] for r in range_db_ids]

1439

return db_ids, next_db_id

1440

1441

def walk_mainline(self):

1442

t = time.time()

1443

db_id = self._get_db_id(self._branch_tip_rev_id)

1444

all_ids = []

1445

while db_id is not None:

1446

self._stats['num_steps'] += 1

1447

next_range, next_db_id = self._get_mainline_range_starting_at(db_id)

1448

if next_range is None:

1449

# No range, so switch to using by-parent search

1450

all_ids.append(db_id)

1451

else:

1452

assert next_range[0] == db_id

1453

all_ids.extend(next_range)

1454

db_id = next_db_id

1455

self._stats['query_time'] += (time.time() - t)

1456

return all_ids

1457

1458

def walk_ancestry(self):

1459

"""Walk the whole ancestry.

1460

1461

Use the information from the dotted_revno table and the mainline_parent

1462

table to speed things up.

1463

"""

1464

db_id = self._get_db_id(self._branch_tip_rev_id)

1465

all_ancestors = set()

1466

t = time.time()

1467

cursor = self._get_cursor()

1468

while db_id is not None:

1469

self._stats['num_steps'] += 1

1470

range_key, next_db_id = self._get_range_key_and_tail(db_id)

1471

if range_key is None:

1472

merged_revs = cursor.execute(

1473

"SELECT merged_revision FROM dotted_revno"

1474

" WHERE tip_revision = ?",

1475

(db_id,)).fetchall()

1476

all_ancestors.update([r[0] for r in merged_revs])

1477

else:

1478

merged_revs = cursor.execute(

1479

"SELECT merged_revision FROM dotted_revno, mainline_parent"

1480

" WHERE tip_revision = mainline_parent.revision"

1481

" AND mainline_parent.range = ?",

1482

[range_key]).fetchall()

1483

all_ancestors.update([r[0] for r in merged_revs])

1484

db_id = next_db_id

1485

self._stats['query_time'] += (time.time() - t)

1486

return all_ancestors

1487

1488

def _find_tip_containing(self, tip_db_id, merged_db_id):

1489

"""Walk backwards until you find the tip that contains the given id."""

1490

cursor = self._get_cursor()

1491

while tip_db_id is not None:

1492

if tip_db_id == merged_db_id:

1493

# A tip obviously contains itself

1494

self._stats['step_find_tip_as_merged'] += 1

1495

return tip_db_id

1496

self._stats['num_steps'] += 1

1497

self._stats['step_find_tip_containing'] += 1

1498

range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)

1499

if range_key is None:

1500

present_res = cursor.execute(

1501

"SELECT 1 FROM dotted_revno"

1502

" WHERE tip_revision = ?"

1503

" AND merged_revision = ?",

1504

[tip_db_id, merged_db_id]).fetchone()

1505

else:

1506

present_res = cursor.execute(

1507

"SELECT 1"

1508

" FROM dotted_revno, mainline_parent"

1509

" WHERE tip_revision = mainline_parent.revision"

1510

" AND mainline_parent.range = ?"

1511

" AND merged_revision = ?",

1512

[range_key, merged_db_id]).fetchone()

1513

if present_res is not None:

1514

# We found a tip that contains merged_db_id

1515

return tip_db_id

1516

tip_db_id = next_db_id

1517

return None

1518

1519

def _get_merge_sorted_range(self, tip_db_id, start_db_id, stop_db_id):

1520

"""Starting at the given tip, read all merge_sorted data until stop."""

1521

if start_db_id is None or start_db_id == tip_db_id:

1522

found_start = True

1523

else:

1524

found_start = False

1525

cursor = self._get_cursor()

1526

while tip_db_id is not None:

1527

self._stats['num_steps'] += 1

1528

self._stats['step_get_merge_sorted'] += 1

1529

range_key, next_db_id = self._get_range_key_and_tail(tip_db_id)

1530

if range_key is None:

1531

merged_res = cursor.execute(

1532

"SELECT db_id, revision_id, merge_depth, revno,"

1533

" end_of_merge"

1534

" FROM dotted_revno, revision"

1535

" WHERE tip_revision = ?"

1536

" AND db_id = merged_revision"

1537

" ORDER BY dist",

1538

(tip_db_id,)).fetchall()

1539

else:

1540

# NOTE: Adding the ORDER BY costs us 981ms - 844ms = 137ms when

1541

# doing 'bzr log -n0 -r -10..-1' on bzr.dev.

1542

# That seems like a lot. Extracting them without sorting

1543

# on them costs about the same amount. So the guess is

1544

# that adding the extra columns requires more I/O.

1545

# At the moment, SELECT order == INSERT order, so we don't

1546

# strictly need it. I don't know that we can trust that,

1547

# though.

1548

merged_res = cursor.execute(

1549

"SELECT db_id, revision_id, merge_depth, revno,"

1550

" end_of_merge"

1551

# " , mainline_parent.dist as mp_dist"

1552

# " , dotted_revno.dist as dr_dist"

1553

" FROM dotted_revno, revision, mainline_parent"

1554

" WHERE tip_revision = mainline_parent.revision"

1555

" AND mainline_parent.range = ?"

1556

" AND db_id = merged_revision",

1557

# " ORDER BY mainline_parent.dist, dotted_revno.dist",

1558

[range_key]).fetchall()

1559

if found_start:

1560

for db_id, r_id, depth, revno_str, eom in merged_res:

1561

if stop_db_id is not None and db_id == stop_db_id:

1562

return

1563

revno = tuple(map(int, revno_str.split('.')))

1564

yield r_id, depth, revno, eom

1565

else:

1566

for info in merged_res:

1567

if not found_start and info[0] == start_db_id:

1568

found_start = True

1569

if found_start:

1570

if stop_db_id is not None and info[0] == stop_db_id:

1571

return

1572

db_id, r_id, depth, revno_str, eom = info

1573

revno = tuple(map(int, revno_str.split('.')))

1574

yield r_id, depth, revno, eom

1575

tip_db_id = next_db_id

1576

1577

def iter_merge_sorted_revisions(self, start_revision_id=None,

1578

stop_revision_id=None):

1579

"""See Branch.iter_merge_sorted_revisions()

1580

1581

Note that start and stop differ from the Branch implementation, because

1582

stop is *always* exclusive. You can simulate the rest by careful

1583

selection of stop.

1584

"""

1585

self.ensure_branch_tip()

1586

t = time.time()

1587

tip_db_id = self._branch_tip_db_id

1588

if tip_db_id is None:

1589

return []

1590

if start_revision_id is not None:

1591

start_db_id = self._get_db_id(start_revision_id)

1592

else:

1593

start_db_id = tip_db_id

1594

stop_db_id = None

1595

if stop_revision_id is not None:

1596

stop_db_id = self._get_db_id(stop_revision_id)

1597

# Seek fast until we find start_db_id

1598

merge_sorted = []

1599

revnos = {}

1600

tip_db_id = self._find_tip_containing(tip_db_id, start_db_id)

1601

# Now that you have found the first tip containing the given start

1602

# revision, pull in data until you walk off the history, or you find

1603

# the stop revision

1604

merge_sorted = list(

1605

self._get_merge_sorted_range(tip_db_id, start_db_id, stop_db_id))

1606

self._stats['query_time'] += (time.time() - t)

1607

return merge_sorted

1608

1609

def heads(self, revision_ids):

1610

"""Compute Graph.heads() on the given data."""

1611

raise NotImplementedError(self.heads)