1
##############################################################################
3
# Copyright (c) 2003 Zope Corporation and Contributors.
6
# This software is subject to the provisions of the Zope Public License,
7
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
8
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
9
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
10
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
11
# FOR A PARTICULAR PURPOSE
13
##############################################################################
15
Utilities for working with BTrees (TreeSets, Buckets, and Sets) at a low
18
The primary function is check(btree), which performs value-based consistency
19
checks of a kind btree._check() does not perform. See the function docstring
22
display(btree) displays the internal structure of a BTree (TreeSet, etc) to
25
CAUTION: When a BTree node has only a single bucket child, it can be
26
impossible to get at the bucket from Python code (__getstate__() may squash
27
the bucket object out of existence, as a pickling storage optimization). In
28
such a case, the code here synthesizes a temporary bucket with the same keys
29
(and values, if the bucket is of a mapping type). This has no first-order
30
consequences, but can mislead if you pay close attention to reported object
31
addresses and/or object identity (the synthesized bucket has an address
32
that doesn't exist in the actual BTree).
35
from types import TupleType
37
from BTrees.OOBTree import OOBTree, OOBucket, OOSet, OOTreeSet
38
from BTrees.OIBTree import OIBTree, OIBucket, OISet, OITreeSet
39
from BTrees.IOBTree import IOBTree, IOBucket, IOSet, IOTreeSet
40
from BTrees.IIBTree import IIBTree, IIBucket, IISet, IITreeSet
41
from BTrees.IFBTree import IFBTree, IFBucket, IFSet, IFTreeSet
42
from BTrees.OLBTree import OLBTree, OLBucket, OLSet, OLTreeSet
43
from BTrees.LOBTree import LOBTree, LOBucket, LOSet, LOTreeSet
44
from BTrees.LLBTree import LLBTree, LLBucket, LLSet, LLTreeSet
45
from BTrees.LFBTree import LFBTree, LFBucket, LFSet, LFTreeSet
47
from ZODB.utils import positive_id, oid_repr
49
TYPE_UNKNOWN, TYPE_BTREE, TYPE_BUCKET = range(3)
53
'II', 'IO', 'OI', 'IF',
54
'LL', 'LO', 'OL', 'LF',
57
('BTree', (TYPE_BTREE, True)),
58
('Bucket', (TYPE_BUCKET, True)),
59
('TreeSet', (TYPE_BTREE, False)),
60
('Set', (TYPE_BUCKET, False)),
62
_type2kind[globals()[kv+name]] = kind
66
# TYPE_BTREE or TYPE_BUCKET, is_mapping
69
return _type2kind[type(obj)]
72
BTREE_EMPTY, BTREE_ONE, BTREE_NORMAL = range(3)
74
# If the BTree is empty, returns
78
# If the BTree has only one bucket, sometimes returns
80
# BTREE_ONE, bucket_state, None
84
# BTREE_NORMAL, list of keys, list of kids
86
# and the list of kids has one more entry than the list of keys.
88
# BTree.__getstate__() docs:
90
# For an empty BTree (self->len == 0), None.
92
# For a BTree with one child (self->len == 1), and that child is a bucket,
93
# and that bucket has a NULL oid, a one-tuple containing a one-tuple
94
# containing the bucket's state:
98
# child[0].__getstate__(),
102
# Else a two-tuple. The first element is a tuple interleaving the BTree's
103
# keys and direct children, of size 2*self->len - 1 (key[0] is unused and
104
# is not saved). The second element is the firstbucket:
107
# (child[0], key[1], child[1], key[2], child[2], ...,
108
# key[len-1], child[len-1]),
114
'II', 'IO', 'OI', 'IF',
115
'LL', 'LO', 'OL', 'LF',
117
_btree2bucket[globals()[kv+'BTree']] = globals()[kv+'Bucket']
118
_btree2bucket[globals()[kv+'TreeSet']] = globals()[kv+'Set']
120
def crack_btree(t, is_mapping):
121
state = t.__getstate__()
123
return BTREE_EMPTY, [], []
125
assert isinstance(state, TupleType)
128
assert isinstance(state, TupleType) and len(state) == 1
130
return BTREE_ONE, state, None
132
assert len(state) == 2
133
data, firstbucket = state
145
return BTREE_NORMAL, keys, kids
149
# keys, values # for a mapping; len(keys) == len(values) in this case
151
# keys, [] # for a set
153
# bucket.__getstate__() docs:
155
# For a set bucket (self->values is NULL), a one-tuple or two-tuple. The
156
# first element is a tuple of keys, of length self->len. The second element
157
# is the next bucket, present if and only if next is non-NULL:
160
# (keys[0], keys[1], ..., keys[len-1]),
161
# <self->next iff non-NULL>
164
# For a mapping bucket (self->values is not NULL), a one-tuple or two-tuple.
165
# The first element is a tuple interleaving keys and values, of length
166
# 2 * self->len. The second element is the next bucket, present iff next is
170
# (keys[0], values[0], keys[1], values[1], ...,
171
# keys[len-1], values[len-1]),
172
# <self->next iff non-NULL>
175
def crack_bucket(b, is_mapping):
176
state = b.__getstate__()
177
assert isinstance(state, TupleType)
178
assert 1 <= len(state) <= 2
195
def type_and_adr(obj):
196
if hasattr(obj, '_p_oid'):
197
oid = oid_repr(obj._p_oid)
200
return "%s (0x%x oid=%s)" % (type(obj).__name__, positive_id(obj), oid)
202
# Walker implements a depth-first search of a BTree (or TreeSet or Set or
203
# Bucket). Subclasses must implement the visit_btree() and visit_bucket()
204
# methods, and arrange to call the walk() method. walk() calls the
205
# visit_XYZ() methods once for each node in the tree, in depth-first
206
# left-to-right order.
209
def __init__(self, obj):
212
# obj is the BTree (BTree or TreeSet).
213
# path is a list of indices, from the root. For example, if a BTree node
214
# is child[5] of child[3] of the root BTree, [3, 5].
215
# parent is the parent BTree object, or None if this is the root BTree.
216
# is_mapping is True for a BTree and False for a TreeSet.
217
# keys is a list of the BTree's internal keys.
218
# kids is a list of the BTree's children.
219
# If the BTree is an empty root node, keys == kids == [].
220
# Else len(kids) == len(keys) + 1.
221
# lo and hi are slice bounds on the values the elements of keys *should*
222
# lie in (lo inclusive, hi exclusive). lo is None if there is no lower
223
# bound known, and hi is None if no upper bound is known.
225
def visit_btree(self, obj, path, parent, is_mapping,
227
raise NotImplementedError
229
# obj is the bucket (Bucket or Set).
230
# path is a list of indices, from the root. For example, if a bucket
231
# node is child[5] of child[3] of the root BTree, [3, 5].
232
# parent is the parent BTree object.
233
# is_mapping is True for a Bucket and False for a Set.
234
# keys is a list of the bucket's keys.
235
# values is a list of the bucket's values.
236
# If is_mapping is false, values == []. Else len(keys) == len(values).
237
# lo and hi are slice bounds on the values the elements of keys *should*
238
# lie in (lo inclusive, hi exclusive). lo is None if there is no lower
239
# bound known, and hi is None if no upper bound is known.
241
def visit_bucket(self, obj, path, parent, is_mapping,
242
keys, values, lo, hi):
243
raise NotImplementedError
248
stack = [(obj, path, None, None, None)]
250
obj, path, parent, lo, hi = stack.pop()
251
kind, is_mapping = classify(obj)
252
if kind is TYPE_BTREE:
253
bkind, keys, kids = crack_btree(obj, is_mapping)
254
if bkind is BTREE_NORMAL:
255
# push the kids, in reverse order (so they're popped off
256
# the stack in forward order)
258
for i in range(len(kids)-1, -1, -1):
259
newlo, newhi = lo, hi
264
stack.append((kids[i],
270
elif bkind is BTREE_EMPTY:
273
assert bkind is BTREE_ONE
274
# Yuck. There isn't a bucket object to pass on, as
275
# the bucket state is embedded directly in the BTree
276
# state. Synthesize a bucket.
277
assert kids is None # and "keys" is really the bucket
279
bucket = _btree2bucket[type(obj)]()
280
bucket.__setstate__(keys)
281
stack.append((bucket,
289
self.visit_btree(obj,
298
assert kind is TYPE_BUCKET
299
keys, values = crack_bucket(obj, is_mapping)
300
self.visit_bucket(obj,
310
class Checker(Walker):
311
def __init__(self, obj):
312
Walker.__init__(self, obj)
318
s = "Errors found in %s:" % type_and_adr(self.obj)
319
self.errors.insert(0, s)
320
s = "\n".join(self.errors)
321
raise AssertionError(s)
323
def visit_btree(self, obj, path, parent, is_mapping,
325
self.check_sorted(obj, path, keys, lo, hi)
327
def visit_bucket(self, obj, path, parent, is_mapping,
328
keys, values, lo, hi):
329
self.check_sorted(obj, path, keys, lo, hi)
331
def check_sorted(self, obj, path, keys, lo, hi):
334
if lo is not None and not lo <= x:
335
s = "key %r < lower bound %r at index %d" % (x, lo, i)
336
self.complain(s, obj, path)
337
if hi is not None and not x < hi:
338
s = "key %r >= upper bound %r at index %d" % (x, hi, i)
339
self.complain(s, obj, path)
340
if i < n-1 and not x < keys[i+1]:
341
s = "key %r at index %d >= key %r at index %d" % (
342
x, i, keys[i+1], i+1)
343
self.complain(s, obj, path)
346
def complain(self, msg, obj, path):
347
s = "%s, in %s, path from root %s" % (
350
".".join(map(str, path)))
351
self.errors.append(s)
353
class Printer(Walker):
354
def __init__(self, obj):
355
Walker.__init__(self, obj)
360
def visit_btree(self, obj, path, parent, is_mapping,
362
indent = " " * len(path)
363
print "%s%s %s with %d children" % (
365
".".join(map(str, path)),
371
print "%skey %d: %r" % (indent, i, keys[i])
373
def visit_bucket(self, obj, path, parent, is_mapping,
374
keys, values, lo, hi):
375
indent = " " * len(path)
376
print "%s%s %s with %d keys" % (
378
".".join(map(str, path)),
384
print "%skey %d: %r" % (indent, i, keys[i]),
386
print "value %r" % (values[i],)
389
"""Check internal value-based invariants in a BTree or TreeSet.
391
The btree._check() method checks internal C-level pointer consistency.
392
The check() function here checks value-based invariants: whether the
393
keys in leaf bucket and internal nodes are in strictly increasing order,
394
and whether they all lie in their expected range. The latter is a subtle
395
invariant that can't be checked locally -- it requires propagating
396
range info down from the root of the tree, and modifying it at each
397
level for each child.
399
Raises AssertionError if anything is wrong, with a string detail
400
explaining the problems. The entire tree is checked before
401
AssertionError is raised, and the string detail may be large (depending
402
on how much went wrong).
405
Checker(btree).check()
408
"Display the internal structure of a BTree, Bucket, TreeSet or Set."
409
Printer(btree).display()