4
* Copyright (C) 2008 10gen Inc.
6
* This program is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Affero General Public License, version 3,
8
* as published by the Free Software Foundation.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU Affero General Public License for more details.
15
* You should have received a copy of the GNU Affero General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
24
#include "clientcursor.h"
26
#include "dbhelpers.h"
31
#define VERIFYTHISLOC dassert( thisLoc.btree() == this );
33
KeyNode::KeyNode(const BucketBasics& bb, const _KeyNode &k) :
34
prevChildBucket(k.prevChildBucket),
35
recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
38
const int KeyMax = BucketSize / 10;
40
extern int otherTraceLevel;
41
const int split_debug = 0;
42
const int insert_debug = 0;
44
/* BucketBasics --------------------------------------------------- */
46
inline void BucketBasics::modified(const DiskLoc& thisLoc) {
48
btreeStore->modified(thisLoc);
51
int BucketBasics::Size() const {
52
assert( _Size == BucketSize );
55
inline void BucketBasics::setNotPacked() {
58
inline void BucketBasics::setPacked() {
62
void BucketBasics::_shape(int level, stringstream& ss) {
63
for ( int i = 0; i < level; i++ ) ss << ' ';
65
for ( int i = 0; i < n; i++ )
66
if ( !k(i).prevChildBucket.isNull() )
67
k(i).prevChildBucket.btree()->_shape(level+1,ss);
68
if ( !nextChild.isNull() )
69
nextChild.btree()->_shape(level+1,ss);
75
void BucketBasics::dumpTree(DiskLoc thisLoc, const BSONObj &order) {
77
fullValidate(thisLoc, order);
81
int BucketBasics::fullValidate(const DiskLoc& thisLoc, const BSONObj &order) {
85
massert( 10281 , "assert is misdefined", f);
88
killCurrentOp.checkForInterrupt();
89
assertValid(order, true);
94
out() << thisLoc.toString() << ' ';
95
((BtreeBucket *) this)->dump();
101
for ( int i = 0; i < n; i++ ) {
104
if ( kn.isUsed() ) kc++;
105
if ( !kn.prevChildBucket.isNull() ) {
106
DiskLoc left = kn.prevChildBucket;
107
BtreeBucket *b = left.btree();
108
wassert( b->parent == thisLoc );
109
kc += b->fullValidate(kn.prevChildBucket, order);
112
if ( !nextChild.isNull() ) {
113
BtreeBucket *b = nextChild.btree();
114
wassert( b->parent == thisLoc );
115
kc += b->fullValidate(nextChild, order);
123
void BucketBasics::assertValid(const BSONObj &order, bool force) {
124
if ( !debug && !force )
126
wassert( n >= 0 && n < Size() );
127
wassert( emptySize >= 0 && emptySize < BucketSize );
128
wassert( topSize >= n && topSize <= BucketSize );
131
for ( int i = 0; i < n-1; i++ ) {
132
BSONObj k1 = keyNode(i).key;
133
BSONObj k2 = keyNode(i+1).key;
134
int z = k1.woCompare(k2, order); //OK
136
out() << "ERROR: btree key order corrupt. Keys:" << endl;
137
if ( ++nDumped < 5 ) {
138
for ( int j = 0; j < n; j++ ) {
139
out() << " " << keyNode(j).key.toString() << endl;
141
((BtreeBucket *) this)->dump();
147
if ( !(k(i).recordLoc < k(i+1).recordLoc) ) {
148
out() << "ERROR: btree key order corrupt (recordloc's wrong). Keys:" << endl;
149
out() << " k(" << i << "):" << keyNode(i).key.toString() << " RL:" << k(i).recordLoc.toString() << endl;
150
out() << " k(" << i+1 << "):" << keyNode(i+1).key.toString() << " RL:" << k(i+1).recordLoc.toString() << endl;
151
wassert( k(i).recordLoc < k(i+1).recordLoc );
159
BSONObj k1 = keyNode(0).key;
160
BSONObj k2 = keyNode(n-1).key;
161
int z = k1.woCompare(k2, order);
164
problem() << "btree keys out of order" << '\n';
166
((BtreeBucket *) this)->dump();
174
inline void BucketBasics::markUnused(int keypos) {
175
assert( keypos >= 0 && keypos < n );
176
k(keypos).setUnused();
179
inline int BucketBasics::totalDataSize() const {
180
return Size() - (data-(char*)this);
183
void BucketBasics::init() {
189
emptySize = totalDataSize();
195
inline void BucketBasics::_unalloc(int bytes) {
200
/* we allocate space from the end of the buffer for data.
201
the keynodes grow from the front.
203
inline int BucketBasics::_alloc(int bytes) {
206
int ofs = totalDataSize() - topSize;
211
void BucketBasics::_delKeyAtPos(int keypos) {
212
assert( keypos >= 0 && keypos <= n );
213
assert( childForPos(keypos).isNull() );
215
assert( n > 0 || nextChild.isNull() );
216
for ( int j = keypos; j < n; j++ )
218
emptySize += sizeof(_KeyNode);
222
/* pull rightmost key from the bucket. this version requires its right child to be null so it
223
does not bother returning that value.
225
void BucketBasics::popBack(DiskLoc& recLoc, BSONObj& key) {
226
massert( 10282 , "n==0 in btree popBack()", n > 0 );
227
assert( k(n-1).isUsed() ); // no unused skipping in this function at this point - btreebuilder doesn't require that
228
KeyNode kn = keyNode(n-1);
229
recLoc = kn.recordLoc;
231
int keysize = kn.key.objsize();
233
massert( 10283 , "rchild not null in btree popBack()", nextChild.isNull());
235
/* weirdly, we also put the rightmost down pointer in nextchild, even when bucket isn't full. */
236
nextChild = kn.prevChildBucket;
239
emptySize += sizeof(_KeyNode);
243
/* add a key. must be > all existing. be careful to set next ptr right. */
244
bool BucketBasics::_pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild) {
245
int bytesNeeded = key.objsize() + sizeof(_KeyNode);
246
if ( bytesNeeded > emptySize )
248
assert( bytesNeeded <= emptySize );
249
assert( n == 0 || keyNode(n-1).key.woCompare(key, order) <= 0 );
250
emptySize -= sizeof(_KeyNode);
251
_KeyNode& kn = k(n++);
252
kn.prevChildBucket = prevChild;
253
kn.recordLoc = recordLoc;
254
kn.setKeyDataOfs( (short) _alloc(key.objsize()) );
255
char *p = dataAt(kn.keyDataOfs());
256
memcpy(p, key.objdata(), key.objsize());
259
/*void BucketBasics::pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild, DiskLoc nextChild) {
260
pushBack(recordLoc, key, order, prevChild);
261
childForPos(n) = nextChild;
264
/* insert a key in a bucket with no complexity -- no splits required */
265
bool BucketBasics::basicInsert(const DiskLoc& thisLoc, int keypos, const DiskLoc& recordLoc, const BSONObj& key, const BSONObj &order) {
267
assert( keypos >= 0 && keypos <= n );
268
int bytesNeeded = key.objsize() + sizeof(_KeyNode);
269
if ( bytesNeeded > emptySize ) {
271
if ( bytesNeeded > emptySize )
274
for ( int j = n; j > keypos; j-- ) // make room
277
emptySize -= sizeof(_KeyNode);
278
_KeyNode& kn = k(keypos);
279
kn.prevChildBucket.Null();
280
kn.recordLoc = recordLoc;
281
kn.setKeyDataOfs((short) _alloc(key.objsize()) );
282
char *p = dataAt(kn.keyDataOfs());
283
memcpy(p, key.objdata(), key.objsize());
287
/* when we delete things we just leave empty space until the node is
288
full and then we repack it.
290
void BucketBasics::pack( const BSONObj &order ) {
291
if ( flags & Packed )
294
int tdz = totalDataSize();
295
char temp[BucketSize];
298
for ( int j = 0; j < n; j++ ) {
299
short ofsold = k(j).keyDataOfs();
300
int sz = keyNode(j).key.objsize();
303
memcpy(temp+ofs, dataAt(ofsold), sz);
304
k(j).setKeyDataOfsSavingUse( ofs );
306
int dataUsed = tdz - ofs;
307
memcpy(data + ofs, temp + ofs, dataUsed);
308
emptySize = tdz - dataUsed - n * sizeof(_KeyNode);
309
assert( emptySize >= 0 );
312
assertValid( order );
315
inline void BucketBasics::truncateTo(int N, const BSONObj &order) {
321
/* - BtreeBucket --------------------------------------------------- */
323
/* return largest key in the subtree. */
324
void BtreeBucket::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) {
325
DiskLoc loc = thisLoc;
327
BtreeBucket *b = loc.btree();
328
if ( !b->nextChild.isNull() ) {
341
bool BtreeBucket::exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, BSONObj order) {
344
DiskLoc b = locate(idx, thisLoc, key, order, pos, found, minDiskLoc);
350
BtreeBucket *bucket = b.btree();
351
_KeyNode& kn = bucket->k(pos);
353
return bucket->keyAt(pos).woEqual(key);
354
b = bucket->advance(b, pos, 1, "BtreeBucket::exists");
359
string BtreeBucket::dupKeyError( const IndexDetails& idx , const BSONObj& key ){
361
ss << "E11000 duplicate key error";
362
ss << "index: " << idx.indexNamespace() << " ";
363
ss << "dup key: " << key;
367
/* Find a key withing this btree bucket.
369
When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the
370
key. That assures that even when there are many duplicates (e.g., 1 million) for a key,
371
our performance is still good.
373
assertIfDup: if the key exists (ignoring the recordLoc), uassert
375
pos: for existing keys k0...kn-1.
376
returns # it goes BEFORE. so key[pos-1] < key < key[pos]
377
returns n if it goes after the last existing key.
378
note result might be an Unused location!
381
bool BtreeBucket::find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const BSONObj &order, int& pos, bool assertIfDup) {
382
#if defined(_EXPERIMENT1)
384
char *z = (char *) this;
388
if( i >= BucketSize )
394
/* binary search for this key */
395
bool dupsChecked = false;
400
KeyNode M = keyNode(m);
401
int x = key.woCompare(M.key, order);
404
if( k(m).isUnused() ) {
405
// ok that key is there if unused. but we need to check that there aren't other
406
// entries for the key then. as it is very rare that we get here, we don't put any
407
// coding effort in here to make this particularly fast
410
if( idx.head.btree()->exists(idx, idx.head, key, order) )
411
uasserted( ASSERT_ID_DUPKEY , dupKeyError( idx , key ) );
415
uasserted( ASSERT_ID_DUPKEY , dupKeyError( idx , key ) );
418
// dup keys allowed. use recordLoc as if it is part of the key
419
DiskLoc unusedRL = M.recordLoc;
420
unusedRL.GETOFS() &= ~1; // so we can test equality without the used bit messing us up
421
x = recordLoc.compare(unusedRL);
423
if ( x < 0 ) // key < M.key
436
BSONObj keyatpos = keyNode(pos).key;
437
wassert( key.woCompare(keyatpos, order) <= 0 );
439
wassert( keyNode(pos-1).key.woCompare(key, order) <= 0 );
446
void BtreeBucket::delBucket(const DiskLoc& thisLoc, IndexDetails& id) {
447
ClientCursor::informAboutToDeleteBucket(thisLoc);
450
BtreeBucket *p = parent.btreemod();
451
if ( p->nextChild == thisLoc ) {
455
for ( int i = 0; i < p->n; i++ ) {
456
if ( p->k(i).prevChildBucket == thisLoc ) {
457
p->k(i).prevChildBucket.Null();
461
out() << "ERROR: can't find ref to deleted bucket.\n";
462
out() << "To delete:\n";
464
out() << "Parent:\n";
470
/* as a temporary defensive measure, we zap the whole bucket, AND don't truly delete
471
it (meaning it is ineligible for reuse).
473
memset(this, 0, Size());
479
massert( 10284 , "todo: use RecStoreInterface instead", false);
480
// TODO: this was broken anyway as deleteRecord does unindexRecord() call which assumes the data is a BSONObj,
483
// theDataFileMgr.deleteRecord(id.indexNamespace().c_str(), thisLoc.rec(), thisLoc);
487
/* note: may delete the entire bucket! this invalid upon return sometimes. */
488
void BtreeBucket::delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p) {
491
DiskLoc left = childForPos(p);
494
if ( left.isNull() && nextChild.isNull() ) {
496
_delKeyAtPos(p); // we don't delete the top bucket ever
498
delBucket(thisLoc, id);
513
/* remove a key from the index */
514
bool BtreeBucket::unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc ) {
515
if ( key.objsize() > KeyMax ) {
516
OCCASIONALLY problem() << "unindex: key too large to index, skipping " << id.indexNamespace() << /* ' ' << key.toString() << */ '\n';
522
DiskLoc loc = locate(id, thisLoc, key, id.keyPattern(), pos, found, recordLoc, 1);
524
loc.btree()->delKeyAtPos(loc, id, pos);
530
BtreeBucket* BtreeBucket::allocTemp() {
531
BtreeBucket *b = (BtreeBucket*) malloc(BucketSize);
536
inline void fix(const DiskLoc& thisLoc, const DiskLoc& child) {
537
if ( !child.isNull() ) {
539
out() << " " << child.toString() << ".parent=" << thisLoc.toString() << endl;
540
child.btreemod()->parent = thisLoc;
544
/* this sucks. maybe get rid of parent ptrs. */
545
void BtreeBucket::fixParentPtrs(const DiskLoc& thisLoc) {
547
fix(thisLoc, nextChild);
548
for ( int i = 0; i < n; i++ )
549
fix(thisLoc, k(i).prevChildBucket);
552
/* insert a key in this bucket, splitting if necessary.
553
keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost.
555
void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos,
556
DiskLoc recordLoc, const BSONObj& key, const BSONObj& order,
557
DiskLoc lchild, DiskLoc rchild, IndexDetails& idx)
561
out() << " " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' '
562
<< lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl;
564
DiskLoc oldLoc = thisLoc;
566
if ( basicInsert(thisLoc, keypos, recordLoc, key, order) ) {
567
_KeyNode& kn = k(keypos);
568
if ( keypos+1 == n ) { // last key
569
if ( nextChild != lchild ) {
570
out() << "ERROR nextChild != lchild" << endl;
571
out() << " thisLoc: " << thisLoc.toString() << ' ' << idx.indexNamespace() << endl;
572
out() << " keyPos: " << keypos << " n:" << n << endl;
573
out() << " nextChild: " << nextChild.toString() << " lchild: " << lchild.toString() << endl;
574
out() << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
575
out() << " key: " << key.toString() << endl;
578
out() << "\n\nDUMPING FULL INDEX" << endl;
581
idx.head.btree()->fullValidate(idx.head);
585
kn.prevChildBucket = nextChild;
586
assert( kn.prevChildBucket == lchild );
588
if ( !rchild.isNull() )
589
rchild.btreemod()->parent = thisLoc;
592
k(keypos).prevChildBucket = lchild;
593
if ( k(keypos+1).prevChildBucket != lchild ) {
594
out() << "ERROR k(keypos+1).prevChildBucket != lchild" << endl;
595
out() << " thisLoc: " << thisLoc.toString() << ' ' << idx.indexNamespace() << endl;
596
out() << " keyPos: " << keypos << " n:" << n << endl;
597
out() << " k(keypos+1).pcb: " << k(keypos+1).prevChildBucket.toString() << " lchild: " << lchild.toString() << endl;
598
out() << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
599
out() << " key: " << key.toString() << endl;
602
out() << "\n\nDUMPING FULL INDEX" << endl;
605
idx.head.btree()->fullValidate(idx.head);
609
k(keypos+1).prevChildBucket = rchild;
610
if ( !rchild.isNull() )
611
rchild.btreemod()->parent = thisLoc;
616
/* ---------- split ---------------- */
619
out() << " " << thisLoc.toString() << ".split" << endl;
623
DiskLoc rLoc = addBucket(idx);
624
BtreeBucket *r = rLoc.btreemod();
626
out() << " mid:" << mid << ' ' << keyNode(mid).key.toString() << " n:" << n << endl;
627
for ( int i = mid+1; i < n; i++ ) {
628
KeyNode kn = keyNode(i);
629
r->pushBack(kn.recordLoc, kn.key, order, kn.prevChildBucket);
631
r->nextChild = nextChild;
632
r->assertValid( order );
635
out() << " new rLoc:" << rLoc.toString() << endl;
637
rLoc.btree()->fixParentPtrs(rLoc);
640
KeyNode middle = keyNode(mid);
641
nextChild = middle.prevChildBucket; // middle key gets promoted, its children will be thisLoc (l) and rLoc (r)
643
out() << " middle key:" << middle.key.toString() << endl;
646
// promote middle to a parent node
647
if ( parent.isNull() ) {
648
// make a new parent if we were the root
649
DiskLoc L = addBucket(idx);
650
BtreeBucket *p = L.btreemod();
651
p->pushBack(middle.recordLoc, middle.key, order, thisLoc);
653
p->assertValid( order );
654
parent = idx.head = L;
656
out() << " we were root, making new root:" << hex << parent.getOfs() << dec << endl;
657
rLoc.btreemod()->parent = parent;
660
/* set this before calling _insert - if it splits it will do fixParent() logic and change the value.
662
rLoc.btreemod()->parent = parent;
664
out() << " promoting middle key " << middle.key.toString() << endl;
665
parent.btree()->_insert(parent, middle.recordLoc, middle.key, order, /*dupsallowed*/true, thisLoc, rLoc, idx);
669
truncateTo(mid, order); // note this may trash middle.key. thus we had to promote it before finishing up here.
671
// add our new key, there is room now
674
if ( keypos <= mid ) {
676
out() << " keypos<mid, insertHere() the new key" << endl;
677
insertHere(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx);
679
int kp = keypos-mid-1;
681
rLoc.btree()->insertHere(rLoc, kp, recordLoc, key, order, lchild, rchild, idx);
686
out() << " split end " << hex << thisLoc.getOfs() << dec << endl;
689
/* start a new index off, empty */
690
DiskLoc BtreeBucket::addBucket(IndexDetails& id) {
691
DiskLoc loc = btreeStore->insert(id.indexNamespace().c_str(), 0, BucketSize, true);
692
BtreeBucket *b = loc.btreemod();
697
void BtreeBucket::renameIndexNamespace(const char *oldNs, const char *newNs) {
698
btreeStore->rename( oldNs, newNs );
701
DiskLoc BtreeBucket::getHead(const DiskLoc& thisLoc) {
703
while ( !p.btree()->isHead() )
704
p = p.btree()->parent;
708
DiskLoc BtreeBucket::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) {
709
if ( keyOfs < 0 || keyOfs >= n ) {
710
out() << "ASSERT failure BtreeBucket::advance, caller: " << caller << endl;
711
out() << " thisLoc: " << thisLoc.toString() << endl;
712
out() << " keyOfs: " << keyOfs << " n:" << n << " direction: " << direction << endl;
713
out() << bucketSummary() << endl;
716
int adj = direction < 0 ? 1 : 0;
717
int ko = keyOfs + direction;
718
DiskLoc nextDown = childForPos(ko+adj);
719
if ( !nextDown.isNull() ) {
721
keyOfs = direction>0 ? 0 : nextDown.btree()->n - 1;
722
DiskLoc loc = nextDown.btree()->childForPos(keyOfs + adj);
730
if ( ko < n && ko >= 0 ) {
735
// end of bucket. traverse back up.
736
DiskLoc childLoc = thisLoc;
737
DiskLoc ancestor = parent;
739
if ( ancestor.isNull() )
741
BtreeBucket *an = ancestor.btree();
742
for ( int i = 0; i < an->n; i++ ) {
743
if ( an->childForPos(i+adj) == childLoc ) {
748
assert( direction<0 || an->nextChild == childLoc );
749
// parent exhausted also, keep going up
751
ancestor = an->parent;
757
DiskLoc BtreeBucket::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const BSONObj &order, int& pos, bool& found, DiskLoc recordLoc, int direction) {
759
found = find(idx, key, recordLoc, order, p, /*assertIfDup*/ false);
765
DiskLoc child = childForPos(p);
767
if ( !child.isNull() ) {
768
DiskLoc l = child.btree()->locate(idx, child, key, order, pos, found, recordLoc, direction);
775
return --pos == -1 ? DiskLoc() /*theend*/ : thisLoc;
777
return pos == n ? DiskLoc() /*theend*/ : thisLoc;
780
/* @thisLoc disk location of *this
782
int BtreeBucket::_insert(DiskLoc thisLoc, DiskLoc recordLoc,
783
const BSONObj& key, const BSONObj &order, bool dupsAllowed,
784
DiskLoc lChild, DiskLoc rChild, IndexDetails& idx) {
785
if ( key.objsize() > KeyMax ) {
786
problem() << "ERROR: key too large len:" << key.objsize() << " max:" << KeyMax << ' ' << key.objsize() << ' ' << idx.indexNamespace() << endl;
789
assert( key.objsize() > 0 );
792
bool found = find(idx, key, recordLoc, order, pos, !dupsAllowed);
793
if ( insert_debug ) {
794
out() << " " << thisLoc.toString() << '.' << "_insert " <<
795
key.toString() << '/' << recordLoc.toString() <<
796
" l:" << lChild.toString() << " r:" << rChild.toString() << endl;
797
out() << " found:" << found << " pos:" << pos << " n:" << n << endl;
801
_KeyNode& kn = k(pos);
802
if ( kn.isUnused() ) {
803
log(4) << "btree _insert: reusing unused key" << endl;
804
massert( 10285 , "_insert: reuse key but lchild is not null", lChild.isNull());
805
massert( 10286 , "_insert: reuse key but rchild is not null", rChild.isNull());
810
out() << "_insert(): key already exists in index\n";
811
out() << " " << idx.indexNamespace().c_str() << " thisLoc:" << thisLoc.toString() << '\n';
812
out() << " " << key.toString() << '\n';
813
out() << " " << "recordLoc:" << recordLoc.toString() << " pos:" << pos << endl;
814
out() << " old l r: " << childForPos(pos).toString() << ' ' << childForPos(pos+1).toString() << endl;
815
out() << " new l r: " << lChild.toString() << ' ' << rChild.toString() << endl;
816
massert( 10287 , "btree: key+recloc already in index", false);
819
DEBUGGING out() << "TEMP: key: " << key.toString() << endl;
820
DiskLoc& child = childForPos(pos);
822
out() << " getChild(" << pos << "): " << child.toString() << endl;
823
if ( child.isNull() || !rChild.isNull() /* means an 'internal' insert */ ) {
824
insertHere(thisLoc, pos, recordLoc, key, order, lChild, rChild, idx);
828
return child.btree()->bt_insert(child, recordLoc, key, order, dupsAllowed, idx, /*toplevel*/false);
831
void BtreeBucket::dump() {
832
out() << "DUMP btreebucket n:" << n;
833
out() << " parent:" << hex << parent.getOfs() << dec;
834
for ( int i = 0; i < n; i++ ) {
836
KeyNode k = keyNode(i);
837
out() << '\t' << i << '\t' << k.key.toString() << "\tleft:" << hex <<
838
k.prevChildBucket.getOfs() << "\tRecLoc:" << k.recordLoc.toString() << dec;
839
if ( this->k(i).isUnused() )
842
out() << " right:" << hex << nextChild.getOfs() << dec << endl;
845
/* todo: meaning of return code unclear clean up */
846
int BtreeBucket::bt_insert(DiskLoc thisLoc, DiskLoc recordLoc,
847
const BSONObj& key, const BSONObj &order, bool dupsAllowed,
848
IndexDetails& idx, bool toplevel)
851
if ( key.objsize() > KeyMax ) {
852
problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << '\n';
857
int x = _insert(thisLoc, recordLoc, key, order, dupsAllowed, DiskLoc(), DiskLoc(), idx);
858
assertValid( order );
863
void BtreeBucket::shape(stringstream& ss) {
867
DiskLoc BtreeBucket::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ){
870
DiskLoc bucket = locate( indexdetails , indexdetails.head , key , BSONObj() , pos , found , minDiskLoc );
871
if ( bucket.isNull() )
874
BtreeBucket *b = bucket.btree();
876
_KeyNode& knraw = b->k(pos);
877
if ( knraw.isUsed() )
879
bucket = b->advance( bucket , pos , 1 , "findSingle" );
880
if ( bucket.isNull() )
884
KeyNode kn = b->keyNode( pos );
885
if ( key.woCompare( kn.key ) != 0 )
893
#include "dbhelpers.h"
897
void BtreeBucket::a_test(IndexDetails& id) {
898
BtreeBucket *b = id.head.btree();
900
// record locs for testing
906
BSONObj key = fromjson("{x:9}");
907
BSONObj order = fromjson("{}");
909
b->bt_insert(id.head, A, key, order, true, id);
911
b->bt_insert(id.head, A, key, order, true, id);
913
b->bt_insert(id.head, A, key, order, true, id);
915
b->bt_insert(id.head, A, key, order, true, id);
917
assert( b->k(0).isUsed() );
918
// b->k(0).setUnused();
923
b->dumpTree(id.head, order);
925
/* b->bt_insert(id.head, B, key, order, false, id);
928
b->dumpTree(id.head, order);
931
b->bt_insert(id.head, A, key, order, false, id);
933
b->dumpTree(id.head, order);
936
// this should assert. does it? (it might "accidentally" though, not asserting proves a problem, asserting proves nothing)
937
b->bt_insert(id.head, C, key, order, false, id);
939
b->dumpTree(id.head, order);
942
/* --- BtreeBuilder --- */
944
BtreeBuilder::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) :
945
dupsAllowed(_dupsAllowed), idx(_idx), n(0)
947
first = cur = BtreeBucket::addBucket(idx);
949
order = idx.keyPattern();
953
void BtreeBuilder::newBucket() {
954
DiskLoc L = BtreeBucket::addBucket(idx);
960
void BtreeBuilder::addKey(BSONObj& key, DiskLoc loc) {
963
int cmp = keyLast.woCompare(key, order);
964
massert( 10288 , "bad key order in BtreeBuilder - server internal error", cmp <= 0 );
967
uasserted( ASSERT_ID_DUPKEY , BtreeBucket::dupKeyError( idx , keyLast ) );
973
if ( ! b->_pushBack(loc, key, order, DiskLoc()) ){
975
if ( key.objsize() > KeyMax ) {
976
problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << '\n';
981
b->pushBack(loc, key, order, DiskLoc());
987
void BtreeBuilder::buildNextLevel(DiskLoc loc) {
990
if( loc.btree()->tempNext().isNull() ) {
991
// only 1 bucket at this level. we are done.
997
DiskLoc upLoc = BtreeBucket::addBucket(idx);
998
DiskLoc upStart = upLoc;
999
BtreeBucket *up = upLoc.btreemod();
1002
while( !xloc.isNull() ) {
1003
BtreeBucket *x = xloc.btreemod();
1008
log() << "warning: empty bucket on BtreeBuild " << k.toString() << endl;
1010
if ( ! up->_pushBack(r, k, order, xloc) ){
1011
// current bucket full
1012
DiskLoc n = BtreeBucket::addBucket(idx);
1015
up = upLoc.btreemod();
1016
up->pushBack(r, k, order, xloc);
1019
xloc = x->tempNext(); /* get next in chain at current level */
1027
log(2) << "btree levels: " << levels << endl;
1030
/* when all addKeys are done, we then build the higher levels of the tree */
1031
void BtreeBuilder::commit() {
1032
buildNextLevel(first);
1036
BtreeBuilder::~BtreeBuilder() {
1038
log(2) << "Rolling back partially built index space" << endl;
1040
while( !x.isNull() ) {
1041
DiskLoc next = x.btree()->tempNext();
1042
btreeStore->deleteRecord(idx.indexNamespace().c_str(), x);
1045
assert( idx.head.isNull() );
1046
log(2) << "done rollback" << endl;