2
* See the file LICENSE for redistribution information.
4
* Copyright (c) 1996-2002
5
* Sleepycat Software. All rights reserved.
8
* Copyright (c) 1990, 1993, 1994
9
* Margo Seltzer. All rights reserved.
12
* Copyright (c) 1990, 1993, 1994
13
* The Regents of the University of California. All rights reserved.
15
* This code is derived from software contributed to Berkeley by
18
* Redistribution and use in source and binary forms, with or without
19
* modification, are permitted provided that the following conditions
21
* 1. Redistributions of source code must retain the above copyright
22
* notice, this list of conditions and the following disclaimer.
23
* 2. Redistributions in binary form must reproduce the above copyright
24
* notice, this list of conditions and the following disclaimer in the
25
* documentation and/or other materials provided with the distribution.
26
* 3. Neither the name of the University nor the names of its contributors
27
* may be used to endorse or promote products derived from this software
28
* without specific prior written permission.
30
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43
#include "db_config.h"
46
static const char revid[] = "$Id$";
53
* Page manipulation for hashing package.
56
#ifndef NO_SYSTEM_INCLUDES
57
#include <sys/types.h>
63
#include "dbinc/db_page.h"
64
#include "dbinc/db_shash.h"
65
#include "dbinc/hash.h"
66
#include "dbinc/lock.h"
68
static int __ham_c_delpg
69
__P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *));
72
* PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *));
75
__ham_item(dbc, mode, pgnop)
86
hcp = (HASH_CURSOR *)dbc->internal;
88
if (F_ISSET(hcp, H_DELETED)) {
89
__db_err(dbp->dbenv, "Attempt to return a deleted item");
92
F_CLR(hcp, H_OK | H_NOMORE);
94
/* Check if we need to get a page for this cursor. */
95
if ((ret = __ham_get_cpage(dbc, mode)) != 0)
99
/* Check if we are looking for space in which to insert an item. */
100
if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID &&
101
hcp->seek_size < P_FREESPACE(dbp, hcp->page))
102
hcp->seek_found_page = hcp->pgno;
104
/* Check for off-page duplicates. */
105
if (hcp->indx < NUM_ENT(hcp->page) &&
106
HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
108
HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
114
/* Check if we need to go on to the next page. */
115
if (F_ISSET(hcp, H_ISDUP))
117
* ISDUP is set, and offset is at the beginning of the datum.
118
* We need to grab the length of the datum, then set the datum
119
* pointer to be the beginning of the datum.
121
memcpy(&hcp->dup_len,
122
HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) +
123
hcp->dup_off, sizeof(db_indx_t));
125
if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) {
126
/* Fetch next page. */
127
if (NEXT_PGNO(hcp->page) == PGNO_INVALID) {
128
F_SET(hcp, H_NOMORE);
129
return (DB_NOTFOUND);
131
next_pgno = NEXT_PGNO(hcp->page);
133
if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
143
* PUBLIC: int __ham_item_reset __P((DBC *));
146
__ham_item_reset(dbc)
156
hcp = (HASH_CURSOR *)dbc->internal;
159
if (hcp->page != NULL)
160
ret = mpf->put(mpf, hcp->page, 0);
162
__ham_item_init(dbc);
167
* PUBLIC: void __ham_item_init __P((DBC *));
175
hcp = (HASH_CURSOR *)dbc->internal;
177
* If this cursor still holds any locks, we must
178
* release them if we are not running with transactions.
180
(void)__TLPUT(dbc, hcp->lock);
183
* The following fields must *not* be initialized here
184
* because they may have meaning across inits.
185
* hlock, hdr, split_buf, stats
187
hcp->bucket = BUCKET_INVALID;
188
hcp->lbucket = BUCKET_INVALID;
189
LOCK_INIT(hcp->lock);
190
hcp->lock_mode = DB_LOCK_NG;
195
hcp->seek_found_page = PGNO_INVALID;
198
hcp->pgno = PGNO_INVALID;
199
hcp->indx = NDX_INVALID;
204
* Returns the last item in a bucket.
206
* PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *));
209
__ham_item_last(dbc, mode, pgnop)
217
hcp = (HASH_CURSOR *)dbc->internal;
218
if ((ret = __ham_item_reset(dbc)) != 0)
221
hcp->bucket = hcp->hdr->max_bucket;
222
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
224
return (__ham_item_prev(dbc, mode, pgnop));
228
* PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *));
231
__ham_item_first(dbc, mode, pgnop)
239
hcp = (HASH_CURSOR *)dbc->internal;
240
if ((ret = __ham_item_reset(dbc)) != 0)
244
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
245
return (__ham_item_next(dbc, mode, pgnop));
250
* Returns a pointer to key/data pair on a page. In the case of
251
* bigkeys, just returns the page number and index of the bigkey
254
* PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *));
257
__ham_item_prev(dbc, mode, pgnop)
267
hcp = (HASH_CURSOR *)dbc->internal;
271
* There are 5 cases for backing up in a hash file.
272
* Case 1: In the middle of a page, no duplicates, just dec the index.
273
* Case 2: In the middle of a duplicate set, back up one.
274
* Case 3: At the beginning of a duplicate set, get out of set and
275
* back up to next key.
276
* Case 4: At the beginning of a page; go to previous page.
277
* Case 5: At the beginning of a bucket; go to prev bucket.
279
F_CLR(hcp, H_OK | H_NOMORE | H_DELETED);
281
if ((ret = __ham_get_cpage(dbc, mode)) != 0)
285
* First handle the duplicates. Either you'll get the key here
286
* or you'll exit the duplicate set and drop into the code below
287
* to handle backing up through keys.
289
if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) {
290
if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) ==
293
HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)),
299
/* Duplicates are on-page. */
300
if (hcp->dup_off != 0) {
301
memcpy(&hcp->dup_len, HKEYDATA_DATA(
302
H_PAIRDATA(dbp, hcp->page, hcp->indx))
303
+ hcp->dup_off - sizeof(db_indx_t),
306
DUP_SIZE(hcp->dup_len);
307
return (__ham_item(dbc, mode, pgnop));
312
* If we get here, we are not in a duplicate set, and just need
313
* to back up the cursor. There are still three cases:
314
* midpage, beginning of page, beginning of bucket.
317
if (F_ISSET(hcp, H_DUPONLY)) {
319
F_SET(hcp, H_NOMORE);
323
* We are no longer in a dup set; flag this so the dup code
324
* will reinitialize should we stumble upon another one.
328
if (hcp->indx == 0) { /* Beginning of page. */
329
hcp->pgno = PREV_PGNO(hcp->page);
330
if (hcp->pgno == PGNO_INVALID) {
331
/* Beginning of bucket. */
332
F_SET(hcp, H_NOMORE);
333
return (DB_NOTFOUND);
335
__ham_next_cpage(dbc, hcp->pgno, 0)) != 0)
338
hcp->indx = NUM_ENT(hcp->page);
342
* Either we've got the cursor set up to be decremented, or we
343
* have to find the end of a bucket.
345
if (hcp->indx == NDX_INVALID) {
346
DB_ASSERT(hcp->page != NULL);
348
hcp->indx = NUM_ENT(hcp->page);
349
for (next_pgno = NEXT_PGNO(hcp->page);
350
next_pgno != PGNO_INVALID;
351
next_pgno = NEXT_PGNO(hcp->page)) {
352
if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
354
hcp->indx = NUM_ENT(hcp->page);
357
if (hcp->indx == 0) {
358
/* Bucket was empty. */
359
F_SET(hcp, H_NOMORE);
360
return (DB_NOTFOUND);
366
return (__ham_item(dbc, mode, pgnop));
370
* Sets the cursor to the next key/data pair on a page.
372
* PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *));
375
__ham_item_next(dbc, mode, pgnop)
383
hcp = (HASH_CURSOR *)dbc->internal;
385
if ((ret = __ham_get_cpage(dbc, mode)) != 0)
389
* Deleted on-page duplicates are a weird case. If we delete the last
390
* one, then our cursor is at the very end of a duplicate set and
391
* we actually need to go on to the next key.
393
if (F_ISSET(hcp, H_DELETED)) {
394
if (hcp->indx != NDX_INVALID &&
395
F_ISSET(hcp, H_ISDUP) &&
396
HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx))
397
== H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) {
398
if (F_ISSET(hcp, H_DUPONLY)) {
400
F_SET(hcp, H_NOMORE);
406
} else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) {
408
F_SET(hcp, H_NOMORE);
410
} else if (F_ISSET(hcp, H_ISDUP) &&
411
F_ISSET(hcp, H_NEXT_NODUP)) {
415
F_CLR(hcp, H_DELETED);
416
} else if (hcp->indx == NDX_INVALID) {
419
} else if (F_ISSET(hcp, H_NEXT_NODUP)) {
422
} else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) {
423
if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >=
424
hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) {
426
F_SET(hcp, H_NOMORE);
429
hcp->dup_off += DUP_SIZE(hcp->dup_len);
430
if (hcp->dup_off >= hcp->dup_tlen) {
434
} else if (F_ISSET(hcp, H_DUPONLY)) {
436
F_SET(hcp, H_NOMORE);
443
return (__ham_item(dbc, mode, pgnop));
447
* PUBLIC: void __ham_putitem __P((DB *, PAGE *p, const DBT *, int));
449
* This is a little bit sleazy in that we're overloading the meaning
450
* of the H_OFFPAGE type here. When we recover deletes, we have the
451
* entire entry instead of having only the DBT, so we'll pass type
452
* H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
453
* an H_KEYDATA around it.
456
__ham_putitem(dbp, p, dbt, type)
468
/* Put the item element on the page. */
469
if (type == H_OFFPAGE) {
470
off = HOFFSET(p) - dbt->size;
471
HOFFSET(p) = inp[n] = off;
472
memcpy(P_ENTRY(dbp, p, n), dbt->data, dbt->size);
474
off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
475
HOFFSET(p) = inp[n] = off;
476
PUT_HKEYDATA(P_ENTRY(dbp, p, n), dbt->data, dbt->size, type);
479
/* Adjust page info. */
484
* PUBLIC: void __ham_reputpair __P((DB *, PAGE *,
485
* PUBLIC: u_int32_t, const DBT *, const DBT *));
487
* This is a special case to restore a key/data pair to its original
488
* location during recovery. We are guaranteed that the pair fits
489
* on the page and is not the last pair on the page (because if it's
490
* the last pair, the normal insert works).
493
__ham_reputpair(dbp, p, ndx, key, data)
497
const DBT *key, *data;
499
db_indx_t i, *inp, movebytes, newbytes;
505
/* First shuffle the existing items up on the page. */
506
movebytes = (db_indx_t)(
507
(ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p));
508
newbytes = key->size + data->size;
509
from = (u_int8_t *)p + HOFFSET(p);
510
memmove(from - newbytes, from, movebytes);
513
* Adjust the indices and move them up 2 spaces. Note that we
514
* have to check the exit condition inside the loop just in case
515
* we are dealing with index 0 (db_indx_t's are unsigned).
517
for (i = NUM_ENT(p) - 1; ; i-- ) {
518
inp[i + 2] = inp[i] - newbytes;
519
if (i == H_KEYINDEX(ndx))
523
/* Put the key and data on the page. */
524
inp[H_KEYINDEX(ndx)] = (db_indx_t)(
525
(ndx == 0 ? psize : inp[H_DATAINDEX(ndx - 2)]) - key->size);
526
inp[H_DATAINDEX(ndx)] = inp[H_KEYINDEX(ndx)] - data->size;
527
memcpy(P_ENTRY(dbp, p, H_KEYINDEX(ndx)), key->data, key->size);
528
memcpy(P_ENTRY(dbp, p, H_DATAINDEX(ndx)), data->data, data->size);
530
/* Adjust page info. */
531
HOFFSET(p) -= newbytes;
536
* PUBLIC: int __ham_del_pair __P((DBC *, int));
539
__ham_del_pair(dbc, reclaim_page)
544
DBT data_dbt, key_dbt;
545
DB_LSN new_lsn, *n_lsn, tmp_lsn;
548
PAGE *n_pagep, *nn_pagep, *p, *p_pagep;
551
db_pgno_t chg_pgno, pgno, tmp_pgno;
557
hcp = (HASH_CURSOR *)dbc->internal;
558
n_pagep = p_pagep = nn_pagep = NULL;
561
if (hcp->page == NULL &&
562
(ret = mpf->get(mpf, &hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
567
* We optimize for the normal case which is when neither the key nor
568
* the data are large. In this case, we write a single log record
569
* and do the delete. If either is large, we'll call __big_delete
570
* to remove the big item and then update the page to remove the
571
* entry referring to the big item.
574
if (HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) {
575
memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))),
577
ret = __db_doff(dbc, pgno);
581
switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) {
584
HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))),
586
ret = __db_doff(dbc, pgno);
591
* If we delete a pair that is/was a duplicate, then
592
* we had better clear the flag so that we update the
593
* cursor appropriately.
602
/* Now log the delete off this page. */
603
if (DBC_LOGGING(dbc)) {
604
key_dbt.data = P_ENTRY(dbp, p, H_KEYINDEX(ndx));
605
key_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_KEYINDEX(ndx));
606
data_dbt.data = P_ENTRY(dbp, p, H_DATAINDEX(ndx));
607
data_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_DATAINDEX(ndx));
609
if ((ret = __ham_insdel_log(dbp,
610
dbc->txn, &new_lsn, 0, DELPAIR, PGNO(p), (u_int32_t)ndx,
611
&LSN(p), &key_dbt, &data_dbt)) != 0)
614
LSN_NOT_LOGGED(new_lsn);
616
/* Move lsn onto page. */
620
__ham_dpair(dbp, p, ndx);
623
* Mark item deleted so that we don't try to return it, and
624
* so that we update the cursor correctly on the next call
627
F_SET(hcp, H_DELETED);
631
* Update cursors that are on the page where the delete happend.
633
if ((ret = __ham_c_update(dbc, 0, 0, 0)) != 0)
637
* If we are locking, we will not maintain this, because it is
641
* Perhaps we can retain incremental numbers and apply them later.
643
if (!STD_LOCKING(dbc)) {
645
if ((ret = __ham_dirty_meta(dbc)) != 0)
650
* If we need to reclaim the page, then check if the page is empty.
651
* There are two cases. If it's empty and it's not the first page
652
* in the bucket (i.e., the bucket page) then we can simply remove
653
* it. If it is the first chain in the bucket, then we need to copy
654
* the second page into it and remove the second page.
655
* If its the only page in the bucket we leave it alone.
659
(PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID))
660
return (mpf->set(mpf, p, DB_MPOOL_DIRTY));
662
if (PREV_PGNO(p) == PGNO_INVALID) {
664
* First page in chain is empty and we know that there
665
* are more pages in the chain.
667
if ((ret = mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
670
if (NEXT_PGNO(n_pagep) != PGNO_INVALID && (ret =
671
mpf->get(mpf, &NEXT_PGNO(n_pagep), 0, &nn_pagep)) != 0)
674
if (DBC_LOGGING(dbc)) {
675
key_dbt.data = n_pagep;
676
key_dbt.size = dbp->pgsize;
677
if ((ret = __ham_copypage_log(dbp,
678
dbc->txn, &new_lsn, 0, PGNO(p),
679
&LSN(p), PGNO(n_pagep), &LSN(n_pagep),
681
nn_pagep == NULL ? NULL : &LSN(nn_pagep),
685
LSN_NOT_LOGGED(new_lsn);
687
/* Move lsn onto page. */
688
LSN(p) = new_lsn; /* Structure assignment. */
689
LSN(n_pagep) = new_lsn;
690
if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
691
LSN(nn_pagep) = new_lsn;
693
if (nn_pagep != NULL) {
694
PREV_PGNO(nn_pagep) = PGNO(p);
696
mpf->put(mpf, nn_pagep, DB_MPOOL_DIRTY)) != 0) {
704
memcpy(p, n_pagep, dbp->pgsize);
707
PREV_PGNO(p) = PGNO_INVALID;
710
* Update cursors to reflect the fact that records
711
* on the second page have moved to the first page.
713
if ((ret = __ham_c_delpg(dbc, PGNO(n_pagep),
714
PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0)
718
* Update the cursor to reflect its new position.
724
if ((ret = mpf->set(mpf, p, DB_MPOOL_DIRTY)) != 0)
726
if ((ret = __db_free(dbc, n_pagep)) != 0) {
731
if ((ret = mpf->get(mpf, &PREV_PGNO(p), 0, &p_pagep)) != 0)
734
if (NEXT_PGNO(p) != PGNO_INVALID) {
736
mpf->get(mpf, &NEXT_PGNO(p), 0, &n_pagep)) != 0)
738
n_lsn = &LSN(n_pagep);
744
NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
746
PREV_PGNO(n_pagep) = PGNO(p_pagep);
748
if (DBC_LOGGING(dbc)) {
749
if ((ret = __ham_newpage_log(dbp, dbc->txn,
750
&new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep),
751
PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
754
LSN_NOT_LOGGED(new_lsn);
756
/* Move lsn onto page. */
757
LSN(p_pagep) = new_lsn; /* Structure assignment. */
759
LSN(n_pagep) = new_lsn;
762
if (NEXT_PGNO(p) == PGNO_INVALID) {
764
* There is no next page; put the cursor on the
765
* previous page as if we'd deleted the last item
766
* on that page, with index after the last valid
769
* The deleted flag was set up above.
771
hcp->pgno = PGNO(p_pagep);
772
hcp->indx = NUM_ENT(p_pagep);
773
op = DB_HAM_DELLASTPG;
776
* There is a next page, so put the cursor at
777
* the beginning of it.
779
hcp->pgno = NEXT_PGNO(p);
781
op = DB_HAM_DELMIDPG;
785
* Since we are about to delete the cursor page and we have
786
* just moved the cursor, we need to make sure that the
787
* old page pointer isn't left hanging around in the cursor.
791
ret = __db_free(dbc, p);
793
mpf->put(mpf, p_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
795
if (n_pagep != NULL && (t_ret =
796
mpf->put(mpf, n_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
800
if ((ret = __ham_c_delpg(dbc,
801
chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0)
807
err: /* Clean up any pages. */
809
(void)mpf->put(mpf, n_pagep, 0);
810
if (nn_pagep != NULL)
811
(void)mpf->put(mpf, nn_pagep, 0);
813
(void)mpf->put(mpf, p_pagep, 0);
819
* Given the key data indicated by the cursor, replace part/all of it
820
* according to the fields in the dbt.
822
* PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t));
825
__ham_replpair(dbc, dbt, make_dup)
831
DBT old_dbt, tdata, tmp;
835
int32_t change; /* XXX: Possible overflow. */
836
u_int32_t dup_flag, len, memsize;
837
int beyond_eor, is_big, ret, type;
838
u_int8_t *beg, *dest, *end, *hk, *src;
842
* Big item replacements are handled in generic code.
843
* Items that fit on the current page fall into 4 classes.
844
* 1. On-page element, same size
845
* 2. On-page element, new is bigger (fits)
846
* 3. On-page element, new is bigger (does not fit)
847
* 4. On-page element, old is bigger
848
* Numbers 1, 2, and 4 are essentially the same (and should
849
* be the common case). We handle case 3 as a delete and
854
hcp = (HASH_CURSOR *)dbc->internal;
857
* We need to compute the number of bytes that we are adding or
858
* removing from the entry. Normally, we can simply substract
859
* the number of bytes we are replacing (dbt->dlen) from the
860
* number of bytes we are inserting (dbt->size). However, if
861
* we are doing a partial put off the end of a record, then this
862
* formula doesn't work, because we are essentially adding
865
change = dbt->size - dbt->dlen;
867
hk = H_PAIRDATA(dbp, hcp->page, hcp->indx);
868
is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
871
memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
873
len = LEN_HKEYDATA(dbp, hcp->page,
874
dbp->pgsize, H_DATAINDEX(hcp->indx));
876
beyond_eor = dbt->doff + dbt->dlen > len;
878
change += dbt->doff + dbt->dlen - len;
880
if (change > (int32_t)P_FREESPACE(dbp, hcp->page) ||
881
beyond_eor || is_big) {
883
* Case 3 -- two subcases.
884
* A. This is not really a partial operation, but an overwrite.
885
* Simple del and add works.
886
* B. This is a partial and we need to construct the data that
887
* we are really inserting (yuck).
888
* In both cases, we need to grab the key off the page (in
889
* some cases we could do this outside of this routine; for
890
* cleanliness we do it here. If you happen to be on a big
891
* key, this could be a performance hit).
893
memset(&tmp, 0, sizeof(tmp));
895
__db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx),
896
&tmp, &dbc->rkey->data, &dbc->rkey->ulen)) != 0)
899
/* Preserve duplicate info. */
900
dup_flag = F_ISSET(hcp, H_ISDUP);
901
if (dbt->doff == 0 && dbt->dlen == len) {
902
ret = __ham_del_pair(dbc, 0);
904
ret = __ham_add_el(dbc,
905
&tmp, dbt, dup_flag ? H_DUPLICATE : H_KEYDATA);
906
} else { /* Case B */
907
type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
908
HPAGE_PTYPE(hk) : H_KEYDATA;
909
memset(&tdata, 0, sizeof(tdata));
912
if ((ret = __db_ret(dbp, hcp->page,
913
H_DATAINDEX(hcp->indx), &tdata, &memp, &memsize))
917
/* Now we can delete the item. */
918
if ((ret = __ham_del_pair(dbc, 0)) != 0) {
919
__os_free(dbenv, memp);
923
/* Now shift old data around to make room for new. */
925
if ((ret = __os_realloc(dbenv,
926
tdata.size + change, &tdata.data)) != 0)
929
memsize = tdata.size + change;
930
memset((u_int8_t *)tdata.data + tdata.size,
933
end = (u_int8_t *)tdata.data + tdata.size;
935
src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
936
if (src < end && tdata.size > dbt->doff + dbt->dlen) {
937
len = tdata.size - dbt->doff - dbt->dlen;
939
memmove(dest, src, len);
941
memcpy((u_int8_t *)tdata.data + dbt->doff,
942
dbt->data, dbt->size);
943
tdata.size += change;
945
/* Now add the pair. */
946
ret = __ham_add_el(dbc, &tmp, &tdata, type);
947
__os_free(dbenv, memp);
949
F_SET(hcp, dup_flag);
954
* Set up pointer into existing data. Do it before the log
955
* message so we can use it inside of the log setup.
957
beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx));
961
* If we are going to have to move bytes at all, figure out
962
* all the parameters here. Then log the call before moving
965
if (DBC_LOGGING(dbc)) {
967
old_dbt.size = dbt->dlen;
968
if ((ret = __ham_replace_log(dbp,
969
dbc->txn, &new_lsn, 0, PGNO(hcp->page),
970
(u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page),
971
(u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
975
LSN_NOT_LOGGED(new_lsn);
977
LSN(hcp->page) = new_lsn; /* Structure assignment. */
979
__ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx),
980
(int32_t)dbt->doff, change, dbt);
986
* Replace data on a page with new data, possibly growing or shrinking what's
987
* there. This is called on two different occasions. On one (from replpair)
988
* we are interested in changing only the data. On the other (from recovery)
989
* we are replacing the entire data (header and all) with a new element. In
990
* the latter case, the off argument is negative.
991
* pagep: the page that we're changing
992
* ndx: page index of the element that is growing/shrinking.
993
* off: Offset at which we are beginning the replacement.
994
* change: the number of bytes (+ or -) that the element is growing/shrinking.
995
* dbt: the new data that gets written at beg.
997
* PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t,
998
* PUBLIC: int32_t, int32_t, DBT *));
1001
__ham_onpage_replace(dbp, pagep, ndx, off, change, dbt)
1012
u_int8_t *src, *dest;
1015
pgsize = dbp->pgsize;
1016
inp = P_INP(dbp, pagep);
1019
src = (u_int8_t *)(pagep) + HOFFSET(pagep);
1021
len = inp[ndx] - HOFFSET(pagep);
1022
else if ((u_int32_t)off >=
1023
LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) {
1024
len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx))
1025
+ LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src);
1029
(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) -
1031
dest = src - change;
1032
memmove(dest, src, len);
1034
memset(dest + len, 0, change);
1036
/* Now update the indices. */
1037
for (i = ndx; i < NUM_ENT(pagep); i++)
1039
HOFFSET(pagep) -= change;
1042
memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off,
1043
dbt->data, dbt->size);
1045
memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size);
1049
* PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
1052
__ham_split_page(dbc, obucket, nbucket)
1054
u_int32_t obucket, nbucket;
1063
HASH_CURSOR *hcp, *cp;
1064
PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
1066
db_pgno_t bucket_pgno, npgno, next_pgno;
1067
u_int32_t big_len, len;
1068
int found, i, ret, t_ret;
1074
hcp = (HASH_CURSOR *)dbc->internal;
1075
temp_pagep = old_pagep = new_pagep = NULL;
1079
bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
1080
if ((ret = __db_lget(dbc,
1081
0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0)
1083
if ((ret = mpf->get(mpf,
1084
&bucket_pgno, DB_MPOOL_CREATE, &old_pagep)) != 0)
1087
/* Properly initialize the new bucket page. */
1088
npgno = BUCKET_TO_PAGE(hcp, nbucket);
1089
if ((ret = mpf->get(mpf, &npgno, DB_MPOOL_CREATE, &new_pagep)) != 0)
1092
dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
1094
temp_pagep = hcp->split_buf;
1095
memcpy(temp_pagep, old_pagep, dbp->pgsize);
1097
if (DBC_LOGGING(dbc)) {
1098
page_dbt.size = dbp->pgsize;
1099
page_dbt.data = old_pagep;
1100
if ((ret = __ham_splitdata_log(dbp,
1101
dbc->txn, &new_lsn, 0, SPLITOLD,
1102
PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
1105
LSN_NOT_LOGGED(new_lsn);
1107
LSN(old_pagep) = new_lsn; /* Structure assignment. */
1109
P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID,
1110
PGNO_INVALID, 0, P_HASH);
1115
while (temp_pagep != NULL) {
1116
if ((ret = __ham_get_clist(dbp,
1117
PGNO(temp_pagep), NDX_INVALID, &carray)) != 0)
1120
for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) {
1121
if ((ret = __db_ret(dbp, temp_pagep,
1122
H_KEYINDEX(n), &key, &big_buf, &big_len)) != 0)
1125
if (__ham_call_hash(dbc, key.data, key.size) == obucket)
1131
* Figure out how many bytes we need on the new
1132
* page to store the key/data pair.
1134
len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize,
1136
LEN_HITEM(dbp, temp_pagep, dbp->pgsize,
1138
2 * sizeof(db_indx_t);
1140
if (P_FREESPACE(dbp, *pp) < len) {
1141
if (DBC_LOGGING(dbc)) {
1142
page_dbt.size = dbp->pgsize;
1143
page_dbt.data = *pp;
1144
if ((ret = __ham_splitdata_log(dbp,
1145
dbc->txn, &new_lsn, 0,
1146
SPLITNEW, PGNO(*pp), &page_dbt,
1150
LSN_NOT_LOGGED(new_lsn);
1153
__ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
1157
/* Check if we need to update a cursor. */
1158
if (carray != NULL) {
1160
for (i = 0; carray[i] != NULL; i++) {
1162
(HASH_CURSOR *)carray[i]->internal;
1163
if (cp->pgno == PGNO(temp_pagep) &&
1165
cp->pgno = PGNO(*pp);
1166
cp->indx = NUM_ENT(*pp);
1170
if (found && DBC_LOGGING(dbc) &&
1171
IS_SUBTRANSACTION(dbc->txn)) {
1173
__ham_chgpg_log(dbp,
1174
dbc->txn, &new_lsn, 0,
1175
DB_HAM_SPLIT, PGNO(temp_pagep),
1176
PGNO(*pp), n, NUM_ENT(*pp))) != 0)
1180
__ham_copy_item(dbp, temp_pagep, H_KEYINDEX(n), *pp);
1181
__ham_copy_item(dbp, temp_pagep, H_DATAINDEX(n), *pp);
1183
next_pgno = NEXT_PGNO(temp_pagep);
1185
/* Clear temp_page; if it's a link overflow page, free it. */
1186
if (PGNO(temp_pagep) != bucket_pgno && (ret =
1187
__db_free(dbc, temp_pagep)) != 0) {
1192
if (next_pgno == PGNO_INVALID)
1194
else if ((ret = mpf->get(
1195
mpf, &next_pgno, DB_MPOOL_CREATE, &temp_pagep)) != 0)
1198
if (temp_pagep != NULL) {
1199
if (DBC_LOGGING(dbc)) {
1200
page_dbt.size = dbp->pgsize;
1201
page_dbt.data = temp_pagep;
1202
if ((ret = __ham_splitdata_log(dbp,
1203
dbc->txn, &new_lsn, 0,
1204
SPLITOLD, PGNO(temp_pagep),
1205
&page_dbt, &LSN(temp_pagep))) != 0)
1208
LSN_NOT_LOGGED(new_lsn);
1209
LSN(temp_pagep) = new_lsn;
1212
if (carray != NULL) /* We never knew its size. */
1213
__os_free(dbenv, carray);
1216
if (big_buf != NULL)
1217
__os_free(dbenv, big_buf);
1220
* If the original bucket spanned multiple pages, then we've got
1221
* a pointer to a page that used to be on the bucket chain. It
1222
* should be deleted.
1224
if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
1225
(ret = __db_free(dbc, temp_pagep)) != 0) {
1231
* Write new buckets out.
1233
if (DBC_LOGGING(dbc)) {
1234
page_dbt.size = dbp->pgsize;
1235
page_dbt.data = old_pagep;
1236
if ((ret = __ham_splitdata_log(dbp, dbc->txn,
1237
&new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt,
1238
&LSN(old_pagep))) != 0)
1240
LSN(old_pagep) = new_lsn;
1242
page_dbt.data = new_pagep;
1243
if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0,
1244
SPLITNEW, PGNO(new_pagep), &page_dbt,
1245
&LSN(new_pagep))) != 0)
1247
LSN(new_pagep) = new_lsn;
1249
LSN_NOT_LOGGED(LSN(old_pagep));
1250
LSN_NOT_LOGGED(LSN(new_pagep));
1253
ret = mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY);
1255
mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
1259
err: if (old_pagep != NULL)
1260
(void)mpf->put(mpf, old_pagep, DB_MPOOL_DIRTY);
1261
if (new_pagep != NULL)
1262
(void)mpf->put(mpf, new_pagep, DB_MPOOL_DIRTY);
1263
if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
1264
(void)mpf->put(mpf, temp_pagep, DB_MPOOL_DIRTY);
1266
if (LOCK_ISSET(block))
1267
__TLPUT(dbc, block);
1268
if (carray != NULL) /* We never knew its size. */
1269
__os_free(dbenv, carray);
1274
* Add the given pair to the page. The page in question may already be
1275
* held (i.e. it was already gotten). If it is, then the page is passed
1276
* in via the pagep parameter. On return, pagep will contain the page
1277
* to which we just added something. This allows us to link overflow
1278
* pages and return the new page having correctly put the last page.
1280
* PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
1283
__ham_add_el(dbc, key, val, type)
1285
const DBT *key, *val;
1288
const DBT *pkey, *pdata;
1290
DBT key_dbt, data_dbt;
1294
HOFFPAGE doff, koff;
1295
db_pgno_t next_pgno, pgno;
1296
u_int32_t data_size, key_size, pairsize, rectype;
1297
int do_expand, is_keybig, is_databig, ret;
1298
int key_type, data_type;
1302
hcp = (HASH_CURSOR *)dbc->internal;
1305
pgno = hcp->seek_found_page != PGNO_INVALID ?
1306
hcp->seek_found_page : hcp->pgno;
1307
if (hcp->page == NULL &&
1308
(ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
1311
key_size = HKEYDATA_PSIZE(key->size);
1312
data_size = HKEYDATA_PSIZE(val->size);
1313
is_keybig = ISBIG(hcp, key->size);
1314
is_databig = ISBIG(hcp, val->size);
1316
key_size = HOFFPAGE_PSIZE;
1318
data_size = HOFFPAGE_PSIZE;
1320
pairsize = key_size + data_size;
1322
/* Advance to first page in chain with room for item. */
1323
while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) {
1325
* This may not be the end of the chain, but the pair may fit
1326
* anyway. Check if it's a bigpair that fits or a regular
1329
if (P_FREESPACE(dbp, hcp->page) >= pairsize)
1331
next_pgno = NEXT_PGNO(hcp->page);
1332
if ((ret = __ham_next_cpage(dbc, next_pgno, 0)) != 0)
1337
* Check if we need to allocate a new page.
1339
if (P_FREESPACE(dbp, hcp->page) < pairsize) {
1341
if ((ret = __ham_add_ovflpage(dbc,
1342
(PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0)
1344
hcp->pgno = PGNO(hcp->page);
1350
hcp->indx = NUM_ENT(hcp->page);
1351
F_CLR(hcp, H_DELETED);
1353
koff.type = H_OFFPAGE;
1354
UMRW_SET(koff.unused[0]);
1355
UMRW_SET(koff.unused[1]);
1356
UMRW_SET(koff.unused[2]);
1357
if ((ret = __db_poff(dbc, key, &koff.pgno)) != 0)
1359
koff.tlen = key->size;
1360
key_dbt.data = &koff;
1361
key_dbt.size = sizeof(koff);
1363
key_type = H_OFFPAGE;
1366
key_type = H_KEYDATA;
1370
doff.type = H_OFFPAGE;
1371
UMRW_SET(doff.unused[0]);
1372
UMRW_SET(doff.unused[1]);
1373
UMRW_SET(doff.unused[2]);
1374
if ((ret = __db_poff(dbc, val, &doff.pgno)) != 0)
1376
doff.tlen = val->size;
1377
data_dbt.data = &doff;
1378
data_dbt.size = sizeof(doff);
1380
data_type = H_OFFPAGE;
1386
if (DBC_LOGGING(dbc)) {
1389
rectype |= PAIR_DATAMASK;
1391
rectype |= PAIR_KEYMASK;
1392
if (type == H_DUPLICATE)
1393
rectype |= PAIR_DUPMASK;
1395
if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0,
1396
rectype, PGNO(hcp->page), (u_int32_t)NUM_ENT(hcp->page),
1397
&LSN(hcp->page), pkey, pdata)) != 0)
1400
LSN_NOT_LOGGED(new_lsn);
1402
/* Move lsn onto page. */
1403
LSN(hcp->page) = new_lsn; /* Structure assignment. */
1405
__ham_putitem(dbp, hcp->page, pkey, key_type);
1406
__ham_putitem(dbp, hcp->page, pdata, data_type);
1409
* For splits, we are going to update item_info's page number
1410
* field, so that we can easily return to the same page the
1411
* next time we come in here. For other operations, this shouldn't
1412
* matter, since odds are this is the last thing that happens before
1413
* we return to the user program.
1415
hcp->pgno = PGNO(hcp->page);
1419
* Maybe keep incremental numbers here.
1421
if (!STD_LOCKING(dbc)) {
1423
if ((ret = __ham_dirty_meta(dbc)) != 0)
1427
if (do_expand || (hcp->hdr->ffactor != 0 &&
1428
(u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor))
1429
F_SET(hcp, H_EXPAND);
1434
* Special __putitem call used in splitting -- copies one entry to
1435
* another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1436
* H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
1437
* do not need to do any logging here.
1439
* PUBLIC: void __ham_copy_item __P((DB *, PAGE *, u_int32_t, PAGE *));
1442
__ham_copy_item(dbp, src_page, src_ndx, dest_page)
1453
pgsize = dbp->pgsize;
1454
inp = P_INP(dbp, dest_page);
1456
* Copy the key and data entries onto this new page.
1458
src = P_ENTRY(dbp, src_page, src_ndx);
1460
/* Set up space on dest. */
1461
len = (u_int32_t)LEN_HITEM(dbp, src_page, pgsize, src_ndx);
1462
HOFFSET(dest_page) -= len;
1463
inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
1464
dest = P_ENTRY(dbp, dest_page, NUM_ENT(dest_page));
1465
NUM_ENT(dest_page)++;
1467
memcpy(dest, src, len);
1473
* pointer on success
1476
* PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
1479
__ham_add_ovflpage(dbc, pagep, release, pp)
1494
if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0)
1497
if (DBC_LOGGING(dbc)) {
1498
if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0,
1499
PUTOVFL, PGNO(pagep), &LSN(pagep),
1500
PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
1503
LSN_NOT_LOGGED(new_lsn);
1505
/* Move lsn onto page. */
1506
LSN(pagep) = LSN(new_pagep) = new_lsn;
1507
NEXT_PGNO(pagep) = PGNO(new_pagep);
1509
PREV_PGNO(new_pagep) = PGNO(pagep);
1512
ret = mpf->put(mpf, pagep, DB_MPOOL_DIRTY);
1519
* PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t));
1522
__ham_get_cpage(dbc, mode)
1534
hcp = (HASH_CURSOR *)dbc->internal;
1538
* There are four cases with respect to buckets and locks.
1539
* 1. If there is no lock held, then if we are locking, we should
1541
* 2. If there is a lock held, it's for the current bucket, and it's
1542
* for the right mode, we don't need to do anything.
1543
* 3. If there is a lock held for the current bucket but it's not
1544
* strong enough, we need to upgrade.
1545
* 4. If there is a lock, but it's for a different bucket, then we need
1546
* to release the existing lock and get a new lock.
1548
LOCK_INIT(tmp_lock);
1549
if (STD_LOCKING(dbc)) {
1550
if (hcp->lbucket != hcp->bucket && /* Case 4 */
1551
(ret = __TLPUT(dbc, hcp->lock)) != 0)
1554
if ((LOCK_ISSET(hcp->lock) &&
1555
(hcp->lock_mode == DB_LOCK_READ &&
1556
mode == DB_LOCK_WRITE))) {
1558
tmp_lock = hcp->lock;
1559
LOCK_INIT(hcp->lock);
1562
/* Acquire the lock. */
1563
if (!LOCK_ISSET(hcp->lock))
1564
/* Cases 1, 3, and 4. */
1565
if ((ret = __ham_lock_bucket(dbc, mode)) != 0)
1569
hcp->lock_mode = mode;
1570
hcp->lbucket = hcp->bucket;
1571
if (LOCK_ISSET(tmp_lock))
1572
/* Case 3: release the original lock. */
1574
dbp->dbenv->lock_put(dbp->dbenv, &tmp_lock);
1575
} else if (LOCK_ISSET(tmp_lock))
1576
hcp->lock = tmp_lock;
1579
if (ret == 0 && hcp->page == NULL) {
1580
if (hcp->pgno == PGNO_INVALID)
1581
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
1582
if ((ret = mpf->get(mpf,
1583
&hcp->pgno, DB_MPOOL_CREATE, &hcp->page)) != 0)
1591
* Get a new page at the cursor, putting the last page if necessary.
1592
* If the flag is set to H_ISDUP, then we are talking about the
1593
* duplicate page, not the main page.
1595
* PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int));
1598
__ham_next_cpage(dbc, pgno, dirty)
1611
hcp = (HASH_CURSOR *)dbc->internal;
1613
if (hcp->page != NULL &&
1614
(ret = mpf->put(mpf, hcp->page, dirty ? DB_MPOOL_DIRTY : 0)) != 0)
1618
if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_CREATE, &p)) != 0)
1629
* __ham_lock_bucket --
1630
* Get the lock on a particular bucket.
1632
* PUBLIC: int __ham_lock_bucket __P((DBC *, db_lockmode_t));
1635
__ham_lock_bucket(dbc, mode)
1643
hcp = (HASH_CURSOR *)dbc->internal;
1644
gotmeta = hcp->hdr == NULL ? 1 : 0;
1646
if ((ret = __ham_get_meta(dbc)) != 0)
1648
pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
1650
if ((ret = __ham_release_meta(dbc)) != 0)
1653
ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock);
1655
hcp->lock_mode = mode;
1661
* Delete a pair on a page, paying no attention to what the pair
1662
* represents. The caller is responsible for freeing up duplicates
1663
* or offpage entries that might be referenced by this pair.
1665
* Recovery assumes that this may be called without the metadata
1668
* PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1671
__ham_dpair(dbp, p, indx)
1676
db_indx_t delta, n, *inp;
1677
u_int8_t *dest, *src;
1679
inp = P_INP(dbp, p);
1681
* Compute "delta", the amount we have to shift all of the
1682
* offsets. To find the delta, we just need to calculate
1683
* the size of the pair of elements we are removing.
1685
delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx);
1688
* The hard case: we want to remove something other than
1689
* the last item on the page. We need to shift data and
1692
if ((db_indx_t)indx != NUM_ENT(p) - 2) {
1694
* Move the data: src is the first occupied byte on
1695
* the page. (Length is delta.)
1697
src = (u_int8_t *)p + HOFFSET(p);
1700
* Destination is delta bytes beyond src. This might
1701
* be an overlapping copy, so we have to use memmove.
1704
memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p));
1707
/* Adjust page metadata. */
1708
HOFFSET(p) = HOFFSET(p) + delta;
1709
NUM_ENT(p) = NUM_ENT(p) - 2;
1711
/* Adjust the offsets. */
1712
for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++)
1713
inp[n] = inp[n + 2] + delta;
1720
* Adjust the cursors after we've emptied a page in a bucket, taking
1721
* care that when we move cursors pointing to deleted items, their
1722
* orders don't collide with the orders of cursors on the page we move
1723
* them to (since after this function is called, cursors with the same
1724
* index on the two pages will be otherwise indistinguishable--they'll
1725
* all have pgno new_pgno). There are three cases:
1727
* 1) The emptied page is the first page in the bucket. In this
1728
* case, we've copied all the items from the second page into the
1729
* first page, so the first page is new_pgno and the second page is
1730
* old_pgno. new_pgno is empty, but can have deleted cursors
1731
* pointing at indx 0, so we need to be careful of the orders
1732
* there. This is DB_HAM_DELFIRSTPG.
1734
* 2) The page is somewhere in the middle of a bucket. Our caller
1735
* can just delete such a page, so it's old_pgno. old_pgno is
1736
* empty, but may have deleted cursors pointing at indx 0, so we
1737
* need to be careful of indx 0 when we move those cursors to
1738
* new_pgno. This is DB_HAM_DELMIDPG.
1740
* 3) The page is the last in a bucket. Again the empty page is
1741
* old_pgno, and again it should only have cursors that are deleted
1742
* and at indx == 0. This time, though, there's no next page to
1743
* move them to, so we set them to indx == num_ent on the previous
1744
* page--and indx == num_ent is the index whose cursors we need to
1745
* be careful of. This is DB_HAM_DELLASTPG.
1748
__ham_c_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp)
1750
db_pgno_t old_pgno, new_pgno;
1765
/* Which is the worrisome index? */
1766
indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0;
1771
my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
1774
MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
1776
* Find the highest order of any cursor our movement
1780
for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
1781
ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
1782
ldbp = LIST_NEXT(ldbp, dblistlinks)) {
1783
MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
1784
for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
1785
cp = TAILQ_NEXT(cp, links)) {
1786
if (cp == dbc || cp->dbtype != DB_HASH)
1788
hcp = (HASH_CURSOR *)cp->internal;
1789
if (hcp->pgno == new_pgno) {
1790
if (hcp->indx == indx &&
1791
F_ISSET(hcp, H_DELETED) &&
1792
hcp->order >= order)
1793
order = hcp->order + 1;
1794
DB_ASSERT(op != DB_HAM_DELFIRSTPG ||
1795
hcp->indx == NDX_INVALID ||
1797
F_ISSET(hcp, H_DELETED)));
1800
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
1803
for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
1804
ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
1805
ldbp = LIST_NEXT(ldbp, dblistlinks)) {
1806
MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
1807
for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
1808
cp = TAILQ_NEXT(cp, links)) {
1809
if (cp == dbc || cp->dbtype != DB_HASH)
1812
hcp = (HASH_CURSOR *)cp->internal;
1814
if (hcp->pgno == old_pgno) {
1816
case DB_HAM_DELFIRSTPG:
1818
* We're moving all items,
1819
* regardless of index.
1821
hcp->pgno = new_pgno;
1824
* But we have to be careful of
1827
if (hcp->indx == indx)
1828
hcp->order += order;
1830
case DB_HAM_DELMIDPG:
1831
hcp->pgno = new_pgno;
1832
DB_ASSERT(hcp->indx == 0 &&
1833
F_ISSET(hcp, H_DELETED));
1834
hcp->order += order;
1836
case DB_HAM_DELLASTPG:
1837
hcp->pgno = new_pgno;
1838
DB_ASSERT(hcp->indx == 0 &&
1839
F_ISSET(hcp, H_DELETED));
1841
hcp->order += order;
1845
return (__db_panic(dbenv, EINVAL));
1847
if (my_txn != NULL && cp->txn != my_txn)
1851
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
1853
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
1855
if (found != 0 && DBC_LOGGING(dbc)) {
1856
if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, op,
1857
old_pgno, new_pgno, indx, order)) != 0)