2
* See the file LICENSE for redistribution information.
4
* Copyright (c) 1996-2001
5
* Sleepycat Software. All rights reserved.
8
* Copyright (c) 1995, 1996
9
* The President and Fellows of Harvard University. All rights reserved.
11
* This code is derived from software contributed to Berkeley by
14
* Redistribution and use in source and binary forms, with or without
15
* modification, are permitted provided that the following conditions
17
* 1. Redistributions of source code must retain the above copyright
18
* notice, this list of conditions and the following disclaimer.
19
* 2. Redistributions in binary form must reproduce the above copyright
20
* notice, this list of conditions and the following disclaimer in the
21
* documentation and/or other materials provided with the distribution.
22
* 3. Neither the name of the University nor the names of its contributors
23
* may be used to endorse or promote products derived from this software
24
* without specific prior written permission.
26
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39
#include "db_config.h"
42
static const char revid[] = "$Id: txn.c,v 11.97 2001/07/02 01:05:44 bostic Exp $";
45
#ifndef NO_SYSTEM_INCLUDES
46
#include <sys/types.h>
48
#if TIME_WITH_SYS_TIME
63
#include "db_server.h"
75
#include "rpc_client_ext.h"
79
* __txn_isvalid enumerated types. We cannot simply use the transaction
80
* statuses, because different statuses need to be handled differently
81
* depending on the caller.
90
static int __txn_begin __P((DB_TXN *, int));
91
static void __txn_end __P((DB_TXN *, int));
92
static int __txn_isvalid __P((const DB_TXN *, TXN_DETAIL **, txnop_t));
93
static int __txn_undo __P((DB_TXN *));
97
* This is a wrapper to the actual begin process. Normal txn_begin()
98
* allocates a DB_TXN structure for the caller, while txn_xa_begin() does
99
* not. Other than that, both call into the common __txn_begin code().
101
* Internally, we use TXN_DETAIL structures, but the DB_TXN structure
102
* provides access to the transaction ID and the offset in the transaction
103
* region of the TXN_DETAIL structure.
105
* EXTERN: int txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t));
108
txn_begin(dbenv, parent, txnpp, flags)
110
DB_TXN *parent, **txnpp;
117
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
118
return (__dbcl_txn_begin(dbenv, parent, txnpp, flags));
122
ENV_REQUIRES_CONFIG(dbenv, dbenv->tx_handle, "txn_begin", DB_INIT_TXN);
124
if ((ret = __db_fchk(dbenv,
126
DB_DIRTY_READ | DB_TXN_NOWAIT |
127
DB_TXN_NOSYNC | DB_TXN_SYNC)) != 0)
129
if ((ret = __db_fcchk(dbenv,
130
"txn_begin", flags, DB_TXN_NOSYNC, DB_TXN_SYNC)) != 0)
133
if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0)
136
txn->mgrp = dbenv->tx_handle;
137
txn->parent = parent;
138
TAILQ_INIT(&txn->kids);
139
txn->flags = TXN_MALLOC;
140
if (LF_ISSET(DB_DIRTY_READ))
141
F_SET(txn, TXN_DIRTY_READ);
142
if (LF_ISSET(DB_TXN_NOSYNC))
143
F_SET(txn, TXN_NOSYNC);
144
if (LF_ISSET(DB_TXN_SYNC))
145
F_SET(txn, TXN_SYNC);
146
if (LF_ISSET(DB_TXN_NOWAIT))
147
F_SET(txn, TXN_NOWAIT);
149
if ((ret = __txn_begin(txn, 0)) != 0) {
150
__os_free(dbenv, txn, sizeof(DB_TXN));
154
if (txn != NULL && parent != NULL)
155
TAILQ_INSERT_HEAD(&parent->kids, txn, klinks);
163
* XA version of txn_begin.
165
* PUBLIC: int __txn_xa_begin __P((DB_ENV *, DB_TXN *));
168
__txn_xa_begin(dbenv, txn)
174
memset(txn, 0, sizeof(DB_TXN));
176
txn->mgrp = dbenv->tx_handle;
178
return (__txn_begin(txn, 0));
182
* __txn_compensate_begin
183
* Begin an compensation transaction. This is a special interface
184
* that is used only for transactions that must be started to compensate
185
* for actions during an abort. Currently only used for allocations.
187
* PUBLIC: int __txn_compensate_begin __P((DB_ENV *, DB_TXN **txnp));
190
__txn_compensate_begin(dbenv, txnpp)
199
if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0)
202
txn->mgrp = dbenv->tx_handle;
205
F_SET(txn, TXN_COMPENSATE);
206
return (__txn_begin(txn, 1));
211
* Normal DB version of txn_begin.
214
__txn_begin(txn, internal)
221
DB_TXNREGION *region;
229
region = mgr->reginfo.primary;
232
* We do not have to write begin records (and if we do not, then we
233
* need never write records for read-only transactions). However,
234
* we do need to find the current LSN so that we can store it in the
235
* transaction structure, so we can know where to take checkpoints.
237
if (LOGGING_ON(dbenv) &&
238
(ret = log_put(dbenv, &begin_lsn, NULL, DB_CURLSN)) != 0)
241
R_LOCK(dbenv, &mgr->reginfo);
242
if (!F_ISSET(txn, TXN_COMPENSATE) && F_ISSET(region, TXN_IN_RECOVERY)) {
243
__db_err(dbenv, "operation not permitted during recovery.");
248
/* Make sure that we aren't still recovering prepared transactions. */
249
if (!internal && region->nrestores != 0) {
251
"txn_begin: recovery of prepared but not yet committed transactions is incomplete.");
256
/* Make sure that last_txnid is not going to wrap around. */
257
if (region->last_txnid == TXN_INVALID) {
259
"txn_begin: transaction ID wrapped. Exit the database environment\nand restart the application as if application failure had occurred");
264
/* Allocate a new transaction detail structure. */
266
__db_shalloc(mgr->reginfo.addr, sizeof(TXN_DETAIL), 0, &td)) != 0) {
268
"Unable to allocate memory for transaction detail");
272
/* Place transaction on active transaction list. */
273
SH_TAILQ_INSERT_HEAD(®ion->active_txn, td, links, __txn_detail);
275
id = ++region->last_txnid;
277
if (++region->nactive > region->maxnactive)
278
region->maxnactive = region->nactive;
281
td->begin_lsn = begin_lsn;
282
ZERO_LSN(td->last_lsn);
283
td->status = TXN_RUNNING;
284
if (txn->parent != NULL)
285
td->parent = txn->parent->off;
287
td->parent = INVALID_ROFF;
290
off = R_OFFSET(&mgr->reginfo, td);
291
R_UNLOCK(dbenv, &mgr->reginfo);
293
ZERO_LSN(txn->last_lsn);
298
* If this is a transaction family, we must link the child to the
299
* maximal grandparent in the lock table for deadlock detection.
301
if (txn->parent != NULL && LOCKING_ON(dbenv))
302
if ((ret = __lock_addfamilylocker(dbenv,
303
txn->parent->txnid, txn->txnid)) != 0)
306
if (F_ISSET(txn, TXN_MALLOC)) {
307
MUTEX_THREAD_LOCK(dbenv, mgr->mutexp);
308
TAILQ_INSERT_TAIL(&mgr->txn_chain, txn, links);
309
MUTEX_THREAD_UNLOCK(dbenv, mgr->mutexp);
314
err1: R_UNLOCK(dbenv, &mgr->reginfo);
321
* Commit a transaction.
323
* EXTERN: int txn_commit __P((DB_TXN *, u_int32_t));
326
txn_commit(txnp, flags)
334
dbenv = txnp->mgrp->dbenv;
337
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
338
return (__dbcl_txn_commit(txnp, flags));
343
if ((ret = __txn_isvalid(txnp, NULL, TXN_OP_COMMIT)) != 0)
347
* We clear flags that are incorrect, ignoring any flag errors, and
348
* default to synchronous operations. By definition, transaction
349
* handles are dead when we return, and this error should never
350
* happen, but we don't want to fail in the field 'cause the app is
351
* specifying the wrong flag for some reason.
354
"txn_commit", flags, DB_TXN_NOSYNC | DB_TXN_SYNC) != 0)
356
if (__db_fcchk(dbenv,
357
"txn_commit", flags, DB_TXN_NOSYNC, DB_TXN_SYNC) != 0)
359
if (LF_ISSET(DB_TXN_NOSYNC)) {
360
F_CLR(txnp, TXN_SYNC);
361
F_SET(txnp, TXN_NOSYNC);
363
if (LF_ISSET(DB_TXN_SYNC)) {
364
F_CLR(txnp, TXN_NOSYNC);
365
F_SET(txnp, TXN_SYNC);
369
* Commit any unresolved children. If there's an error, abort any
370
* unresolved children and the parent.
372
while ((kid = TAILQ_FIRST(&txnp->kids)) != NULL)
373
if ((ret = txn_commit(kid, flags)) != 0) {
374
while ((kid = TAILQ_FIRST(&txnp->kids)) != NULL)
375
(void)txn_abort(kid);
380
* If there are any log records, write a log record and sync the log,
381
* else do no log writes. If the commit is for a child transaction,
382
* we do not need to commit the child synchronously since it may still
383
* abort (if its parent aborts), and otherwise its parent or ultimate
384
* ancestor will write synchronously.
386
if (LOGGING_ON(dbenv) && !IS_ZERO_LSN(txnp->last_lsn)) {
387
if (txnp->parent == NULL) {
388
if ((ret = __txn_regop_log(dbenv,
389
txnp, &txnp->last_lsn,
390
(F_ISSET(dbenv, DB_ENV_TXN_NOSYNC) &&
391
!F_ISSET(txnp, TXN_SYNC)) ||
392
F_ISSET(txnp, TXN_NOSYNC) ? 0 : DB_COMMIT,
393
TXN_COMMIT, (int32_t)time(NULL))) != 0) {
397
/* Log the commit in the parent! */
398
if ((ret = __txn_child_log(dbenv,
399
txnp->parent, &txnp->parent->last_lsn,
400
0, txnp->txnid, &txnp->last_lsn)) != 0) {
404
F_SET(txnp->parent, TXN_CHILDCOMMIT);
411
err: (void)txn_abort(txnp);
417
* Abort a transaction.
419
* EXTERN: int txn_abort __P((DB_TXN *));
431
dbenv = txnp->mgrp->dbenv;
434
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
435
return (__dbcl_txn_abort(txnp));
440
if ((ret = __txn_isvalid(txnp, &td, TXN_OP_ABORT)) != 0)
443
/* Abort any unresolved children. */
444
while ((kid = TAILQ_FIRST(&txnp->kids)) != NULL)
445
if ((t_ret = txn_abort(kid)) != 0 && ret == 0)
448
request.op = DB_LOCK_UPGRADE_WRITE;
449
if (LOCKING_ON(dbenv) &&
450
(ret = lock_vec(dbenv, txnp->txnid, 0, &request, 1, NULL)) != 0)
451
(void)__db_panic(dbenv, ret);
452
if ((t_ret = __txn_undo(txnp)) != 0 && ret == 0)
456
* Normally, we do not need to log aborts. However, if we
457
* are a distributed transaction (i.e., we have a prepare),
458
* then we log the abort so we know that this transaction
459
* was actually completed. Even if the log fails; abort/undo
462
if (LOGGING_ON(dbenv) && td->status == TXN_PREPARED &&
463
(t_ret = __txn_regop_log(dbenv, txnp, &txnp->last_lsn,
464
(F_ISSET(dbenv, DB_ENV_TXN_NOSYNC) &&
465
!F_ISSET(txnp, TXN_SYNC)) ||
466
F_ISSET(txnp, TXN_NOSYNC) ? 0 : DB_FLUSH,
467
TXN_ABORT, (int32_t)time(NULL))) != 0 && ret == 0)
477
* Free the per-process resources associated with this txn handle.
479
* EXTERN: int txn_discard __P((DB_TXN *, u_int32_t flags));
482
txn_discard(txnp, flags)
491
dbenv = txnp->mgrp->dbenv;
494
#if defined(HAVE_RPC)
495
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
496
return (__dbcl_txn_discard(txnp, flags));
503
if ((ret = __txn_isvalid(txnp, &td, TXN_OP_DISCARD)) != 0)
506
/* Should be no children. */
507
DB_ASSERT(TAILQ_FIRST(&txnp->kids) == NULL);
508
DB_ASSERT(F_ISSET(td, TXN_RESTORED));
510
/* Free the space. */
511
MUTEX_THREAD_LOCK(dbenv, txnp->mgrp->mutexp);
512
txnp->mgrp->n_discards++;
513
if (F_ISSET(txnp, TXN_MALLOC)) {
514
TAILQ_REMOVE(&txnp->mgrp->txn_chain, txnp, links);
517
MUTEX_THREAD_UNLOCK(dbenv, txnp->mgrp->mutexp);
519
__os_free(dbenv, freep, sizeof(*freep));
526
* Flush the log so a future commit is guaranteed to succeed.
528
* EXTERN: int txn_prepare __P((DB_TXN *, u_int8_t *));
531
txn_prepare(txnp, gid)
541
dbenv = txnp->mgrp->dbenv;
544
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
545
return (__dbcl_txn_prepare(txnp, gid));
550
if ((ret = __txn_isvalid(txnp, &td, TXN_OP_PREPARE)) != 0)
553
if (txnp->parent != NULL) {
554
__db_err(dbenv, "Prepare disallowed on child transactions.");
558
/* Commit any unresolved children. */
559
while ((kid = TAILQ_FIRST(&txnp->kids)) != NULL)
560
if ((ret = txn_commit(kid, DB_TXN_NOSYNC)) != 0)
564
* In XA, the global transaction ID in the txn_detail structure is
565
* already set; in a non-XA environment, we must set it here. XA
566
* requires that the transaction be either ENDED or SUSPENDED when
567
* prepare is called, so we know that if the xa_status isn't in one
568
* of those states, then we are calling prepare directly and we need
569
* to fill in the td->xid.
572
if (LOGGING_ON(dbenv)) {
573
memset(&xid, 0, sizeof(xid));
574
if (td->xa_status != TXN_XA_ENDED &&
575
td->xa_status != TXN_XA_SUSPENDED)
576
/* Regular prepare; fill in the gid. */
577
memcpy(td->xid, gid, sizeof(td->xid));
579
xid.size = sizeof(td->xid);
582
if ((ret = __txn_xa_regop_log(dbenv, txnp, &txnp->last_lsn,
583
(F_ISSET(dbenv, DB_ENV_TXN_NOSYNC) &&
584
!F_ISSET(txnp, TXN_SYNC)) ||
585
F_ISSET(txnp, TXN_NOSYNC) ? 0 : DB_COMMIT, TXN_PREPARE,
586
&xid, td->format, td->gtrid, td->bqual,
587
&td->begin_lsn)) != 0) {
588
__db_err(dbenv, "txn_prepare: log_write failed %s",
594
MUTEX_THREAD_LOCK(dbenv, txnp->mgrp->mutexp);
595
td->status = TXN_PREPARED;
596
MUTEX_THREAD_UNLOCK(dbenv, txnp->mgrp->mutexp);
602
* Return the transaction ID.
604
* EXTERN: u_int32_t txn_id __P((DB_TXN *));
610
return (txnp->txnid);
615
* Return 0 if the txnp is reasonable, otherwise panic.
618
__txn_isvalid(txnp, tdp, op)
624
DB_TXNREGION *region;
628
region = mgrp->reginfo.primary;
630
/* Check for recovery. */
631
if (!F_ISSET(txnp, TXN_COMPENSATE) &&
632
F_ISSET(region, TXN_IN_RECOVERY)) {
633
__db_err(mgrp->dbenv,
634
"operation not permitted during recovery.");
638
/* Check for live cursors. */
639
if (txnp->cursors != 0) {
640
__db_err(mgrp->dbenv, "transaction has active cursors");
644
/* Check transaction's state. */
645
tp = (TXN_DETAIL *)R_ADDR(&mgrp->reginfo, txnp->off);
650
* Discard is a special case since we're just tossing the
651
* per-process space; there are a lot of problems with the
652
* transaction that we can tolerate.
654
if (op == TXN_OP_DISCARD) {
655
/* Transaction is already been reused. */
656
if (txnp->txnid != tp->txnid)
660
* We have the right transaction, we'd better be
661
* a restored transaction. However, if it's not,
662
* we won't panic the environment since it means
663
* that someone has a bad user-handle, not that the
664
* transaction is corrupt.
666
if (!F_ISSET(tp, TXN_RESTORED)) {
667
__db_err(mgrp->dbenv, "not a restored transaction");
672
* No matter what state the transaction is in, we can
673
* blow away this process' handle.
678
switch (tp->status) {
682
__db_err(mgrp->dbenv, "transaction already %s",
683
tp->status == TXN_COMMITTED ? "committed" : "aborted");
686
if (op == TXN_OP_PREPARE) {
687
__db_err(mgrp->dbenv, "transaction already prepared");
689
* Txn_prepare doesn't blow away the user handle, so
690
* in this case, give the user the opportunity to
703
* If there's a serious problem with the transaction, panic. TXN
704
* handles are dead by definition when we return, and if you use
705
* a cursor you forgot to close, we have no idea what will happen.
707
return (__db_panic(mgrp->dbenv, EINVAL));
712
* Internal transaction end routine.
715
__txn_end(txnp, is_commit)
722
DB_TXNREGION *region;
724
int do_closefiles, ret;
728
region = mgr->reginfo.primary;
731
/* Release the locks. */
732
request.op = txnp->parent == NULL ||
733
is_commit == 0 ? DB_LOCK_PUT_ALL : DB_LOCK_INHERIT;
736
* __txn_end cannot return an error, we MUST return success/failure
737
* from txn_commit or txn_abort, ignoring any internal errors. So,
738
* we panic if something goes wrong. We can't deadlock here because
739
* we're not acquiring any new locks, so DB_LOCK_DEADLOCK is just as
740
* fatal as any other error.
742
if (LOCKING_ON(dbenv) &&
743
(ret = lock_vec(dbenv, txnp->txnid, 0, &request, 1, NULL)) != 0)
744
(void)__db_panic(dbenv, ret);
746
/* End the transaction. */
747
R_LOCK(dbenv, &mgr->reginfo);
749
tp = (TXN_DETAIL *)R_ADDR(&mgr->reginfo, txnp->off);
750
SH_TAILQ_REMOVE(®ion->active_txn, tp, links, __txn_detail);
751
if (F_ISSET(tp, TXN_RESTORED)) {
753
do_closefiles = region->nrestores == 0;
755
__db_shalloc_free(mgr->reginfo.addr, tp);
763
R_UNLOCK(dbenv, &mgr->reginfo);
766
* The transaction cannot get more locks, remove its locker info,
769
if (LOCKING_ON(dbenv) && (ret =
770
__lock_freefamilylocker(dbenv->lk_handle, txnp->txnid)) != 0)
771
(void)__db_panic(dbenv, ret);
772
if (txnp->parent != NULL)
773
TAILQ_REMOVE(&txnp->parent->kids, txnp, klinks);
775
/* Free the space. */
776
if (F_ISSET(txnp, TXN_MALLOC)) {
777
MUTEX_THREAD_LOCK(dbenv, mgr->mutexp);
778
TAILQ_REMOVE(&mgr->txn_chain, txnp, links);
779
MUTEX_THREAD_UNLOCK(dbenv, mgr->mutexp);
781
__os_free(dbenv, txnp, sizeof(*txnp));
785
F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
786
__log_close_files(dbenv);
787
F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
794
* Undo the transaction with id txnid. Returns 0 on success and
812
if (!LOGGING_ON(dbenv))
816
* This is the simplest way to code this, but if the mallocs during
817
* recovery turn out to be a performance issue, we can do the
818
* allocation here and use DB_DBT_USERMEM.
820
memset(&rdbt, 0, sizeof(rdbt));
821
threaded = F_ISSET(dbenv, DB_ENV_THREAD) ? 1 : 0;
823
F_SET(&rdbt, DB_DBT_MALLOC);
825
key_lsn = txnp->last_lsn;
827
/* Allocate a transaction list for children or aborted page creates. */
828
if ((ret = __db_txnlist_init(dbenv, &txnlist)) != 0)
831
if (F_ISSET(txnp, TXN_CHILDCOMMIT) &&
832
(ret = __db_txnlist_lsninit(dbenv,
833
txnlist, &txnp->last_lsn)) != 0)
836
for (ret = 0; ret == 0 && !IS_ZERO_LSN(key_lsn);) {
838
* The dispatch routine returns the lsn of the record
839
* before the current one in the key_lsn argument.
841
if ((ret = log_get(dbenv, &key_lsn, &rdbt, DB_SET)) == 0) {
842
ret = __db_dispatch(dbenv,
843
&rdbt, &key_lsn, DB_TXN_ABORT, txnlist);
844
if (threaded && rdbt.data != NULL) {
845
__os_free(dbenv, rdbt.data, rdbt.size);
848
if (F_ISSET(txnp, TXN_CHILDCOMMIT))
849
(void)__db_txnlist_lsnadd(dbenv,
850
txnlist, &key_lsn, 0);
852
if (ret == DB_SURPRISE_KID &&
853
(ret = __db_txnlist_lsninit(dbenv, txnlist, &key_lsn))
855
F_SET(txnp, TXN_CHILDCOMMIT);
857
__db_err(txnp->mgrp->dbenv,
858
"txn_abort: Log undo failed for LSN: %lu %lu: %s",
859
(u_long)key_lsn.file, (u_long)key_lsn.offset,
862
__db_txnlist_end(dbenv, txnlist);
867
if (txnlist != NULL) {
868
ret = __db_do_the_limbo(dbenv, txnlist);
869
__db_txnlist_end(dbenv, txnlist);
876
* Transaction checkpoint.
877
* If either kbytes or minutes is non-zero, then we only take the checkpoint
878
* more than "minutes" minutes have passed since the last checkpoint or if
879
* more than "kbytes" of log data have been written since the last checkpoint.
880
* When taking a checkpoint, find the oldest active transaction and figure out
881
* its first LSN. This is the lowest LSN we can checkpoint, since any record
882
* written after since that point may be involved in a transaction and may
883
* therefore need to be undone in the case of an abort.
885
* EXTERN: int txn_checkpoint __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
888
txn_checkpoint(dbenv, kbytes, minutes, flags)
890
u_int32_t kbytes, minutes, flags;
893
DB_LSN ckp_lsn, sync_lsn, last_ckp;
895
DB_TXNREGION *region;
898
time_t last_ckp_time, now;
899
u_int32_t bytes, mbytes;
903
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
904
return (__dbcl_txn_checkpoint(dbenv, kbytes, minutes));
907
ENV_REQUIRES_CONFIG(dbenv,
908
dbenv->tx_handle, "txn_checkpoint", DB_INIT_TXN);
910
mgr = dbenv->tx_handle;
911
region = mgr->reginfo.primary;
912
dblp = dbenv->lg_handle;
913
lp = dblp->reginfo.primary;
916
* Check if we need to checkpoint.
920
if (LF_ISSET(DB_FORCE))
923
R_LOCK(dbenv, &dblp->reginfo);
924
mbytes = lp->stat.st_wc_mbytes;
926
* We add the current buffer offset so as to count bytes that
927
* have not yet been written, but are sitting in the log buffer.
929
bytes = lp->stat.st_wc_bytes + lp->b_off;
931
R_UNLOCK(dbenv, &dblp->reginfo);
933
/* Don't checkpoint a quiescent database. */
934
if (bytes == 0 && mbytes == 0)
937
if (kbytes != 0 && mbytes * 1024 + bytes / 1024 >= (u_int32_t)kbytes)
943
R_LOCK(dbenv, &mgr->reginfo);
944
last_ckp_time = region->time_ckp;
945
R_UNLOCK(dbenv, &mgr->reginfo);
947
if (now - last_ckp_time >= (time_t)(minutes * 60))
952
* If we checked time and data and didn't go to checkpoint,
955
if (minutes != 0 || kbytes != 0)
959
if (IS_ZERO_LSN(ckp_lsn)) {
960
R_LOCK(dbenv, &dblp->reginfo);
962
R_UNLOCK(dbenv, &dblp->reginfo);
966
* We have to find an LSN such that all transactions begun
967
* before that LSN are complete.
969
R_LOCK(dbenv, &mgr->reginfo);
971
if (IS_ZERO_LSN(region->pending_ckp)) {
973
SH_TAILQ_FIRST(®ion->active_txn, __txn_detail);
975
txnp = SH_TAILQ_NEXT(txnp, links, __txn_detail)) {
978
* Look through the active transactions for the
981
if (!IS_ZERO_LSN(txnp->begin_lsn) &&
982
log_compare(&txnp->begin_lsn, &ckp_lsn) < 0)
983
ckp_lsn = txnp->begin_lsn;
985
region->pending_ckp = ckp_lsn;
987
ckp_lsn = region->pending_ckp;
989
R_UNLOCK(dbenv, &mgr->reginfo);
992
* Try three times to sync the mpool buffers up to the specified LSN,
993
* sleeping 1, 2 and 4 seconds between attempts.
996
for (interval = 1;;) {
998
* memp_sync may change the lsn you pass it, so don't
999
* pass it the actual ckp_lsn, pass it a local instead.
1002
if ((ret = memp_sync(dbenv, &sync_lsn)) == 0)
1006
* ret == DB_INCOMPLETE means there are still buffers
1007
* to flush, the checkpoint is not complete.
1009
if (ret == DB_INCOMPLETE) {
1013
(void)__os_sleep(dbenv, interval, 0);
1017
"txn_checkpoint: failure in memp_sync %s",
1023
if (LOGGING_ON(dbenv)) {
1024
R_LOCK(dbenv, &mgr->reginfo);
1025
last_ckp = region->last_ckp;
1026
ZERO_LSN(region->pending_ckp);
1027
R_UNLOCK(dbenv, &mgr->reginfo);
1029
if ((ret = __txn_ckp_log(dbenv,
1030
NULL, &ckp_lsn, DB_CHECKPOINT, &ckp_lsn,
1031
&last_ckp, (int32_t)time(NULL))) != 0) {
1033
"txn_checkpoint: log failed at LSN [%ld %ld] %s",
1034
(long)ckp_lsn.file, (long)ckp_lsn.offset,
1039
R_LOCK(dbenv, &mgr->reginfo);
1040
region->last_ckp = ckp_lsn;
1041
(void)time(®ion->time_ckp);
1042
R_UNLOCK(dbenv, &mgr->reginfo);
1048
* __txn_activekids --
1049
* Return if this transaction has any active children.
1051
* PUBLIC: int __txn_activekids __P((DB_ENV *, u_int32_t, DB_TXN *));
1054
__txn_activekids(dbenv, rectype, txnp)
1060
* On a child commit, we know that there are children (i.e., the
1061
* commiting child at the least. In that case, skip this check.
1063
if (rectype == DB_txn_child)
1066
if (TAILQ_FIRST(&txnp->kids) != NULL) {
1067
__db_err(dbenv, "Child transaction is active");
1074
* __txn_force_abort --
1075
* Force an abort record into the log if the commit record
1076
* failed to get to disk.
1077
* NOTE: This routine depends on the layout of HDR and of the
1078
* __txn_regop record in txn.src. We are passed the begining
1079
* of the commit record in the log buffer and overwirte the
1080
* commit with an abort and recalculate the checksum.
1082
* PUBLIC: void __txn_force_abort __P((u_int8_t *));
1085
__txn_force_abort(buffer)
1088
u_int32_t cksum, offset, opcode;
1091
/* rectype txn_num lsn */
1092
offset = sizeof(u_int32_t) + sizeof(u_int32_t) + sizeof(DB_LSN);
1093
bp = buffer + sizeof(HDR) + offset;
1095
memcpy(bp, &opcode, sizeof(opcode));
1096
cksum = __ham_func4(NULL, buffer + sizeof(HDR),
1097
offset + sizeof(u_int32_t) + sizeof(int32_t));
1098
memcpy(buffer + SSZ(HDR, cksum), &cksum, sizeof(cksum));
1103
* Before we can close an environment, we need to check if we
1104
* were in the midst of taking care of restored transactions. If
1105
* so, then we need to close the files that we opened.
1107
* PUBLIC: void __txn_preclose __P((DB_ENV *));
1110
__txn_preclose(dbenv)
1114
DB_TXNREGION *region;
1117
mgr = (DB_TXNMGR *)dbenv->tx_handle;
1118
region = mgr->reginfo.primary;
1121
R_LOCK(dbenv, &mgr->reginfo);
1122
if (region != NULL &&
1123
region->nrestores <= mgr->n_discards && mgr->n_discards != 0)
1125
R_UNLOCK(dbenv, &mgr->reginfo);
1127
if (do_closefiles) {
1129
* Set the DBLOG_RECOVER flag while closing these
1130
* files so they do not create additional log records
1131
* that will confuse future recoveries.
1133
F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
1134
__log_close_files(dbenv);
1135
F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);