1
/*****************************************************************************
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/******************************************************
22
Created 2/6/1997 Heikki Tuuri
23
*******************************************************/
28
#include "row0vers.ic"
31
#include "dict0dict.h"
32
#include "dict0boot.h"
34
#include "mach0data.h"
39
#include "trx0purge.h"
45
#include "read0read.h"
46
#include "lock0lock.h"
48
/*********************************************************************
49
Finds out if an active transaction has inserted or modified a secondary
50
index record. NOTE: the kernel mutex is temporarily released in this
54
row_vers_impl_x_locked_off_kernel(
55
/*==============================*/
56
/* out: NULL if committed, else the active
57
transaction; NOTE that the kernel mutex is
58
temporarily released! */
59
const rec_t* rec, /* in: record in a secondary index */
60
dict_index_t* index, /* in: the secondary index */
61
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
63
dict_index_t* clust_index;
71
dtuple_t* entry = NULL; /* assignment to eliminate compiler
79
ut_ad(mutex_own(&kernel_mutex));
80
#ifdef UNIV_SYNC_DEBUG
81
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
82
#endif /* UNIV_SYNC_DEBUG */
84
mutex_exit(&kernel_mutex);
88
/* Search for the clustered index record: this is a time-consuming
89
operation: therefore we release the kernel mutex; also, the release
90
is required by the latching order convention. The latch on the
91
clustered index locks the top of the stack of versions. We also
92
reserve purge_latch to lock the bottom of the version stack. */
94
clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
97
/* In a rare case it is possible that no clust rec is found
98
for a secondary index record: if in row0umod.c
99
row_undo_mod_remove_clust_low() we have already removed the
100
clust rec, while purge is still cleaning and removing
101
secondary index records associated with earlier versions of
102
the clustered index record. In that case there cannot be
103
any implicit lock on the secondary index record, because
104
an active transaction which has modified the secondary index
105
record has also modified the clustered index record. And in
106
a rollback we always undo the modifications to secondary index
107
records before the clustered index record. */
109
mutex_enter(&kernel_mutex);
115
heap = mem_heap_create(1024);
116
clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
117
ULINT_UNDEFINED, &heap);
118
trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
120
mtr_s_lock(&(purge_sys->latch), &mtr);
122
mutex_enter(&kernel_mutex);
125
if (!trx_is_active(trx_id)) {
126
/* The transaction that modified or inserted clust_rec is no
127
longer active: no implicit lock on rec */
131
if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
132
clust_offsets, TRUE)) {
133
/* Corruption noticed: try to avoid a crash by returning */
137
comp = page_rec_is_comp(rec);
138
ut_ad(index->table == clust_index->table);
139
ut_ad(!!comp == dict_table_is_comp(index->table));
140
ut_ad(!comp == !page_rec_is_comp(clust_rec));
142
/* We look up if some earlier version, which was modified by the trx_id
143
transaction, of the clustered index record would require rec to be in
144
a different state (delete marked or unmarked, or have different field
145
values, or not existing). If there is such a version, then rec was
146
modified by the trx_id transaction, and it has an implicit x-lock on
147
rec. Note that if clust_rec itself would require rec to be in a
148
different state, then the trx_id transaction has not yet had time to
149
modify rec, and does not necessarily have an implicit x-lock on rec. */
151
rec_del = rec_get_deleted_flag(rec, comp);
160
trx_id_t prev_trx_id;
162
mutex_exit(&kernel_mutex);
164
/* While we retrieve an earlier version of clust_rec, we
165
release the kernel mutex, because it may take time to access
166
the disk. After the release, we have to check if the trx_id
167
transaction is still active. We keep the semaphore in mtr on
168
the clust_rec page, so that no other transaction can update
169
it and get an implicit x-lock on rec. */
172
heap = mem_heap_create(1024);
173
err = trx_undo_prev_version_build(clust_rec, &mtr, version,
174
clust_index, clust_offsets,
175
heap, &prev_version);
176
mem_heap_free(heap2); /* free version and clust_offsets */
178
if (prev_version == NULL) {
179
mutex_enter(&kernel_mutex);
181
if (!trx_is_active(trx_id)) {
182
/* Transaction no longer active: no
188
/* If the transaction is still active,
189
clust_rec must be a fresh insert, because no
190
previous version was found. */
191
ut_ad(err == DB_SUCCESS);
193
/* It was a freshly inserted version: there is an
194
implicit x-lock on rec */
196
trx = trx_get_on_id(trx_id);
201
clust_offsets = rec_get_offsets(prev_version, clust_index,
202
NULL, ULINT_UNDEFINED, &heap);
204
vers_del = rec_get_deleted_flag(prev_version, comp);
205
prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
208
/* If the trx_id and prev_trx_id are different and if
209
the prev_version is marked deleted then the
210
prev_trx_id must have already committed for the trx_id
211
to be able to modify the row. Therefore, prev_trx_id
212
cannot hold any implicit lock. */
213
if (vers_del && 0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
215
mutex_enter(&kernel_mutex);
219
/* The stack of versions is locked by mtr. Thus, it
220
is safe to fetch the prefixes for externally stored
222
row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
223
clust_offsets, NULL, &ext, heap);
224
entry = row_build_index_entry(row, ext, index, heap);
225
/* entry may be NULL if a record was inserted in place
226
of a deleted record, and the BLOB pointers of the new
227
record were not initialized yet. But in that case,
228
prev_version should be NULL. */
231
mutex_enter(&kernel_mutex);
233
if (!trx_is_active(trx_id)) {
234
/* Transaction no longer active: no implicit x-lock */
239
/* If we get here, we know that the trx_id transaction is
240
still active and it has modified prev_version. Let us check
241
if prev_version would require rec to be in a different
244
/* The previous version of clust_rec must be
245
accessible, because the transaction is still active
246
and clust_rec was not a fresh insert. */
247
ut_ad(err == DB_SUCCESS);
249
/* We check if entry and rec are identified in the alphabetical
251
if (0 == cmp_dtuple_rec(index->cmp_ctx, entry, rec, offsets)) {
252
/* The delete marks of rec and prev_version should be
253
equal for rec to be in the state required by
256
if (rec_del != vers_del) {
257
trx = trx_get_on_id(trx_id);
262
/* It is possible that the row was updated so that the
263
secondary index record remained the same in
264
alphabetical ordering, but the field values changed
265
still. For example, 'abc' -> 'ABC'. Check also that. */
267
dtuple_set_types_binary(
268
entry, dtuple_get_n_fields(entry));
270
if (0 != cmp_dtuple_rec(
271
index->cmp_ctx, entry, rec, offsets)) {
273
trx = trx_get_on_id(trx_id);
277
} else if (!rec_del) {
278
/* The delete mark should be set in rec for it to be
279
in the state required by prev_version */
281
trx = trx_get_on_id(trx_id);
286
if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
287
/* The versions modified by the trx_id transaction end
288
to prev_version: no implicit x-lock */
293
version = prev_version;
303
/*********************************************************************
304
Finds out if we must preserve a delete marked earlier version of a clustered
305
index record, because it is >= the purge view. */
308
row_vers_must_preserve_del_marked(
309
/*==============================*/
310
/* out: TRUE if earlier version should
312
trx_id_t trx_id, /* in: transaction id in the version */
313
mtr_t* mtr) /* in: mtr holding the latch on the
314
clustered index record; it will also
315
hold the latch on purge_view */
317
#ifdef UNIV_SYNC_DEBUG
318
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
319
#endif /* UNIV_SYNC_DEBUG */
321
mtr_s_lock(&(purge_sys->latch), mtr);
323
if (trx_purge_update_undo_must_exist(trx_id)) {
325
/* A purge operation is not yet allowed to remove this
326
delete marked record */
334
/*********************************************************************
335
Finds out if a version of the record, where the version >= the current
336
purge view, should have ientry as its secondary index entry. We check
337
if there is any not delete marked version of the record where the trx
338
id >= purge view, and the secondary index entry and ientry are identified in
339
the alphabetical ordering; exactly in this case we return TRUE. */
342
row_vers_old_has_index_entry(
343
/*=========================*/
344
/* out: TRUE if earlier version should have */
345
ibool also_curr,/* in: TRUE if also rec is included in the
346
versions to search; otherwise only versions
347
prior to it are searched */
348
const rec_t* rec, /* in: record in the clustered index; the
349
caller must have a latch on the page */
350
mtr_t* mtr, /* in: mtr holding the latch on rec; it will
351
also hold the latch on purge_view */
352
dict_index_t* index, /* in: the secondary index */
353
const dtuple_t* ientry) /* in: the secondary index entry */
355
const rec_t* version;
357
dict_index_t* clust_index;
358
ulint* clust_offsets;
362
const dtuple_t* entry;
366
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
367
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
368
#ifdef UNIV_SYNC_DEBUG
369
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
370
#endif /* UNIV_SYNC_DEBUG */
371
mtr_s_lock(&(purge_sys->latch), mtr);
373
clust_index = dict_table_get_first_index(index->table);
375
comp = page_rec_is_comp(rec);
376
ut_ad(!dict_table_is_comp(index->table) == !comp);
377
heap = mem_heap_create(1024);
378
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
379
ULINT_UNDEFINED, &heap);
381
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
384
/* The stack of versions is locked by mtr.
385
Thus, it is safe to fetch the prefixes for
386
externally stored columns. */
387
row = row_build(ROW_COPY_POINTERS, clust_index,
388
rec, clust_offsets, NULL, &ext, heap);
389
entry = row_build_index_entry(row, ext, index, heap);
391
/* If entry == NULL, the record contains unset BLOB
392
pointers. This must be a freshly inserted record. If
394
row_purge_remove_sec_if_poss_low(), the thread will
395
hold latches on the clustered index and the secondary
396
index. Because the insert works in three steps:
398
(1) insert the record to clustered index
399
(2) store the BLOBs and update BLOB pointers
400
(3) insert records to secondary indexes
402
the purge thread can safely ignore freshly inserted
403
records and delete the secondary index record. The
404
thread that inserted the new record will be inserting
405
the secondary index records. */
407
/* NOTE that we cannot do the comparison as binary
408
fields because the row is maybe being modified so that
409
the clustered index record has already been updated to
410
a different binary value in a char field, but the
411
collation identifies the old and new value anyway! */
413
&& !dtuple_coll_cmp(index->cmp_ctx, ientry, entry)) {
425
heap = mem_heap_create(1024);
426
err = trx_undo_prev_version_build(rec, mtr, version,
427
clust_index, clust_offsets,
428
heap, &prev_version);
429
mem_heap_free(heap2); /* free version and clust_offsets */
431
if (err != DB_SUCCESS || !prev_version) {
432
/* Versions end here */
439
clust_offsets = rec_get_offsets(prev_version, clust_index,
440
NULL, ULINT_UNDEFINED, &heap);
442
if (!rec_get_deleted_flag(prev_version, comp)) {
445
/* The stack of versions is locked by mtr.
446
Thus, it is safe to fetch the prefixes for
447
externally stored columns. */
448
row = row_build(ROW_COPY_POINTERS, clust_index,
449
prev_version, clust_offsets,
451
entry = row_build_index_entry(row, ext, index, heap);
453
/* If entry == NULL, the record contains unset
454
BLOB pointers. This must be a freshly
455
inserted record that we can safely ignore.
456
For the justification, see the comments after
457
the previous row_build_index_entry() call. */
459
/* NOTE that we cannot do the comparison as binary
460
fields because maybe the secondary index record has
461
already been updated to a different binary value in
462
a char field, but the collation identifies the old
463
and new value anyway! */
467
index->cmp_ctx, ientry, entry)) {
475
version = prev_version;
479
/*********************************************************************
480
Constructs the version of a clustered index record which a consistent
481
read should see. We assume that the trx id stored in rec is such that
482
the consistent read should not see rec in its present version. */
485
row_vers_build_for_consistent_read(
486
/*===============================*/
487
/* out: DB_SUCCESS or DB_MISSING_HISTORY */
488
const rec_t* rec, /* in: record in a clustered index; the
489
caller must have a latch on the page; this
490
latch locks the top of the stack of versions
492
mtr_t* mtr, /* in: mtr holding the latch on rec */
493
dict_index_t* index, /* in: the clustered index */
494
ulint** offsets,/* in/out: offsets returned by
495
rec_get_offsets(rec, index) */
496
read_view_t* view, /* in: the consistent read view */
497
mem_heap_t** offset_heap,/* in/out: memory heap from which
498
the offsets are allocated */
499
mem_heap_t* in_heap,/* in: memory heap from which the memory for
500
*old_vers is allocated; memory for possible
501
intermediate versions is allocated and freed
502
locally within the function */
503
rec_t** old_vers)/* out, own: old version, or NULL if the
504
record does not exist in the view, that is,
505
it was freshly inserted afterwards */
507
const rec_t* version;
510
mem_heap_t* heap = NULL;
514
ut_ad(dict_index_is_clust(index));
515
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
516
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
517
#ifdef UNIV_SYNC_DEBUG
518
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
519
#endif /* UNIV_SYNC_DEBUG */
521
ut_ad(rec_offs_validate(rec, index, *offsets));
523
trx_id = row_get_rec_trx_id(rec, index, *offsets);
525
ut_ad(!read_view_sees_trx_id(view, trx_id));
527
rw_lock_s_lock(&(purge_sys->latch));
531
mem_heap_t* heap2 = heap;
532
trx_undo_rec_t* undo_rec;
535
heap = mem_heap_create(1024);
537
/* If we have high-granularity consistent read view and
538
creating transaction of the view is the same as trx_id in
539
the record we see this record only in the case when
540
undo_no of the record is < undo_no in the view. */
542
if (view->type == VIEW_HIGH_GRANULARITY
543
&& ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {
545
roll_ptr = row_get_rec_roll_ptr(version, index,
547
undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
548
undo_no = trx_undo_rec_get_undo_no(undo_rec);
549
mem_heap_empty(heap);
551
if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
552
/* The view already sees this version: we can
553
copy it to in_heap and return */
555
buf = mem_heap_alloc(in_heap,
556
rec_offs_size(*offsets));
557
*old_vers = rec_copy(buf, version, *offsets);
558
rec_offs_make_valid(*old_vers, index,
566
err = trx_undo_prev_version_build(rec, mtr, version, index,
570
mem_heap_free(heap2); /* free version */
573
if (err != DB_SUCCESS) {
577
if (prev_version == NULL) {
578
/* It was a freshly inserted version */
585
*offsets = rec_get_offsets(prev_version, index, *offsets,
586
ULINT_UNDEFINED, offset_heap);
588
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
590
if (read_view_sees_trx_id(view, trx_id)) {
592
/* The view already sees this version: we can copy
593
it to in_heap and return */
595
buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
596
*old_vers = rec_copy(buf, prev_version, *offsets);
597
rec_offs_make_valid(*old_vers, index, *offsets);
603
version = prev_version;
607
rw_lock_s_unlock(&(purge_sys->latch));
612
/*********************************************************************
613
Constructs the last committed version of a clustered index record,
614
which should be seen by a semi-consistent read. */
617
row_vers_build_for_semi_consistent_read(
618
/*====================================*/
619
/* out: DB_SUCCESS or DB_MISSING_HISTORY */
620
const rec_t* rec, /* in: record in a clustered index; the
621
caller must have a latch on the page; this
622
latch locks the top of the stack of versions
624
mtr_t* mtr, /* in: mtr holding the latch on rec */
625
dict_index_t* index, /* in: the clustered index */
626
ulint** offsets,/* in/out: offsets returned by
627
rec_get_offsets(rec, index) */
628
mem_heap_t** offset_heap,/* in/out: memory heap from which
629
the offsets are allocated */
630
mem_heap_t* in_heap,/* in: memory heap from which the memory for
631
*old_vers is allocated; memory for possible
632
intermediate versions is allocated and freed
633
locally within the function */
634
const rec_t** old_vers)/* out: rec, old version, or NULL if the
635
record does not exist in the view, that is,
636
it was freshly inserted afterwards */
638
const rec_t* version;
639
mem_heap_t* heap = NULL;
642
trx_id_t rec_trx_id = ut_dulint_zero;
644
ut_ad(dict_index_is_clust(index));
645
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
646
|| mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
647
#ifdef UNIV_SYNC_DEBUG
648
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
649
#endif /* UNIV_SYNC_DEBUG */
651
ut_ad(rec_offs_validate(rec, index, *offsets));
653
rw_lock_s_lock(&(purge_sys->latch));
654
/* The S-latch on purge_sys prevents the purge view from
655
changing. Thus, if we have an uncommitted transaction at
656
this point, then purge cannot remove its undo log even if
657
the transaction could commit now. */
665
trx_id_t version_trx_id;
667
version_trx_id = row_get_rec_trx_id(version, index, *offsets);
668
if (rec == version) {
669
rec_trx_id = version_trx_id;
672
mutex_enter(&kernel_mutex);
673
version_trx = trx_get_on_id(version_trx_id);
674
mutex_exit(&kernel_mutex);
677
|| version_trx->conc_state == TRX_NOT_STARTED
678
|| version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
680
/* We found a version that belongs to a
681
committed transaction: return it. */
683
if (rec == version) {
689
/* We assume that a rolled-back transaction stays in
690
TRX_ACTIVE state until all the changes have been
691
rolled back and the transaction is removed from
692
the global list of transactions. */
694
if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
695
/* The transaction was committed while
696
we searched for earlier versions.
697
Return the current version as a
698
semi-consistent read. */
701
*offsets = rec_get_offsets(version,
707
buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
708
*old_vers = rec_copy(buf, version, *offsets);
709
rec_offs_make_valid(*old_vers, index, *offsets);
716
heap = mem_heap_create(1024);
718
err = trx_undo_prev_version_build(rec, mtr, version, index,
722
mem_heap_free(heap2); /* free version */
725
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
729
if (prev_version == NULL) {
730
/* It was a freshly inserted version */
737
version = prev_version;
738
*offsets = rec_get_offsets(version, index, *offsets,
739
ULINT_UNDEFINED, offset_heap);
745
rw_lock_s_unlock(&(purge_sys->latch));