1
/******************************************************
4
(c) 1994-1996 Innobase Oy
6
Created 2/2/1994 Heikki Tuuri
7
*******************************************************/
11
#include "page0page.ic"
16
#include "lock0lock.h"
26
The index page consists of a page header which contains the page's
27
id and other information. On top of it are the the index records
28
in a heap linked into a one way linear list according to alphabetic order.
30
Just below page end is an array of pointers which we call page directory,
31
to about every sixth record in the list. The pointers are placed in
32
the directory in the alphabetical order of the records pointed to,
33
enabling us to make binary search using the array. Each slot n:o I
34
in the directory points to a record, where a 4-bit field contains a count
35
of those records which are in the linear list between pointer I and
36
the pointer I - 1 in the directory, including the record
37
pointed to by pointer I and not including the record pointed to by I - 1.
38
We say that the record pointed to by slot I, or that slot I, owns
39
these records. The count is always kept in the range 4 to 8, with
40
the exception that it is 1 for the first slot, and 1--8 for the second slot.
42
An essentially binary search can be performed in the list of index
43
records, like we could do if we had pointer to every record in the
44
page directory. The data structure is, however, more efficient when
45
we are doing inserts, because most inserts are just pushed on a heap.
46
Only every 8th insert requires block move in the directory pointer
47
table, which itself is quite small. A record is deleted from the page
48
by just taking it off the linear list and updating the number of owned
49
records-field of the record which owns it, and updating the page directory,
50
if necessary. A special case is the one when the record owns itself.
51
Because the overhead of inserts is so small, we may also increase the
52
page size from the projected default of 8 kB to 64 kB without too
53
much loss of efficiency in inserts. Bigger page becomes actual
54
when the disk transfer rate compared to seek and latency time rises.
55
On the present system, the page size is set so that the page transfer
56
time (3 ms) is 20 % of the disk random access time (15 ms).
58
When the page is split, merged, or becomes full but contains deleted
59
records, we have to reorganize the page.
61
Assuming a page size of 8 kB, a typical index page of a secondary
62
index contains 300 index entries, and the size of the page directory
63
is 50 x 4 bytes = 200 bytes. */
65
/*******************************************************************
66
Looks for the directory slot which owns the given record. */
69
page_dir_find_owner_slot(
70
/*=====================*/
71
/* out: the directory slot number */
72
const rec_t* rec) /* in: the physical record */
75
register uint16 rec_offs_bytes;
76
register const page_dir_slot_t* slot;
77
register const page_dir_slot_t* first_slot;
78
register const rec_t* r = rec;
80
ut_ad(page_rec_check(rec));
82
page = page_align(rec);
83
first_slot = page_dir_get_nth_slot(page, 0);
84
slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
86
if (page_is_comp(page)) {
87
while (rec_get_n_owned_new(r) == 0) {
88
r = rec_get_next_ptr_const(r, TRUE);
89
ut_ad(r >= page + PAGE_NEW_SUPREMUM);
90
ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
93
while (rec_get_n_owned_old(r) == 0) {
94
r = rec_get_next_ptr_const(r, FALSE);
95
ut_ad(r >= page + PAGE_OLD_SUPREMUM);
96
ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
100
rec_offs_bytes = mach_encode_2(r - page);
102
while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
104
if (UNIV_UNLIKELY(slot == first_slot)) {
106
"InnoDB: Probable data corruption on"
108
"InnoDB: Original record ",
109
(ulong) page_get_page_no(page));
111
if (page_is_comp(page)) {
112
fputs("(compact record)", stderr);
114
rec_print_old(stderr, rec);
118
"InnoDB: on that page.\n"
119
"InnoDB: Cannot find the dir slot for record ",
121
if (page_is_comp(page)) {
122
fputs("(compact record)", stderr);
124
rec_print_old(stderr, page
125
+ mach_decode_2(rec_offs_bytes));
128
"InnoDB: on that page!\n", stderr);
130
buf_page_print(page, 0);
135
slot += PAGE_DIR_SLOT_SIZE;
138
return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
141
/******************************************************************
142
Used to check the consistency of a directory slot. */
147
/* out: TRUE if succeed */
148
page_dir_slot_t* slot) /* in: slot */
156
page = page_align(slot);
158
n_slots = page_dir_get_n_slots(page);
160
ut_a(slot <= page_dir_get_nth_slot(page, 0));
161
ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
163
ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
165
if (page_is_comp(page)) {
166
n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
168
n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
171
if (slot == page_dir_get_nth_slot(page, 0)) {
173
} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
175
ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
177
ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
178
ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
184
/*****************************************************************
185
Sets the max trx id field value. */
190
buf_block_t* block, /* in/out: page */
191
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
192
dulint trx_id) /* in: transaction id */
194
const ibool is_hashed = block->is_hashed;
195
page_t* page = buf_block_get_frame(block);
198
rw_lock_x_lock(&btr_search_latch);
201
/* It is not necessary to write this change to the redo log, as
202
during a database recovery we assume that the max trx id of every
203
page is the maximum trx id assigned before the crash. */
205
mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
206
if (UNIV_LIKELY_NULL(page_zip)) {
207
page_zip_write_header(page_zip,
208
page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
213
rw_lock_x_unlock(&btr_search_latch);
217
/****************************************************************
218
Allocates a block of memory from the heap of an index page. */
223
/* out: pointer to start of allocated
224
buffer, or NULL if allocation fails */
225
page_t* page, /* in/out: index page */
226
page_zip_des_t* page_zip,/* in/out: compressed page with enough
227
space available for inserting the record,
229
ulint need, /* in: total number of bytes needed */
230
ulint* heap_no)/* out: this contains the heap number
231
of the allocated record
232
if allocation succeeds */
237
ut_ad(page && heap_no);
239
avl_space = page_get_max_insert_size(page, 1);
241
if (avl_space >= need) {
242
block = page_header_get_ptr(page, PAGE_HEAP_TOP);
244
page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
246
*heap_no = page_dir_get_n_heap(page);
248
page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
256
/**************************************************************
257
Writes a log record of page creation. */
260
page_create_write_log(
261
/*==================*/
262
buf_frame_t* frame, /* in: a buffer frame where the page is
264
mtr_t* mtr, /* in: mini-transaction handle */
265
ibool comp) /* in: TRUE=compact page format */
267
mlog_write_initial_log_record(frame, comp
268
? MLOG_COMP_PAGE_CREATE
269
: MLOG_PAGE_CREATE, mtr);
272
/***************************************************************
273
Parses a redo log record of creating a page. */
278
/* out: end of log record or NULL */
279
byte* ptr, /* in: buffer */
280
byte* end_ptr __attribute__((unused)), /* in: buffer end */
281
ulint comp, /* in: nonzero=compact page format */
282
buf_block_t* block, /* in: block or NULL */
283
mtr_t* mtr) /* in: mtr or NULL */
285
ut_ad(ptr && end_ptr);
287
/* The record is empty, except for the record initial part */
290
page_create(block, mtr, comp);
296
/**************************************************************
297
The index page creation function. */
302
/* out: pointer to the page */
303
buf_block_t* block, /* in: a buffer block where the
305
ulint comp) /* in: nonzero=compact page format */
307
page_dir_slot_t* slot;
319
#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
320
# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
322
#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
323
# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
326
/* The infimum and supremum records use a dummy index. */
327
if (UNIV_LIKELY(comp)) {
328
index = srv_sys->dummy_ind2;
330
index = srv_sys->dummy_ind1;
333
/* 1. INCREMENT MODIFY CLOCK */
334
buf_block_modify_clock_inc(block);
336
page = buf_block_get_frame(block);
338
fil_page_set_type(page, FIL_PAGE_INDEX);
340
heap = mem_heap_create(200);
342
/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
344
/* Create first a data tuple for infimum record */
345
tuple = dtuple_create(heap, 1);
346
dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
347
field = dtuple_get_nth_field(tuple, 0);
349
dfield_set_data(field, "infimum", 8);
350
dtype_set(dfield_get_type(field),
351
DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
352
/* Set the corresponding physical record to its place in the page
355
heap_top = page + PAGE_DATA;
357
infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
359
if (UNIV_LIKELY(comp)) {
360
ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
362
rec_set_n_owned_new(infimum_rec, NULL, 1);
363
rec_set_heap_no_new(infimum_rec, 0);
365
ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
367
rec_set_n_owned_old(infimum_rec, 1);
368
rec_set_heap_no_old(infimum_rec, 0);
371
offsets = rec_get_offsets(infimum_rec, index, NULL,
372
ULINT_UNDEFINED, &heap);
374
heap_top = rec_get_end(infimum_rec, offsets);
376
/* Create then a tuple for supremum */
378
tuple = dtuple_create(heap, 1);
379
dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
380
field = dtuple_get_nth_field(tuple, 0);
382
dfield_set_data(field, "supremum", comp ? 8 : 9);
383
dtype_set(dfield_get_type(field),
384
DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
386
supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
388
if (UNIV_LIKELY(comp)) {
389
ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
391
rec_set_n_owned_new(supremum_rec, NULL, 1);
392
rec_set_heap_no_new(supremum_rec, 1);
394
ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
396
rec_set_n_owned_old(supremum_rec, 1);
397
rec_set_heap_no_old(supremum_rec, 1);
400
offsets = rec_get_offsets(supremum_rec, index, offsets,
401
ULINT_UNDEFINED, &heap);
402
heap_top = rec_get_end(supremum_rec, offsets);
404
ut_ad(heap_top == page
405
+ (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
409
/* 4. INITIALIZE THE PAGE */
411
page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
412
page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
413
page_header_set_field(page, NULL, PAGE_N_HEAP, comp
414
? 0x8000 | PAGE_HEAP_NO_USER_LOW
415
: PAGE_HEAP_NO_USER_LOW);
416
page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
417
page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
418
page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
419
page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
420
page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
421
page_header_set_field(page, NULL, PAGE_N_RECS, 0);
422
page_set_max_trx_id(block, NULL, ut_dulint_zero);
423
memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
424
- page_offset(heap_top));
426
/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
428
/* Set the slots to point to infimum and supremum. */
430
slot = page_dir_get_nth_slot(page, 0);
431
page_dir_slot_set_rec(slot, infimum_rec);
433
slot = page_dir_get_nth_slot(page, 1);
434
page_dir_slot_set_rec(slot, supremum_rec);
436
/* Set the next pointers in infimum and supremum */
438
if (UNIV_LIKELY(comp)) {
439
rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
440
rec_set_next_offs_new(supremum_rec, 0);
442
rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
443
rec_set_next_offs_old(supremum_rec, 0);
449
/**************************************************************
450
Create an uncompressed B-tree index page. */
455
/* out: pointer to the page */
456
buf_block_t* block, /* in: a buffer block where the
458
mtr_t* mtr, /* in: mini-transaction handle */
459
ulint comp) /* in: nonzero=compact page format */
461
page_create_write_log(buf_block_get_frame(block), mtr, comp);
462
return(page_create_low(block, comp));
465
/**************************************************************
466
Create a compressed B-tree index page. */
471
/* out: pointer to the page */
472
buf_block_t* block, /* in/out: a buffer frame where the
474
dict_index_t* index, /* in: the index of the page */
475
ulint level, /* in: the B-tree level of the page */
476
mtr_t* mtr) /* in: mini-transaction handle */
479
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
484
ut_ad(dict_table_is_comp(index->table));
486
page = page_create_low(block, TRUE);
487
mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
489
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
490
/* The compression of a newly created page
491
should always succeed. */
498
/*****************************************************************
499
Differs from page_copy_rec_list_end, because this function does not
500
touch the lock table and max trx id on page or compress the page. */
503
page_copy_rec_list_end_no_locks(
504
/*============================*/
505
buf_block_t* new_block, /* in: index page to copy to */
506
buf_block_t* block, /* in: index page of rec */
507
rec_t* rec, /* in: record on page */
508
dict_index_t* index, /* in: record descriptor */
509
mtr_t* mtr) /* in: mtr */
511
page_t* new_page = buf_block_get_frame(new_block);
514
mem_heap_t* heap = NULL;
515
ulint offsets_[REC_OFFS_NORMAL_SIZE];
516
ulint* offsets = offsets_;
517
rec_offs_init(offsets_);
519
page_cur_position(rec, block, &cur1);
521
if (page_cur_is_before_first(&cur1)) {
523
page_cur_move_to_next(&cur1);
526
ut_a((ibool)!!page_is_comp(new_page)
527
== dict_table_is_comp(index->table));
528
ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
529
ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
530
(page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
532
cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
534
/* Copy records from the original page to the new page */
536
while (!page_cur_is_after_last(&cur1)) {
537
rec_t* cur1_rec = page_cur_get_rec(&cur1);
539
offsets = rec_get_offsets(cur1_rec, index, offsets,
540
ULINT_UNDEFINED, &heap);
541
ins_rec = page_cur_insert_rec_low(cur2, index,
542
cur1_rec, offsets, mtr);
543
if (UNIV_UNLIKELY(!ins_rec)) {
544
/* Track an assertion failure reported on the mailing
545
list on June 18th, 2003 */
547
buf_page_print(new_page, 0);
548
buf_page_print(page_align(rec), 0);
549
ut_print_timestamp(stderr);
552
"InnoDB: rec offset %lu, cur1 offset %lu,"
553
" cur2 offset %lu\n",
554
(ulong) page_offset(rec),
555
(ulong) page_offset(page_cur_get_rec(&cur1)),
556
(ulong) page_offset(cur2));
560
page_cur_move_to_next(&cur1);
564
if (UNIV_LIKELY_NULL(heap)) {
569
/*****************************************************************
570
Copies records from page to new_page, from a given record onward,
571
including that record. Infimum and supremum records are not copied.
572
The records are copied to the start of the record list on new_page. */
575
page_copy_rec_list_end(
576
/*===================*/
577
/* out: pointer to the original
578
successor of the infimum record
579
on new_page, or NULL on zip overflow
580
(new_block will be decompressed) */
581
buf_block_t* new_block, /* in/out: index page to copy to */
582
buf_block_t* block, /* in: index page containing rec */
583
rec_t* rec, /* in: record on page */
584
dict_index_t* index, /* in: record descriptor */
585
mtr_t* mtr) /* in: mtr */
587
page_t* new_page = buf_block_get_frame(new_block);
588
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
589
page_t* page = page_align(rec);
590
rec_t* ret = page_rec_get_next(
591
page_get_infimum_rec(new_page));
592
ulint log_mode = 0; /* remove warning */
594
#ifdef UNIV_ZIP_DEBUG
596
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
599
/* Strict page_zip_validate() may fail here.
600
Furthermore, btr_compress() may set FIL_PAGE_PREV to
601
FIL_NULL on new_page while leaving it intact on
602
new_page_zip. So, we cannot validate new_page_zip. */
603
ut_a(page_zip_validate_low(page_zip, page, TRUE));
605
#endif /* UNIV_ZIP_DEBUG */
606
ut_ad(buf_block_get_frame(block) == page);
607
ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
608
ut_ad(page_is_comp(page) == page_is_comp(new_page));
609
/* Here, "ret" may be pointing to a user record or the
610
predefined supremum record. */
612
if (UNIV_LIKELY_NULL(new_page_zip)) {
613
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
616
if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
617
page_copy_rec_list_end_to_created_page(new_page, rec,
620
page_copy_rec_list_end_no_locks(new_block, block, rec,
624
if (UNIV_LIKELY_NULL(new_page_zip)) {
625
mtr_set_log_mode(mtr, log_mode);
628
(!page_zip_compress(new_page_zip, new_page, index, mtr))) {
629
/* Before trying to reorganize the page,
630
store the number of preceding records on the page. */
632
= page_rec_get_n_recs_before(ret);
633
/* Before copying, "ret" was the successor of
634
the predefined infimum record. It must still
635
have at least one predecessor (the predefined
636
infimum record, or a freshly copied record
637
that is smaller than "ret"). */
641
(!page_zip_reorganize(new_block, index, mtr))) {
644
(!page_zip_decompress(new_page_zip,
648
ut_ad(page_validate(new_page, index));
651
/* The page was reorganized:
653
ret = new_page + PAGE_NEW_INFIMUM;
656
ret = rec_get_next_ptr(ret, TRUE);
662
/* Update the lock table, MAX_TRX_ID, and possible hash index */
664
lock_move_rec_list_end(new_block, block, rec);
666
page_update_max_trx_id(new_block, new_page_zip,
667
page_get_max_trx_id(page));
669
btr_search_move_or_delete_hash_entries(new_block, block, index);
674
/*****************************************************************
675
Copies records from page to new_page, up to the given record,
676
NOT including that record. Infimum and supremum records are not copied.
677
The records are copied to the end of the record list on new_page. */
680
page_copy_rec_list_start(
681
/*=====================*/
682
/* out: pointer to the original
683
predecessor of the supremum record
684
on new_page, or NULL on zip overflow
685
(new_block will be decompressed) */
686
buf_block_t* new_block, /* in/out: index page to copy to */
687
buf_block_t* block, /* in: index page containing rec */
688
rec_t* rec, /* in: record on page */
689
dict_index_t* index, /* in: record descriptor */
690
mtr_t* mtr) /* in: mtr */
692
page_t* new_page = buf_block_get_frame(new_block);
693
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
696
ulint log_mode = 0 /* remove warning */;
697
mem_heap_t* heap = NULL;
699
= page_rec_get_prev(page_get_supremum_rec(new_page));
700
ulint offsets_[REC_OFFS_NORMAL_SIZE];
701
ulint* offsets = offsets_;
702
rec_offs_init(offsets_);
704
/* Here, "ret" may be pointing to a user record or the
705
predefined infimum record. */
707
if (page_rec_is_infimum(rec)) {
712
if (UNIV_LIKELY_NULL(new_page_zip)) {
713
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
716
page_cur_set_before_first(block, &cur1);
717
page_cur_move_to_next(&cur1);
721
/* Copy records from the original page to the new page */
723
while (page_cur_get_rec(&cur1) != rec) {
724
rec_t* cur1_rec = page_cur_get_rec(&cur1);
725
offsets = rec_get_offsets(cur1_rec, index, offsets,
726
ULINT_UNDEFINED, &heap);
727
cur2 = page_cur_insert_rec_low(cur2, index,
728
cur1_rec, offsets, mtr);
731
page_cur_move_to_next(&cur1);
734
if (UNIV_LIKELY_NULL(heap)) {
738
if (UNIV_LIKELY_NULL(new_page_zip)) {
739
mtr_set_log_mode(mtr, log_mode);
742
(!page_zip_compress(new_page_zip, new_page, index, mtr))) {
743
/* Before trying to reorganize the page,
744
store the number of preceding records on the page. */
746
= page_rec_get_n_recs_before(ret);
747
/* Before copying, "ret" was the predecessor
748
of the predefined supremum record. If it was
749
the predefined infimum record, then it would
750
still be the infimum. Thus, the assertion
751
ut_a(ret_pos > 0) would fail here. */
754
(!page_zip_reorganize(new_block, index, mtr))) {
757
(!page_zip_decompress(new_page_zip,
761
ut_ad(page_validate(new_page, index));
764
/* The page was reorganized:
766
ret = new_page + PAGE_NEW_INFIMUM;
769
ret = rec_get_next_ptr(ret, TRUE);
775
/* Update MAX_TRX_ID, the lock table, and possible hash index */
777
page_update_max_trx_id(new_block, new_page_zip,
778
page_get_max_trx_id(page_align(rec)));
780
lock_move_rec_list_start(new_block, block, rec, ret);
782
btr_search_move_or_delete_hash_entries(new_block, block, index);
787
/**************************************************************
788
Writes a log record of a record list end or start deletion. */
791
page_delete_rec_list_write_log(
792
/*===========================*/
793
rec_t* rec, /* in: record on page */
794
dict_index_t* index, /* in: record descriptor */
795
byte type, /* in: operation type:
796
MLOG_LIST_END_DELETE, ... */
797
mtr_t* mtr) /* in: mtr */
800
ut_ad(type == MLOG_LIST_END_DELETE
801
|| type == MLOG_LIST_START_DELETE
802
|| type == MLOG_COMP_LIST_END_DELETE
803
|| type == MLOG_COMP_LIST_START_DELETE);
805
log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
807
/* Write the parameter as a 2-byte ulint */
808
mach_write_to_2(log_ptr, page_offset(rec));
809
mlog_close(mtr, log_ptr + 2);
813
/**************************************************************
814
Parses a log record of a record list end or start deletion. */
817
page_parse_delete_rec_list(
818
/*=======================*/
819
/* out: end of log record or NULL */
820
byte type, /* in: MLOG_LIST_END_DELETE,
821
MLOG_LIST_START_DELETE,
822
MLOG_COMP_LIST_END_DELETE or
823
MLOG_COMP_LIST_START_DELETE */
824
byte* ptr, /* in: buffer */
825
byte* end_ptr,/* in: buffer end */
826
buf_block_t* block, /* in/out: buffer block or NULL */
827
dict_index_t* index, /* in: record descriptor */
828
mtr_t* mtr) /* in: mtr or NULL */
833
ut_ad(type == MLOG_LIST_END_DELETE
834
|| type == MLOG_LIST_START_DELETE
835
|| type == MLOG_COMP_LIST_END_DELETE
836
|| type == MLOG_COMP_LIST_START_DELETE);
838
/* Read the record offset as a 2-byte ulint */
840
if (end_ptr < ptr + 2) {
845
offset = mach_read_from_2(ptr);
853
page = buf_block_get_frame(block);
855
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
857
if (type == MLOG_LIST_END_DELETE
858
|| type == MLOG_COMP_LIST_END_DELETE) {
859
page_delete_rec_list_end(page + offset, block, index,
860
ULINT_UNDEFINED, ULINT_UNDEFINED,
863
page_delete_rec_list_start(page + offset, block, index, mtr);
869
/*****************************************************************
870
Deletes records from a page from a given record onward, including that record.
871
The infimum and supremum records are not deleted. */
874
page_delete_rec_list_end(
875
/*=====================*/
876
rec_t* rec, /* in: pointer to record on page */
877
buf_block_t* block, /* in: buffer block of the page */
878
dict_index_t* index, /* in: record descriptor */
879
ulint n_recs, /* in: number of records to delete,
880
or ULINT_UNDEFINED if not known */
881
ulint size, /* in: the sum of the sizes of the
882
records in the end of the chain to
883
delete, or ULINT_UNDEFINED if not known */
884
mtr_t* mtr) /* in: mtr */
886
page_dir_slot_t*slot;
891
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
892
page_t* page = page_align(rec);
893
mem_heap_t* heap = NULL;
894
ulint offsets_[REC_OFFS_NORMAL_SIZE];
895
ulint* offsets = offsets_;
896
rec_offs_init(offsets_);
898
ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
899
ut_ad(!page_zip || page_rec_is_comp(rec));
900
#ifdef UNIV_ZIP_DEBUG
901
ut_a(!page_zip || page_zip_validate(page_zip, page));
902
#endif /* UNIV_ZIP_DEBUG */
904
if (page_rec_is_infimum(rec)) {
905
rec = page_rec_get_next(rec);
908
if (page_rec_is_supremum(rec)) {
913
/* Reset the last insert info in the page header and increment
914
the modify clock for the frame */
916
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
918
/* The page gets invalid for optimistic searches: increment the
919
frame modify clock */
921
buf_block_modify_clock_inc(block);
923
page_delete_rec_list_write_log(rec, index, page_is_comp(page)
924
? MLOG_COMP_LIST_END_DELETE
925
: MLOG_LIST_END_DELETE, mtr);
927
if (UNIV_LIKELY_NULL(page_zip)) {
930
ut_a(page_is_comp(page));
931
/* Individual deletes are not logged */
933
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
937
page_cur_position(rec, block, &cur);
939
offsets = rec_get_offsets(rec, index, offsets,
940
ULINT_UNDEFINED, &heap);
941
rec = rec_get_next_ptr(rec, TRUE);
942
#ifdef UNIV_ZIP_DEBUG
943
ut_a(page_zip_validate(page_zip, page));
944
#endif /* UNIV_ZIP_DEBUG */
945
page_cur_delete_rec(&cur, index, offsets, mtr);
946
} while (page_offset(rec) != PAGE_NEW_SUPREMUM);
948
if (UNIV_LIKELY_NULL(heap)) {
952
/* Restore log mode */
954
mtr_set_log_mode(mtr, log_mode);
958
prev_rec = page_rec_get_prev(rec);
960
last_rec = page_rec_get_prev(page_get_supremum_rec(page));
962
if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
964
/* Calculate the sum of sizes and the number of records */
970
offsets = rec_get_offsets(rec2, index, offsets,
971
ULINT_UNDEFINED, &heap);
972
s = rec_offs_size(offsets);
973
ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
975
ut_ad(size + s < UNIV_PAGE_SIZE);
979
rec2 = page_rec_get_next(rec2);
980
} while (!page_rec_is_supremum(rec2));
982
if (UNIV_LIKELY_NULL(heap)) {
987
ut_ad(size < UNIV_PAGE_SIZE);
989
/* Update the page directory; there is no need to balance the number
990
of the records owned by the supremum record, as it is allowed to be
991
less than PAGE_DIR_SLOT_MIN_N_OWNED */
993
if (page_is_comp(page)) {
997
while (rec_get_n_owned_new(rec2) == 0) {
1000
rec2 = rec_get_next_ptr(rec2, TRUE);
1003
ut_ad(rec_get_n_owned_new(rec2) > count);
1005
n_owned = rec_get_n_owned_new(rec2) - count;
1006
slot_index = page_dir_find_owner_slot(rec2);
1007
slot = page_dir_get_nth_slot(page, slot_index);
1012
while (rec_get_n_owned_old(rec2) == 0) {
1015
rec2 = rec_get_next_ptr(rec2, FALSE);
1018
ut_ad(rec_get_n_owned_old(rec2) > count);
1020
n_owned = rec_get_n_owned_old(rec2) - count;
1021
slot_index = page_dir_find_owner_slot(rec2);
1022
slot = page_dir_get_nth_slot(page, slot_index);
1025
page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
1026
page_dir_slot_set_n_owned(slot, NULL, n_owned);
1028
page_dir_set_n_slots(page, NULL, slot_index + 1);
1030
/* Remove the record chain segment from the record chain */
1031
page_rec_set_next(prev_rec, page_get_supremum_rec(page));
1033
/* Catenate the deleted chain segment to the page free list */
1035
page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
1036
page_header_set_ptr(page, NULL, PAGE_FREE, rec);
1038
page_header_set_field(page, NULL, PAGE_GARBAGE, size
1039
+ page_header_get_field(page, PAGE_GARBAGE));
1041
page_header_set_field(page, NULL, PAGE_N_RECS,
1042
(ulint)(page_get_n_recs(page) - n_recs));
1045
/*****************************************************************
1046
Deletes records from page, up to the given record, NOT including
1047
that record. Infimum and supremum records are not deleted. */
1050
page_delete_rec_list_start(
1051
/*=======================*/
1052
rec_t* rec, /* in: record on page */
1053
buf_block_t* block, /* in: buffer block of the page */
1054
dict_index_t* index, /* in: record descriptor */
1055
mtr_t* mtr) /* in: mtr */
1059
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1060
ulint* offsets = offsets_;
1061
mem_heap_t* heap = NULL;
1064
rec_offs_init(offsets_);
1066
ut_ad((ibool) !!page_rec_is_comp(rec)
1067
== dict_table_is_comp(index->table));
1068
#ifdef UNIV_ZIP_DEBUG
1070
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
1071
page_t* page = buf_block_get_frame(block);
1073
/* page_zip_validate() would detect a min_rec_mark mismatch
1074
in btr_page_split_and_insert()
1075
between btr_attach_half_pages() and insert_page = ...
1076
when btr_page_get_split_rec_to_left() holds
1077
(direction == FSP_DOWN). */
1078
ut_a(!page_zip || page_zip_validate_low(page_zip, page, TRUE));
1080
#endif /* UNIV_ZIP_DEBUG */
1082
if (page_rec_is_infimum(rec)) {
1087
if (page_rec_is_comp(rec)) {
1088
type = MLOG_COMP_LIST_START_DELETE;
1090
type = MLOG_LIST_START_DELETE;
1093
page_delete_rec_list_write_log(rec, index, type, mtr);
1095
page_cur_set_before_first(block, &cur1);
1096
page_cur_move_to_next(&cur1);
1098
/* Individual deletes are not logged */
1100
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
1102
while (page_cur_get_rec(&cur1) != rec) {
1103
offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
1104
offsets, ULINT_UNDEFINED, &heap);
1105
page_cur_delete_rec(&cur1, index, offsets, mtr);
1108
if (UNIV_LIKELY_NULL(heap)) {
1109
mem_heap_free(heap);
1112
/* Restore log mode */
1114
mtr_set_log_mode(mtr, log_mode);
1117
/*****************************************************************
1118
Moves record list end to another page. Moved records include
1122
page_move_rec_list_end(
1123
/*===================*/
1124
/* out: TRUE on success; FALSE on
1126
(new_block will be decompressed) */
1127
buf_block_t* new_block, /* in/out: index page where to move */
1128
buf_block_t* block, /* in: index page from where to move */
1129
rec_t* split_rec, /* in: first record to move */
1130
dict_index_t* index, /* in: record descriptor */
1131
mtr_t* mtr) /* in: mtr */
1133
page_t* new_page = buf_block_get_frame(new_block);
1134
ulint old_data_size;
1135
ulint new_data_size;
1139
old_data_size = page_get_data_size(new_page);
1140
old_n_recs = page_get_n_recs(new_page);
1141
#ifdef UNIV_ZIP_DEBUG
1143
page_zip_des_t* new_page_zip
1144
= buf_block_get_page_zip(new_block);
1145
page_zip_des_t* page_zip
1146
= buf_block_get_page_zip(block);
1147
ut_a(!new_page_zip == !page_zip);
1149
|| page_zip_validate(new_page_zip, new_page));
1151
|| page_zip_validate(page_zip, page_align(split_rec)));
1153
#endif /* UNIV_ZIP_DEBUG */
1155
if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
1156
split_rec, index, mtr))) {
1160
new_data_size = page_get_data_size(new_page);
1161
new_n_recs = page_get_n_recs(new_page);
1163
ut_ad(new_data_size >= old_data_size);
1165
page_delete_rec_list_end(split_rec, block, index,
1166
new_n_recs - old_n_recs,
1167
new_data_size - old_data_size, mtr);
1172
/*****************************************************************
1173
Moves record list start to another page. Moved records do not include
1177
page_move_rec_list_start(
1178
/*=====================*/
1179
/* out: TRUE on success; FALSE on
1180
compression failure */
1181
buf_block_t* new_block, /* in/out: index page where to move */
1182
buf_block_t* block, /* in/out: page containing split_rec */
1183
rec_t* split_rec, /* in: first record not to move */
1184
dict_index_t* index, /* in: record descriptor */
1185
mtr_t* mtr) /* in: mtr */
1187
if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
1188
split_rec, index, mtr))) {
1192
page_delete_rec_list_start(split_rec, block, index, mtr);
1197
/***************************************************************************
1198
This is a low-level operation which is used in a database index creation
1199
to update the page number of a created B-tree to a data dictionary record. */
1202
page_rec_write_index_page_no(
1203
/*=========================*/
1204
rec_t* rec, /* in: record to update */
1205
ulint i, /* in: index of the field to update */
1206
ulint page_no,/* in: value to write */
1207
mtr_t* mtr) /* in: mtr */
1212
data = rec_get_nth_field_old(rec, i, &len);
1216
mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
1219
/******************************************************************
1220
Used to delete n slots from the directory. This function updates
1221
also n_owned fields in the records, so that the first slot after
1222
the deleted ones inherits the records of the deleted slots. */
1225
page_dir_delete_slot(
1226
/*=================*/
1227
page_t* page, /* in/out: the index page */
1228
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
1229
ulint slot_no)/* in: slot to be deleted */
1231
page_dir_slot_t* slot;
1236
ut_ad(!page_zip || page_is_comp(page));
1238
ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
1240
n_slots = page_dir_get_n_slots(page);
1242
/* 1. Reset the n_owned fields of the slots to be
1244
slot = page_dir_get_nth_slot(page, slot_no);
1245
n_owned = page_dir_slot_get_n_owned(slot);
1246
page_dir_slot_set_n_owned(slot, page_zip, 0);
1248
/* 2. Update the n_owned value of the first non-deleted slot */
1250
slot = page_dir_get_nth_slot(page, slot_no + 1);
1251
page_dir_slot_set_n_owned(slot, page_zip,
1252
n_owned + page_dir_slot_get_n_owned(slot));
1254
/* 3. Destroy the slot by copying slots */
1255
for (i = slot_no + 1; i < n_slots; i++) {
1256
rec_t* rec = (rec_t*)
1257
page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
1258
page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
1261
/* 4. Zero out the last slot, which will be removed */
1262
mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
1264
/* 5. Update the page header */
1265
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
1268
/******************************************************************
1269
Used to add n slots to the directory. Does not set the record pointers
1270
in the added slots or update n_owned values: this is the responsibility
1276
page_t* page, /* in/out: the index page */
1277
page_zip_des_t* page_zip,/* in/out: comprssed page, or NULL */
1278
ulint start) /* in: the slot above which the new slots
1281
page_dir_slot_t* slot;
1284
n_slots = page_dir_get_n_slots(page);
1286
ut_ad(start < n_slots - 1);
1288
/* Update the page header */
1289
page_dir_set_n_slots(page, page_zip, n_slots + 1);
1292
slot = page_dir_get_nth_slot(page, n_slots);
1293
memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
1294
(n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
1297
/********************************************************************
1298
Splits a directory slot which owns too many records. */
1301
page_dir_split_slot(
1302
/*================*/
1303
page_t* page, /* in/out: index page */
1304
page_zip_des_t* page_zip,/* in/out: compressed page whose
1305
uncompressed part will be written, or NULL */
1306
ulint slot_no)/* in: the directory slot */
1309
page_dir_slot_t* new_slot;
1310
page_dir_slot_t* prev_slot;
1311
page_dir_slot_t* slot;
1316
ut_ad(!page_zip || page_is_comp(page));
1319
slot = page_dir_get_nth_slot(page, slot_no);
1321
n_owned = page_dir_slot_get_n_owned(slot);
1322
ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
1324
/* 1. We loop to find a record approximately in the middle of the
1325
records owned by the slot. */
1327
prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
1328
rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
1330
for (i = 0; i < n_owned / 2; i++) {
1331
rec = page_rec_get_next(rec);
1334
ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
1336
/* 2. We add one directory slot immediately below the slot to be
1339
page_dir_add_slot(page, page_zip, slot_no - 1);
1341
/* The added slot is now number slot_no, and the old slot is
1342
now number slot_no + 1 */
1344
new_slot = page_dir_get_nth_slot(page, slot_no);
1345
slot = page_dir_get_nth_slot(page, slot_no + 1);
1347
/* 3. We store the appropriate values to the new slot. */
1349
page_dir_slot_set_rec(new_slot, rec);
1350
page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
1352
/* 4. Finally, we update the number of records field of the
1355
page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
1358
/*****************************************************************
1359
Tries to balance the given directory slot with too few records with the upper
1360
neighbor, so that there are at least the minimum number of records owned by
1361
the slot; this may result in the merging of two slots. */
1364
page_dir_balance_slot(
1365
/*==================*/
1366
page_t* page, /* in/out: index page */
1367
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
1368
ulint slot_no)/* in: the directory slot */
1370
page_dir_slot_t* slot;
1371
page_dir_slot_t* up_slot;
1378
ut_ad(!page_zip || page_is_comp(page));
1381
slot = page_dir_get_nth_slot(page, slot_no);
1383
/* The last directory slot cannot be balanced with the upper
1384
neighbor, as there is none. */
1386
if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
1391
up_slot = page_dir_get_nth_slot(page, slot_no + 1);
1393
n_owned = page_dir_slot_get_n_owned(slot);
1394
up_n_owned = page_dir_slot_get_n_owned(up_slot);
1396
ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
1398
/* If the upper slot has the minimum value of n_owned, we will merge
1399
the two slots, therefore we assert: */
1400
ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
1402
if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
1404
/* In this case we can just transfer one record owned
1405
by the upper slot to the property of the lower slot */
1406
old_rec = (rec_t*) page_dir_slot_get_rec(slot);
1408
if (page_is_comp(page)) {
1409
new_rec = rec_get_next_ptr(old_rec, TRUE);
1411
rec_set_n_owned_new(old_rec, page_zip, 0);
1412
rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
1414
new_rec = rec_get_next_ptr(old_rec, FALSE);
1416
rec_set_n_owned_old(old_rec, 0);
1417
rec_set_n_owned_old(new_rec, n_owned + 1);
1420
page_dir_slot_set_rec(slot, new_rec);
1422
page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
1424
/* In this case we may merge the two slots */
1425
page_dir_delete_slot(page, page_zip, slot_no);
1429
/****************************************************************
1430
Returns the middle record of the record list. If there are an even number
1431
of records in the list, returns the first record of the upper half-list. */
1434
page_get_middle_rec(
1435
/*================*/
1436
/* out: middle record */
1437
page_t* page) /* in: page */
1439
page_dir_slot_t* slot;
1446
/* This many records we must leave behind */
1447
middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
1453
slot = page_dir_get_nth_slot(page, i);
1454
n_owned = page_dir_slot_get_n_owned(slot);
1456
if (count + n_owned > middle) {
1464
slot = page_dir_get_nth_slot(page, i - 1);
1465
rec = (rec_t*) page_dir_slot_get_rec(slot);
1466
rec = page_rec_get_next(rec);
1468
/* There are now count records behind rec */
1470
for (i = 0; i < middle - count; i++) {
1471
rec = page_rec_get_next(rec);
1477
/*******************************************************************
1478
Returns the number of records before the given record in chain.
1479
The number includes infimum and supremum records. */
1482
page_rec_get_n_recs_before(
1483
/*=======================*/
1484
/* out: number of records */
1485
const rec_t* rec) /* in: the physical record */
1487
const page_dir_slot_t* slot;
1488
const rec_t* slot_rec;
1493
ut_ad(page_rec_check(rec));
1495
page = page_align(rec);
1496
if (page_is_comp(page)) {
1497
while (rec_get_n_owned_new(rec) == 0) {
1499
rec = rec_get_next_ptr_const(rec, TRUE);
1503
for (i = 0; ; i++) {
1504
slot = page_dir_get_nth_slot(page, i);
1505
slot_rec = page_dir_slot_get_rec(slot);
1507
n += rec_get_n_owned_new(slot_rec);
1509
if (rec == slot_rec) {
1515
while (rec_get_n_owned_old(rec) == 0) {
1517
rec = rec_get_next_ptr_const(rec, FALSE);
1521
for (i = 0; ; i++) {
1522
slot = page_dir_get_nth_slot(page, i);
1523
slot_rec = page_dir_slot_get_rec(slot);
1525
n += rec_get_n_owned_old(slot_rec);
1527
if (rec == slot_rec) {
1541
/****************************************************************
1542
Prints record contents including the data relevant only in
1543
the index page context. */
1548
const rec_t* rec, /* in: physical record */
1549
const ulint* offsets)/* in: record descriptor */
1551
ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
1552
rec_print_new(stderr, rec, offsets);
1553
if (page_rec_is_comp(rec)) {
1555
" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
1556
(ulong) rec_get_n_owned_new(rec),
1557
(ulong) rec_get_heap_no_new(rec),
1558
(ulong) rec_get_next_offs(rec, TRUE));
1561
" n_owned: %lu; heap_no: %lu; next rec: %lu\n",
1562
(ulong) rec_get_n_owned_old(rec),
1563
(ulong) rec_get_heap_no_old(rec),
1564
(ulong) rec_get_next_offs(rec, TRUE));
1567
page_rec_check(rec);
1568
rec_validate(rec, offsets);
1571
/*******************************************************************
1572
This is used to print the contents of the directory for
1573
debugging purposes. */
1578
page_t* page, /* in: index page */
1579
ulint pr_n) /* in: print n first and n last entries */
1583
page_dir_slot_t* slot;
1585
n = page_dir_get_n_slots(page);
1587
fprintf(stderr, "--------------------------------\n"
1590
"Directory stack top at offs: %lu; number of slots: %lu\n",
1591
page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
1593
for (i = 0; i < n; i++) {
1594
slot = page_dir_get_nth_slot(page, i);
1595
if ((i == pr_n) && (i < n - pr_n)) {
1596
fputs(" ... \n", stderr);
1598
if ((i < pr_n) || (i >= n - pr_n)) {
1600
"Contents of slot: %lu: n_owned: %lu,"
1603
(ulong) page_dir_slot_get_n_owned(slot),
1605
page_offset(page_dir_slot_get_rec(slot)));
1608
fprintf(stderr, "Total of %lu records\n"
1609
"--------------------------------\n",
1610
(ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
1613
/*******************************************************************
1614
This is used to print the contents of the page record list for
1615
debugging purposes. */
1620
buf_block_t* block, /* in: index page */
1621
dict_index_t* index, /* in: dictionary index of the page */
1622
ulint pr_n) /* in: print n first and n last entries */
1624
page_t* page = block->frame;
1628
mem_heap_t* heap = NULL;
1629
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1630
ulint* offsets = offsets_;
1631
rec_offs_init(offsets_);
1633
ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
1636
"--------------------------------\n"
1637
"PAGE RECORD LIST\n"
1638
"Page address %p\n", page);
1640
n_recs = page_get_n_recs(page);
1642
page_cur_set_before_first(block, &cur);
1645
offsets = rec_get_offsets(cur.rec, index, offsets,
1646
ULINT_UNDEFINED, &heap);
1647
page_rec_print(cur.rec, offsets);
1649
if (count == pr_n) {
1652
if (page_cur_is_after_last(&cur)) {
1655
page_cur_move_to_next(&cur);
1659
if (n_recs > 2 * pr_n) {
1660
fputs(" ... \n", stderr);
1663
while (!page_cur_is_after_last(&cur)) {
1664
page_cur_move_to_next(&cur);
1666
if (count + pr_n >= n_recs) {
1667
offsets = rec_get_offsets(cur.rec, index, offsets,
1668
ULINT_UNDEFINED, &heap);
1669
page_rec_print(cur.rec, offsets);
1675
"Total of %lu records \n"
1676
"--------------------------------\n",
1677
(ulong) (count + 1));
1679
if (UNIV_LIKELY_NULL(heap)) {
1680
mem_heap_free(heap);
1684
/*******************************************************************
1685
Prints the info in a page header. */
1693
"--------------------------------\n"
1694
"PAGE HEADER INFO\n"
1695
"Page address %p, n records %lu (%s)\n"
1696
"n dir slots %lu, heap top %lu\n"
1697
"Page n heap %lu, free %lu, garbage %lu\n"
1698
"Page last insert %lu, direction %lu, n direction %lu\n",
1699
page, (ulong) page_header_get_field(page, PAGE_N_RECS),
1700
page_is_comp(page) ? "compact format" : "original format",
1701
(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
1702
(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
1703
(ulong) page_dir_get_n_heap(page),
1704
(ulong) page_header_get_field(page, PAGE_FREE),
1705
(ulong) page_header_get_field(page, PAGE_GARBAGE),
1706
(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
1707
(ulong) page_header_get_field(page, PAGE_DIRECTION),
1708
(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
1711
/*******************************************************************
1712
This is used to print the contents of the page for
1713
debugging purposes. */
1718
buf_block_t* block, /* in: index page */
1719
dict_index_t* index, /* in: dictionary index of the page */
1720
ulint dn, /* in: print dn first and last entries
1722
ulint rn) /* in: print rn first and last records
1725
page_t* page = block->frame;
1727
page_header_print(page);
1728
page_dir_print(page, dn);
1729
page_print_list(block, index, rn);
1732
/*******************************************************************
1733
The following is used to validate a record on a page. This function
1734
differs from rec_validate as it can also check the n_owned field and
1735
the heap_no field. */
1740
/* out: TRUE if ok */
1741
rec_t* rec, /* in: physical record */
1742
const ulint* offsets)/* in: array returned by rec_get_offsets() */
1748
page = page_align(rec);
1749
ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
1751
page_rec_check(rec);
1752
rec_validate(rec, offsets);
1754
if (page_rec_is_comp(rec)) {
1755
n_owned = rec_get_n_owned_new(rec);
1756
heap_no = rec_get_heap_no_new(rec);
1758
n_owned = rec_get_n_owned_old(rec);
1759
heap_no = rec_get_heap_no_old(rec);
1762
if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
1764
"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
1765
(ulong) page_offset(rec), (ulong) n_owned);
1769
if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
1771
"InnoDB: Heap no of rec %lu too big %lu %lu\n",
1772
(ulong) page_offset(rec), (ulong) heap_no,
1773
(ulong) page_dir_get_n_heap(page));
1780
/*******************************************************************
1781
Checks that the first directory slot points to the infimum record and
1782
the last to the supremum. This function is intended to track if the
1783
bug fixed in 4.0.14 has caused corruption to users' databases. */
1788
const page_t* page) /* in: index page */
1792
ulint supremum_offs;
1794
n_slots = page_dir_get_n_slots(page);
1795
infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
1796
supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
1799
if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
1802
"InnoDB: Page directory corruption:"
1803
" infimum not pointed to\n");
1804
buf_page_print(page, 0);
1807
if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
1810
"InnoDB: Page directory corruption:"
1811
" supremum not pointed to\n");
1812
buf_page_print(page, 0);
1816
/*******************************************************************
1817
This function checks the consistency of an index page when we do not
1818
know the index. This is also resilient so that this should never crash
1819
even if the page is total garbage. */
1822
page_simple_validate_old(
1823
/*=====================*/
1824
/* out: TRUE if ok */
1825
page_t* page) /* in: old-style index page */
1827
page_dir_slot_t* slot;
1836
ut_a(!page_is_comp(page));
1838
/* Check first that the record heap and the directory do not
1841
n_slots = page_dir_get_n_slots(page);
1843
if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
1845
"InnoDB: Nonsensical number %lu of page dir slots\n",
1851
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
1853
if (UNIV_UNLIKELY(rec_heap_top
1854
> page_dir_get_nth_slot(page, n_slots - 1))) {
1857
"InnoDB: Record heap and dir overlap on a page,"
1858
" heap top %lu, dir %lu\n",
1859
(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
1861
page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
1866
/* Validate the record list in a loop checking also that it is
1867
consistent with the page record directory. */
1872
slot = page_dir_get_nth_slot(page, slot_no);
1874
rec = page_get_infimum_rec(page);
1877
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
1879
"InnoDB: Record %lu is above"
1880
" rec heap top %lu\n",
1881
(ulong)(rec - page),
1882
(ulong)(rec_heap_top - page));
1887
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
1888
/* This is a record pointed to by a dir slot */
1889
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
1893
"InnoDB: Wrong owned count %lu, %lu,"
1895
(ulong) rec_get_n_owned_old(rec),
1897
(ulong)(rec - page));
1903
(page_dir_slot_get_rec(slot) != rec)) {
1905
"InnoDB: Dir slot does not point"
1906
" to right rec %lu\n",
1907
(ulong)(rec - page));
1914
if (!page_rec_is_supremum(rec)) {
1916
slot = page_dir_get_nth_slot(page, slot_no);
1920
if (page_rec_is_supremum(rec)) {
1926
(rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
1927
|| rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
1929
"InnoDB: Next record offset"
1930
" nonsensical %lu for rec %lu\n",
1931
(ulong) rec_get_next_offs(rec, FALSE),
1932
(ulong) (rec - page));
1939
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
1941
"InnoDB: Page record list appears"
1942
" to be circular %lu\n",
1947
rec = page_rec_get_next(rec);
1951
if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
1952
fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
1957
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
1958
fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
1959
(ulong) slot_no, (ulong) (n_slots - 1));
1963
if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
1964
+ PAGE_HEAP_NO_USER_LOW
1966
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
1967
(ulong) page_header_get_field(page, PAGE_N_RECS)
1968
+ PAGE_HEAP_NO_USER_LOW,
1969
(ulong) (count + 1));
1974
/* Check then the free list */
1975
rec = page_header_get_ptr(page, PAGE_FREE);
1977
while (rec != NULL) {
1978
if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
1979
|| rec >= page + UNIV_PAGE_SIZE)) {
1981
"InnoDB: Free list record has"
1982
" a nonsensical offset %lu\n",
1983
(ulong) (rec - page));
1988
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
1990
"InnoDB: Free list record %lu"
1991
" is above rec heap top %lu\n",
1992
(ulong) (rec - page),
1993
(ulong) (rec_heap_top - page));
2000
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
2002
"InnoDB: Page free list appears"
2003
" to be circular %lu\n",
2008
rec = page_rec_get_next(rec);
2011
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
2013
fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
2014
(ulong) page_dir_get_n_heap(page),
2015
(ulong) (count + 1));
2026
/*******************************************************************
2027
This function checks the consistency of an index page when we do not
2028
know the index. This is also resilient so that this should never crash
2029
even if the page is total garbage. */
2032
page_simple_validate_new(
2033
/*=====================*/
2034
/* out: TRUE if ok */
2035
page_t* page) /* in: new-style index page */
2037
page_dir_slot_t* slot;
2046
ut_a(page_is_comp(page));
2048
/* Check first that the record heap and the directory do not
2051
n_slots = page_dir_get_n_slots(page);
2053
if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
2055
"InnoDB: Nonsensical number %lu"
2056
" of page dir slots\n", (ulong) n_slots);
2061
rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
2063
if (UNIV_UNLIKELY(rec_heap_top
2064
> page_dir_get_nth_slot(page, n_slots - 1))) {
2067
"InnoDB: Record heap and dir overlap on a page,"
2068
" heap top %lu, dir %lu\n",
2069
(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
2071
page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
2076
/* Validate the record list in a loop checking also that it is
2077
consistent with the page record directory. */
2082
slot = page_dir_get_nth_slot(page, slot_no);
2084
rec = page_get_infimum_rec(page);
2087
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
2089
"InnoDB: Record %lu is above rec"
2091
(ulong) page_offset(rec),
2092
(ulong) page_offset(rec_heap_top));
2097
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
2098
/* This is a record pointed to by a dir slot */
2099
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
2103
"InnoDB: Wrong owned count %lu, %lu,"
2105
(ulong) rec_get_n_owned_new(rec),
2107
(ulong) page_offset(rec));
2113
(page_dir_slot_get_rec(slot) != rec)) {
2115
"InnoDB: Dir slot does not point"
2116
" to right rec %lu\n",
2117
(ulong) page_offset(rec));
2124
if (!page_rec_is_supremum(rec)) {
2126
slot = page_dir_get_nth_slot(page, slot_no);
2130
if (page_rec_is_supremum(rec)) {
2136
(rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
2137
|| rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
2139
"InnoDB: Next record offset nonsensical %lu"
2141
(ulong) rec_get_next_offs(rec, TRUE),
2142
(ulong) page_offset(rec));
2149
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
2151
"InnoDB: Page record list appears"
2152
" to be circular %lu\n",
2157
rec = page_rec_get_next(rec);
2161
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
2162
fprintf(stderr, "InnoDB: n owned is zero"
2163
" in a supremum rec\n");
2168
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
2169
fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
2170
(ulong) slot_no, (ulong) (n_slots - 1));
2174
if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
2175
+ PAGE_HEAP_NO_USER_LOW
2177
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
2178
(ulong) page_header_get_field(page, PAGE_N_RECS)
2179
+ PAGE_HEAP_NO_USER_LOW,
2180
(ulong) (count + 1));
2185
/* Check then the free list */
2186
rec = page_header_get_ptr(page, PAGE_FREE);
2188
while (rec != NULL) {
2189
if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
2190
|| rec >= page + UNIV_PAGE_SIZE)) {
2192
"InnoDB: Free list record has"
2193
" a nonsensical offset %lu\n",
2194
(ulong) page_offset(rec));
2199
if (UNIV_UNLIKELY(rec > rec_heap_top)) {
2201
"InnoDB: Free list record %lu"
2202
" is above rec heap top %lu\n",
2203
(ulong) page_offset(rec),
2204
(ulong) page_offset(rec_heap_top));
2211
if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
2213
"InnoDB: Page free list appears"
2214
" to be circular %lu\n",
2219
rec = page_rec_get_next(rec);
2222
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
2224
fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
2225
(ulong) page_dir_get_n_heap(page),
2226
(ulong) (count + 1));
2237
/*******************************************************************
2238
This function checks the consistency of an index page. */
2243
/* out: TRUE if ok */
2244
page_t* page, /* in: index page */
2245
dict_index_t* index) /* in: data dictionary index containing
2246
the page record type definition */
2248
page_dir_slot_t*slot;
2253
ulint rec_own_count;
2257
rec_t* old_rec = NULL;
2262
ulint* offsets = NULL;
2263
ulint* old_offsets = NULL;
2265
if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
2266
!= dict_table_is_comp(index->table))) {
2267
fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
2270
if (page_is_comp(page)) {
2271
if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
2275
if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
2280
heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
2282
/* The following buffer is used to check that the
2283
records in the page record heap do not overlap */
2285
buf = mem_heap_zalloc(heap, UNIV_PAGE_SIZE);
2287
/* Check first that the record heap and the directory do not
2290
n_slots = page_dir_get_n_slots(page);
2292
if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
2293
<= page_dir_get_nth_slot(page, n_slots - 1)))) {
2295
fputs("InnoDB: Record heap and dir overlap on a page ",
2297
dict_index_name_print(stderr, NULL, index);
2298
fprintf(stderr, ", %p, %p\n",
2299
page_header_get_ptr(page, PAGE_HEAP_TOP),
2300
page_dir_get_nth_slot(page, n_slots - 1));
2305
/* Validate the record list in a loop checking also that
2306
it is consistent with the directory. */
2311
slot = page_dir_get_nth_slot(page, slot_no);
2313
rec = page_get_infimum_rec(page);
2316
offsets = rec_get_offsets(rec, index, offsets,
2317
ULINT_UNDEFINED, &heap);
2319
if (page_is_comp(page) && page_rec_is_user_rec(rec)
2320
&& UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
2321
== page_is_leaf(page))) {
2322
fputs("InnoDB: node_ptr flag mismatch\n", stderr);
2326
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
2330
/* Check that the records are in the ascending order */
2331
if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
2332
&& !page_rec_is_supremum(rec)) {
2334
(1 != cmp_rec_rec(rec, old_rec,
2335
offsets, old_offsets, index))) {
2337
"InnoDB: Records in wrong order"
2339
(ulong) page_get_page_no(page));
2340
dict_index_name_print(stderr, NULL, index);
2341
fputs("\nInnoDB: previous record ", stderr);
2342
rec_print_new(stderr, old_rec, old_offsets);
2343
fputs("\nInnoDB: record ", stderr);
2344
rec_print_new(stderr, rec, offsets);
2351
if (page_rec_is_user_rec(rec)) {
2353
data_size += rec_offs_size(offsets);
2356
offs = page_offset(rec_get_start(rec, offsets));
2358
for (i = rec_offs_size(offsets); i--; ) {
2359
if (UNIV_UNLIKELY(buf[offs + i])) {
2360
/* No other record may overlap this */
2362
fputs("InnoDB: Record overlaps another\n",
2370
if (page_is_comp(page)) {
2371
rec_own_count = rec_get_n_owned_new(rec);
2373
rec_own_count = rec_get_n_owned_old(rec);
2376
if (UNIV_UNLIKELY(rec_own_count)) {
2377
/* This is a record pointed to by a dir slot */
2378
if (UNIV_UNLIKELY(rec_own_count != own_count)) {
2380
"InnoDB: Wrong owned count %lu, %lu\n",
2381
(ulong) rec_own_count,
2386
if (page_dir_slot_get_rec(slot) != rec) {
2387
fputs("InnoDB: Dir slot does not"
2388
" point to right rec\n",
2393
page_dir_slot_check(slot);
2396
if (!page_rec_is_supremum(rec)) {
2398
slot = page_dir_get_nth_slot(page, slot_no);
2402
if (page_rec_is_supremum(rec)) {
2409
rec = page_rec_get_next(rec);
2411
/* set old_offsets to offsets; recycle offsets */
2413
ulint* offs = old_offsets;
2414
old_offsets = offsets;
2419
if (page_is_comp(page)) {
2420
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
2424
} else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
2426
fputs("InnoDB: n owned is zero\n", stderr);
2430
if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
2431
fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
2432
(ulong) slot_no, (ulong) (n_slots - 1));
2436
if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
2437
+ PAGE_HEAP_NO_USER_LOW
2439
fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
2440
(ulong) page_header_get_field(page, PAGE_N_RECS)
2441
+ PAGE_HEAP_NO_USER_LOW,
2442
(ulong) (count + 1));
2446
if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
2448
"InnoDB: Summed data size %lu, returned by func %lu\n",
2449
(ulong) data_size, (ulong) page_get_data_size(page));
2453
/* Check then the free list */
2454
rec = page_header_get_ptr(page, PAGE_FREE);
2456
while (rec != NULL) {
2457
offsets = rec_get_offsets(rec, index, offsets,
2458
ULINT_UNDEFINED, &heap);
2459
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
2465
offs = page_offset(rec_get_start(rec, offsets));
2467
for (i = rec_offs_size(offsets); i--; ) {
2469
if (UNIV_UNLIKELY(buf[offs + i])) {
2470
fputs("InnoDB: Record overlaps another"
2471
" in free list\n", stderr);
2478
rec = page_rec_get_next(rec);
2481
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
2482
fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
2483
(ulong) page_dir_get_n_heap(page),
2491
mem_heap_free(heap);
2493
if (UNIV_UNLIKELY(ret == FALSE)) {
2495
fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
2496
(ulong) page_get_page_no(page));
2497
dict_index_name_print(stderr, NULL, index);
2499
buf_page_print(page, 0);
2505
/*******************************************************************
2506
Looks in the page record list for a record with the given heap number. */
2509
page_find_rec_with_heap_no(
2510
/*=======================*/
2511
/* out: record, NULL if not found */
2512
const page_t* page, /* in: index page */
2513
ulint heap_no)/* in: heap number */
2517
if (page_is_comp(page)) {
2518
rec = page + PAGE_NEW_INFIMUM;
2521
ulint rec_heap_no = rec_get_heap_no_new(rec);
2523
if (rec_heap_no == heap_no) {
2526
} else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2531
rec = page + rec_get_next_offs(rec, TRUE);
2534
rec = page + PAGE_OLD_INFIMUM;
2537
ulint rec_heap_no = rec_get_heap_no_old(rec);
2539
if (rec_heap_no == heap_no) {
2542
} else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2547
rec = page + rec_get_next_offs(rec, FALSE);