1
/******************************************************
6
Created 7/19/1997 Heikki Tuuri
7
*******************************************************/
12
#include "ibuf0ibuf.ic"
25
#include "sync0sync.h"
26
#include "dict0boot.h"
28
#include "lock0lock.h"
32
/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
34
If an OS thread performs any operation that brings in disk pages from
35
non-system tablespaces into the buffer pool, or creates such a page there,
36
then the operation may have as a side effect an insert buffer index tree
37
compression. Thus, the tree latch of the insert buffer tree may be acquired
38
in the x-mode, and also the file space latch of the system tablespace may
39
be acquired in the x-mode.
41
Also, an insert to an index in a non-system tablespace can have the same
42
effect. How do we know this cannot lead to a deadlock of OS threads? There
43
is a problem with the i\o-handler threads: they break the latching order
44
because they own x-latches to pages which are on a lower level than the
45
insert buffer tree latch, its page latches, and the tablespace latch an
46
insert buffer operation can reserve.
48
The solution is the following: We put into each tablespace an insert buffer
49
of its own. Let all the tree and page latches connected with the insert buffer
50
be later in the latching order than the fsp latch and fsp page latches.
51
Insert buffer pages must be such that the insert buffer is never invoked
52
when these pages area accessed as this would result in a recursion violating
53
the latching order. We let a special i/o-handler thread take care of i/o to
54
the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
55
pages and the first inode page, which contains the inode of the ibuf tree: let
56
us call all these ibuf pages. If the OS does not support asynchronous i/o,
57
then there is no special i/o thread, but to prevent deadlocks, we do not let a
58
read-ahead access both non-ibuf and ibuf pages.
60
Then an i/o-handler for the insert buffer never needs to access the insert
61
buffer tree and thus obeys the latching order. On the other hand, other
62
i/o-handlers for other tablespaces may require access to the insert buffer,
63
but because all kinds of latches they need to access there are later in the
64
latching order, no violation of the latching order occurs in this case,
67
A problem is how to grow and contract an insert buffer tree. As it is later
68
in the latching order than the fsp management, we have to reserve the fsp
69
latch first, before adding or removing pages from the insert buffer tree.
70
We let the insert buffer tree have its own file space management: a free
71
list of pages linked to the tree root. To prevent recursive using of the
72
insert buffer when adding pages to the tree, we must first load these pages
73
to memory, obtaining a latch on them, and only after that add them to the
74
free list of the insert buffer tree. More difficult is removing of pages
75
from the free list. If there is an excess of pages in the free list of the
76
ibuf tree, they might be needed if some thread reserves the fsp latch,
77
intending to allocate more file space. So we do the following: if a thread
78
reserves the fsp latch, we check the writer count field of the latch. If
79
this field has value 1, it means that the thread did not own the latch
80
before entering the fsp system, and the mtr of the thread contains no
81
modifications to the fsp pages. Now we are free to reserve the ibuf latch,
82
and check if there is an excess of pages in the free list. We can then, in a
83
separate mini-transaction, take them out of the free list and free them to
86
To avoid deadlocks in the ibuf system, we divide file pages into three levels:
89
(2) ibuf tree pages and the pages in the ibuf tree free list, and
90
(3) ibuf bitmap pages.
92
No OS thread is allowed to access higher level pages if it has latches to
93
lower level pages; even if the thread owns a B-tree latch it must not access
94
the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
95
is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
96
exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
97
level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
98
it uses synchronous aio or the OS does not support aio, it can access any
99
pages, as long as it obeys the access order rules. */
101
/* Buffer pool size per the maximum insert buffer size */
102
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
104
/* The insert buffer control structure */
107
ulint ibuf_rnd = 986058871;
109
ulint ibuf_flush_count = 0;
111
/* Dimensions for the ibuf_count array */
112
#define IBUF_COUNT_N_SPACES 10
113
#define IBUF_COUNT_N_PAGES 10000
115
/* Buffered entry counts for file pages, used in debugging */
116
ulint* ibuf_counts[IBUF_COUNT_N_SPACES];
118
ibool ibuf_counts_inited = FALSE;
120
/* The start address for an insert buffer bitmap page bitmap */
121
#define IBUF_BITMAP PAGE_DATA
123
/* Offsets in bits for the bits describing a single page in the bitmap */
124
#define IBUF_BITMAP_FREE 0
125
#define IBUF_BITMAP_BUFFERED 2
126
#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf
127
tree, excluding the root page, or is
128
in the free list of the ibuf */
130
/* Number of bits describing a single page */
131
#define IBUF_BITS_PER_PAGE 4
133
/* The mutex used to block pessimistic inserts to ibuf trees */
134
mutex_t ibuf_pessimistic_insert_mutex;
136
/* The mutex protecting the insert buffer structs */
139
/* The mutex protecting the insert buffer bitmaps */
140
mutex_t ibuf_bitmap_mutex;
142
/* The area in pages from which contract looks for page numbers for merge */
143
#define IBUF_MERGE_AREA 8
145
/* Inside the merge area, pages which have at most 1 per this number less
146
buffered entries compared to maximum volume that can buffered for a single
147
page are merged along with the page whose buffer became full */
148
#define IBUF_MERGE_THRESHOLD 4
150
/* In ibuf_contract at most this number of pages is read to memory in one
151
batch, in order to merge the entries for them in the insert buffer */
152
#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
154
/* If the combined size of the ibuf trees exceeds ibuf->max_size by this
155
many pages, we start to contract it in connection to inserts there, using
156
non-synchronous contract */
157
#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
159
/* Same as above, but use synchronous contract */
160
#define IBUF_CONTRACT_ON_INSERT_SYNC 5
162
/* Same as above, but no insert is done, only contract is called */
163
#define IBUF_CONTRACT_DO_NOT_INSERT 10
165
/* TODO: how to cope with drop table if there are records in the insert
166
buffer for the indexes of the table? Is there actually any problem,
167
because ibuf merge is done to a page when it is read in, and it is
168
still physically like the index page even if the index would have been
169
dropped! So, there seems to be no problem. */
171
/**********************************************************************
172
Validates the ibuf data structures when the caller owns ibuf_mutex. */
175
ibuf_validate_low(void);
176
/*===================*/
177
/* out: TRUE if ok */
179
/**********************************************************************
180
Sets the flag in the current OS thread local storage denoting that it is
181
inside an insert buffer routine. */
189
ptr = thr_local_get_in_ibuf_field();
191
ut_ad(*ptr == FALSE);
196
/**********************************************************************
197
Sets the flag in the current OS thread local storage denoting that it is
198
exiting an insert buffer routine. */
206
ptr = thr_local_get_in_ibuf_field();
213
/**********************************************************************
214
Returns TRUE if the current OS thread is performing an insert buffer
220
/* out: TRUE if inside an insert buffer routine: for instance,
221
a read-ahead of non-ibuf pages is then forbidden */
223
return(*thr_local_get_in_ibuf_field());
226
/**********************************************************************
227
Gets the ibuf header page and x-latches it. */
230
ibuf_header_page_get(
231
/*=================*/
232
/* out: insert buffer header page */
233
ulint space, /* in: space id */
234
mtr_t* mtr) /* in: mtr */
238
ut_ad(!ibuf_inside());
240
page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
242
buf_page_dbg_add_level(page, SYNC_IBUF_HEADER);
247
/**********************************************************************
248
Gets the root page and x-latches it. */
253
/* out: insert buffer tree root page */
254
ibuf_data_t* data, /* in: ibuf data */
255
ulint space, /* in: space id */
256
mtr_t* mtr) /* in: mtr */
260
ut_ad(ibuf_inside());
262
mtr_x_lock(dict_tree_get_lock((data->index)->tree), mtr);
264
page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
266
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
271
/**********************************************************************
272
Gets the ibuf count for a given page. */
277
/* out: number of entries in the insert buffer
278
currently buffered for this page */
279
ulint space, /* in: space id */
280
ulint page_no)/* in: page number */
282
ut_ad(space < IBUF_COUNT_N_SPACES);
283
ut_ad(page_no < IBUF_COUNT_N_PAGES);
285
if (!ibuf_counts_inited) {
290
return(*(ibuf_counts[space] + page_no));
293
/**********************************************************************
294
Sets the ibuf count for a given page. */
299
ulint space, /* in: space id */
300
ulint page_no,/* in: page number */
301
ulint val) /* in: value to set */
303
ut_ad(space < IBUF_COUNT_N_SPACES);
304
ut_ad(page_no < IBUF_COUNT_N_PAGES);
305
ut_ad(val < UNIV_PAGE_SIZE);
307
*(ibuf_counts[space] + page_no) = val;
310
/**********************************************************************
311
Creates the insert buffer data structure at a database startup and
312
initializes the data structures for the insert buffer of each tablespace. */
315
ibuf_init_at_db_start(void)
316
/*=======================*/
318
ibuf = mem_alloc(sizeof(ibuf_t));
320
/* Note that also a pessimistic delete can sometimes make a B-tree
321
grow in size, as the references on the upper levels of the tree can
324
ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
325
/ IBUF_POOL_SIZE_PER_MAX_SIZE;
326
ibuf->meter = IBUF_THRESHOLD + 1;
328
UT_LIST_INIT(ibuf->data_list);
332
#ifdef UNIV_IBUF_DEBUG
336
for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
338
ibuf_counts[i] = mem_alloc(sizeof(ulint)
339
* IBUF_COUNT_N_PAGES);
340
for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
341
ibuf_count_set(i, j, 0);
346
mutex_create(&ibuf_pessimistic_insert_mutex);
348
mutex_set_level(&ibuf_pessimistic_insert_mutex,
349
SYNC_IBUF_PESS_INSERT_MUTEX);
350
mutex_create(&ibuf_mutex);
352
mutex_set_level(&ibuf_mutex, SYNC_IBUF_MUTEX);
354
mutex_create(&ibuf_bitmap_mutex);
356
mutex_set_level(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
358
fil_ibuf_init_at_db_start();
360
ibuf_counts_inited = TRUE;
363
/**********************************************************************
364
Updates the size information in an ibuf data, assuming the segment size has
368
ibuf_data_sizes_update(
369
/*===================*/
370
ibuf_data_t* data, /* in: ibuf data struct */
371
page_t* root, /* in: ibuf tree root */
372
mtr_t* mtr) /* in: mtr */
376
ut_ad(mutex_own(&ibuf_mutex));
378
old_size = data->size;
380
data->free_list_len = flst_get_len(root + PAGE_HEADER
381
+ PAGE_BTR_IBUF_FREE_LIST, mtr);
383
data->height = 1 + btr_page_get_level(root, mtr);
385
data->size = data->seg_size - (1 + data->free_list_len);
386
/* the '1 +' is the ibuf header page */
387
ut_ad(data->size < data->seg_size);
389
if (page_get_n_recs(root) == 0) {
396
ut_ad(ibuf->size + data->size >= old_size);
398
ibuf->size = ibuf->size + data->size - old_size;
400
/* printf("ibuf size %lu, space ibuf size %lu\n", ibuf->size,
404
/**********************************************************************
405
Creates the insert buffer data struct for a single tablespace. Reads the
406
root page of the insert buffer tree in the tablespace. This function can
407
be called only after the dictionary system has been initialized, as this
408
creates also the insert buffer table and index for this tablespace. */
411
ibuf_data_init_for_space(
412
/*=====================*/
413
/* out, own: ibuf data struct, linked to the list
414
in ibuf control structure. */
415
ulint space) /* in: space id */
426
#ifdef UNIV_LOG_DEBUG
427
if (space % 2 == 1) {
429
printf("No ibuf op in replicate space\n");
434
data = mem_alloc(sizeof(ibuf_data_t));
440
mutex_enter(&ibuf_mutex);
442
mtr_x_lock(fil_space_get_latch(space), &mtr);
444
header_page = ibuf_header_page_get(space, &mtr);
446
fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
452
data->seg_size = n_used;
454
root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
456
buf_page_dbg_add_level(root, SYNC_TREE_NODE);
461
data->n_merged_recs = 0;
463
ibuf_data_sizes_update(data, root, &mtr);
465
mutex_exit(&ibuf_mutex);
471
sprintf(buf, "SYS_IBUF_TABLE_%lu", space);
473
table = dict_mem_table_create(buf, space, 2);
475
dict_mem_table_add_col(table, "PAGE_NO", DATA_BINARY, 0, 0, 0);
476
dict_mem_table_add_col(table, "TYPES", DATA_BINARY, 0, 0, 0);
478
table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
480
dict_table_add_to_cache(table);
482
index = dict_mem_index_create(buf, "CLUST_IND", space,
483
DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
486
dict_mem_index_add_field(index, "PAGE_NO", 0);
487
dict_mem_index_add_field(index, "TYPES", 0);
489
index->page_no = FSP_IBUF_TREE_ROOT_PAGE_NO;
491
index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
493
dict_index_add_to_cache(table, index);
495
data->index = dict_table_get_first_index(table);
497
mutex_enter(&ibuf_mutex);
499
UT_LIST_ADD_LAST(data_list, ibuf->data_list, data);
501
mutex_exit(&ibuf_mutex);
506
/*************************************************************************
507
Initializes an ibuf bitmap page. */
510
ibuf_bitmap_page_init(
511
/*==================*/
512
page_t* page, /* in: bitmap page */
513
mtr_t* mtr) /* in: mtr */
519
/* Write all zeros to the bitmap */
521
bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
523
byte_offset = bit_offset / 8 + 1;
525
for (i = IBUF_BITMAP; i < IBUF_BITMAP + byte_offset; i++) {
527
*(page + i) = (byte)0;
530
mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
533
/*************************************************************************
534
Parses a redo log record of an ibuf bitmap page init. */
537
ibuf_parse_bitmap_init(
538
/*===================*/
539
/* out: end of log record or NULL */
540
byte* ptr, /* in: buffer */
541
byte* end_ptr,/* in: buffer end */
542
page_t* page, /* in: page or NULL */
543
mtr_t* mtr) /* in: mtr or NULL */
545
ut_ad(ptr && end_ptr);
548
ibuf_bitmap_page_init(page, mtr);
554
/************************************************************************
555
Gets the desired bits for a given page from a bitmap page. */
558
ibuf_bitmap_page_get_bits(
559
/*======================*/
560
/* out: value of bits */
561
page_t* page, /* in: bitmap page */
562
ulint page_no,/* in: page whose bits to get */
563
ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
564
mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
571
ut_ad(bit < IBUF_BITS_PER_PAGE);
572
ut_ad(IBUF_BITS_PER_PAGE % 2 == 0);
573
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
574
MTR_MEMO_PAGE_X_FIX));
576
bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
579
byte_offset = bit_offset / 8;
580
bit_offset = bit_offset % 8;
582
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
584
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
586
value = ut_bit_get_nth(map_byte, bit_offset);
588
if (bit == IBUF_BITMAP_FREE) {
589
ut_ad(bit_offset + 1 < 8);
591
value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
597
/************************************************************************
598
Sets the desired bit for a given page in a bitmap page. */
601
ibuf_bitmap_page_set_bits(
602
/*======================*/
603
page_t* page, /* in: bitmap page */
604
ulint page_no,/* in: page whose bits to set */
605
ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
606
ulint val, /* in: value to set */
607
mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
613
ut_ad(bit < IBUF_BITS_PER_PAGE);
614
ut_ad(IBUF_BITS_PER_PAGE % 2 == 0);
615
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
616
MTR_MEMO_PAGE_X_FIX));
617
#ifdef UNIV_IBUF_DEBUG
618
ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
619
|| (0 == ibuf_count_get(buf_frame_get_space_id(page), page_no)));
621
bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
624
byte_offset = bit_offset / 8;
625
bit_offset = bit_offset % 8;
627
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
629
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
631
if (bit == IBUF_BITMAP_FREE) {
632
ut_ad(bit_offset + 1 < 8);
635
map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
636
map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
639
map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
642
mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
646
/************************************************************************
647
Calculates the bitmap page number for a given page number. */
650
ibuf_bitmap_page_no_calc(
651
/*=====================*/
652
/* out: the bitmap page number where
653
the file page is mapped */
654
ulint page_no) /* in: tablespace page number */
656
return(FSP_IBUF_BITMAP_OFFSET
657
+ XDES_DESCRIBED_PER_PAGE
658
* (page_no / XDES_DESCRIBED_PER_PAGE));
661
/************************************************************************
662
Gets the ibuf bitmap page where the bits describing a given file page are
666
ibuf_bitmap_get_map_page(
667
/*=====================*/
668
/* out: bitmap page where the file page is mapped,
669
that is, the bitmap page containing the descriptor
670
bits for the file page; the bitmap page is
672
ulint space, /* in: space id of the file page */
673
ulint page_no,/* in: page number of the file page */
674
mtr_t* mtr) /* in: mtr */
678
page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
680
buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP);
685
/****************************************************************************
686
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
687
mini-transaction, hence this operation does not restrict further work to only
688
ibuf bitmap operations, which would result if the latch to the bitmap pag
692
ibuf_set_free_bits_low(
693
/*===================*/
694
ulint type, /* in: index type */
695
page_t* page, /* in: index page; free bit is reset if the index is
696
a non-clustered non-unique, and page level is 0 */
697
ulint val, /* in: value to set: < 4 */
698
mtr_t* mtr) /* in: mtr */
702
if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
707
if (btr_page_get_level_low(page) != 0) {
712
bitmap_page = ibuf_bitmap_get_map_page(buf_frame_get_space_id(page),
713
buf_frame_get_page_no(page), mtr);
714
#ifdef UNIV_IBUF_DEBUG
715
/* printf("Setting page no %lu free bits to %lu should be %lu\n",
716
buf_frame_get_page_no(page), val,
717
ibuf_index_page_calc_free(page)); */
719
ut_a(val <= ibuf_index_page_calc_free(page));
721
ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
722
IBUF_BITMAP_FREE, val, mtr);
726
/****************************************************************************
727
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
728
mini-transaction, hence this operation does not restrict further work to only
729
ibuf bitmap operations, which would result if the latch to the bitmap page
735
ulint type, /* in: index type */
736
page_t* page, /* in: index page; free bit is reset if the index is
737
a non-clustered non-unique, and page level is 0 */
738
ulint val, /* in: value to set: < 4 */
739
ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
740
the bits must have before setting; this is for
746
if (type & (DICT_CLUSTERED | DICT_UNIQUE)) {
751
if (btr_page_get_level_low(page) != 0) {
758
bitmap_page = ibuf_bitmap_get_map_page(buf_frame_get_space_id(page),
759
buf_frame_get_page_no(page), &mtr);
761
if (max_val != ULINT_UNDEFINED) {
762
#ifdef UNIV_IBUF_DEBUG
765
old_val = ibuf_bitmap_page_get_bits(bitmap_page,
766
buf_frame_get_page_no(page),
767
IBUF_BITMAP_FREE, &mtr);
768
if (old_val != max_val) {
770
"Ibuf: page %lu old val %lu max val %lu\n",
771
buf_frame_get_page_no(page), old_val, max_val); */
774
ut_a(old_val <= max_val);
777
#ifdef UNIV_IBUF_DEBUG
778
/* printf("Setting page no %lu free bits to %lu should be %lu\n",
779
buf_frame_get_page_no(page), val,
780
ibuf_index_page_calc_free(page)); */
782
ut_a(val <= ibuf_index_page_calc_free(page));
784
ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
785
IBUF_BITMAP_FREE, val, &mtr);
789
/****************************************************************************
790
Resets the free bits of the page in the ibuf bitmap. This is done in a
791
separate mini-transaction, hence this operation does not restrict further
792
work to only ibuf bitmap operations, which would result if the latch to the
793
bitmap page were kept. */
796
ibuf_reset_free_bits_with_type(
797
/*===========================*/
798
ulint type, /* in: index type */
799
page_t* page) /* in: index page; free bits are set to 0 if the index
800
is non-clustered and non-unique and the page level is
803
ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED);
806
/****************************************************************************
807
Resets the free bits of the page in the ibuf bitmap. This is done in a
808
separate mini-transaction, hence this operation does not restrict further
809
work to solely ibuf bitmap operations, which would result if the latch to
810
the bitmap page were kept. */
813
ibuf_reset_free_bits(
814
/*=================*/
815
dict_index_t* index, /* in: index */
816
page_t* page) /* in: index page; free bits are set to 0 if
817
the index is non-clustered and non-unique and
818
the page level is 0 */
820
ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED);
823
/**************************************************************************
824
Updates the free bits for a page to reflect the present state. Does this
825
in the mtr given, which means that the latching order rules virtually prevent
826
any further operations for this OS thread until mtr is committed. */
829
ibuf_update_free_bits_low(
830
/*======================*/
831
dict_index_t* index, /* in: index */
832
page_t* page, /* in: index page */
833
ulint max_ins_size, /* in: value of maximum insert size
834
with reorganize before the latest
835
operation performed to the page */
836
mtr_t* mtr) /* in: mtr */
841
before = ibuf_index_page_calc_free_bits(max_ins_size);
843
after = ibuf_index_page_calc_free(page);
845
if (before != after) {
846
ibuf_set_free_bits_low(index->type, page, after, mtr);
850
/**************************************************************************
851
Updates the free bits for the two pages to reflect the present state. Does
852
this in the mtr given, which means that the latching order rules virtually
853
prevent any further operations until mtr is committed. */
856
ibuf_update_free_bits_for_two_pages_low(
857
/*====================================*/
858
dict_index_t* index, /* in: index */
859
page_t* page1, /* in: index page */
860
page_t* page2, /* in: index page */
861
mtr_t* mtr) /* in: mtr */
865
/* As we have to x-latch two random bitmap pages, we have to acquire
866
the bitmap mutex to prevent a deadlock with a similar operation
867
performed by another OS thread. */
869
mutex_enter(&ibuf_bitmap_mutex);
871
state = ibuf_index_page_calc_free(page1);
873
ibuf_set_free_bits_low(index->type, page1, state, mtr);
875
state = ibuf_index_page_calc_free(page2);
877
ibuf_set_free_bits_low(index->type, page2, state, mtr);
879
mutex_exit(&ibuf_bitmap_mutex);
882
/**************************************************************************
883
Returns TRUE if the page is one of the fixed address ibuf pages. */
886
ibuf_fixed_addr_page(
887
/*=================*/
888
/* out: TRUE if a fixed address ibuf i/o page */
889
ulint page_no)/* in: page number */
891
if ((ibuf_bitmap_page(page_no))
892
|| (page_no == IBUF_TREE_ROOT_PAGE_NO)) {
899
/***************************************************************************
900
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
905
/* out: TRUE if level 2 or level 3 page */
906
ulint space, /* in: space id */
907
ulint page_no)/* in: page number */
913
if (recv_no_ibuf_operations) {
914
/* Recovery is running: no ibuf operations should be
920
if (ibuf_fixed_addr_page(page_no)) {
925
ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
929
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
931
ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
938
/***************************************************************************
939
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
944
/* out: TRUE if level 2 or level 3 page */
945
ulint space, /* in: space id */
946
ulint page_no,/* in: page number */
947
mtr_t* mtr) /* in: mtr which will contain an x-latch to the
948
bitmap page if the page is not one of the fixed
949
address ibuf pages */
954
#ifdef UNIV_LOG_DEBUG
955
if (space % 2 != 0) {
957
printf("No ibuf in a replicate space\n");
962
if (ibuf_fixed_addr_page(page_no)) {
967
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
969
ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
974
/************************************************************************
975
Returns the page number field of an ibuf record. */
978
ibuf_rec_get_page_no(
979
/*=================*/
980
/* out: page number */
981
rec_t* rec) /* in: ibuf record */
986
ut_ad(ibuf_inside());
987
ut_ad(rec_get_n_fields(rec) > 2);
989
field = rec_get_nth_field(rec, 0, &len);
993
return(mach_read_from_4(field));
996
/************************************************************************
997
Returns the space taken by a stored non-clustered index entry if converted to
1001
ibuf_rec_get_volume(
1002
/*================*/
1003
/* out: size of index record in bytes + an upper
1004
limit of the space taken in the page directory */
1005
rec_t* rec) /* in: ibuf record */
1012
ut_ad(ibuf_inside());
1013
ut_ad(rec_get_n_fields(rec) > 2);
1015
n_fields = rec_get_n_fields(rec) - 2;
1017
field = rec_get_nth_field(rec, 2, &len);
1019
data_size = rec_get_data_size(rec) - (field - rec);
1021
return(data_size + rec_get_converted_extra_size(data_size, n_fields)
1022
+ page_dir_calc_reserved_space(1));
1025
/*************************************************************************
1026
Builds the tuple to insert to an ibuf tree when we have an entry for a
1027
non-clustered index. */
1032
/* out, own: entry to insert into an ibuf
1033
index tree; NOTE that the original entry
1034
must be kept because we copy pointers to its
1036
dtuple_t* entry, /* in: entry for a non-clustered index */
1037
ulint page_no,/* in: index page number where entry should
1039
mem_heap_t* heap) /* in: heap into which to build */
1043
dfield_t* entry_field;
1049
/* We have to build a tuple whose first field is the page number,
1050
the second field contains the original type information for entry,
1051
and the rest of the fields are copied from entry. All fields
1052
in the tuple are of the type binary. */
1054
n_fields = dtuple_get_n_fields(entry);
1056
tuple = dtuple_create(heap, n_fields + 2);
1058
/* Store the page number in tuple */
1060
field = dtuple_get_nth_field(tuple, 0);
1062
buf = mem_heap_alloc(heap, 4);
1064
mach_write_to_4(buf, page_no);
1066
dfield_set_data(field, buf, 4);
1068
/* Store the type info in tuple */
1070
buf2 = mem_heap_alloc(heap, n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1072
for (i = 0; i < n_fields; i++) {
1074
field = dtuple_get_nth_field(tuple, i + 2);
1076
entry_field = dtuple_get_nth_field(entry, i);
1078
dfield_copy(field, entry_field);
1080
dtype_store_for_order_and_null_size(
1081
buf2 + i * DATA_ORDER_NULL_TYPE_BUF_SIZE,
1082
dfield_get_type(entry_field));
1085
field = dtuple_get_nth_field(tuple, 1);
1087
dfield_set_data(field, buf2, n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1089
/* Set the types in the new tuple binary */
1091
dtuple_set_types_binary(tuple, n_fields + 2);
1096
/*************************************************************************
1097
Builds the entry to insert into a non-clustered index when we have the
1098
corresponding record in an ibuf index. */
1101
ibuf_build_entry_from_ibuf_rec(
1102
/*===========================*/
1103
/* out, own: entry to insert to
1104
a non-clustered index; NOTE that
1105
as we copy pointers to fields in
1106
ibuf_rec, the caller must hold a
1107
latch to the ibuf_rec page as long
1108
as the entry is used! */
1109
rec_t* ibuf_rec, /* in: record in an insert buffer */
1110
mem_heap_t* heap) /* in: heap where built */
1120
n_fields = rec_get_n_fields(ibuf_rec) - 2;
1122
tuple = dtuple_create(heap, n_fields);
1124
types = rec_get_nth_field(ibuf_rec, 1, &len);
1126
ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1128
for (i = 0; i < n_fields; i++) {
1129
field = dtuple_get_nth_field(tuple, i);
1131
data = rec_get_nth_field(ibuf_rec, i + 2, &len);
1133
dfield_set_data(field, data, len);
1135
dtype_read_for_order_and_null_size(dfield_get_type(field),
1136
types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1142
/*************************************************************************
1143
Builds a search tuple used to search buffered inserts for an index page. */
1146
ibuf_search_tuple_build(
1147
/*====================*/
1148
/* out, own: search tuple */
1149
ulint page_no,/* in: index page number */
1150
mem_heap_t* heap) /* in: heap into which to build */
1156
tuple = dtuple_create(heap, 1);
1158
/* Store the page number in tuple */
1160
field = dtuple_get_nth_field(tuple, 0);
1162
buf = mem_heap_alloc(heap, 4);
1164
mach_write_to_4(buf, page_no);
1166
dfield_set_data(field, buf, 4);
1168
dtuple_set_types_binary(tuple, 1);
1173
/*************************************************************************
1174
Checks if there are enough pages in the free list of the ibuf tree that we
1175
dare to start a pessimistic insert to the insert buffer. */
1178
ibuf_data_enough_free_for_insert(
1179
/*=============================*/
1180
/* out: TRUE if enough free pages in list */
1181
ibuf_data_t* data) /* in: ibuf data for the space */
1183
ut_ad(mutex_own(&ibuf_mutex));
1185
/* We want a big margin of free pages, because a B-tree can sometimes
1186
grow in size also if records are deleted from it, as the node pointers
1187
can change, and we must make sure that we are able to delete the
1188
inserts buffered for pages that we read to the buffer pool, without
1189
any risk of running out of free space in the insert buffer. */
1191
if (data->free_list_len >= data->size / 2 + 3 * data->height) {
1199
/*************************************************************************
1200
Checks if there are enough pages in the free list of the ibuf tree that we
1201
should remove them and free to the file space management. */
1204
ibuf_data_too_much_free(
1205
/*====================*/
1206
/* out: TRUE if enough free pages in list */
1207
ibuf_data_t* data) /* in: ibuf data for the space */
1209
ut_ad(mutex_own(&ibuf_mutex));
1211
if (data->free_list_len >= 3 + data->size / 2 + 3 * data->height) {
1219
/*************************************************************************
1220
Allocates a new page from the ibuf file segment and adds it to the free
1226
/* out: DB_SUCCESS, or DB_STRONG_FAIL
1228
ulint space, /* in: space id */
1229
ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
1232
page_t* header_page;
1236
page_t* bitmap_page;
1240
/* Acquire the fsp latch before the ibuf header, obeying the latching
1242
mtr_x_lock(fil_space_get_latch(space), &mtr);
1244
header_page = ibuf_header_page_get(space, &mtr);
1246
/* Allocate a new page: NOTE that if the page has been a part of a
1247
non-clustered index which has subsequently been dropped, then the
1248
page may have buffered inserts in the insert buffer, and these
1249
should be deleted from there. These get deleted when the page
1250
allocation creates the page in buffer. Thus the call below may end
1251
up calling the insert buffer routines and, as we yet have no latches
1252
to insert buffer tree pages, these routines can run without a risk
1253
of a deadlock. This is the reason why we created a special ibuf
1254
header page apart from the ibuf tree. */
1256
page_no = fseg_alloc_free_page(header_page + IBUF_HEADER
1257
+ IBUF_TREE_SEG_HEADER, 0, FSP_UP,
1259
if (page_no == FIL_NULL) {
1262
return(DB_STRONG_FAIL);
1265
page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
1267
buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW);
1271
mutex_enter(&ibuf_mutex);
1273
root = ibuf_tree_root_get(ibuf_data, space, &mtr);
1275
/* Add the page to the free list and update the ibuf size data */
1277
flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1278
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1280
ibuf_data->seg_size++;
1281
ibuf_data->free_list_len++;
1283
/* Set the bit indicating that this page is now an ibuf tree page
1286
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
1288
ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
1292
mutex_exit(&ibuf_mutex);
1297
/*************************************************************************
1298
Removes a page from the free list and frees it to the fsp system. */
1301
ibuf_remove_free_page(
1302
/*==================*/
1303
ulint space, /* in: space id */
1304
ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
1308
page_t* header_page;
1312
page_t* bitmap_page;
1316
/* Acquire the fsp latch before the ibuf header, obeying the latching
1318
mtr_x_lock(fil_space_get_latch(space), &mtr);
1320
header_page = ibuf_header_page_get(space, &mtr);
1322
/* Prevent pessimistic inserts to insert buffer trees for a while */
1323
mutex_enter(&ibuf_pessimistic_insert_mutex);
1327
mutex_enter(&ibuf_mutex);
1329
if (!ibuf_data_too_much_free(ibuf_data)) {
1331
mutex_exit(&ibuf_mutex);
1335
mutex_exit(&ibuf_pessimistic_insert_mutex);
1344
root = ibuf_tree_root_get(ibuf_data, space, &mtr2);
1346
page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1350
/* NOTE that we must release the latch on the ibuf tree root
1351
because in fseg_free_page we access level 1 pages, and the root
1352
is a level 2 page. */
1355
mutex_exit(&ibuf_mutex);
1359
/* Since pessimistic inserts were prevented, we know that the
1360
page is still in the free list. NOTE that also deletes may take
1361
pages from the free list, but they take them from the start, and
1362
the free list was so long that they cannot have taken the last
1365
fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
1366
space, page_no, &mtr);
1369
mutex_enter(&ibuf_mutex);
1371
root = ibuf_tree_root_get(ibuf_data, space, &mtr);
1373
ut_ad(page_no == flst_get_last(root + PAGE_HEADER
1374
+ PAGE_BTR_IBUF_FREE_LIST, &mtr)
1377
page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
1379
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
1381
/* Remove the page from the free list and update the ibuf size data */
1383
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1384
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1386
ibuf_data->seg_size--;
1387
ibuf_data->free_list_len--;
1389
mutex_exit(&ibuf_pessimistic_insert_mutex);
1391
/* Set the bit indicating that this page is no more an ibuf tree page
1394
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
1396
ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
1400
mutex_exit(&ibuf_mutex);
1405
/***************************************************************************
1406
Frees excess pages from the ibuf free list. This function is called when an OS
1407
thread calls fsp services to allocate a new file segment, or a new page to a
1408
file segment, and the thread did not own the fsp latch before this call. */
1411
ibuf_free_excess_pages(
1412
/*===================*/
1413
ulint space) /* in: space id */
1415
ibuf_data_t* ibuf_data;
1418
ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
1419
ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1);
1420
ut_ad(!ibuf_inside());
1422
/* NOTE: We require that the thread did not own the latch before,
1423
because then we know that we can obey the correct latching order
1426
ibuf_data = fil_space_get_ibuf_data(space);
1428
if (ibuf_data == NULL) {
1429
/* Not yet initialized */
1432
/*printf("Ibuf for space %lu not yet initialized\n", space); */
1438
/* Free at most a few pages at a time, so that we do not delay the
1439
requested service too much */
1441
for (i = 0; i < 4; i++) {
1443
mutex_enter(&ibuf_mutex);
1445
if (!ibuf_data_too_much_free(ibuf_data)) {
1447
mutex_exit(&ibuf_mutex);
1452
mutex_exit(&ibuf_mutex);
1454
ibuf_remove_free_page(space, ibuf_data);
1458
/*************************************************************************
1459
Reads page numbers from a leaf in an ibuf tree. */
1462
ibuf_get_merge_page_nos(
1463
/*====================*/
1464
/* out: a lower limit for the combined volume
1465
of records which will be merged */
1466
ibool contract,/* in: TRUE if this function is called to
1467
contract the tree, FALSE if this is called
1468
when a single page becomes full and we look
1469
if it pays to read also nearby pages */
1470
rec_t* first_rec,/* in: record from which we read down and
1471
up in the chain of records */
1472
ulint* page_nos,/* in/out: buffer for at least
1473
IBUF_MAX_N_PAGES_MERGED many page numbers;
1474
the page numbers are in an ascending order */
1475
ulint* n_stored)/* out: number of page numbers stored to
1476
page_nos in this function */
1479
ulint first_page_no;
1483
ulint volume_for_page;
1491
limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
1493
page = buf_frame_align(first_rec);
1495
if (first_rec == page_get_supremum_rec(page)) {
1497
first_rec = page_rec_get_prev(first_rec);
1500
if (first_rec == page_get_infimum_rec(page)) {
1502
first_rec = page_rec_get_next(first_rec);
1505
if (first_rec == page_get_supremum_rec(page)) {
1511
first_page_no = ibuf_rec_get_page_no(first_rec);
1515
while ((rec != page_get_infimum_rec(page)) && (n_pages < limit)) {
1517
rec_page_no = ibuf_rec_get_page_no(rec);
1519
ut_ad(rec_page_no != 0);
1521
if (rec_page_no / IBUF_MERGE_AREA
1522
!= first_page_no / IBUF_MERGE_AREA) {
1527
if (rec_page_no != prev_page_no) {
1531
prev_page_no = rec_page_no;
1533
rec = page_rec_get_prev(rec);
1536
rec = page_rec_get_next(rec);
1540
volume_for_page = 0;
1542
while (*n_stored < limit) {
1543
if (rec == page_get_supremum_rec(page)) {
1546
rec_page_no = ibuf_rec_get_page_no(rec);
1547
ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
1550
#ifdef UNIV_IBUF_DEBUG
1551
ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
1553
if (rec_page_no != prev_page_no) {
1554
if ((prev_page_no == first_page_no)
1556
|| (volume_for_page >
1557
((IBUF_MERGE_THRESHOLD - 1)
1558
* 4 * UNIV_PAGE_SIZE
1559
/ IBUF_PAGE_SIZE_PER_FREE_SPACE)
1560
/ IBUF_MERGE_THRESHOLD)) {
1562
page_nos[*n_stored] = prev_page_no;
1566
sum_volumes += volume_for_page;
1569
if (rec_page_no / IBUF_MERGE_AREA
1570
!= first_page_no / IBUF_MERGE_AREA) {
1575
volume_for_page = 0;
1578
if (rec_page_no == 1) {
1579
/* Supremum record */
1584
rec_volume = ibuf_rec_get_volume(rec);
1586
volume_for_page += rec_volume;
1588
prev_page_no = rec_page_no;
1590
rec = page_rec_get_next(rec);
1593
#ifdef UNIV_IBUF_DEBUG
1594
ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
1596
/* printf("Ibuf merge batch %lu pages %lu volume\n", *n_stored,
1598
return(sum_volumes);
1601
/*************************************************************************
1602
Contracts insert buffer trees by reading pages to the buffer pool. */
1607
/* out: a lower limit for the combined size in bytes
1608
of entries which will be merged from ibuf trees to the
1609
pages read, 0 if ibuf is empty */
1610
ibool sync) /* in: TRUE if the caller wants to wait for the
1611
issued read with the highest tablespace address
1618
ibool all_trees_empty;
1619
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
1624
ut_ad(!ibuf_inside());
1626
mutex_enter(&ibuf_mutex);
1628
ut_ad(ibuf_validate_low());
1630
/* Choose an ibuf tree at random */
1631
ibuf_rnd += 865558671;
1633
rnd_pos = ibuf_rnd % ibuf->size;
1635
all_trees_empty = TRUE;
1637
data = UT_LIST_GET_FIRST(ibuf->data_list);
1641
all_trees_empty = FALSE;
1643
if (rnd_pos < data->size) {
1648
rnd_pos -= data->size;
1651
data = UT_LIST_GET_NEXT(data_list, data);
1654
if (all_trees_empty) {
1655
mutex_exit(&ibuf_mutex);
1660
data = UT_LIST_GET_FIRST(ibuf->data_list);
1666
space = (data->index)->space;
1672
/* Open a cursor to a randomly chosen leaf of the tree, at a random
1673
position within the leaf */
1675
btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
1678
&& 0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
1680
/* This tree is empty */
1687
btr_pcur_close(&pcur);
1689
mutex_exit(&ibuf_mutex);
1694
mutex_exit(&ibuf_mutex);
1696
sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
1697
page_nos, &n_stored);
1699
#ifdef UNIV_IBUF_DEBUG
1700
/* printf("Ibuf contract sync %lu pages %lu volume %lu\n", sync,
1701
n_stored, sum_sizes); */
1706
btr_pcur_close(&pcur);
1708
buf_read_ibuf_merge_pages(sync, space, page_nos, n_stored);
1710
return(sum_sizes + 1);
1713
/*************************************************************************
1714
Contract insert buffer trees after insert if they are too big. */
1717
ibuf_contract_after_insert(
1718
/*=======================*/
1719
ulint entry_size) /* in: size of a record which was inserted
1720
into an ibuf tree */
1726
mutex_enter(&ibuf_mutex);
1728
if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
1729
mutex_exit(&ibuf_mutex);
1736
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
1741
mutex_exit(&ibuf_mutex);
1743
/* Contract at least entry_size many bytes */
1747
while ((size > 0) && (sum_sizes < entry_size)) {
1749
size = ibuf_contract(sync);
1754
/*************************************************************************
1755
Gets an upper limit for the combined size of entries buffered in the insert
1756
buffer for a given page. */
1759
ibuf_get_volume_buffered(
1760
/*=====================*/
1761
/* out: upper limit for the volume of
1762
buffered inserts for the index page, in bytes;
1763
we may also return UNIV_PAGE_SIZE, if the
1764
entries for the index page span on several
1765
pages in the insert buffer */
1766
btr_pcur_t* pcur, /* in: pcur positioned at a place in an
1767
insert buffer tree where we would insert an
1768
entry for the index page whose number is
1769
page_no, latch mode has to be BTR_MODIFY_PREV
1770
or BTR_MODIFY_TREE */
1771
ulint space, /* in: space id */
1772
ulint page_no,/* in: page number of an index page */
1773
mtr_t* mtr) /* in: mtr */
1783
ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
1784
|| (pcur->latch_mode == BTR_MODIFY_TREE));
1786
/* Count the volume of records earlier in the alphabetical order than
1791
rec = btr_pcur_get_rec(pcur);
1793
page = buf_frame_align(rec);
1795
if (rec == page_get_supremum_rec(page)) {
1796
rec = page_rec_get_prev(rec);
1800
if (rec == page_get_infimum_rec(page)) {
1805
if (page_no != ibuf_rec_get_page_no(rec)) {
1810
volume += ibuf_rec_get_volume(rec);
1812
rec = page_rec_get_prev(rec);
1815
/* Look at the previous page */
1817
prev_page_no = btr_page_get_prev(page, mtr);
1819
if (prev_page_no == FIL_NULL) {
1824
prev_page = buf_page_get(space, prev_page_no, RW_X_LATCH, mtr);
1826
buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
1828
rec = page_get_supremum_rec(prev_page);
1829
rec = page_rec_get_prev(rec);
1832
if (rec == page_get_infimum_rec(prev_page)) {
1834
/* We cannot go to yet a previous page, because we
1835
do not have the x-latch on it, and cannot acquire one
1836
because of the latching order: we have to give up */
1838
return(UNIV_PAGE_SIZE);
1841
if (page_no != ibuf_rec_get_page_no(rec)) {
1846
volume += ibuf_rec_get_volume(rec);
1848
rec = page_rec_get_prev(rec);
1852
rec = btr_pcur_get_rec(pcur);
1854
if (rec != page_get_supremum_rec(page)) {
1855
rec = page_rec_get_next(rec);
1859
if (rec == page_get_supremum_rec(page)) {
1864
if (page_no != ibuf_rec_get_page_no(rec)) {
1869
volume += ibuf_rec_get_volume(rec);
1871
rec = page_rec_get_next(rec);
1874
/* Look at the next page */
1876
next_page_no = btr_page_get_next(page, mtr);
1878
if (next_page_no == FIL_NULL) {
1883
next_page = buf_page_get(space, next_page_no, RW_X_LATCH, mtr);
1885
buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
1887
rec = page_get_infimum_rec(next_page);
1888
rec = page_rec_get_next(rec);
1891
if (rec == page_get_supremum_rec(next_page)) {
1895
return(UNIV_PAGE_SIZE);
1898
if (page_no != ibuf_rec_get_page_no(rec)) {
1903
volume += ibuf_rec_get_volume(rec);
1905
rec = page_rec_get_next(rec);
1909
/*************************************************************************
1910
Makes an index insert to the insert buffer, instead of directly to the disk
1911
page, if this is possible. */
1916
/* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
1917
ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
1918
dtuple_t* entry, /* in: index entry to insert */
1919
dict_index_t* index, /* in: index where to insert; must not be
1920
unique or clustered */
1921
ulint space, /* in: space id where to insert */
1922
ulint page_no,/* in: page number where to insert */
1923
que_thr_t* thr) /* in: query thread */
1930
dtuple_t* ibuf_entry;
1934
ibool old_bit_value;
1935
page_t* bitmap_page;
1936
ibuf_data_t* ibuf_data;
1937
dict_index_t* ibuf_index;
1941
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
1945
ut_ad(!(index->type & (DICT_UNIQUE | DICT_CLUSTERED)));
1946
ut_ad(dtuple_check_typed(entry));
1950
ibuf_data = fil_space_get_ibuf_data(space);
1952
ibuf_index = ibuf_data->index;
1954
mutex_enter(&ibuf_mutex);
1956
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
1957
/* Insert buffer is now too big, contract it but do not try
1960
mutex_exit(&ibuf_mutex);
1962
#ifdef UNIV_IBUF_DEBUG
1963
printf("Ibuf too big\n");
1965
/* Use synchronous contract (== TRUE) */
1966
ibuf_contract(TRUE);
1968
return(DB_STRONG_FAIL);
1971
mutex_exit(&ibuf_mutex);
1973
if (mode == BTR_MODIFY_TREE) {
1974
mutex_enter(&ibuf_pessimistic_insert_mutex);
1978
mutex_enter(&ibuf_mutex);
1980
while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
1982
mutex_exit(&ibuf_mutex);
1986
mutex_exit(&ibuf_pessimistic_insert_mutex);
1988
err = ibuf_add_free_page(space, ibuf_data);
1990
if (err == DB_STRONG_FAIL) {
1995
mutex_enter(&ibuf_pessimistic_insert_mutex);
1999
mutex_enter(&ibuf_mutex);
2005
entry_size = rec_get_converted_size(entry);
2007
heap = mem_heap_create(512);
2009
/* Build the entry which contains the space id and the page number as
2010
the first fields and the type information for other fields, and which
2011
will be inserted to the insert buffer. */
2013
ibuf_entry = ibuf_entry_build(entry, page_no, heap);
2015
/* Open a cursor to the insert buffer tree to calculate if we can add
2016
the new entry to it without exceeding the free space limit for the
2021
btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
2023
/* Find out the volume of already buffered inserts for the same index
2025
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
2027
#ifdef UNIV_IBUF_DEBUG
2028
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
2030
mtr_start(&bitmap_mtr);
2032
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr);
2034
/* We check if the index page is suitable for buffered entries */
2036
if (buf_page_peek(space, page_no)
2037
|| lock_rec_expl_exist_on_page(space, page_no)) {
2039
err = DB_STRONG_FAIL;
2041
mtr_commit(&bitmap_mtr);
2046
bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
2047
IBUF_BITMAP_FREE, &bitmap_mtr);
2049
if (buffered + entry_size + page_dir_calc_reserved_space(1)
2050
> ibuf_index_page_calc_free_from_bits(bits)) {
2052
mtr_commit(&bitmap_mtr);
2054
/* It may not fit */
2055
err = DB_STRONG_FAIL;
2059
ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
2060
page_nos, &n_stored);
2064
/* Set the bitmap bit denoting that the insert buffer contains
2065
buffered entries for this index page, if the bit is not set yet */
2067
old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
2068
IBUF_BITMAP_BUFFERED, &bitmap_mtr);
2069
if (!old_bit_value) {
2070
ibuf_bitmap_page_set_bits(bitmap_page, page_no,
2071
IBUF_BITMAP_BUFFERED, TRUE, &bitmap_mtr);
2074
mtr_commit(&bitmap_mtr);
2076
cursor = btr_pcur_get_btr_cur(&pcur);
2078
if (mode == BTR_MODIFY_PREV) {
2079
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
2080
ibuf_entry, &ins_rec, thr,
2082
if (err == DB_SUCCESS) {
2083
/* Update the page max trx id field */
2084
page_update_max_trx_id(buf_frame_align(ins_rec),
2085
thr_get_trx(thr)->id);
2088
ut_ad(mode == BTR_MODIFY_TREE);
2090
/* We acquire an x-latch to the root page before the insert,
2091
because a pessimistic insert releases the tree x-latch,
2092
which would cause the x-latching of the root after that to
2093
break the latching order. */
2095
root = ibuf_tree_root_get(ibuf_data, space, &mtr);
2097
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
2098
| BTR_NO_UNDO_LOG_FLAG,
2100
ibuf_entry, &ins_rec, thr,
2102
if (err == DB_SUCCESS) {
2103
/* Update the page max trx id field */
2104
page_update_max_trx_id(buf_frame_align(ins_rec),
2105
thr_get_trx(thr)->id);
2108
ibuf_data_sizes_update(ibuf_data, root, &mtr);
2112
#ifdef UNIV_IBUF_DEBUG
2113
if (err == DB_SUCCESS) {
2114
ibuf_count_set(space, page_no,
2115
ibuf_count_get(space, page_no) + 1);
2118
if (mode == BTR_MODIFY_TREE) {
2119
ut_ad(ibuf_validate_low());
2121
mutex_exit(&ibuf_mutex);
2122
mutex_exit(&ibuf_pessimistic_insert_mutex);
2126
btr_pcur_close(&pcur);
2129
mem_heap_free(heap);
2131
mutex_enter(&ibuf_mutex);
2133
if (err == DB_SUCCESS) {
2134
ibuf_data->empty = FALSE;
2135
ibuf_data->n_inserts++;
2138
mutex_exit(&ibuf_mutex);
2140
if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) {
2141
ibuf_contract_after_insert(entry_size);
2145
#ifdef UNIV_IBUF_DEBUG
2146
ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
2148
buf_read_ibuf_merge_pages(FALSE, space, page_nos, n_stored);
2154
/*************************************************************************
2155
Makes an index insert to the insert buffer, instead of directly to the disk
2156
page, if this is possible. Does not do insert if the index is clustered
2162
/* out: TRUE if success */
2163
dtuple_t* entry, /* in: index entry to insert */
2164
dict_index_t* index, /* in: index where to insert */
2165
ulint space, /* in: space id where to insert */
2166
ulint page_no,/* in: page number where to insert */
2167
que_thr_t* thr) /* in: query thread */
2171
ut_ad(dtuple_check_typed(entry));
2173
if (index->type & DICT_CLUSTERED || index->type & DICT_UNIQUE) {
2178
if (rec_get_converted_size(entry)
2179
>= page_get_free_space_of_empty() / 2) {
2183
err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no,
2185
if (err == DB_FAIL) {
2186
err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space,
2190
if (err == DB_SUCCESS) {
2191
#ifdef UNIV_IBUF_DEBUG
2192
/* printf("Ibuf insert for page no %lu of index %s\n", page_no,
2198
ut_a(err == DB_STRONG_FAIL);
2204
/************************************************************************
2205
During merge, inserts to an index page a secondary index entry extracted
2206
from the insert buffer. */
2209
ibuf_insert_to_index_page(
2210
/*======================*/
2211
dtuple_t* entry, /* in: buffered entry to insert */
2212
page_t* page, /* in: index page where the buffered entry
2214
mtr_t* mtr) /* in: mtr */
2216
page_cur_t page_cur;
2219
page_t* bitmap_page;
2222
ut_ad(ibuf_inside());
2223
ut_ad(dtuple_check_typed(entry));
2225
low_match = page_cur_search(page, entry, PAGE_CUR_LE, &page_cur);
2227
if (low_match == dtuple_get_n_fields(entry)) {
2228
rec = page_cur_get_rec(&page_cur);
2230
ut_ad(rec_get_deleted_flag(rec));
2232
btr_cur_del_unmark_for_ibuf(rec, mtr);
2234
rec = page_cur_tuple_insert(&page_cur, entry, mtr);
2237
/* If the record did not fit, reorganize */
2239
btr_page_reorganize(page, mtr);
2241
page_cur_search(page, entry, PAGE_CUR_LE, &page_cur);
2243
/* This time the record must fit */
2244
if (!page_cur_tuple_insert(&page_cur, entry, mtr)) {
2246
"Ibuf insert fails; page free %lu, dtuple size %lu\n",
2247
page_get_max_insert_size(page, 1),
2248
rec_get_converted_size(entry));
2250
bitmap_page = ibuf_bitmap_get_map_page(
2251
buf_frame_get_space_id(page),
2252
buf_frame_get_page_no(page),
2255
old_bits = ibuf_bitmap_page_get_bits(
2257
buf_frame_get_page_no(page),
2258
IBUF_BITMAP_FREE, mtr);
2260
printf("Bitmap bits %lu\n", old_bits);
2268
/*************************************************************************
2269
Deletes from ibuf the record on which pcur is positioned. If we have to
2270
resort to a pessimistic delete, this function commits mtr and closes
2276
/* out: TRUE if mtr was committed and pcur
2277
closed in this operation */
2278
ulint space, /* in: space id */
2279
ulint page_no,/* in: index page number where the record
2281
btr_pcur_t* pcur, /* in: pcur positioned on the record to
2282
delete, having latch mode BTR_MODIFY_LEAF */
2283
mtr_t* mtr) /* in: mtr */
2286
ibuf_data_t* ibuf_data;
2290
ut_ad(ibuf_inside());
2292
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
2295
#ifdef UNIV_IBUF_DEBUG
2296
ibuf_count_set(space, page_no,
2297
ibuf_count_get(space, page_no) - 1);
2302
/* We have to resort to a pessimistic delete from ibuf */
2303
btr_pcur_store_position(pcur, mtr);
2305
btr_pcur_commit_specify_mtr(pcur, mtr);
2307
ibuf_data = fil_space_get_ibuf_data(space);
2309
mutex_enter(&ibuf_mutex);
2313
ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
2315
root = ibuf_tree_root_get(ibuf_data, space, mtr);
2317
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
2319
ut_a(err == DB_SUCCESS);
2321
#ifdef UNIV_IBUF_DEBUG
2322
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
2324
ibuf_data_sizes_update(ibuf_data, root, mtr);
2326
ut_ad(ibuf_validate_low());
2328
btr_pcur_commit_specify_mtr(pcur, mtr);
2330
btr_pcur_close(pcur);
2332
mutex_exit(&ibuf_mutex);
2337
/*************************************************************************
2338
When an index page is read from a disk to the buffer pool, this function
2339
inserts to the page the possible index entries buffered in the insert buffer.
2340
The entries are deleted from the insert buffer. If the page is not read, but
2341
created in the buffer pool, this function deletes its buffered entries from
2342
the insert buffer; there can exist entries for such a page if the page
2343
belonged to an index which subsequently was dropped. */
2346
ibuf_merge_or_delete_for_page(
2347
/*==========================*/
2348
page_t* page, /* in: if page has been read from disk, pointer to
2349
the page x-latched, else NULL */
2350
ulint space, /* in: space id of the index page */
2351
ulint page_no)/* in: page number of the index page */
2356
dtuple_t* search_tuple;
2360
page_t* bitmap_page;
2361
ibuf_data_t* ibuf_data;
2370
/* TODO: get MySQL type info to use in ibuf_insert_to_index_page */
2372
#ifdef UNIV_LOG_DEBUG
2373
if (space % 2 != 0) {
2375
printf("No ibuf operation in a replicate space\n");
2380
if (ibuf_fixed_addr_page(page_no) || fsp_descr_page(page_no)
2381
|| trx_sys_hdr_page(space, page_no)) {
2387
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
2389
if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
2390
IBUF_BITMAP_BUFFERED, &mtr)) {
2391
/* No inserts buffered for this page */
2400
ibuf_data = fil_space_get_ibuf_data(space);
2404
heap = mem_heap_create(512);
2406
search_tuple = ibuf_search_tuple_build(page_no, heap);
2409
/* Move the ownership of the x-latch on the page to this OS
2410
thread, so that we can acquire a second x-latch on it. This
2411
is needed for the insert operations to the index page to pass
2412
the debug checks. */
2414
block = buf_block_align(page);
2415
rw_lock_x_lock_move_ownership(&(block->lock));
2424
success = buf_page_get_known_nowait(RW_X_LATCH, page,
2426
#ifdef UNIV_SYNC_DEBUG
2433
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
2436
/* Position pcur in the insert buffer at the first entry for this
2438
btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
2439
BTR_MODIFY_LEAF, &pcur, &mtr);
2441
if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
2442
ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
2448
ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
2450
ibuf_rec = btr_pcur_get_rec(&pcur);
2452
/* Check if the entry is for this index page */
2453
if (ibuf_rec_get_page_no(ibuf_rec) != page_no) {
2456
page_header_reset_last_insert(page, &mtr);
2463
/* Now we have at pcur a record which should be
2464
inserted to the index page; NOTE that the call below
2465
copies pointers to fields in ibuf_rec, and we must
2466
keep the latch to the ibuf_rec page until the
2467
insertion is finished! */
2469
max_trx_id = page_get_max_trx_id(
2470
buf_frame_align(ibuf_rec));
2472
page_update_max_trx_id(page, max_trx_id);
2474
entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, heap);
2475
#ifdef UNIV_IBUF_DEBUG
2476
volume += rec_get_converted_size(entry)
2477
+ page_dir_calc_reserved_space(1);
2479
ut_a(volume <= 4 * UNIV_PAGE_SIZE
2480
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
2482
ibuf_insert_to_index_page(entry, page, &mtr);
2487
/* Delete the record from ibuf */
2488
closed = ibuf_delete_rec(space, page_no, &pcur, &mtr);
2491
/* Deletion was pessimistic and mtr was committed:
2492
we start from the beginning again */
2497
if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
2506
#ifdef UNIV_IBUF_DEBUG
2507
if (ibuf_count_get(space, page_no) > 0) {
2509
/* btr_print_tree(ibuf_data->index->tree, 100);
2513
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
2515
ibuf_bitmap_page_set_bits(bitmap_page, page_no,
2516
IBUF_BITMAP_BUFFERED, FALSE, &mtr);
2518
old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
2519
IBUF_BITMAP_FREE, &mtr);
2520
new_bits = ibuf_index_page_calc_free(page);
2522
#ifdef UNIV_IBUF_DEBUG
2523
/* printf("Old bits %lu new bits %lu max size %lu\n", old_bits,
2525
page_get_max_insert_size_after_reorganize(page, 1)); */
2527
if (old_bits != new_bits) {
2529
ibuf_bitmap_page_set_bits(bitmap_page, page_no,
2535
ibuf_data->n_merges++;
2536
ibuf_data->n_merged_recs += n_inserts;
2538
#ifdef UNIV_IBUF_DEBUG
2539
/* printf("Ibuf merge %lu records volume %lu to page no %lu\n",
2540
n_inserts, volume, page_no); */
2543
btr_pcur_close(&pcur);
2545
mem_heap_free(heap);
2548
#ifdef UNIV_IBUF_DEBUG
2549
ut_a(ibuf_count_get(space, page_no) == 0);
2553
/**********************************************************************
2554
Validates the ibuf data structures when the caller owns ibuf_mutex. */
2557
ibuf_validate_low(void)
2558
/*===================*/
2559
/* out: TRUE if ok */
2564
ut_ad(mutex_own(&ibuf_mutex));
2568
data = UT_LIST_GET_FIRST(ibuf->data_list);
2571
sum_sizes += data->size;
2573
data = UT_LIST_GET_NEXT(data_list, data);
2576
ut_a(sum_sizes == ibuf->size);
2581
/**********************************************************************
2582
Prints info of ibuf. */
2589
#ifdef UNIV_IBUF_DEBUG
2592
mutex_enter(&ibuf_mutex);
2594
printf("Ibuf size %lu max size %lu\n", ibuf->size, ibuf->max_size);
2596
data = UT_LIST_GET_FIRST(ibuf->data_list);
2600
"Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,\n",
2601
data->space, data->size, data->free_list_len, data->seg_size);
2602
printf("%lu inserts, %lu merged recs, %lu merges\n",
2603
data->n_inserts, data->n_merged_recs, data->n_merges);
2604
#ifdef UNIV_IBUF_DEBUG
2605
for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
2606
if (ibuf_count_get(data->space, i) > 0) {
2608
printf("Ibuf count for page %lu is %lu\n",
2609
i, ibuf_count_get(data->space, i));
2613
data = UT_LIST_GET_NEXT(data_list, data);
2616
mutex_exit(&ibuf_mutex);