1
/************************************************************************
2
The index tree adaptive search
6
Created 2/17/1996 Heikki Tuuri
7
*************************************************************************/
15
#include "page0page.h"
22
ulint btr_search_this_is_zero = 0; /* A dummy variable to fool the
25
#ifdef UNIV_SEARCH_PERF_STAT
26
ulint btr_search_n_succ = 0;
27
ulint btr_search_n_hash_fail = 0;
28
#endif /* UNIV_SEARCH_PERF_STAT */
30
byte btr_sea_pad1[64]; /* padding to prevent other memory update
31
hotspots from residing on the same memory
32
cache line as btr_search_latch */
34
/* The latch protecting the adaptive search system: this latch protects the
35
(1) positions of records on those pages where a hash index has been built.
36
NOTE: It does not protect values of non-ordering fields within a record from
37
being updated in-place! We can use fact (1) to perform unique searches to
40
rw_lock_t* btr_search_latch_temp; /* We will allocate the latch from
41
dynamic memory to get it to the
42
same DRAM page as other hotspot
45
byte btr_sea_pad2[64]; /* padding to prevent other memory update
46
hotspots from residing on the same memory
49
btr_search_sys_t* btr_search_sys;
51
/* If the number of records on the page divided by this parameter
52
would have been successfully accessed using a hash index, the index
53
is then built on the page, assuming the global limit has been reached */
55
#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
57
/* The global limit for consecutive potentially successful hash searches,
58
before hash index building is started */
60
#define BTR_SEARCH_BUILD_LIMIT 100
62
/************************************************************************
63
Builds a hash index on a page with the given parameters. If the page already
64
has a hash index with different parameters, the old hash index is removed.
65
If index is non-NULL, this function checks if n_fields and n_bytes are
66
sensible values, and does not build a hash index if not. */
69
btr_search_build_page_hash_index(
70
/*=============================*/
71
dict_index_t* index, /* in: index for which to build, or NULL if
73
page_t* page, /* in: index page, s- or x-latched */
74
ulint n_fields,/* in: hash this many full fields */
75
ulint n_bytes,/* in: hash this many bytes from the next
77
ibool left_side);/* in: hash for searches from left side? */
79
/*********************************************************************
80
This function should be called before reserving any btr search mutex, if
81
the intended operation might add nodes to the search system hash table.
82
Because of the latching order, once we have reserved the btr search system
83
latch, we cannot allocate a free frame from the buffer pool. Checks that
84
there is a free buffer frame allocated for hash table heap in the btr search
85
system. If not, allocates a free frames for the heap. This check makes it
86
probable that, when have reserved the btr search system latch and we need to
87
allocate a new node to the hash table, it will succeed. However, the check
88
will not guarantee success. */
91
btr_search_check_free_space_in_heap(void)
92
/*=====================================*/
98
#ifdef UNIV_SYNC_DEBUG
99
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
100
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
101
#endif /* UNIV_SYNC_DEBUG */
103
table = btr_search_sys->hash_index;
107
/* Note that we peek the value of heap->free_block without reserving
108
the latch: this is ok, because we will not guarantee that there will
109
be enough free space in the hash table. */
111
if (heap->free_block == NULL) {
112
frame = buf_frame_alloc();
114
rw_lock_x_lock(&btr_search_latch);
116
if (heap->free_block == NULL) {
117
heap->free_block = frame;
119
buf_frame_free(frame);
122
rw_lock_x_unlock(&btr_search_latch);
126
/*********************************************************************
127
Creates and initializes the adaptive search system at a database start. */
130
btr_search_sys_create(
131
/*==================*/
132
ulint hash_size) /* in: hash index hash table size */
134
/* We allocate the search latch from dynamic memory:
135
see above at the global variable definition */
137
btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
139
rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
141
btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
143
btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);
147
/*********************************************************************
148
Creates and initializes a search info struct. */
151
btr_search_info_create(
152
/*===================*/
153
/* out, own: search info struct */
154
mem_heap_t* heap) /* in: heap where created */
158
info = mem_heap_alloc(heap, sizeof(btr_search_t));
161
info->magic_n = BTR_SEARCH_MAGIC_N;
162
#endif /* UNIV_DEBUG */
165
info->root_guess = NULL;
167
info->hash_analysis = 0;
168
info->n_hash_potential = 0;
170
info->last_hash_succ = FALSE;
172
#ifdef UNIV_SEARCH_PERF_STAT
173
info->n_hash_succ = 0;
174
info->n_hash_fail = 0;
175
info->n_patt_succ = 0;
176
info->n_searches = 0;
177
#endif /* UNIV_SEARCH_PERF_STAT */
179
/* Set some sensible values */
183
info->left_side = TRUE;
188
/*********************************************************************
189
Returns the value of ref_count. The value is protected by
192
btr_search_info_get_ref_count(
193
/*==========================*/
194
/* out: ref_count value. */
195
btr_search_t* info) /* in: search info. */
201
#ifdef UNIV_SYNC_DEBUG
202
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
203
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
204
#endif /* UNIV_SYNC_DEBUG */
206
rw_lock_s_lock(&btr_search_latch);
207
ret = info->ref_count;
208
rw_lock_s_unlock(&btr_search_latch);
213
/*************************************************************************
214
Updates the search info of an index about hash successes. NOTE that info
215
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
219
btr_search_info_update_hash(
220
/*========================*/
221
btr_search_t* info, /* in/out: search info */
222
btr_cur_t* cursor) /* in: cursor which was just positioned */
228
#ifdef UNIV_SYNC_DEBUG
229
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
230
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
231
#endif /* UNIV_SYNC_DEBUG */
233
index = cursor->index;
235
if (index->type & DICT_IBUF) {
236
/* So many deletes are performed on an insert buffer tree
237
that we do not consider a hash index useful on it: */
242
n_unique = dict_index_get_n_unique_in_tree(index);
244
if (info->n_hash_potential == 0) {
249
/* Test if the search would have succeeded using the recommended
252
if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
254
info->n_hash_potential++;
259
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
260
cursor->low_match, cursor->low_bytes);
262
if (info->left_side ? cmp <= 0 : cmp > 0) {
267
cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
268
cursor->up_match, cursor->up_bytes);
270
if (info->left_side ? cmp <= 0 : cmp > 0) {
272
goto increment_potential;
276
/* We have to set a new recommendation; skip the hash analysis
277
for a while to avoid unnecessary CPU time usage when there is no
278
chance for success */
280
info->hash_analysis = 0;
282
cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
283
cursor->low_match, cursor->low_bytes);
285
info->n_hash_potential = 0;
287
/* For extra safety, we set some sensible values here */
292
info->left_side = TRUE;
294
} else if (cmp > 0) {
295
info->n_hash_potential = 1;
297
if (cursor->up_match >= n_unique) {
299
info->n_fields = n_unique;
302
} else if (cursor->low_match < cursor->up_match) {
304
info->n_fields = cursor->low_match + 1;
307
info->n_fields = cursor->low_match;
308
info->n_bytes = cursor->low_bytes + 1;
311
info->left_side = TRUE;
313
info->n_hash_potential = 1;
315
if (cursor->low_match >= n_unique) {
317
info->n_fields = n_unique;
320
} else if (cursor->low_match > cursor->up_match) {
322
info->n_fields = cursor->up_match + 1;
325
info->n_fields = cursor->up_match;
326
info->n_bytes = cursor->up_bytes + 1;
329
info->left_side = FALSE;
333
/*************************************************************************
334
Updates the block search info on hash successes. NOTE that info and
335
block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
336
semaphore, to save CPU time! Do not assume the fields are consistent. */
339
btr_search_update_block_hash_info(
340
/*==============================*/
341
/* out: TRUE if building a (new) hash index on
342
the block is recommended */
343
btr_search_t* info, /* in: search info */
344
buf_block_t* block, /* in: buffer block */
345
btr_cur_t* cursor __attribute__((unused)))
348
#ifdef UNIV_SYNC_DEBUG
349
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
350
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
351
ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED)
352
|| rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX));
353
#endif /* UNIV_SYNC_DEBUG */
356
info->last_hash_succ = FALSE;
358
ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
359
ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
361
if ((block->n_hash_helps > 0)
362
&& (info->n_hash_potential > 0)
363
&& (block->n_fields == info->n_fields)
364
&& (block->n_bytes == info->n_bytes)
365
&& (block->left_side == info->left_side)) {
367
if ((block->is_hashed)
368
&& (block->curr_n_fields == info->n_fields)
369
&& (block->curr_n_bytes == info->n_bytes)
370
&& (block->curr_left_side == info->left_side)) {
372
/* The search would presumably have succeeded using
375
info->last_hash_succ = TRUE;
378
block->n_hash_helps++;
380
block->n_hash_helps = 1;
381
block->n_fields = info->n_fields;
382
block->n_bytes = info->n_bytes;
383
block->left_side = info->left_side;
387
if (cursor->index->table->does_not_fit_in_memory) {
388
block->n_hash_helps = 0;
390
#endif /* UNIV_DEBUG */
392
if ((block->n_hash_helps > page_get_n_recs(block->frame)
393
/ BTR_SEARCH_PAGE_BUILD_LIMIT)
394
&& (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
396
if ((!block->is_hashed)
397
|| (block->n_hash_helps
398
> 2 * page_get_n_recs(block->frame))
399
|| (block->n_fields != block->curr_n_fields)
400
|| (block->n_bytes != block->curr_n_bytes)
401
|| (block->left_side != block->curr_left_side)) {
403
/* Build a new hash index on the page */
412
/*************************************************************************
413
Updates a hash node reference when it has been unsuccessfully used in a
414
search which could have succeeded with the used hash parameters. This can
415
happen because when building a hash index for a page, we do not check
416
what happens at page boundaries, and therefore there can be misleading
417
hash nodes. Also, collisions in the fold value can lead to misleading
418
references. This function lazily fixes these imperfections in the hash
422
btr_search_update_hash_ref(
423
/*=======================*/
424
btr_search_t* info, /* in: search info */
425
buf_block_t* block, /* in: buffer block where cursor positioned */
426
btr_cur_t* cursor) /* in: cursor */
432
ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
433
#ifdef UNIV_SYNC_DEBUG
434
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
435
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
436
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
437
#endif /* UNIV_SYNC_DEBUG */
438
ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block);
439
ut_a(!block->is_hashed || block->index == cursor->index);
442
&& (info->n_hash_potential > 0)
443
&& (block->curr_n_fields == info->n_fields)
444
&& (block->curr_n_bytes == info->n_bytes)
445
&& (block->curr_left_side == info->left_side)) {
446
mem_heap_t* heap = NULL;
447
ulint offsets_[REC_OFFS_NORMAL_SIZE];
448
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
450
rec = btr_cur_get_rec(cursor);
452
if (!page_rec_is_user_rec(rec)) {
457
index_id = cursor->index->id;
459
rec_get_offsets(rec, cursor->index, offsets_,
460
ULINT_UNDEFINED, &heap),
461
block->curr_n_fields,
462
block->curr_n_bytes, index_id);
463
if (UNIV_LIKELY_NULL(heap)) {
466
#ifdef UNIV_SYNC_DEBUG
467
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
468
#endif /* UNIV_SYNC_DEBUG */
470
ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
474
/*************************************************************************
475
Updates the search info. */
478
btr_search_info_update_slow(
479
/*========================*/
480
btr_search_t* info, /* in/out: search info */
481
btr_cur_t* cursor) /* in: cursor which was just positioned */
488
#ifdef UNIV_SYNC_DEBUG
489
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
490
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
491
#endif /* UNIV_SYNC_DEBUG */
493
block = buf_block_align(btr_cur_get_rec(cursor));
495
/* NOTE that the following two function calls do NOT protect
496
info or block->n_fields etc. with any semaphore, to save CPU time!
497
We cannot assume the fields are consistent when we return from
500
btr_search_info_update_hash(info, cursor);
502
build_index = btr_search_update_block_hash_info(info, block, cursor);
504
if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
506
btr_search_check_free_space_in_heap();
509
if (cursor->flag == BTR_CUR_HASH_FAIL) {
510
/* Update the hash node reference, if appropriate */
512
#ifdef UNIV_SEARCH_PERF_STAT
513
btr_search_n_hash_fail++;
514
#endif /* UNIV_SEARCH_PERF_STAT */
516
rw_lock_x_lock(&btr_search_latch);
518
btr_search_update_hash_ref(info, block, cursor);
520
rw_lock_x_unlock(&btr_search_latch);
524
/* Note that since we did not protect block->n_fields etc.
525
with any semaphore, the values can be inconsistent. We have
526
to check inside the function call that they make sense. We
527
also malloc an array and store the values there to make sure
528
the compiler does not let the function call parameters change
529
inside the called function. It might be that the compiler
530
would optimize the call just to pass pointers to block. */
532
params = mem_alloc(3 * sizeof(ulint));
533
params[0] = block->n_fields;
534
params[1] = block->n_bytes;
535
params[2] = block->left_side;
537
/* Make sure the compiler cannot deduce the values and do
540
params2 = params + btr_search_this_is_zero;
542
btr_search_build_page_hash_index(cursor->index,
551
/**********************************************************************
552
Checks if a guessed position for a tree cursor is right. Note that if
553
mode is PAGE_CUR_LE, which is used in inserts, and the function returns
554
TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
557
btr_search_check_guess(
558
/*===================*/
559
/* out: TRUE if success */
560
btr_cur_t* cursor, /* in: guessed cursor position */
561
ibool can_only_compare_to_cursor_rec,
562
/* in: if we do not have a latch on the page
563
of cursor, but only a latch on
564
btr_search_latch, then ONLY the columns
565
of the record UNDER the cursor are
566
protected, not the next or previous record
567
in the chain: we cannot look at the next or
568
previous record to check our guess! */
569
dtuple_t* tuple, /* in: data tuple */
570
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
572
mtr_t* mtr) /* in: mtr */
579
mem_heap_t* heap = NULL;
580
ulint offsets_[REC_OFFS_NORMAL_SIZE];
581
ulint* offsets = offsets_;
582
ibool success = FALSE;
583
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
585
n_unique = dict_index_get_n_unique_in_tree(cursor->index);
587
rec = btr_cur_get_rec(cursor);
589
ut_ad(page_rec_is_user_rec(rec));
594
offsets = rec_get_offsets(rec, cursor->index, offsets,
596
cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
597
offsets, &match, &bytes);
599
if (mode == PAGE_CUR_GE) {
604
cursor->up_match = match;
606
if (match >= n_unique) {
610
} else if (mode == PAGE_CUR_LE) {
615
cursor->low_match = match;
617
} else if (mode == PAGE_CUR_G) {
621
} else if (mode == PAGE_CUR_L) {
627
if (can_only_compare_to_cursor_rec) {
628
/* Since we could not determine if our guess is right just by
629
looking at the record under the cursor, return FALSE */
636
if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
639
ut_ad(!page_rec_is_infimum(rec));
641
prev_rec = page_rec_get_prev(rec);
643
if (page_rec_is_infimum(prev_rec)) {
644
success = btr_page_get_prev(
645
buf_frame_align(prev_rec), mtr) == FIL_NULL;
650
offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
652
cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
653
offsets, &match, &bytes);
654
if (mode == PAGE_CUR_GE) {
664
ut_ad(!page_rec_is_supremum(rec));
666
next_rec = page_rec_get_next(rec);
668
if (page_rec_is_supremum(next_rec)) {
669
if (btr_page_get_next(
670
buf_frame_align(next_rec), mtr)
673
cursor->up_match = 0;
680
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
682
cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
683
offsets, &match, &bytes);
684
if (mode == PAGE_CUR_LE) {
686
cursor->up_match = match;
692
if (UNIV_LIKELY_NULL(heap)) {
698
/**********************************************************************
699
Tries to guess the right search position based on the hash search info
700
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
701
and the function returns TRUE, then cursor->up_match and cursor->low_match
702
both have sensible values. */
705
btr_search_guess_on_hash(
706
/*=====================*/
707
/* out: TRUE if succeeded */
708
dict_index_t* index, /* in: index */
709
btr_search_t* info, /* in: index search info */
710
dtuple_t* tuple, /* in: logical record */
711
ulint mode, /* in: PAGE_CUR_L, ... */
712
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...;
713
NOTE that only if has_search_latch
714
is 0, we will have a latch set on
715
the cursor page, otherwise we assume
716
the caller uses his search latch
717
to protect the record! */
718
btr_cur_t* cursor, /* out: tree cursor */
719
ulint has_search_latch,/* in: latch mode the caller
720
currently has on btr_search_latch:
721
RW_S_LATCH, RW_X_LATCH, or 0 */
722
mtr_t* mtr) /* in: mtr */
728
ulint tuple_n_fields;
730
ibool can_only_compare_to_cursor_rec = TRUE;
735
ut_ad(index && info && tuple && cursor && mtr);
736
ut_ad((latch_mode == BTR_SEARCH_LEAF)
737
|| (latch_mode == BTR_MODIFY_LEAF));
739
/* Note that, for efficiency, the struct info may not be protected by
742
if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
747
cursor->n_fields = info->n_fields;
748
cursor->n_bytes = info->n_bytes;
750
tuple_n_fields = dtuple_get_n_fields(tuple);
752
if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) {
757
if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields)
758
&& (cursor->n_bytes > 0)) {
763
index_id = index->id;
765
#ifdef UNIV_SEARCH_PERF_STAT
768
fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
771
cursor->flag = BTR_CUR_HASH;
773
if (UNIV_LIKELY(!has_search_latch)) {
774
rw_lock_s_lock(&btr_search_latch);
777
ut_ad(btr_search_latch.writer != RW_LOCK_EX);
778
ut_ad(btr_search_latch.reader_count > 0);
780
rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
782
if (UNIV_UNLIKELY(!rec)) {
786
page = buf_frame_align(rec);
788
if (UNIV_LIKELY(!has_search_latch)) {
791
!buf_page_get_known_nowait(latch_mode, page,
798
rw_lock_s_unlock(&btr_search_latch);
799
can_only_compare_to_cursor_rec = FALSE;
801
#ifdef UNIV_SYNC_DEBUG
802
buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
803
#endif /* UNIV_SYNC_DEBUG */
806
block = buf_block_align(page);
808
if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) {
809
if (UNIV_LIKELY(!has_search_latch)) {
811
btr_leaf_page_release(page, latch_mode, mtr);
817
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
818
ut_ad(page_rec_is_user_rec(rec));
820
btr_cur_position(index, rec, cursor);
822
/* Check the validity of the guess within the page */
824
/* If we only have the latch on btr_search_latch, not on the
825
page, it only protects the columns of the record the cursor
826
is positioned on. We cannot look at the next of the previous
827
record to determine if our guess for the cursor position is
830
ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
831
|| !btr_search_check_guess(cursor,
832
can_only_compare_to_cursor_rec,
834
if (UNIV_LIKELY(!has_search_latch)) {
835
btr_leaf_page_release(page, latch_mode, mtr);
841
if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
843
info->n_hash_potential++;
847
/* These lines of code can be used in a debug version to check
848
the correctness of the searched cursor position: */
850
info->last_hash_succ = FALSE;
852
/* Currently, does not work if the following fails: */
853
ut_ad(!has_search_latch);
855
btr_leaf_page_release(page, latch_mode, mtr);
857
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
859
if (mode == PAGE_CUR_GE
860
&& page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
862
/* If mode is PAGE_CUR_GE, then the binary search
863
in the index tree may actually take us to the supremum
864
of the previous page */
866
info->last_hash_succ = FALSE;
868
btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
870
ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
872
ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
875
/* NOTE that it is theoretically possible that the above assertions
876
fail if the page of the cursor gets removed from the buffer pool
877
meanwhile! Thus it might not be a bug. */
879
info->last_hash_succ = TRUE;
881
#ifdef UNIV_SEARCH_PERF_STAT
884
if (UNIV_LIKELY(!has_search_latch)
885
&& buf_block_peek_if_too_old(block)) {
887
buf_page_make_young(page);
890
/* Increment the page get statistics though we did not really
891
fix the page: for user info only */
893
buf_pool->n_page_gets++;
897
/*-------------------------------------------*/
899
if (UNIV_LIKELY(!has_search_latch)) {
900
rw_lock_s_unlock(&btr_search_latch);
903
cursor->flag = BTR_CUR_HASH_FAIL;
905
#ifdef UNIV_SEARCH_PERF_STAT
908
if (info->n_hash_succ > 0) {
912
info->last_hash_succ = FALSE;
917
/************************************************************************
918
Drops a page hash index. */
921
btr_search_drop_page_hash_index(
922
/*============================*/
923
page_t* page) /* in: index page, s- or x-latched, or an index page
924
for which we know that block->buf_fix_count == 0 */
942
#ifdef UNIV_SYNC_DEBUG
943
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
944
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
945
#endif /* UNIV_SYNC_DEBUG */
947
rw_lock_s_lock(&btr_search_latch);
949
block = buf_block_align(page);
951
if (UNIV_LIKELY(!block->is_hashed)) {
953
rw_lock_s_unlock(&btr_search_latch);
958
table = btr_search_sys->hash_index;
960
#ifdef UNIV_SYNC_DEBUG
961
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
962
|| rw_lock_own(&(block->lock), RW_LOCK_EX)
963
|| (block->buf_fix_count == 0));
964
#endif /* UNIV_SYNC_DEBUG */
966
n_fields = block->curr_n_fields;
967
n_bytes = block->curr_n_bytes;
968
index = block->index;
970
/* NOTE: The fields of block must not be accessed after
971
releasing btr_search_latch, as the index page might only
974
rw_lock_s_unlock(&btr_search_latch);
976
ut_a(n_fields + n_bytes > 0);
978
n_recs = page_get_n_recs(page);
980
/* Calculate and cache fold values into an array for fast deletion
981
from the hash index */
983
folds = mem_alloc(n_recs * sizeof(ulint));
987
rec = page_get_infimum_rec(page);
988
rec = page_rec_get_next(rec);
990
index_id = btr_page_get_index_id(page);
992
ut_a(0 == ut_dulint_cmp(index_id, index->id));
999
while (!page_rec_is_supremum(rec)) {
1000
offsets = rec_get_offsets(rec, index, offsets,
1001
n_fields + (n_bytes > 0), &heap);
1002
ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
1003
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1005
if (fold == prev_fold && prev_fold != 0) {
1010
/* Remove all hash nodes pointing to this page from the
1013
folds[n_cached] = fold;
1016
rec = page_rec_get_next(rec);
1020
if (UNIV_LIKELY_NULL(heap)) {
1021
mem_heap_free(heap);
1024
rw_lock_x_lock(&btr_search_latch);
1026
if (UNIV_UNLIKELY(!block->is_hashed)) {
1027
/* Someone else has meanwhile dropped the hash index */
1032
ut_a(block->index == index);
1034
if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
1035
|| UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
1037
/* Someone else has meanwhile built a new hash index on the
1038
page, with different parameters */
1040
rw_lock_x_unlock(&btr_search_latch);
1046
for (i = 0; i < n_cached; i++) {
1048
ha_remove_all_nodes_to_page(table, folds[i], page);
1051
ut_a(index->search_info->ref_count > 0);
1052
index->search_info->ref_count--;
1054
block->is_hashed = FALSE;
1055
block->index = NULL;
1058
if (UNIV_UNLIKELY(block->n_pointers)) {
1060
ut_print_timestamp(stderr);
1062
" InnoDB: Corruption of adaptive hash index."
1064
"InnoDB: the hash index to a page of %s,"
1065
" still %lu hash nodes remain.\n",
1066
index->name, (ulong) block->n_pointers);
1067
rw_lock_x_unlock(&btr_search_latch);
1069
btr_search_validate();
1071
rw_lock_x_unlock(&btr_search_latch);
1077
/************************************************************************
1078
Drops a page hash index when a page is freed from a fseg to the file system.
1079
Drops possible hash index if the page happens to be in the buffer pool. */
1082
btr_search_drop_page_hash_when_freed(
1083
/*=================================*/
1084
ulint space, /* in: space id */
1085
ulint page_no) /* in: page number */
1091
is_hashed = buf_page_peek_if_search_hashed(space, page_no);
1100
/* We assume that if the caller has a latch on the page, then the
1101
caller has already dropped the hash index for the page, and we never
1102
get here. Therefore we can acquire the s-latch to the page without
1103
having to fear a deadlock. */
1105
page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
1106
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
1108
/* Because the buffer pool mutex was released by
1109
buf_page_peek_if_search_hashed(), it is possible that the
1110
block was removed from the buffer pool by another thread
1111
before buf_page_get_gen() got a chance to acquire the buffer
1112
pool mutex again. Thus, we must check for a NULL return. */
1114
if (UNIV_LIKELY(page != NULL)) {
1116
#ifdef UNIV_SYNC_DEBUG
1117
buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
1118
#endif /* UNIV_SYNC_DEBUG */
1120
btr_search_drop_page_hash_index(page);
1126
/************************************************************************
1127
Builds a hash index on a page with the given parameters. If the page already
1128
has a hash index with different parameters, the old hash index is removed.
1129
If index is non-NULL, this function checks if n_fields and n_bytes are
1130
sensible values, and does not build a hash index if not. */
1133
btr_search_build_page_hash_index(
1134
/*=============================*/
1135
dict_index_t* index, /* in: index for which to build */
1136
page_t* page, /* in: index page, s- or x-latched */
1137
ulint n_fields,/* in: hash this many full fields */
1138
ulint n_bytes,/* in: hash this many bytes from the next
1140
ibool left_side)/* in: hash for searches from left side? */
1142
hash_table_t* table;
1154
mem_heap_t* heap = NULL;
1155
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1156
ulint* offsets = offsets_;
1157
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1161
block = buf_block_align(page);
1162
table = btr_search_sys->hash_index;
1164
#ifdef UNIV_SYNC_DEBUG
1165
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1166
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
1167
|| rw_lock_own(&(block->lock), RW_LOCK_EX));
1168
#endif /* UNIV_SYNC_DEBUG */
1170
rw_lock_s_lock(&btr_search_latch);
1172
if (block->is_hashed && ((block->curr_n_fields != n_fields)
1173
|| (block->curr_n_bytes != n_bytes)
1174
|| (block->curr_left_side != left_side))) {
1176
rw_lock_s_unlock(&btr_search_latch);
1178
btr_search_drop_page_hash_index(page);
1180
rw_lock_s_unlock(&btr_search_latch);
1183
n_recs = page_get_n_recs(page);
1190
/* Check that the values for hash index build are sensible */
1192
if (n_fields + n_bytes == 0) {
1197
if (dict_index_get_n_unique_in_tree(index) < n_fields
1198
|| (dict_index_get_n_unique_in_tree(index) == n_fields
1203
/* Calculate and cache fold values and corresponding records into
1204
an array for fast insertion to the hash index */
1206
folds = mem_alloc(n_recs * sizeof(ulint));
1207
recs = mem_alloc(n_recs * sizeof(rec_t*));
1211
index_id = btr_page_get_index_id(page);
1213
rec = page_get_infimum_rec(page);
1214
rec = page_rec_get_next(rec);
1216
offsets = rec_get_offsets(rec, index, offsets,
1217
n_fields + (n_bytes > 0), &heap);
1219
if (!page_rec_is_supremum(rec)) {
1220
ut_a(n_fields <= rec_offs_n_fields(offsets));
1223
ut_a(n_fields < rec_offs_n_fields(offsets));
1227
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1231
folds[n_cached] = fold;
1232
recs[n_cached] = rec;
1237
next_rec = page_rec_get_next(rec);
1239
if (page_rec_is_supremum(next_rec)) {
1243
folds[n_cached] = fold;
1244
recs[n_cached] = rec;
1251
offsets = rec_get_offsets(next_rec, index, offsets,
1252
n_fields + (n_bytes > 0), &heap);
1253
next_fold = rec_fold(next_rec, offsets, n_fields,
1256
if (fold != next_fold) {
1257
/* Insert an entry into the hash index */
1261
folds[n_cached] = next_fold;
1262
recs[n_cached] = next_rec;
1265
folds[n_cached] = fold;
1266
recs[n_cached] = rec;
1275
btr_search_check_free_space_in_heap();
1277
rw_lock_x_lock(&btr_search_latch);
1279
if (block->is_hashed && ((block->curr_n_fields != n_fields)
1280
|| (block->curr_n_bytes != n_bytes)
1281
|| (block->curr_left_side != left_side))) {
1285
/* This counter is decremented every time we drop page
1286
hash index entries and is incremented here. Since we can
1287
rebuild hash index for a page that is already hashed, we
1288
have to take care not to increment the counter in that
1290
if (!block->is_hashed) {
1291
index->search_info->ref_count++;
1294
block->is_hashed = TRUE;
1295
block->n_hash_helps = 0;
1297
block->curr_n_fields = n_fields;
1298
block->curr_n_bytes = n_bytes;
1299
block->curr_left_side = left_side;
1300
block->index = index;
1302
for (i = 0; i < n_cached; i++) {
1304
ha_insert_for_fold(table, folds[i], recs[i]);
1308
rw_lock_x_unlock(&btr_search_latch);
1312
if (UNIV_LIKELY_NULL(heap)) {
1313
mem_heap_free(heap);
1317
/************************************************************************
1318
Moves or deletes hash entries for moved records. If new_page is already hashed,
1319
then the hash index for page, if any, is dropped. If new_page is not hashed,
1320
and page is hashed, then a new hash index is built to new_page with the same
1321
parameters as page (this often happens when a page is split). */
1324
btr_search_move_or_delete_hash_entries(
1325
/*===================================*/
1326
page_t* new_page, /* in: records are copied
1328
page_t* page, /* in: index page from which
1329
records were copied, and the
1330
copied records will be deleted
1332
dict_index_t* index) /* in: record descriptor */
1335
buf_block_t* new_block;
1340
block = buf_block_align(page);
1341
new_block = buf_block_align(new_page);
1342
ut_a(page_is_comp(page) == page_is_comp(new_page));
1344
#ifdef UNIV_SYNC_DEBUG
1345
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1346
ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
1347
#endif /* UNIV_SYNC_DEBUG */
1348
ut_a(!new_block->is_hashed || new_block->index == index);
1349
ut_a(!block->is_hashed || block->index == index);
1351
rw_lock_s_lock(&btr_search_latch);
1353
if (new_block->is_hashed) {
1355
rw_lock_s_unlock(&btr_search_latch);
1357
btr_search_drop_page_hash_index(page);
1362
if (block->is_hashed) {
1364
n_fields = block->curr_n_fields;
1365
n_bytes = block->curr_n_bytes;
1366
left_side = block->curr_left_side;
1368
new_block->n_fields = block->curr_n_fields;
1369
new_block->n_bytes = block->curr_n_bytes;
1370
new_block->left_side = left_side;
1372
rw_lock_s_unlock(&btr_search_latch);
1374
ut_a(n_fields + n_bytes > 0);
1376
btr_search_build_page_hash_index(index, new_page, n_fields,
1377
n_bytes, left_side);
1378
#if 1 /* TODO: safe to remove? */
1379
ut_a(n_fields == block->curr_n_fields);
1380
ut_a(n_bytes == block->curr_n_bytes);
1381
ut_a(left_side == block->curr_left_side);
1386
rw_lock_s_unlock(&btr_search_latch);
1389
/************************************************************************
1390
Updates the page hash index when a single record is deleted from a page. */
1393
btr_search_update_hash_on_delete(
1394
/*=============================*/
1395
btr_cur_t* cursor) /* in: cursor which was positioned on the
1396
record to delete using btr_cur_search_...,
1397
the record is not yet deleted */
1399
hash_table_t* table;
1405
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1406
mem_heap_t* heap = NULL;
1407
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1409
rec = btr_cur_get_rec(cursor);
1411
block = buf_block_align(rec);
1413
#ifdef UNIV_SYNC_DEBUG
1414
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1415
#endif /* UNIV_SYNC_DEBUG */
1417
if (!block->is_hashed) {
1422
ut_a(block->index == cursor->index);
1423
ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
1425
table = btr_search_sys->hash_index;
1427
index_id = cursor->index->id;
1428
fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
1429
ULINT_UNDEFINED, &heap),
1430
block->curr_n_fields, block->curr_n_bytes, index_id);
1431
if (UNIV_LIKELY_NULL(heap)) {
1432
mem_heap_free(heap);
1434
rw_lock_x_lock(&btr_search_latch);
1436
found = ha_search_and_delete_if_found(table, fold, rec);
1438
rw_lock_x_unlock(&btr_search_latch);
1441
/************************************************************************
1442
Updates the page hash index when a single record is inserted on a page. */
1445
btr_search_update_hash_node_on_insert(
1446
/*==================================*/
1447
btr_cur_t* cursor) /* in: cursor which was positioned to the
1448
place to insert using btr_cur_search_...,
1449
and the new record has been inserted next
1452
hash_table_t* table;
1456
rec = btr_cur_get_rec(cursor);
1458
block = buf_block_align(rec);
1460
#ifdef UNIV_SYNC_DEBUG
1461
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1462
#endif /* UNIV_SYNC_DEBUG */
1464
if (!block->is_hashed) {
1469
ut_a(block->index == cursor->index);
1471
rw_lock_x_lock(&btr_search_latch);
1473
if ((cursor->flag == BTR_CUR_HASH)
1474
&& (cursor->n_fields == block->curr_n_fields)
1475
&& (cursor->n_bytes == block->curr_n_bytes)
1476
&& !block->curr_left_side) {
1478
table = btr_search_sys->hash_index;
1480
ha_search_and_update_if_found(table, cursor->fold, rec,
1481
page_rec_get_next(rec));
1483
rw_lock_x_unlock(&btr_search_latch);
1485
rw_lock_x_unlock(&btr_search_latch);
1487
btr_search_update_hash_on_insert(cursor);
1491
/************************************************************************
1492
Updates the page hash index when a single record is inserted on a page. */
1495
btr_search_update_hash_on_insert(
1496
/*=============================*/
1497
btr_cur_t* cursor) /* in: cursor which was positioned to the
1498
place to insert using btr_cur_search_...,
1499
and the new record has been inserted next
1502
hash_table_t* table;
1510
ulint next_fold = 0; /* remove warning (??? bug ???) */
1514
ibool locked = FALSE;
1515
mem_heap_t* heap = NULL;
1516
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1517
ulint* offsets = offsets_;
1518
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1520
table = btr_search_sys->hash_index;
1522
btr_search_check_free_space_in_heap();
1524
rec = btr_cur_get_rec(cursor);
1526
block = buf_block_align(rec);
1528
#ifdef UNIV_SYNC_DEBUG
1529
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1530
#endif /* UNIV_SYNC_DEBUG */
1532
if (!block->is_hashed) {
1537
ut_a(block->index == cursor->index);
1539
index_id = cursor->index->id;
1541
n_fields = block->curr_n_fields;
1542
n_bytes = block->curr_n_bytes;
1543
left_side = block->curr_left_side;
1545
ins_rec = page_rec_get_next(rec);
1546
next_rec = page_rec_get_next(ins_rec);
1548
offsets = rec_get_offsets(ins_rec, cursor->index, offsets,
1549
ULINT_UNDEFINED, &heap);
1550
ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index_id);
1552
if (!page_rec_is_supremum(next_rec)) {
1553
offsets = rec_get_offsets(next_rec, cursor->index, offsets,
1554
n_fields + (n_bytes > 0), &heap);
1555
next_fold = rec_fold(next_rec, offsets, n_fields,
1559
if (!page_rec_is_infimum(rec)) {
1560
offsets = rec_get_offsets(rec, cursor->index, offsets,
1561
n_fields + (n_bytes > 0), &heap);
1562
fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
1566
rw_lock_x_lock(&btr_search_latch);
1570
ha_insert_for_fold(table, ins_fold, ins_rec);
1573
goto check_next_rec;
1576
if (fold != ins_fold) {
1580
rw_lock_x_lock(&btr_search_latch);
1586
ha_insert_for_fold(table, fold, rec);
1588
ha_insert_for_fold(table, ins_fold, ins_rec);
1593
if (page_rec_is_supremum(next_rec)) {
1598
rw_lock_x_lock(&btr_search_latch);
1603
ha_insert_for_fold(table, ins_fold, ins_rec);
1609
if (ins_fold != next_fold) {
1613
rw_lock_x_lock(&btr_search_latch);
1620
ha_insert_for_fold(table, ins_fold, ins_rec);
1622
fputs("Hash insert for ", stderr);
1623
dict_index_name_print(stderr, cursor->index);
1624
fprintf(stderr, " fold %lu\n", ins_fold);
1627
ha_insert_for_fold(table, next_fold, next_rec);
1632
if (UNIV_LIKELY_NULL(heap)) {
1633
mem_heap_free(heap);
1636
rw_lock_x_unlock(&btr_search_latch);
1640
/************************************************************************
1641
Validates the search system. */
1644
btr_search_validate(void)
1645
/*=====================*/
1646
/* out: TRUE if ok */
1651
ulint n_page_dumps = 0;
1655
mem_heap_t* heap = NULL;
1656
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1657
ulint* offsets = offsets_;
1659
/* How many cells to check before temporarily releasing
1660
btr_search_latch. */
1661
ulint chunk_size = 10000;
1663
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1665
rw_lock_x_lock(&btr_search_latch);
1667
cell_count = hash_get_n_cells(btr_search_sys->hash_index);
1669
for (i = 0; i < cell_count; i++) {
1670
/* We release btr_search_latch every once in a while to
1671
give other queries a chance to run. */
1672
if ((i != 0) && ((i % chunk_size) == 0)) {
1673
rw_lock_x_unlock(&btr_search_latch);
1675
rw_lock_x_lock(&btr_search_latch);
1678
node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
1680
while (node != NULL) {
1681
block = buf_block_align(node->data);
1682
page = buf_frame_align(node->data);
1683
offsets = rec_get_offsets((rec_t*) node->data,
1684
block->index, offsets,
1685
block->curr_n_fields
1686
+ (block->curr_n_bytes > 0),
1689
if (!block->is_hashed || node->fold
1690
!= rec_fold((rec_t*)(node->data),
1692
block->curr_n_fields,
1693
block->curr_n_bytes,
1694
btr_page_get_index_id(page))) {
1696
ut_print_timestamp(stderr);
1699
" InnoDB: Error in an adaptive hash"
1700
" index pointer to page %lu\n"
1701
"InnoDB: ptr mem address %p"
1702
" index id %lu %lu,"
1703
" node fold %lu, rec fold %lu\n",
1704
(ulong) buf_frame_get_page_no(page),
1706
(ulong) ut_dulint_get_high(
1707
btr_page_get_index_id(page)),
1708
(ulong) ut_dulint_get_low(
1709
btr_page_get_index_id(page)),
1711
(ulong) rec_fold((rec_t*)(node->data),
1713
block->curr_n_fields,
1714
block->curr_n_bytes,
1715
btr_page_get_index_id(
1718
fputs("InnoDB: Record ", stderr);
1719
rec_print_new(stderr, (rec_t*)node->data,
1721
fprintf(stderr, "\nInnoDB: on that page."
1722
" Page mem address %p, is hashed %lu,"
1723
" n fields %lu, n bytes %lu\n"
1724
"InnoDB: side %lu\n",
1725
(void*) page, (ulong) block->is_hashed,
1726
(ulong) block->curr_n_fields,
1727
(ulong) block->curr_n_bytes,
1728
(ulong) block->curr_left_side);
1730
if (n_page_dumps < 20) {
1731
buf_page_print(page);
1740
for (i = 0; i < cell_count; i += chunk_size) {
1741
ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
1743
/* We release btr_search_latch every once in a while to
1744
give other queries a chance to run. */
1746
rw_lock_x_unlock(&btr_search_latch);
1748
rw_lock_x_lock(&btr_search_latch);
1751
if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
1756
rw_lock_x_unlock(&btr_search_latch);
1757
if (UNIV_LIKELY_NULL(heap)) {
1758
mem_heap_free(heap);