48
50
ulint btr_cur_n_non_sea_old = 0;
49
51
ulint btr_cur_n_sea_old = 0;
54
uint btr_cur_limit_optimistic_insert_debug = 0;
55
#endif /* UNIV_DEBUG */
51
57
/* In the optimistic insert, if the insert does not fit, but this much space
52
58
can be released by page reorganize, then it is reorganized */
66
72
/*--------------------------------------*/
67
73
#define BTR_BLOB_HDR_SIZE 8
75
/* Estimated table level stats from sampled value. */
76
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, ext_size, not_empty) \
77
((value * (ib_longlong) index->stat_n_leaf_pages \
78
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + ext_size \
80
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size))
82
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
83
/* A BLOB field reference full of zero, for use in assertions and tests.
84
Initially, BLOB field references are set to zero, in
85
dtuple_convert_big_rec(). */
86
const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
87
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
69
89
/***********************************************************************
70
90
Marks all extern fields in a record as owned by the record. This function
71
91
should be called if the delete mark of a record is removed: a not delete
306
326
ut_ad(dict_index_check_search_tuple(index, tuple));
307
327
ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
308
328
ut_ad(dtuple_check_typed(tuple));
329
ut_ad(index->page != FIL_NULL);
331
UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match);
332
UNIV_MEM_INVALID(&cursor->up_bytes, sizeof cursor->up_bytes);
333
UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match);
334
UNIV_MEM_INVALID(&cursor->low_bytes, sizeof cursor->low_bytes);
310
335
#ifdef UNIV_DEBUG
311
336
cursor->up_match = ULINT_UNDEFINED;
312
337
cursor->low_match = ULINT_UNDEFINED;
1002
1027
goto calculate_sizes_again;
1030
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
1005
1033
/* If there have been many consecutive inserts, and we are on the leaf
1006
1034
level, check if we have to split the page to reserve enough free space
1007
1035
for future updates of records. */
1014
1042
&& (0 == level)
1015
1043
&& (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1016
1044
|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1047
#endif /* UNIV_DEBUG */
1018
1048
if (big_rec_vec) {
1019
1049
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1337
/* The code below assumes index is a clustered index: change index to
1338
the clustered index if we are updating a secondary index record (or we
1339
could as well skip writing the sys col values to the log in this case
1340
because they are not needed for a secondary index record update) */
1342
index = dict_table_get_first_index(index->table);
1366
/* For secondary indexes, we could skip writing the dummy system fields
1367
to the redo log but we have to change redo log parsing of
1368
MLOG_REC_UPDATE_IN_PLACE/MLOG_COMP_REC_UPDATE_IN_PLACE or we have to add
1369
new redo log record. For now, just write dummy sys fields to the redo
1370
log if we are updating a secondary index record.
1344
1373
mach_write_to_1(log_ptr, flags);
1347
log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
1376
if (index->type & DICT_CLUSTERED) {
1377
log_ptr = row_upd_write_sys_vals_to_log(
1378
index, trx, roll_ptr, log_ptr, mtr);
1380
/* Dummy system fields for a secondary index */
1381
/* TRX_ID Position */
1382
log_ptr += mach_write_compressed(log_ptr, 0);
1384
trx_write_roll_ptr(log_ptr, ut_dulint_zero);
1385
log_ptr += DATA_ROLL_PTR_LEN;
1387
log_ptr += mach_dulint_write_compressed(log_ptr,
1349
1391
mach_write_to_2(log_ptr, page_offset(rec));
1579
1620
heap = mem_heap_create(1024);
1580
1621
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1622
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
1623
ut_a(!rec_offs_any_null_extern(rec, offsets)
1624
|| thr_get_trx(thr) == trx_roll_crash_recv_trx);
1625
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1582
1627
#ifdef UNIV_DEBUG
1583
1628
if (btr_cur_print_record_ops && thr) {
1685
1730
page_cur_move_to_prev(page_cursor);
1687
trx = thr_get_trx(thr);
1689
1732
if (!(flags & BTR_KEEP_SYS_FLAG)) {
1690
1733
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
1692
1735
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
1736
thr_get_trx(thr)->id);
1696
1739
rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
2358
2401
/***************************************************************
2359
Sets a secondary index record delete mark to FALSE. This function is only
2402
Sets a secondary index record delete mark. This function is only
2360
2403
used by the insert buffer insert merge mechanism. */
2363
btr_cur_del_unmark_for_ibuf(
2364
/*========================*/
2406
btr_cur_set_deleted_flag_for_ibuf(
2407
/*==============================*/
2365
2408
rec_t* rec, /* in: record to delete unmark */
2409
ibool val, /* in: value to set */
2366
2410
mtr_t* mtr) /* in: mtr */
2368
2412
/* We do not need to reserve btr_search_latch, as the page has just
2369
2413
been read to the buffer pool and there cannot be a hash index to it. */
2371
rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
2415
rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), val);
2373
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
2417
btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2376
2420
/*==================== B-TREE RECORD REMOVE =========================*/
2837
2883
/***********************************************************************
2884
Record the number of non_null key values in a given index for
2885
each n-column prefix of the index where n < dict_index_get_n_unique(index).
2886
The estimates are eventually stored in the array:
2887
index->stat_n_non_null_key_vals. */
2890
btr_record_not_null_field_in_rec(
2891
/*=============================*/
2892
ulint n_unique, /* in: dict_index_get_n_unique(index),
2893
number of columns uniquely determine
2895
const ulint* offsets, /* in: rec_get_offsets(rec, index),
2896
its size could be for all fields or
2897
that of "n_unique" */
2898
ib_longlong* n_not_null) /* in/out: array to record number of
2899
not null rows for n-column prefix */
2903
ut_ad(rec_offs_n_fields(offsets) >= n_unique);
2905
if (n_not_null == NULL) {
2909
for (i = 0; i < n_unique; i++) {
2910
if (rec_offs_nth_sql_null(offsets, i)) {
2918
/***********************************************************************
2838
2919
Estimates the number of different key values in a given index, for
2839
2920
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
2840
The estimates are stored in the array index->stat_n_diff_key_vals. */
2921
The estimates are stored in the array index->stat_n_diff_key_vals.
2922
If innodb_stats_method is "nulls_ignored", we also record the number of
2923
non-null values for each prefix and store the estimates in
2924
array index->stat_n_non_null_key_vals. */
2843
2927
btr_estimate_number_of_different_key_vals(
2860
2946
mem_heap_t* heap = NULL;
2861
ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
2862
ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
2863
ulint* offsets_rec = offsets_rec_;
2864
ulint* offsets_next_rec= offsets_next_rec_;
2865
*offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
2867
= (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
2947
ulint* offsets_rec = NULL;
2948
ulint* offsets_next_rec = NULL;
2869
2950
n_cols = dict_index_get_n_unique(index);
2871
n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
2873
memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
2952
heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
2954
+ dict_index_get_n_fields(index)
2955
* (sizeof *offsets_rec
2956
+ sizeof *offsets_next_rec));
2958
n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_longlong));
2962
/* Check srv_innodb_stats_method setting, and decide whether we
2963
need to record non-null value and also decide if NULL is
2964
considered equal (by setting stats_null_not_equal value) */
2965
switch (srv_innodb_stats_method) {
2966
case SRV_STATS_NULLS_IGNORED:
2967
n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
2968
* sizeof *n_not_null);
2971
case SRV_STATS_NULLS_UNEQUAL:
2972
/* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
2973
case, we will treat NULLs as unequal value */
2974
stats_null_not_equal = TRUE;
2977
case SRV_STATS_NULLS_EQUAL:
2978
stats_null_not_equal = FALSE;
2875
2985
/* We sample some pages in the index to get an estimate */
2877
2987
for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
2879
2988
mtr_start(&mtr);
2881
2990
btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
2889
2998
page = btr_cur_get_page(&cursor);
2891
supremum = page_get_supremum_rec(page);
2892
3000
rec = page_rec_get_next(page_get_infimum_rec(page));
2894
if (rec != supremum) {
3002
if (!page_rec_is_supremum(rec)) {
2895
3003
not_empty_flag = 1;
2896
3004
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
2897
3005
ULINT_UNDEFINED, &heap);
3008
btr_record_not_null_field_in_rec(
3009
n_cols, offsets_rec, n_not_null);
2900
while (rec != supremum) {
3013
while (!page_rec_is_supremum(rec)) {
2901
3014
rec_t* next_rec = page_rec_get_next(rec);
2902
if (next_rec == supremum) {
3015
if (page_rec_is_supremum(next_rec)) {
3016
total_external_size +=
3017
btr_rec_get_externally_stored_len(
2907
3023
matched_bytes = 0;
2908
3024
offsets_next_rec = rec_get_offsets(next_rec, index,
2909
3025
offsets_next_rec,
2912
3029
cmp_rec_rec_with_match(rec, next_rec,
2913
3030
offsets_rec, offsets_next_rec,
2914
index, &matched_fields,
3031
index, stats_null_not_equal,
2915
3033
&matched_bytes);
2917
3035
for (j = matched_fields + 1; j <= n_cols; j++) {
2971
3090
included in index->stat_n_leaf_pages) */
2973
3092
for (j = 0; j <= n_cols; j++) {
2974
index->stat_n_diff_key_vals[j]
2976
* (ib_longlong)index->stat_n_leaf_pages
2977
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
2978
+ total_external_size
2980
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES
2981
+ total_external_size));
3093
index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE(
3094
n_diff[j], index, total_external_size, not_empty_flag);
2983
3096
/* If the tree is small, smaller than
2984
3097
10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
2999
3112
index->stat_n_diff_key_vals[j] += add_on;
3003
if (UNIV_LIKELY_NULL(heap)) {
3004
mem_heap_free(heap);
3114
/* Update the stat_n_non_null_key_vals[] with our
3115
sampled result. stat_n_non_null_key_vals[] is created
3116
and initialized to zero in dict_index_add_to_cache(),
3117
along with stat_n_diff_key_vals[] array */
3118
if (n_not_null != NULL && (j < n_cols)) {
3119
index->stat_n_non_null_key_vals[j] =
3120
BTR_TABLE_STATS_FROM_SAMPLE(
3121
n_not_null[j], index,
3122
total_external_size, not_empty_flag);
3126
mem_heap_free(heap);
3008
3129
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
3461
3584
extern_len -= store_len;
3463
rec_page = buf_page_get(space_id,
3464
buf_frame_get_page_no(data),
3586
#ifdef UNIV_SYNC_DEBUG
3588
#endif /* UNIV_SYNC_DEBUG */
3589
buf_page_get(space_id,
3590
buf_frame_get_page_no(data),
3466
3592
#ifdef UNIV_SYNC_DEBUG
3467
3593
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3468
3594
#endif /* UNIV_SYNC_DEBUG */
3557
3684
mtr_start(&mtr);
3559
rec_page = buf_page_get(buf_frame_get_space_id(data),
3560
buf_frame_get_page_no(data),
3686
#ifdef UNIV_SYNC_DEBUG
3688
#endif /* UNIV_SYNC_DEBUG */
3689
buf_page_get(buf_frame_get_space_id(data),
3690
buf_frame_get_page_no(data),
3562
3692
#ifdef UNIV_SYNC_DEBUG
3563
3693
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3564
3694
#endif /* UNIV_SYNC_DEBUG */