~ubuntu-branches/debian/squeeze/mysql-5.1/squeeze

« back to all changes in this revision

Viewing changes to storage/innobase/btr/btr0cur.c

  • Committer: Package Import Robot
  • Author(s): Moritz Muehlenhoff
  • Date: 2014-01-14 10:40:30 UTC
  • mfrom: (1.1.5)
  • Revision ID: package-import@ubuntu.com-20140114104030-44alii0hx3x3g41y
Tags: 5.1.73-1
* New upstream release
  http://dev.mysql.com/doc/relnotes/mysql/5.1/en/news-5-1-73.html
* Update patches
* Disable flaky test rpl.rpl_innodb_bug28430 breaking the build. It's  marked
  as experimental by upstream and the internet is full of reports about it's
  unrelialibity

Show diffs side-by-side

added added

removed removed

Lines of Context:
31
31
#include "btr0sea.h"
32
32
#include "row0upd.h"
33
33
#include "trx0rec.h"
 
34
#include "trx0undo.h"
 
35
#include "trx0roll.h" /* trx_roll_crash_recv_trx */
34
36
#include "que0que.h"
35
37
#include "row0row.h"
36
38
#include "srv0srv.h"
48
50
ulint   btr_cur_n_non_sea_old   = 0;
49
51
ulint   btr_cur_n_sea_old       = 0;
50
52
 
 
53
#ifdef UNIV_DEBUG
 
54
uint    btr_cur_limit_optimistic_insert_debug = 0;
 
55
#endif /* UNIV_DEBUG */
 
56
 
51
57
/* In the optimistic insert, if the insert does not fit, but this much space
52
58
can be released by page reorganize, then it is reorganized */
53
59
 
66
72
/*--------------------------------------*/
67
73
#define BTR_BLOB_HDR_SIZE               8
68
74
 
 
75
/* Estimated table level stats from sampled value. */
 
76
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, ext_size, not_empty)  \
 
77
        ((value * (ib_longlong) index->stat_n_leaf_pages                \
 
78
          + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + ext_size                 \
 
79
          + not_empty)                                                  \
 
80
         / (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size))
 
81
 
 
82
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
83
/* A BLOB field reference full of zero, for use in assertions and tests.
 
84
Initially, BLOB field references are set to zero, in
 
85
dtuple_convert_big_rec(). */
 
86
const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
 
87
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
88
 
69
89
/***********************************************************************
70
90
Marks all extern fields in a record as owned by the record. This function
71
91
should be called if the delete mark of a record is removed: a not delete
306
326
        ut_ad(dict_index_check_search_tuple(index, tuple));
307
327
        ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
308
328
        ut_ad(dtuple_check_typed(tuple));
 
329
        ut_ad(index->page != FIL_NULL);
309
330
 
 
331
        UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match);
 
332
        UNIV_MEM_INVALID(&cursor->up_bytes, sizeof cursor->up_bytes);
 
333
        UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match);
 
334
        UNIV_MEM_INVALID(&cursor->low_bytes, sizeof cursor->low_bytes);
310
335
#ifdef UNIV_DEBUG
311
336
        cursor->up_match = ULINT_UNDEFINED;
312
337
        cursor->low_match = ULINT_UNDEFINED;
1002
1027
                goto calculate_sizes_again;
1003
1028
        }
1004
1029
 
 
1030
        LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
 
1031
                                      goto fail);
 
1032
 
1005
1033
        /* If there have been many consecutive inserts, and we are on the leaf
1006
1034
        level, check if we have to split the page to reserve enough free space
1007
1035
        for future updates of records. */
1014
1042
            && (0 == level)
1015
1043
            && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1016
1044
                || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1017
 
 
 
1045
#ifdef UNIV_DEBUG
 
1046
fail:
 
1047
#endif /* UNIV_DEBUG */
1018
1048
                if (big_rec_vec) {
1019
1049
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1020
1050
                }
1306
1336
 
1307
1337
/***************************************************************
1308
1338
Writes a redo log record of updating a record in-place. */
1309
 
UNIV_INLINE
1310
1339
void
1311
1340
btr_cur_update_in_place_log(
1312
1341
/*========================*/
1334
1363
                return;
1335
1364
        }
1336
1365
 
1337
 
        /* The code below assumes index is a clustered index: change index to
1338
 
        the clustered index if we are updating a secondary index record (or we
1339
 
        could as well skip writing the sys col values to the log in this case
1340
 
        because they are not needed for a secondary index record update) */
1341
 
 
1342
 
        index = dict_table_get_first_index(index->table);
 
1366
        /* For secondary indexes, we could skip writing the dummy system fields
 
1367
        to the redo log but we have to change redo log parsing of
 
1368
        MLOG_REC_UPDATE_IN_PLACE/MLOG_COMP_REC_UPDATE_IN_PLACE or we have to add
 
1369
        new redo log record. For now, just write dummy sys fields to the redo
 
1370
        log if we are updating a secondary index record.
 
1371
        */
1343
1372
 
1344
1373
        mach_write_to_1(log_ptr, flags);
1345
1374
        log_ptr++;
1346
1375
 
1347
 
        log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
1348
 
                                                mtr);
 
1376
        if (index->type & DICT_CLUSTERED) {
 
1377
                log_ptr = row_upd_write_sys_vals_to_log(
 
1378
                                index, trx, roll_ptr, log_ptr, mtr);
 
1379
        } else {
 
1380
                /* Dummy system fields for a secondary index */
 
1381
                /* TRX_ID Position */
 
1382
                log_ptr += mach_write_compressed(log_ptr, 0);
 
1383
                /* ROLL_PTR */
 
1384
                trx_write_roll_ptr(log_ptr, ut_dulint_zero);
 
1385
                log_ptr += DATA_ROLL_PTR_LEN;
 
1386
                /* TRX_ID */
 
1387
                log_ptr += mach_dulint_write_compressed(log_ptr,
 
1388
                                                        ut_dulint_zero);
 
1389
        }
 
1390
 
1349
1391
        mach_write_to_2(log_ptr, page_offset(rec));
1350
1392
        log_ptr += 2;
1351
1393
 
1565
1607
        ulint           old_rec_size;
1566
1608
        dtuple_t*       new_entry;
1567
1609
        dulint          roll_ptr;
1568
 
        trx_t*          trx;
1569
1610
        mem_heap_t*     heap;
1570
1611
        ibool           reorganized     = FALSE;
1571
1612
        ulint           i;
1578
1619
 
1579
1620
        heap = mem_heap_create(1024);
1580
1621
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
1622
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
1623
        ut_a(!rec_offs_any_null_extern(rec, offsets)
 
1624
             || thr_get_trx(thr) == trx_roll_crash_recv_trx);
 
1625
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1581
1626
 
1582
1627
#ifdef UNIV_DEBUG
1583
1628
        if (btr_cur_print_record_ops && thr) {
1684
1729
 
1685
1730
        page_cur_move_to_prev(page_cursor);
1686
1731
 
1687
 
        trx = thr_get_trx(thr);
1688
 
 
1689
1732
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1690
1733
                row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
1691
1734
                                              roll_ptr);
1692
1735
                row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
1693
 
                                              trx->id);
 
1736
                                              thr_get_trx(thr)->id);
1694
1737
        }
1695
1738
 
1696
1739
        rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
2356
2399
}
2357
2400
 
2358
2401
/***************************************************************
2359
 
Sets a secondary index record delete mark to FALSE. This function is only
 
2402
Sets a secondary index record delete mark. This function is only
2360
2403
used by the insert buffer insert merge mechanism. */
2361
2404
 
2362
2405
void
2363
 
btr_cur_del_unmark_for_ibuf(
2364
 
/*========================*/
 
2406
btr_cur_set_deleted_flag_for_ibuf(
 
2407
/*==============================*/
2365
2408
        rec_t*          rec,    /* in: record to delete unmark */
 
2409
        ibool           val,    /* in: value to set */
2366
2410
        mtr_t*          mtr)    /* in: mtr */
2367
2411
{
2368
2412
        /* We do not need to reserve btr_search_latch, as the page has just
2369
2413
        been read to the buffer pool and there cannot be a hash index to it. */
2370
2414
 
2371
 
        rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
 
2415
        rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), val);
2372
2416
 
2373
 
        btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
 
2417
        btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2374
2418
}
2375
2419
 
2376
2420
/*==================== B-TREE RECORD REMOVE =========================*/
2771
2815
                                n_rows = n_rows * 2;
2772
2816
                        }
2773
2817
 
 
2818
                        DBUG_EXECUTE_IF("bug14007649", return(n_rows););
 
2819
 
2774
2820
                        /* Do not estimate the number of rows in the range
2775
2821
                        to over 1 / 2 of the estimated rows in the whole
2776
2822
                        table */
2835
2881
}
2836
2882
 
2837
2883
/***********************************************************************
 
2884
Record the number of non_null key values in a given index for
 
2885
each n-column prefix of the index where n < dict_index_get_n_unique(index).
 
2886
The estimates are eventually stored in the array:
 
2887
index->stat_n_non_null_key_vals. */
 
2888
static
 
2889
void
 
2890
btr_record_not_null_field_in_rec(
 
2891
/*=============================*/
 
2892
        ulint           n_unique,       /* in: dict_index_get_n_unique(index),
 
2893
                                        number of columns uniquely determine
 
2894
                                        an index entry */
 
2895
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index),
 
2896
                                        its size could be for all fields or
 
2897
                                        that of "n_unique" */
 
2898
        ib_longlong*    n_not_null)     /* in/out: array to record number of
 
2899
                                        not null rows for n-column prefix */
 
2900
{
 
2901
        ulint   i;
 
2902
 
 
2903
        ut_ad(rec_offs_n_fields(offsets) >= n_unique);
 
2904
 
 
2905
        if (n_not_null == NULL) {
 
2906
                return;
 
2907
        }
 
2908
 
 
2909
        for (i = 0; i < n_unique; i++) {
 
2910
                if (rec_offs_nth_sql_null(offsets, i)) {
 
2911
                        break;
 
2912
                }
 
2913
 
 
2914
                n_not_null[i]++;
 
2915
        }
 
2916
}
 
2917
 
 
2918
/***********************************************************************
2838
2919
Estimates the number of different key values in a given index, for
2839
2920
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
2840
 
The estimates are stored in the array index->stat_n_diff_key_vals. */
 
2921
The estimates are stored in the array index->stat_n_diff_key_vals.
 
2922
If innodb_stats_method is "nulls_ignored", we also record the number of
 
2923
non-null values for each prefix and store the estimates in
 
2924
array index->stat_n_non_null_key_vals. */
2841
2925
 
2842
2926
void
2843
2927
btr_estimate_number_of_different_key_vals(
2851
2935
        ulint           matched_fields;
2852
2936
        ulint           matched_bytes;
2853
2937
        ib_longlong*    n_diff;
 
2938
        ib_longlong*    n_not_null;
 
2939
        ibool           stats_null_not_equal;
2854
2940
        ulint           not_empty_flag  = 0;
2855
2941
        ulint           total_external_size = 0;
2856
2942
        ulint           i;
2858
2944
        ulint           add_on;
2859
2945
        mtr_t           mtr;
2860
2946
        mem_heap_t*     heap            = NULL;
2861
 
        ulint           offsets_rec_[REC_OFFS_NORMAL_SIZE];
2862
 
        ulint           offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
2863
 
        ulint*          offsets_rec     = offsets_rec_;
2864
 
        ulint*          offsets_next_rec= offsets_next_rec_;
2865
 
        *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
2866
 
        *offsets_next_rec_
2867
 
                = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
 
2947
        ulint*          offsets_rec     = NULL;
 
2948
        ulint*          offsets_next_rec = NULL;
2868
2949
 
2869
2950
        n_cols = dict_index_get_n_unique(index);
2870
2951
 
2871
 
        n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
2872
 
 
2873
 
        memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
 
2952
        heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
 
2953
                               * (n_cols + 1)
 
2954
                               + dict_index_get_n_fields(index)
 
2955
                               * (sizeof *offsets_rec
 
2956
                                  + sizeof *offsets_next_rec));
 
2957
 
 
2958
        n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_longlong));
 
2959
 
 
2960
        n_not_null = NULL;
 
2961
 
 
2962
        /* Check srv_innodb_stats_method setting, and decide whether we
 
2963
        need to record non-null value and also decide if NULL is
 
2964
        considered equal (by setting stats_null_not_equal value) */
 
2965
        switch (srv_innodb_stats_method) {
 
2966
        case SRV_STATS_NULLS_IGNORED:
 
2967
                n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
 
2968
                                             * sizeof *n_not_null);
 
2969
                /* fall through */
 
2970
 
 
2971
        case SRV_STATS_NULLS_UNEQUAL:
 
2972
                /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
 
2973
                case, we will treat NULLs as unequal value */
 
2974
                stats_null_not_equal = TRUE;
 
2975
                break;
 
2976
 
 
2977
        case SRV_STATS_NULLS_EQUAL:
 
2978
                stats_null_not_equal = FALSE;
 
2979
                break;
 
2980
 
 
2981
        default:
 
2982
                ut_error;
 
2983
        }
2874
2984
 
2875
2985
        /* We sample some pages in the index to get an estimate */
2876
2986
 
2877
2987
        for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
2878
 
                rec_t*  supremum;
2879
2988
                mtr_start(&mtr);
2880
2989
 
2881
2990
                btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
2888
2997
 
2889
2998
                page = btr_cur_get_page(&cursor);
2890
2999
 
2891
 
                supremum = page_get_supremum_rec(page);
2892
3000
                rec = page_rec_get_next(page_get_infimum_rec(page));
2893
3001
 
2894
 
                if (rec != supremum) {
 
3002
                if (!page_rec_is_supremum(rec)) {
2895
3003
                        not_empty_flag = 1;
2896
3004
                        offsets_rec = rec_get_offsets(rec, index, offsets_rec,
2897
3005
                                                      ULINT_UNDEFINED, &heap);
 
3006
 
 
3007
                        if (n_not_null) {
 
3008
                                btr_record_not_null_field_in_rec(
 
3009
                                        n_cols, offsets_rec, n_not_null);
 
3010
                        }
2898
3011
                }
2899
3012
 
2900
 
                while (rec != supremum) {
 
3013
                while (!page_rec_is_supremum(rec)) {
2901
3014
                        rec_t*  next_rec = page_rec_get_next(rec);
2902
 
                        if (next_rec == supremum) {
 
3015
                        if (page_rec_is_supremum(next_rec)) {
 
3016
                                total_external_size +=
 
3017
                                        btr_rec_get_externally_stored_len(
 
3018
                                                rec, offsets_rec);
2903
3019
                                break;
2904
3020
                        }
2905
3021
 
2907
3023
                        matched_bytes = 0;
2908
3024
                        offsets_next_rec = rec_get_offsets(next_rec, index,
2909
3025
                                                           offsets_next_rec,
2910
 
                                                           n_cols, &heap);
 
3026
                                                           ULINT_UNDEFINED,
 
3027
                                                           &heap);
2911
3028
 
2912
3029
                        cmp_rec_rec_with_match(rec, next_rec,
2913
3030
                                               offsets_rec, offsets_next_rec,
2914
 
                                               index, &matched_fields,
 
3031
                                               index, stats_null_not_equal,
 
3032
                                               &matched_fields,
2915
3033
                                               &matched_bytes);
2916
3034
 
2917
3035
                        for (j = matched_fields + 1; j <= n_cols; j++) {
2921
3039
                                n_diff[j]++;
2922
3040
                        }
2923
3041
 
 
3042
                        if (n_not_null) {
 
3043
                                btr_record_not_null_field_in_rec(
 
3044
                                        n_cols, offsets_next_rec, n_not_null);
 
3045
                        }
 
3046
 
2924
3047
                        total_external_size
2925
3048
                                += btr_rec_get_externally_stored_len(
2926
3049
                                        rec, offsets_rec);
2955
3078
                        }
2956
3079
                }
2957
3080
 
2958
 
                offsets_rec = rec_get_offsets(rec, index, offsets_rec,
2959
 
                                              ULINT_UNDEFINED, &heap);
2960
 
                total_external_size += btr_rec_get_externally_stored_len(
2961
 
                        rec, offsets_rec);
2962
3081
                mtr_commit(&mtr);
2963
3082
        }
2964
3083
 
2971
3090
        included in index->stat_n_leaf_pages) */
2972
3091
 
2973
3092
        for (j = 0; j <= n_cols; j++) {
2974
 
                index->stat_n_diff_key_vals[j]
2975
 
                        = ((n_diff[j]
2976
 
                            * (ib_longlong)index->stat_n_leaf_pages
2977
 
                            + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
2978
 
                            + total_external_size
2979
 
                            + not_empty_flag)
2980
 
                           / (BTR_KEY_VAL_ESTIMATE_N_PAGES
2981
 
                              + total_external_size));
 
3093
                index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE(
 
3094
                        n_diff[j], index, total_external_size, not_empty_flag);
2982
3095
 
2983
3096
                /* If the tree is small, smaller than
2984
3097
                10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
2997
3110
                }
2998
3111
 
2999
3112
                index->stat_n_diff_key_vals[j] += add_on;
3000
 
        }
3001
 
 
3002
 
        mem_free(n_diff);
3003
 
        if (UNIV_LIKELY_NULL(heap)) {
3004
 
                mem_heap_free(heap);
3005
 
        }
 
3113
 
 
3114
                /* Update the stat_n_non_null_key_vals[] with our
 
3115
                sampled result. stat_n_non_null_key_vals[] is created
 
3116
                and initialized to zero in dict_index_add_to_cache(),
 
3117
                along with stat_n_diff_key_vals[] array */
 
3118
                if (n_not_null != NULL && (j < n_cols)) {
 
3119
                        index->stat_n_non_null_key_vals[j] =
 
3120
                                 BTR_TABLE_STATS_FROM_SAMPLE(
 
3121
                                        n_not_null[j], index,
 
3122
                                        total_external_size, not_empty_flag);
 
3123
                }
 
3124
        }
 
3125
 
 
3126
        mem_heap_free(heap);
3006
3127
}
3007
3128
 
3008
3129
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
3365
3486
        page_t* page;
3366
3487
        ulint   space_id;
3367
3488
        page_t* prev_page;
 
3489
#ifdef UNIV_SYNC_DEBUG
3368
3490
        page_t* rec_page;
 
3491
#endif /* UNIV_SYNC_DEBUG */
3369
3492
        ulint   prev_page_no;
3370
3493
        ulint   hint_page_no;
3371
3494
        ulint   i;
3460
3583
 
3461
3584
                        extern_len -= store_len;
3462
3585
 
3463
 
                        rec_page = buf_page_get(space_id,
3464
 
                                                buf_frame_get_page_no(data),
3465
 
                                                RW_X_LATCH, &mtr);
 
3586
#ifdef UNIV_SYNC_DEBUG
 
3587
                        rec_page =
 
3588
#endif /* UNIV_SYNC_DEBUG */
 
3589
                        buf_page_get(space_id,
 
3590
                                     buf_frame_get_page_no(data),
 
3591
                                     RW_X_LATCH, &mtr);
3466
3592
#ifdef UNIV_SYNC_DEBUG
3467
3593
                        buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3468
3594
#endif /* UNIV_SYNC_DEBUG */
3536
3662
                                        X-latch to the index tree */
3537
3663
{
3538
3664
        page_t* page;
 
3665
#ifdef UNIV_SYNC_DEBUG
3539
3666
        page_t* rec_page;
 
3667
#endif /* UNIV_SYNC_DEBUG */
3540
3668
        ulint   space_id;
3541
3669
        ulint   page_no;
3542
 
        ulint   offset;
3543
3670
        ulint   extern_len;
3544
3671
        ulint   next_page_no;
3545
3672
        ulint   part_len;
3556
3683
        for (;;) {
3557
3684
                mtr_start(&mtr);
3558
3685
 
3559
 
                rec_page = buf_page_get(buf_frame_get_space_id(data),
3560
 
                                        buf_frame_get_page_no(data),
3561
 
                                        RW_X_LATCH, &mtr);
 
3686
#ifdef UNIV_SYNC_DEBUG
 
3687
                rec_page =
 
3688
#endif /* UNIV_SYNC_DEBUG */
 
3689
                buf_page_get(buf_frame_get_space_id(data),
 
3690
                             buf_frame_get_page_no(data),
 
3691
                             RW_X_LATCH, &mtr);
3562
3692
#ifdef UNIV_SYNC_DEBUG
3563
3693
                buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3564
3694
#endif /* UNIV_SYNC_DEBUG */
3568
3698
                page_no = mach_read_from_4(data + local_len
3569
3699
                                           + BTR_EXTERN_PAGE_NO);
3570
3700
 
3571
 
                offset = mach_read_from_4(data + local_len
3572
 
                                          + BTR_EXTERN_OFFSET);
3573
3701
                extern_len = mach_read_from_4(data + local_len
3574
3702
                                              + BTR_EXTERN_LEN + 4);
3575
3703