~ubuntu-branches/ubuntu/maverick/mysql-5.1/maverick-proposed

« back to all changes in this revision

Viewing changes to storage/innodb_plugin/btr/btr0cur.c

  • Committer: Package Import Robot
  • Author(s): Marc Deslauriers
  • Date: 2012-02-22 14:16:05 UTC
  • mto: This revision was merged to the branch mainline in revision 20.
  • Revision ID: package-import@ubuntu.com-20120222141605-nxlu9yzc6attylc2
Tags: upstream-5.1.61
ImportĀ upstreamĀ versionĀ 5.1.61

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 
3
 
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
3
Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
4
4
Copyright (c) 2008, Google Inc.
5
5
 
6
6
Portions of this file contain modifications contributed and copyrighted by
100
100
/*--------------------------------------*/
101
101
#define BTR_BLOB_HDR_SIZE               8       /*!< Size of a BLOB
102
102
                                                part header, in bytes */
 
103
 
 
104
/** Estimated table level stats from sampled value.
 
105
@param value            sampled stats
 
106
@param index            index being sampled
 
107
@param sample           number of sampled rows
 
108
@param ext_size         external stored data size
 
109
@param not_empty        table not empty
 
110
@return estimated table wide stats from sampled value */
 
111
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
 
112
        (((value) * (ib_int64_t) index->stat_n_leaf_pages               \
 
113
          + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
 
114
 
103
115
/* @} */
104
116
#endif /* !UNIV_HOTBACKUP */
105
117
 
174
186
ulint
175
187
btr_rec_get_externally_stored_len(
176
188
/*==============================*/
177
 
        rec_t*          rec,    /*!< in: record */
 
189
        const rec_t*    rec,    /*!< in: record */
178
190
        const ulint*    offsets);/*!< in: array returned by rec_get_offsets() */
179
191
#endif /* !UNIV_HOTBACKUP */
180
192
 
226
238
        case BTR_SEARCH_LEAF:
227
239
        case BTR_MODIFY_LEAF:
228
240
                mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
229
 
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
 
241
                get_block = btr_block_get(
 
242
                        space, zip_size, page_no, mode, cursor->index, mtr);
230
243
#ifdef UNIV_BTR_DEBUG
231
244
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
232
245
#endif /* UNIV_BTR_DEBUG */
237
250
                left_page_no = btr_page_get_prev(page, mtr);
238
251
 
239
252
                if (left_page_no != FIL_NULL) {
240
 
                        get_block = btr_block_get(space, zip_size,
241
 
                                                  left_page_no,
242
 
                                                  RW_X_LATCH, mtr);
 
253
                        get_block = btr_block_get(
 
254
                                space, zip_size, left_page_no,
 
255
                                RW_X_LATCH, cursor->index, mtr);
243
256
#ifdef UNIV_BTR_DEBUG
244
257
                        ut_a(page_is_comp(get_block->frame)
245
258
                             == page_is_comp(page));
249
262
                        get_block->check_index_page_at_flush = TRUE;
250
263
                }
251
264
 
252
 
                get_block = btr_block_get(space, zip_size, page_no,
253
 
                                          RW_X_LATCH, mtr);
 
265
                get_block = btr_block_get(
 
266
                        space, zip_size, page_no,
 
267
                        RW_X_LATCH, cursor->index, mtr);
254
268
#ifdef UNIV_BTR_DEBUG
255
269
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
256
270
#endif /* UNIV_BTR_DEBUG */
259
273
                right_page_no = btr_page_get_next(page, mtr);
260
274
 
261
275
                if (right_page_no != FIL_NULL) {
262
 
                        get_block = btr_block_get(space, zip_size,
263
 
                                                  right_page_no,
264
 
                                                  RW_X_LATCH, mtr);
 
276
                        get_block = btr_block_get(
 
277
                                space, zip_size, right_page_no,
 
278
                                RW_X_LATCH, cursor->index, mtr);
265
279
#ifdef UNIV_BTR_DEBUG
266
280
                        ut_a(page_is_comp(get_block->frame)
267
281
                             == page_is_comp(page));
280
294
                left_page_no = btr_page_get_prev(page, mtr);
281
295
 
282
296
                if (left_page_no != FIL_NULL) {
283
 
                        get_block = btr_block_get(space, zip_size,
284
 
                                                  left_page_no, mode, mtr);
 
297
                        get_block = btr_block_get(
 
298
                                space, zip_size,
 
299
                                left_page_no, mode, cursor->index, mtr);
285
300
                        cursor->left_block = get_block;
286
301
#ifdef UNIV_BTR_DEBUG
287
302
                        ut_a(page_is_comp(get_block->frame)
292
307
                        get_block->check_index_page_at_flush = TRUE;
293
308
                }
294
309
 
295
 
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
 
310
                get_block = btr_block_get(
 
311
                        space, zip_size, page_no, mode, cursor->index, mtr);
296
312
#ifdef UNIV_BTR_DEBUG
297
313
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
298
314
#endif /* UNIV_BTR_DEBUG */
560
576
                        ut_a(!page_zip || page_zip_validate(page_zip, page));
561
577
#endif /* UNIV_ZIP_DEBUG */
562
578
 
563
 
                        buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 
579
                        buf_block_dbg_add_level(
 
580
                                block, dict_index_is_ibuf(index)
 
581
                                ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
564
582
                }
565
583
 
566
584
                ut_ad(0 == ut_dulint_cmp(index->id,
618
636
 
619
637
                        if (level > 0) {
620
638
                                /* x-latch the page */
621
 
                                page = btr_page_get(space, zip_size,
622
 
                                                    page_no, RW_X_LATCH, mtr);
 
639
                                page = btr_page_get(space, zip_size, page_no,
 
640
                                                    RW_X_LATCH, index, mtr);
623
641
                                ut_a((ibool)!!page_is_comp(page)
624
642
                                     == dict_table_is_comp(index->table));
625
643
                        }
953
971
                                not zero, the parameters index and thr
954
972
                                should be specified */
955
973
        btr_cur_t*      cursor, /*!< in: cursor on page after which to insert */
956
 
        const dtuple_t* entry,  /*!< in: entry to insert */
 
974
        dtuple_t*       entry,  /*!< in/out: entry to insert */
957
975
        que_thr_t*      thr,    /*!< in: query thread or NULL */
958
976
        mtr_t*          mtr,    /*!< in/out: mini-transaction */
959
977
        ibool*          inherit)/*!< out: TRUE if the inserted new record maybe
1626
1644
See if there is enough place in the page modification log to log
1627
1645
an update-in-place.
1628
1646
@return TRUE if enough place */
1629
 
static
 
1647
UNIV_INTERN
1630
1648
ibool
1631
1649
btr_cur_update_alloc_zip(
1632
1650
/*=====================*/
1709
1727
        roll_ptr_t      roll_ptr        = ut_dulint_zero;
1710
1728
        trx_t*          trx;
1711
1729
        ulint           was_delete_marked;
 
1730
        ibool           is_hashed;
1712
1731
        mem_heap_t*     heap            = NULL;
1713
1732
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
1714
1733
        ulint*          offsets         = offsets_;
1750
1769
                return(err);
1751
1770
        }
1752
1771
 
1753
 
        if (block->is_hashed) {
 
1772
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
 
1773
                row_upd_rec_sys_fields(rec, NULL,
 
1774
                                       index, offsets, trx, roll_ptr);
 
1775
        }
 
1776
 
 
1777
        was_delete_marked = rec_get_deleted_flag(
 
1778
                rec, page_is_comp(buf_block_get_frame(block)));
 
1779
 
 
1780
        is_hashed = (block->index != NULL);
 
1781
 
 
1782
        if (is_hashed) {
 
1783
                /* TO DO: Can we skip this if none of the fields
 
1784
                index->search_info->curr_n_fields
 
1785
                are being updated? */
 
1786
 
1754
1787
                /* The function row_upd_changes_ord_field_binary works only
1755
1788
                if the update vector was built for a clustered index, we must
1756
1789
                NOT call it if index is secondary */
1757
1790
 
1758
1791
                if (!dict_index_is_clust(index)
1759
 
                    || row_upd_changes_ord_field_binary(NULL, index, update)) {
 
1792
                    || row_upd_changes_ord_field_binary(index, update, thr,
 
1793
                                                        NULL, NULL)) {
1760
1794
 
1761
1795
                        /* Remove possible hash index pointer to this record */
1762
1796
                        btr_search_update_hash_on_delete(cursor);
1765
1799
                rw_lock_x_lock(&btr_search_latch);
1766
1800
        }
1767
1801
 
1768
 
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1769
 
                row_upd_rec_sys_fields(rec, NULL,
1770
 
                                       index, offsets, trx, roll_ptr);
1771
 
        }
1772
 
 
1773
 
        was_delete_marked = rec_get_deleted_flag(
1774
 
                rec, page_is_comp(buf_block_get_frame(block)));
1775
 
 
1776
1802
        row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1777
1803
 
1778
 
        if (block->is_hashed) {
 
1804
        if (is_hashed) {
1779
1805
                rw_lock_x_unlock(&btr_search_latch);
1780
1806
        }
1781
1807
 
1836
1862
        page_t*         page;
1837
1863
        page_zip_des_t* page_zip;
1838
1864
        rec_t*          rec;
1839
 
        rec_t*          orig_rec;
1840
1865
        ulint           max_size;
1841
1866
        ulint           new_rec_size;
1842
1867
        ulint           old_rec_size;
1843
1868
        dtuple_t*       new_entry;
1844
1869
        roll_ptr_t      roll_ptr;
1845
 
        trx_t*          trx;
1846
1870
        mem_heap_t*     heap;
1847
1871
        ulint           i;
1848
1872
        ulint           n_ext;
1850
1874
 
1851
1875
        block = btr_cur_get_block(cursor);
1852
1876
        page = buf_block_get_frame(block);
1853
 
        orig_rec = rec = btr_cur_get_rec(cursor);
 
1877
        rec = btr_cur_get_rec(cursor);
1854
1878
        index = cursor->index;
1855
1879
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1856
1880
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1859
1883
 
1860
1884
        heap = mem_heap_create(1024);
1861
1885
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
1886
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
1887
        ut_a(!rec_offs_any_null_extern(rec, offsets)
 
1888
             || trx_is_recv(thr_get_trx(thr)));
 
1889
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1862
1890
 
1863
1891
#ifdef UNIV_DEBUG
1864
1892
        if (btr_cur_print_record_ops && thr) {
1981
2009
 
1982
2010
        page_cur_move_to_prev(page_cursor);
1983
2011
 
1984
 
        trx = thr_get_trx(thr);
1985
 
 
1986
2012
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1987
2013
                row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
1988
2014
                                              roll_ptr);
1989
2015
                row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
1990
 
                                              trx->id);
 
2016
                                              thr_get_trx(thr)->id);
1991
2017
        }
1992
2018
 
1993
2019
        /* There are no externally stored columns in new_entry */
2474
2500
 
2475
2501
                /* We do not need to reserve btr_search_latch, as the page
2476
2502
                is only being recovered, and there cannot be a hash index to
2477
 
                it. */
 
2503
                it. Besides, these fields are being updated in place
 
2504
                and the adaptive hash index does not depend on them. */
2478
2505
 
2479
2506
                btr_rec_set_deleted_flag(rec, page_zip, val);
2480
2507
 
2509
2536
btr_cur_del_mark_set_clust_rec(
2510
2537
/*===========================*/
2511
2538
        ulint           flags,  /*!< in: undo logging and locking flags */
2512
 
        btr_cur_t*      cursor, /*!< in: cursor */
 
2539
        buf_block_t*    block,  /*!< in/out: buffer block of the record */
 
2540
        rec_t*          rec,    /*!< in/out: record */
 
2541
        dict_index_t*   index,  /*!< in: clustered index of the record */
 
2542
        const ulint*    offsets,/*!< in: rec_get_offsets(rec) */
2513
2543
        ibool           val,    /*!< in: value to set */
2514
2544
        que_thr_t*      thr,    /*!< in: query thread */
2515
2545
        mtr_t*          mtr)    /*!< in: mtr */
2516
2546
{
2517
 
        dict_index_t*   index;
2518
 
        buf_block_t*    block;
2519
2547
        roll_ptr_t      roll_ptr;
2520
2548
        ulint           err;
2521
 
        rec_t*          rec;
2522
2549
        page_zip_des_t* page_zip;
2523
2550
        trx_t*          trx;
2524
 
        mem_heap_t*     heap            = NULL;
2525
 
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2526
 
        ulint*          offsets         = offsets_;
2527
 
        rec_offs_init(offsets_);
2528
2551
 
2529
 
        rec = btr_cur_get_rec(cursor);
2530
 
        index = cursor->index;
 
2552
        ut_ad(dict_index_is_clust(index));
 
2553
        ut_ad(rec_offs_validate(rec, index, offsets));
2531
2554
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
2532
 
        offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
2555
        ut_ad(buf_block_get_frame(block) == page_align(rec));
 
2556
        ut_ad(page_is_leaf(page_align(rec)));
2533
2557
 
2534
2558
#ifdef UNIV_DEBUG
2535
2559
        if (btr_cur_print_record_ops && thr) {
2541
2565
        ut_ad(dict_index_is_clust(index));
2542
2566
        ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2543
2567
 
2544
 
        err = lock_clust_rec_modify_check_and_lock(flags,
2545
 
                                                   btr_cur_get_block(cursor),
 
2568
        err = lock_clust_rec_modify_check_and_lock(flags, block,
2546
2569
                                                   rec, index, offsets, thr);
2547
2570
 
2548
2571
        if (err != DB_SUCCESS) {
2549
2572
 
2550
 
                goto func_exit;
 
2573
                return(err);
2551
2574
        }
2552
2575
 
2553
2576
        err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
2555
2578
                                            &roll_ptr);
2556
2579
        if (err != DB_SUCCESS) {
2557
2580
 
2558
 
                goto func_exit;
2559
 
        }
2560
 
 
2561
 
        block = btr_cur_get_block(cursor);
2562
 
 
2563
 
        if (block->is_hashed) {
2564
 
                rw_lock_x_lock(&btr_search_latch);
2565
 
        }
 
2581
                return(err);
 
2582
        }
 
2583
 
 
2584
        /* The btr_search_latch is not needed here, because
 
2585
        the adaptive hash index does not depend on the delete-mark
 
2586
        and the delete-mark is being updated in place. */
2566
2587
 
2567
2588
        page_zip = buf_block_get_page_zip(block);
2568
2589
 
 
2590
        btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
2569
2591
        btr_rec_set_deleted_flag(rec, page_zip, val);
2570
2592
 
2571
2593
        trx = thr_get_trx(thr);
2575
2597
                                       index, offsets, trx, roll_ptr);
2576
2598
        }
2577
2599
 
2578
 
        if (block->is_hashed) {
2579
 
                rw_lock_x_unlock(&btr_search_latch);
2580
 
        }
2581
 
 
2582
2600
        btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
2583
2601
                                           roll_ptr, mtr);
2584
2602
 
2585
 
func_exit:
2586
 
        if (UNIV_LIKELY_NULL(heap)) {
2587
 
                mem_heap_free(heap);
2588
 
        }
2589
2603
        return(err);
2590
2604
}
2591
2605
 
2658
2672
 
2659
2673
                /* We do not need to reserve btr_search_latch, as the page
2660
2674
                is only being recovered, and there cannot be a hash index to
2661
 
                it. */
 
2675
                it. Besides, the delete-mark flag is being updated in place
 
2676
                and the adaptive hash index does not depend on it. */
2662
2677
 
2663
2678
                btr_rec_set_deleted_flag(rec, page_zip, val);
2664
2679
        }
2706
2721
        ut_ad(!!page_rec_is_comp(rec)
2707
2722
              == dict_table_is_comp(cursor->index->table));
2708
2723
 
2709
 
        if (block->is_hashed) {
2710
 
                rw_lock_x_lock(&btr_search_latch);
2711
 
        }
2712
 
 
 
2724
        /* We do not need to reserve btr_search_latch, as the
 
2725
        delete-mark flag is being updated in place and the adaptive
 
2726
        hash index does not depend on it. */
2713
2727
        btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
2714
2728
 
2715
 
        if (block->is_hashed) {
2716
 
                rw_lock_x_unlock(&btr_search_latch);
2717
 
        }
2718
 
 
2719
2729
        btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2720
2730
 
2721
2731
        return(DB_SUCCESS);
2735
2745
                                        uncompressed */
2736
2746
        mtr_t*          mtr)            /*!< in: mtr */
2737
2747
{
2738
 
        /* We do not need to reserve btr_search_latch, as the page has just
2739
 
        been read to the buffer pool and there cannot be a hash index to it. */
 
2748
        /* We do not need to reserve btr_search_latch, as the page
 
2749
        has just been read to the buffer pool and there cannot be
 
2750
        a hash index to it.  Besides, the delete-mark flag is being
 
2751
        updated in place and the adaptive hash index does not depend
 
2752
        on it. */
2740
2753
 
2741
2754
        btr_rec_set_deleted_flag(rec, page_zip, FALSE);
2742
2755
 
3211
3224
}
3212
3225
 
3213
3226
/*******************************************************************//**
 
3227
Record the number of non_null key values in a given index for
 
3228
each n-column prefix of the index where n < dict_index_get_n_unique(index).
 
3229
The estimates are eventually stored in the array:
 
3230
index->stat_n_non_null_key_vals. */
 
3231
static
 
3232
void
 
3233
btr_record_not_null_field_in_rec(
 
3234
/*=============================*/
 
3235
        ulint           n_unique,       /*!< in: dict_index_get_n_unique(index),
 
3236
                                        number of columns uniquely determine
 
3237
                                        an index entry */
 
3238
        const ulint*    offsets,        /*!< in: rec_get_offsets(rec, index),
 
3239
                                        its size could be for all fields or
 
3240
                                        that of "n_unique" */
 
3241
        ib_int64_t*     n_not_null)     /*!< in/out: array to record number of
 
3242
                                        not null rows for n-column prefix */
 
3243
{
 
3244
        ulint   i;
 
3245
 
 
3246
        ut_ad(rec_offs_n_fields(offsets) >= n_unique);
 
3247
 
 
3248
        if (n_not_null == NULL) {
 
3249
                return;
 
3250
        }
 
3251
 
 
3252
        for (i = 0; i < n_unique; i++) {
 
3253
                if (rec_offs_nth_sql_null(offsets, i)) {
 
3254
                        break;
 
3255
                }
 
3256
 
 
3257
                n_not_null[i]++;
 
3258
        }
 
3259
}
 
3260
 
 
3261
/*******************************************************************//**
3214
3262
Estimates the number of different key values in a given index, for
3215
3263
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
3216
 
The estimates are stored in the array index->stat_n_diff_key_vals. */
 
3264
The estimates are stored in the array index->stat_n_diff_key_vals.
 
3265
If innodb_stats_method is "nulls_ignored", we also record the number of
 
3266
non-null values for each prefix and store the estimates in
 
3267
array index->stat_n_non_null_key_vals. */
3217
3268
UNIV_INTERN
3218
3269
void
3219
3270
btr_estimate_number_of_different_key_vals(
3227
3278
        ulint           matched_fields;
3228
3279
        ulint           matched_bytes;
3229
3280
        ib_int64_t*     n_diff;
 
3281
        ib_int64_t*     n_not_null;
 
3282
        ibool           stats_null_not_equal;
3230
3283
        ullint          n_sample_pages; /* number of pages to sample */
3231
3284
        ulint           not_empty_flag  = 0;
3232
3285
        ulint           total_external_size = 0;
3235
3288
        ullint          add_on;
3236
3289
        mtr_t           mtr;
3237
3290
        mem_heap_t*     heap            = NULL;
3238
 
        ulint           offsets_rec_[REC_OFFS_NORMAL_SIZE];
3239
 
        ulint           offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
3240
 
        ulint*          offsets_rec     = offsets_rec_;
3241
 
        ulint*          offsets_next_rec= offsets_next_rec_;
3242
 
        rec_offs_init(offsets_rec_);
3243
 
        rec_offs_init(offsets_next_rec_);
 
3291
        ulint*          offsets_rec     = NULL;
 
3292
        ulint*          offsets_next_rec = NULL;
3244
3293
 
3245
3294
        n_cols = dict_index_get_n_unique(index);
3246
3295
 
3247
 
        n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
 
3296
        heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
 
3297
                               * (n_cols + 1)
 
3298
                               + dict_index_get_n_fields(index)
 
3299
                               * (sizeof *offsets_rec
 
3300
                                  + sizeof *offsets_next_rec));
 
3301
 
 
3302
        n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
 
3303
 
 
3304
        n_not_null = NULL;
 
3305
 
 
3306
        /* Check srv_innodb_stats_method setting, and decide whether we
 
3307
        need to record non-null value and also decide if NULL is
 
3308
        considered equal (by setting stats_null_not_equal value) */
 
3309
        switch (srv_innodb_stats_method) {
 
3310
        case SRV_STATS_NULLS_IGNORED:
 
3311
                n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
 
3312
                                             * sizeof *n_not_null);
 
3313
                /* fall through */
 
3314
 
 
3315
        case SRV_STATS_NULLS_UNEQUAL:
 
3316
                /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
 
3317
                case, we will treat NULLs as unequal value */
 
3318
                stats_null_not_equal = TRUE;
 
3319
                break;
 
3320
 
 
3321
        case SRV_STATS_NULLS_EQUAL:
 
3322
                stats_null_not_equal = FALSE;
 
3323
                break;
 
3324
 
 
3325
        default:
 
3326
                ut_error;
 
3327
        }
3248
3328
 
3249
3329
        /* It makes no sense to test more pages than are contained
3250
3330
        in the index, thus we lower the number if it is too high */
3261
3341
        /* We sample some pages in the index to get an estimate */
3262
3342
 
3263
3343
        for (i = 0; i < n_sample_pages; i++) {
3264
 
                rec_t*  supremum;
3265
3344
                mtr_start(&mtr);
3266
3345
 
3267
3346
                btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
3274
3353
 
3275
3354
                page = btr_cur_get_page(&cursor);
3276
3355
 
3277
 
                supremum = page_get_supremum_rec(page);
3278
3356
                rec = page_rec_get_next(page_get_infimum_rec(page));
3279
3357
 
3280
 
                if (rec != supremum) {
 
3358
                if (!page_rec_is_supremum(rec)) {
3281
3359
                        not_empty_flag = 1;
3282
3360
                        offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3283
3361
                                                      ULINT_UNDEFINED, &heap);
 
3362
 
 
3363
                        if (n_not_null) {
 
3364
                                btr_record_not_null_field_in_rec(
 
3365
                                        n_cols, offsets_rec, n_not_null);
 
3366
                        }
3284
3367
                }
3285
3368
 
3286
 
                while (rec != supremum) {
 
3369
                while (!page_rec_is_supremum(rec)) {
3287
3370
                        rec_t*  next_rec = page_rec_get_next(rec);
3288
 
                        if (next_rec == supremum) {
 
3371
                        if (page_rec_is_supremum(next_rec)) {
 
3372
                                total_external_size +=
 
3373
                                        btr_rec_get_externally_stored_len(
 
3374
                                                rec, offsets_rec);
3289
3375
                                break;
3290
3376
                        }
3291
3377
 
3293
3379
                        matched_bytes = 0;
3294
3380
                        offsets_next_rec = rec_get_offsets(next_rec, index,
3295
3381
                                                           offsets_next_rec,
3296
 
                                                           n_cols, &heap);
 
3382
                                                           ULINT_UNDEFINED,
 
3383
                                                           &heap);
3297
3384
 
3298
3385
                        cmp_rec_rec_with_match(rec, next_rec,
3299
3386
                                               offsets_rec, offsets_next_rec,
3300
 
                                               index, &matched_fields,
 
3387
                                               index, stats_null_not_equal,
 
3388
                                               &matched_fields,
3301
3389
                                               &matched_bytes);
3302
3390
 
3303
3391
                        for (j = matched_fields + 1; j <= n_cols; j++) {
3307
3395
                                n_diff[j]++;
3308
3396
                        }
3309
3397
 
 
3398
                        if (n_not_null) {
 
3399
                                btr_record_not_null_field_in_rec(
 
3400
                                        n_cols, offsets_next_rec, n_not_null);
 
3401
                        }
 
3402
 
3310
3403
                        total_external_size
3311
3404
                                += btr_rec_get_externally_stored_len(
3312
3405
                                        rec, offsets_rec);
3341
3434
                        }
3342
3435
                }
3343
3436
 
3344
 
                offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3345
 
                                              ULINT_UNDEFINED, &heap);
3346
 
                total_external_size += btr_rec_get_externally_stored_len(
3347
 
                        rec, offsets_rec);
3348
3437
                mtr_commit(&mtr);
3349
3438
        }
3350
3439
 
3356
3445
        also the pages used for external storage of fields (those pages are
3357
3446
        included in index->stat_n_leaf_pages) */
3358
3447
 
3359
 
        dict_index_stat_mutex_enter(index);
3360
 
 
3361
3448
        for (j = 0; j <= n_cols; j++) {
3362
3449
                index->stat_n_diff_key_vals[j]
3363
 
                        = ((n_diff[j]
3364
 
                            * (ib_int64_t)index->stat_n_leaf_pages
3365
 
                            + n_sample_pages - 1
3366
 
                            + total_external_size
3367
 
                            + not_empty_flag)
3368
 
                           / (n_sample_pages
3369
 
                              + total_external_size));
 
3450
                        = BTR_TABLE_STATS_FROM_SAMPLE(
 
3451
                                n_diff[j], index, n_sample_pages,
 
3452
                                total_external_size, not_empty_flag); 
3370
3453
 
3371
3454
                /* If the tree is small, smaller than
3372
3455
                10 * n_sample_pages + total_external_size, then
3385
3468
                }
3386
3469
 
3387
3470
                index->stat_n_diff_key_vals[j] += add_on;
3388
 
        }
3389
 
 
3390
 
        dict_index_stat_mutex_exit(index);
3391
 
 
3392
 
        mem_free(n_diff);
3393
 
        if (UNIV_LIKELY_NULL(heap)) {
3394
 
                mem_heap_free(heap);
3395
 
        }
 
3471
 
 
3472
                /* Update the stat_n_non_null_key_vals[] with our
 
3473
                sampled result. stat_n_non_null_key_vals[] is created
 
3474
                and initialized to zero in dict_index_add_to_cache(),
 
3475
                along with stat_n_diff_key_vals[] array */
 
3476
                if (n_not_null != NULL && (j < n_cols)) {
 
3477
                        index->stat_n_non_null_key_vals[j] =
 
3478
                                 BTR_TABLE_STATS_FROM_SAMPLE(
 
3479
                                        n_not_null[j], index, n_sample_pages,
 
3480
                                        total_external_size, not_empty_flag);
 
3481
                }
 
3482
        }
 
3483
 
 
3484
        mem_heap_free(heap);
3396
3485
}
3397
3486
 
3398
3487
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
3399
3488
 
3400
3489
/***********************************************************//**
 
3490
Gets the offset of the pointer to the externally stored part of a field.
 
3491
@return offset of the pointer to the externally stored part */
 
3492
static
 
3493
ulint
 
3494
btr_rec_get_field_ref_offs(
 
3495
/*=======================*/
 
3496
        const ulint*    offsets,/*!< in: array returned by rec_get_offsets() */
 
3497
        ulint           n)      /*!< in: index of the external field */
 
3498
{
 
3499
        ulint   field_ref_offs;
 
3500
        ulint   local_len;
 
3501
 
 
3502
        ut_a(rec_offs_nth_extern(offsets, n));
 
3503
        field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len);
 
3504
        ut_a(local_len != UNIV_SQL_NULL);
 
3505
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
3506
 
 
3507
        return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE);
 
3508
}
 
3509
 
 
3510
/** Gets a pointer to the externally stored part of a field.
 
3511
@param rec      record
 
3512
@param offsets  rec_get_offsets(rec)
 
3513
@param n        index of the externally stored field
 
3514
@return pointer to the externally stored part */
 
3515
#define btr_rec_get_field_ref(rec, offsets, n)                  \
 
3516
        ((rec) + btr_rec_get_field_ref_offs(offsets, n))
 
3517
 
 
3518
/***********************************************************//**
3401
3519
Gets the externally stored size of a record, in units of a database page.
3402
3520
@return externally stored part, in units of a database page */
3403
3521
static
3404
3522
ulint
3405
3523
btr_rec_get_externally_stored_len(
3406
3524
/*==============================*/
3407
 
        rec_t*          rec,    /*!< in: record */
 
3525
        const rec_t*    rec,    /*!< in: record */
3408
3526
        const ulint*    offsets)/*!< in: array returned by rec_get_offsets() */
3409
3527
{
3410
3528
        ulint   n_fields;
3411
 
        byte*   data;
3412
 
        ulint   local_len;
3413
 
        ulint   extern_len;
3414
3529
        ulint   total_extern_len = 0;
3415
3530
        ulint   i;
3416
3531
 
3417
3532
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
 
3533
 
 
3534
        if (!rec_offs_any_extern(offsets)) {
 
3535
                return(0);
 
3536
        }
 
3537
 
3418
3538
        n_fields = rec_offs_n_fields(offsets);
3419
3539
 
3420
3540
        for (i = 0; i < n_fields; i++) {
3421
3541
                if (rec_offs_nth_extern(offsets, i)) {
3422
3542
 
3423
 
                        data = rec_get_nth_field(rec, offsets, i, &local_len);
3424
 
 
3425
 
                        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3426
 
 
3427
 
                        extern_len = mach_read_from_4(data + local_len
3428
 
                                                      + BTR_EXTERN_LEN + 4);
 
3543
                        ulint   extern_len = mach_read_from_4(
 
3544
                                btr_rec_get_field_ref(rec, offsets, i)
 
3545
                                + BTR_EXTERN_LEN + 4);
3429
3546
 
3430
3547
                        total_extern_len += ut_calc_align(extern_len,
3431
3548
                                                          UNIV_PAGE_SIZE);
3455
3572
        ulint   byte_val;
3456
3573
 
3457
3574
        data = rec_get_nth_field(rec, offsets, i, &local_len);
3458
 
 
 
3575
        ut_ad(rec_offs_nth_extern(offsets, i));
3459
3576
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3460
3577
 
3461
3578
        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3465
3582
        if (val) {
3466
3583
                byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3467
3584
        } else {
 
3585
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3586
                ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG));
 
3587
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3468
3588
                byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
3469
3589
        }
3470
3590
 
3478
3598
        } else {
3479
3599
                mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3480
3600
        }
 
3601
 
 
3602
        btr_blob_dbg_owner(rec, index, offsets, i, val);
3481
3603
}
3482
3604
 
3483
3605
/*******************************************************************//**
3484
 
Marks not updated extern fields as not-owned by this record. The ownership
3485
 
is transferred to the updated record which is inserted elsewhere in the
 
3606
Marks non-updated off-page fields as disowned by this record. The ownership
 
3607
must be transferred to the updated record which is inserted elsewhere in the
3486
3608
index tree. In purge only the owner of externally stored field is allowed
3487
3609
to free the field. */
3488
3610
UNIV_INTERN
3489
3611
void
3490
 
btr_cur_mark_extern_inherited_fields(
3491
 
/*=================================*/
 
3612
btr_cur_disown_inherited_fields(
 
3613
/*============================*/
3492
3614
        page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
3493
3615
                                part will be updated, or NULL */
3494
3616
        rec_t*          rec,    /*!< in/out: record in a clustered index */
3495
3617
        dict_index_t*   index,  /*!< in: index of the page */
3496
3618
        const ulint*    offsets,/*!< in: array returned by rec_get_offsets() */
3497
3619
        const upd_t*    update, /*!< in: update vector */
3498
 
        mtr_t*          mtr)    /*!< in: mtr, or NULL if not logged */
 
3620
        mtr_t*          mtr)    /*!< in/out: mini-transaction */
3499
3621
{
3500
 
        ulint   n;
3501
 
        ulint   j;
3502
3622
        ulint   i;
3503
3623
 
3504
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
 
3624
        ut_ad(rec_offs_validate(rec, index, offsets));
3505
3625
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3506
 
 
3507
 
        if (!rec_offs_any_extern(offsets)) {
3508
 
 
3509
 
                return;
3510
 
        }
3511
 
 
3512
 
        n = rec_offs_n_fields(offsets);
3513
 
 
3514
 
        for (i = 0; i < n; i++) {
3515
 
                if (rec_offs_nth_extern(offsets, i)) {
3516
 
 
3517
 
                        /* Check it is not in updated fields */
3518
 
 
3519
 
                        if (update) {
3520
 
                                for (j = 0; j < upd_get_n_fields(update);
3521
 
                                     j++) {
3522
 
                                        if (upd_get_nth_field(update, j)
3523
 
                                            ->field_no == i) {
3524
 
 
3525
 
                                                goto updated;
3526
 
                                        }
3527
 
                                }
3528
 
                        }
3529
 
 
 
3626
        ut_ad(rec_offs_any_extern(offsets));
 
3627
        ut_ad(mtr);
 
3628
 
 
3629
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
3630
                if (rec_offs_nth_extern(offsets, i)
 
3631
                    && !upd_get_field_by_field_no(update, i)) {
3530
3632
                        btr_cur_set_ownership_of_extern_field(
3531
3633
                                page_zip, rec, index, offsets, i, FALSE, mtr);
3532
 
updated:
3533
 
                        ;
3534
 
                }
3535
 
        }
3536
 
}
3537
 
 
3538
 
/*******************************************************************//**
3539
 
The complement of the previous function: in an update entry may inherit
3540
 
some externally stored fields from a record. We must mark them as inherited
3541
 
in entry, so that they are not freed in a rollback. */
3542
 
UNIV_INTERN
3543
 
void
3544
 
btr_cur_mark_dtuple_inherited_extern(
3545
 
/*=================================*/
3546
 
        dtuple_t*       entry,          /*!< in/out: updated entry to be
3547
 
                                        inserted to clustered index */
3548
 
        const upd_t*    update)         /*!< in: update vector */
3549
 
{
3550
 
        ulint           i;
3551
 
 
3552
 
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3553
 
 
3554
 
                dfield_t*       dfield = dtuple_get_nth_field(entry, i);
3555
 
                byte*           data;
3556
 
                ulint           len;
3557
 
                ulint           j;
3558
 
 
3559
 
                if (!dfield_is_ext(dfield)) {
3560
 
                        continue;
3561
 
                }
3562
 
 
3563
 
                /* Check if it is in updated fields */
3564
 
 
3565
 
                for (j = 0; j < upd_get_n_fields(update); j++) {
3566
 
                        if (upd_get_nth_field(update, j)->field_no == i) {
3567
 
 
3568
 
                                goto is_updated;
3569
 
                        }
3570
 
                }
3571
 
 
3572
 
                data = dfield_get_data(dfield);
3573
 
                len = dfield_get_len(dfield);
3574
 
                data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3575
 
                        |= BTR_EXTERN_INHERITED_FLAG;
3576
 
 
3577
 
is_updated:
3578
 
                ;
 
3634
                }
3579
3635
        }
3580
3636
}
3581
3637
 
3615
3671
}
3616
3672
 
3617
3673
/*******************************************************************//**
3618
 
Marks all extern fields in a dtuple as owned by the record. */
3619
 
UNIV_INTERN
3620
 
void
3621
 
btr_cur_unmark_dtuple_extern_fields(
3622
 
/*================================*/
3623
 
        dtuple_t*       entry)          /*!< in/out: clustered index entry */
3624
 
{
3625
 
        ulint   i;
3626
 
 
3627
 
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3628
 
                dfield_t* dfield = dtuple_get_nth_field(entry, i);
3629
 
 
3630
 
                if (dfield_is_ext(dfield)) {
3631
 
                        byte*   data = dfield_get_data(dfield);
3632
 
                        ulint   len = dfield_get_len(dfield);
3633
 
 
3634
 
                        data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3635
 
                                &= ~BTR_EXTERN_OWNER_FLAG;
3636
 
                }
3637
 
        }
3638
 
}
3639
 
 
3640
 
/*******************************************************************//**
3641
3674
Flags the data tuple fields that are marked as extern storage in the
3642
3675
update vector.  We use this function to remember which fields we must
3643
3676
mark as extern storage in a record inserted for an update.
3770
3803
            && buf_block_get_space(block) == space
3771
3804
            && buf_block_get_page_no(block) == page_no) {
3772
3805
 
3773
 
                if (buf_LRU_free_block(&block->page, all, NULL)
3774
 
                    != BUF_LRU_FREED
 
3806
                if (!buf_LRU_free_block(&block->page, all)
3775
3807
                    && all && block->page.zip.data) {
3776
3808
                        /* Attempt to deallocate the uncompressed page
3777
3809
                        if the whole block cannot be deallocted. */
3778
3810
 
3779
 
                        buf_LRU_free_block(&block->page, FALSE, NULL);
 
3811
                        buf_LRU_free_block(&block->page, FALSE);
3780
3812
                }
3781
3813
        }
3782
3814
 
3789
3821
them in rec.  The extern flags in rec will have to be set beforehand.
3790
3822
The fields are stored on pages allocated from leaf node
3791
3823
file segment of the index tree.
3792
 
@return DB_SUCCESS or error */
 
3824
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
3793
3825
UNIV_INTERN
3794
3826
ulint
3795
 
btr_store_big_rec_extern_fields(
3796
 
/*============================*/
 
3827
btr_store_big_rec_extern_fields_func(
 
3828
/*=================================*/
3797
3829
        dict_index_t*   index,          /*!< in: index of rec; the index tree
3798
3830
                                        MUST be X-latched */
3799
3831
        buf_block_t*    rec_block,      /*!< in/out: block containing rec */
3802
3834
                                        the "external storage" flags in offsets
3803
3835
                                        will not correspond to rec when
3804
3836
                                        this function returns */
3805
 
        big_rec_t*      big_rec_vec,    /*!< in: vector containing fields
 
3837
#ifdef UNIV_DEBUG
 
3838
        mtr_t*          local_mtr,      /*!< in: mtr containing the
 
3839
                                        latch to rec and to the tree */
 
3840
#endif /* UNIV_DEBUG */
 
3841
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3842
        ibool           update_in_place,/*! in: TRUE if the record is updated
 
3843
                                        in place (not delete+insert) */
 
3844
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
3845
        const big_rec_t*big_rec_vec)    /*!< in: vector containing fields
3806
3846
                                        to be stored externally */
3807
 
        mtr_t*          local_mtr __attribute__((unused))) /*!< in: mtr
3808
 
                                        containing the latch to rec and to the
3809
 
                                        tree */
 
3847
 
3810
3848
{
3811
3849
        ulint   rec_page_no;
3812
3850
        byte*   field_ref;
3824
3862
        z_stream c_stream;
3825
3863
 
3826
3864
        ut_ad(rec_offs_validate(rec, index, offsets));
 
3865
        ut_ad(rec_offs_any_extern(offsets));
3827
3866
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3828
3867
                                MTR_MEMO_X_LOCK));
3829
3868
        ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
3855
3894
                ut_a(err == Z_OK);
3856
3895
        }
3857
3896
 
 
3897
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3898
        /* All pointers to externally stored columns in the record
 
3899
        must either be zero or they must be pointers to inherited
 
3900
        columns, owned by this record or an earlier record version. */
 
3901
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
3902
                if (!rec_offs_nth_extern(offsets, i)) {
 
3903
                        continue;
 
3904
                }
 
3905
                field_ref = btr_rec_get_field_ref(rec, offsets, i);
 
3906
 
 
3907
                ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 
3908
                /* Either this must be an update in place,
 
3909
                or the BLOB must be inherited, or the BLOB pointer
 
3910
                must be zero (will be written in this function). */
 
3911
                ut_a(update_in_place
 
3912
                     || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
 
3913
                     || !memcmp(field_ref, field_ref_zero,
 
3914
                                BTR_EXTERN_FIELD_REF_SIZE));
 
3915
        }
 
3916
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3858
3917
        /* We have to create a file segment to the tablespace
3859
3918
        for each field and put the pointer to the field in rec */
3860
3919
 
3861
3920
        for (i = 0; i < big_rec_vec->n_fields; i++) {
3862
 
                ut_ad(rec_offs_nth_extern(offsets,
3863
 
                                          big_rec_vec->fields[i].field_no));
3864
 
                {
3865
 
                        ulint   local_len;
3866
 
                        field_ref = rec_get_nth_field(
3867
 
                                rec, offsets, big_rec_vec->fields[i].field_no,
3868
 
                                &local_len);
3869
 
                        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3870
 
                        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3871
 
                        field_ref += local_len;
3872
 
                }
 
3921
                field_ref = btr_rec_get_field_ref(
 
3922
                        rec, offsets, big_rec_vec->fields[i].field_no);
 
3923
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3924
                /* A zero BLOB pointer should have been initially inserted. */
 
3925
                ut_a(!memcmp(field_ref, field_ref_zero,
 
3926
                             BTR_EXTERN_FIELD_REF_SIZE));
 
3927
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3873
3928
                extern_len = big_rec_vec->fields[i].len;
3874
3929
                UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data,
3875
3930
                                   extern_len);
4044
4099
                                }
4045
4100
 
4046
4101
                                if (prev_page_no == FIL_NULL) {
 
4102
                                        btr_blob_dbg_add_blob(
 
4103
                                                rec, big_rec_vec->fields[i]
 
4104
                                                .field_no, page_no, index,
 
4105
                                                "store");
 
4106
 
4047
4107
                                        mach_write_to_4(field_ref
4048
4108
                                                        + BTR_EXTERN_SPACE_ID,
4049
4109
                                                        space_id);
4119
4179
                                                 MLOG_4BYTES, &mtr);
4120
4180
 
4121
4181
                                if (prev_page_no == FIL_NULL) {
 
4182
                                        btr_blob_dbg_add_blob(
 
4183
                                                rec, big_rec_vec->fields[i]
 
4184
                                                .field_no, page_no, index,
 
4185
                                                "store");
 
4186
 
4122
4187
                                        mlog_write_ulint(field_ref
4123
4188
                                                         + BTR_EXTERN_SPACE_ID,
4124
4189
                                                         space_id,
4151
4216
                mem_heap_free(heap);
4152
4217
        }
4153
4218
 
 
4219
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
4220
        /* All pointers to externally stored columns in the record
 
4221
        must be valid. */
 
4222
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
4223
                if (!rec_offs_nth_extern(offsets, i)) {
 
4224
                        continue;
 
4225
                }
 
4226
 
 
4227
                field_ref = btr_rec_get_field_ref(rec, offsets, i);
 
4228
 
 
4229
                /* The pointer must not be zero. */
 
4230
                ut_a(0 != memcmp(field_ref, field_ref_zero,
 
4231
                                 BTR_EXTERN_FIELD_REF_SIZE));
 
4232
                /* The column must not be disowned by this record. */
 
4233
                ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 
4234
        }
 
4235
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
4154
4236
        return(DB_SUCCESS);
4155
4237
}
4156
4238
 
4173
4255
        if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
4174
4256
                ulint   flags = fil_space_get_flags(space_id);
4175
4257
 
 
4258
#ifndef UNIV_DEBUG /* Improve debug test coverage */
4176
4259
                if (UNIV_LIKELY
4177
4260
                    ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
4178
4261
                        /* Old versions of InnoDB did not initialize
4181
4264
                        a BLOB page that is in Antelope format.*/
4182
4265
                        return;
4183
4266
                }
 
4267
#endif /* !UNIV_DEBUG */
4184
4268
 
4185
4269
                ut_print_timestamp(stderr);
4186
4270
                fprintf(stderr,
4230
4314
        ulint           page_no;
4231
4315
        ulint           next_page_no;
4232
4316
        mtr_t           mtr;
4233
 
#ifdef UNIV_DEBUG
 
4317
 
4234
4318
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4235
4319
                                MTR_MEMO_X_LOCK));
4236
4320
        ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4237
4321
                                     MTR_MEMO_PAGE_X_FIX));
4238
4322
        ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4239
 
 
4240
 
        if (rec) {
4241
 
                ulint   local_len;
4242
 
                const byte*     f = rec_get_nth_field(rec, offsets,
4243
 
                                                      i, &local_len);
4244
 
                ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4245
 
                local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4246
 
                f += local_len;
4247
 
                ut_ad(f == field_ref);
4248
 
        }
4249
 
#endif /* UNIV_DEBUG */
 
4323
        ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
4250
4324
 
4251
4325
        if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
4252
4326
                                  BTR_EXTERN_FIELD_REF_SIZE))) {
4278
4352
                rec_zip_size = 0;
4279
4353
        }
4280
4354
 
 
4355
#ifdef UNIV_BLOB_DEBUG
 
4356
        if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
 
4357
            && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
 
4358
                 && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
 
4359
                /* This off-page column will be freed.
 
4360
                Check that no references remain. */
 
4361
 
 
4362
                btr_blob_dbg_t  b;
 
4363
 
 
4364
                b.blob_page_no = mach_read_from_4(
 
4365
                        field_ref + BTR_EXTERN_PAGE_NO);
 
4366
 
 
4367
                if (rec) {
 
4368
                        /* Remove the reference from the record to the
 
4369
                        BLOB. If the BLOB were not freed, the
 
4370
                        reference would be removed when the record is
 
4371
                        removed. Freeing the BLOB will overwrite the
 
4372
                        BTR_EXTERN_PAGE_NO in the field_ref of the
 
4373
                        record with FIL_NULL, which would make the
 
4374
                        btr_blob_dbg information inconsistent with the
 
4375
                        record. */
 
4376
                        b.ref_page_no = page_get_page_no(page_align(rec));
 
4377
                        b.ref_heap_no = page_rec_get_heap_no(rec);
 
4378
                        b.ref_field_no = i;
 
4379
                        btr_blob_dbg_rbt_delete(index, &b, "free");
 
4380
                }
 
4381
 
 
4382
                btr_blob_dbg_assert_empty(index, b.blob_page_no);
 
4383
        }
 
4384
#endif /* UNIV_BLOB_DEBUG */
 
4385
 
4281
4386
        for (;;) {
 
4387
#ifdef UNIV_SYNC_DEBUG
4282
4388
                buf_block_t*    rec_block;
 
4389
#endif /* UNIV_SYNC_DEBUG */
4283
4390
                buf_block_t*    ext_block;
4284
4391
 
4285
4392
                mtr_start(&mtr);
4286
4393
 
4287
 
                rec_block = buf_page_get(page_get_space_id(
 
4394
#ifdef UNIV_SYNC_DEBUG
 
4395
                rec_block =
 
4396
#endif /* UNIV_SYNC_DEBUG */
 
4397
                        buf_page_get(page_get_space_id(
4288
4398
                                                 page_align(field_ref)),
4289
4399
                                         rec_zip_size,
4290
4400
                                         page_get_page_no(
4406
4516
 
4407
4517
        for (i = 0; i < n_fields; i++) {
4408
4518
                if (rec_offs_nth_extern(offsets, i)) {
4409
 
                        ulint   len;
4410
 
                        byte*   data
4411
 
                                = rec_get_nth_field(rec, offsets, i, &len);
4412
 
                        ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4413
 
 
4414
4519
                        btr_free_externally_stored_field(
4415
 
                                index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
 
4520
                                index, btr_rec_get_field_ref(rec, offsets, i),
4416
4521
                                rec, offsets, page_zip, i, rb_ctx, mtr);
4417
4522
                }
4418
4523
        }
4524
4629
 
4525
4630
/*******************************************************************//**
4526
4631
Copies the prefix of a compressed BLOB.  The clustered index record
4527
 
that points to this BLOB must be protected by a lock or a page latch. */
 
4632
that points to this BLOB must be protected by a lock or a page latch.
 
4633
@return number of bytes written to buf */
4528
4634
static
4529
 
void
 
4635
ulint
4530
4636
btr_copy_zblob_prefix(
4531
4637
/*==================*/
4532
 
        z_stream*       d_stream,/*!< in/out: the decompressing stream */
 
4638
        byte*           buf,    /*!< out: the externally stored part of
 
4639
                                the field, or a prefix of it */
 
4640
        ulint           len,    /*!< in: length of buf, in bytes */
4533
4641
        ulint           zip_size,/*!< in: compressed BLOB page size */
4534
4642
        ulint           space_id,/*!< in: space id of the BLOB pages */
4535
4643
        ulint           page_no,/*!< in: page number of the first BLOB page */
4536
4644
        ulint           offset) /*!< in: offset on the first BLOB page */
4537
4645
{
4538
 
        ulint   page_type = FIL_PAGE_TYPE_ZBLOB;
 
4646
        ulint           page_type = FIL_PAGE_TYPE_ZBLOB;
 
4647
        mem_heap_t*     heap;
 
4648
        int             err;
 
4649
        z_stream        d_stream;
 
4650
 
 
4651
        d_stream.next_out = buf;
 
4652
        d_stream.avail_out = len;
 
4653
        d_stream.next_in = Z_NULL;
 
4654
        d_stream.avail_in = 0;
 
4655
 
 
4656
        /* Zlib inflate needs 32 kilobytes for the default
 
4657
        window size, plus a few kilobytes for small objects. */
 
4658
        heap = mem_heap_create(40000);
 
4659
        page_zip_set_alloc(&d_stream, heap);
4539
4660
 
4540
4661
        ut_ad(ut_is_2pow(zip_size));
4541
4662
        ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4542
4663
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
4543
4664
        ut_ad(space_id);
4544
4665
 
 
4666
        err = inflateInit(&d_stream);
 
4667
        ut_a(err == Z_OK);
 
4668
 
4545
4669
        for (;;) {
4546
4670
                buf_page_t*     bpage;
4547
 
                int             err;
4548
4671
                ulint           next_page_no;
4549
4672
 
4550
4673
                /* There is no latch on bpage directly.  Instead,
4560
4683
                                " compressed BLOB"
4561
4684
                                " page %lu space %lu\n",
4562
4685
                                (ulong) page_no, (ulong) space_id);
4563
 
                        return;
 
4686
                        goto func_exit;
4564
4687
                }
4565
4688
 
4566
4689
                if (UNIV_UNLIKELY
4586
4709
                        offset += 4;
4587
4710
                }
4588
4711
 
4589
 
                d_stream->next_in = bpage->zip.data + offset;
4590
 
                d_stream->avail_in = zip_size - offset;
 
4712
                d_stream.next_in = bpage->zip.data + offset;
 
4713
                d_stream.avail_in = zip_size - offset;
4591
4714
 
4592
 
                err = inflate(d_stream, Z_NO_FLUSH);
 
4715
                err = inflate(&d_stream, Z_NO_FLUSH);
4593
4716
                switch (err) {
4594
4717
                case Z_OK:
4595
 
                        if (!d_stream->avail_out) {
 
4718
                        if (!d_stream.avail_out) {
4596
4719
                                goto end_of_blob;
4597
4720
                        }
4598
4721
                        break;
4609
4732
                                " compressed BLOB"
4610
4733
                                " page %lu space %lu returned %d (%s)\n",
4611
4734
                                (ulong) page_no, (ulong) space_id,
4612
 
                                err, d_stream->msg);
 
4735
                                err, d_stream.msg);
4613
4736
                case Z_BUF_ERROR:
4614
4737
                        goto end_of_blob;
4615
4738
                }
4616
4739
 
4617
4740
                if (next_page_no == FIL_NULL) {
4618
 
                        if (!d_stream->avail_in) {
 
4741
                        if (!d_stream.avail_in) {
4619
4742
                                ut_print_timestamp(stderr);
4620
4743
                                fprintf(stderr,
4621
4744
                                        "  InnoDB: unexpected end of"
4624
4747
                                        (ulong) page_no,
4625
4748
                                        (ulong) space_id);
4626
4749
                        } else {
4627
 
                                err = inflate(d_stream, Z_FINISH);
 
4750
                                err = inflate(&d_stream, Z_FINISH);
4628
4751
                                switch (err) {
4629
4752
                                case Z_STREAM_END:
4630
4753
                                case Z_BUF_ERROR:
4636
4759
 
4637
4760
end_of_blob:
4638
4761
                        buf_page_release_zip(bpage);
4639
 
                        return;
 
4762
                        goto func_exit;
4640
4763
                }
4641
4764
 
4642
4765
                buf_page_release_zip(bpage);
4648
4771
                offset = FIL_PAGE_NEXT;
4649
4772
                page_type = FIL_PAGE_TYPE_ZBLOB2;
4650
4773
        }
 
4774
 
 
4775
func_exit:
 
4776
        inflateEnd(&d_stream);
 
4777
        mem_heap_free(heap);
 
4778
        UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
 
4779
        return(d_stream.total_out);
4651
4780
}
4652
4781
 
4653
4782
/*******************************************************************//**
4673
4802
        }
4674
4803
 
4675
4804
        if (UNIV_UNLIKELY(zip_size)) {
4676
 
                int             err;
4677
 
                z_stream        d_stream;
4678
 
                mem_heap_t*     heap;
4679
 
 
4680
 
                /* Zlib inflate needs 32 kilobytes for the default
4681
 
                window size, plus a few kilobytes for small objects. */
4682
 
                heap = mem_heap_create(40000);
4683
 
                page_zip_set_alloc(&d_stream, heap);
4684
 
 
4685
 
                err = inflateInit(&d_stream);
4686
 
                ut_a(err == Z_OK);
4687
 
 
4688
 
                d_stream.next_out = buf;
4689
 
                d_stream.avail_out = len;
4690
 
                d_stream.avail_in = 0;
4691
 
 
4692
 
                btr_copy_zblob_prefix(&d_stream, zip_size,
4693
 
                                      space_id, page_no, offset);
4694
 
                inflateEnd(&d_stream);
4695
 
                mem_heap_free(heap);
4696
 
                UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
4697
 
                return(d_stream.total_out);
 
4805
                return(btr_copy_zblob_prefix(buf, len, zip_size,
 
4806
                                             space_id, page_no, offset));
4698
4807
        } else {
4699
4808
                return(btr_copy_blob_prefix(buf, len, space_id,
4700
4809
                                            page_no, offset));
4814
4923
 
4815
4924
/*******************************************************************//**
4816
4925
Copies an externally stored field of a record to mem heap.
4817
 
@return the field copied to heap */
 
4926
@return the field copied to heap, or NULL if the field is incomplete */
4818
4927
UNIV_INTERN
4819
4928
byte*
4820
4929
btr_rec_copy_externally_stored_field(
4844
4953
 
4845
4954
        data = rec_get_nth_field(rec, offsets, no, &local_len);
4846
4955
 
 
4956
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
4957
 
 
4958
        if (UNIV_UNLIKELY
 
4959
            (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
 
4960
                     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
 
4961
                /* The externally stored field was not written yet.
 
4962
                This record should only be seen by
 
4963
                recv_recovery_rollback_active() or any
 
4964
                TRX_ISO_READ_UNCOMMITTED transactions. */
 
4965
                return(NULL);
 
4966
        }
 
4967
 
4847
4968
        return(btr_copy_externally_stored_field(len, data,
4848
4969
                                                zip_size, local_len, heap));
4849
4970
}