~ubuntu-branches/ubuntu/lucid/mysql-dfsg-5.1/lucid-security

« back to all changes in this revision

Viewing changes to storage/innodb_plugin/btr/btr0cur.c

  • Committer: Package Import Robot
  • Author(s): Marc Deslauriers
  • Date: 2012-02-22 22:33:55 UTC
  • mto: (1.2.1) (37.1.1 lucid-security)
  • mto: This revision was merged to the branch mainline in revision 36.
  • Revision ID: package-import@ubuntu.com-20120222223355-ku1tb4r70osci6v2
Tags: upstream-5.1.61
ImportĀ upstreamĀ versionĀ 5.1.61

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 
3
 
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
 
3
Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
4
4
Copyright (c) 2008, Google Inc.
5
5
 
6
6
Portions of this file contain modifications contributed and copyrighted by
100
100
/*--------------------------------------*/
101
101
#define BTR_BLOB_HDR_SIZE               8       /*!< Size of a BLOB
102
102
                                                part header, in bytes */
 
103
 
 
104
/** Estimated table level stats from sampled value.
 
105
@param value            sampled stats
 
106
@param index            index being sampled
 
107
@param sample           number of sampled rows
 
108
@param ext_size         external stored data size
 
109
@param not_empty        table not empty
 
110
@return estimated table wide stats from sampled value */
 
111
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
 
112
        (((value) * (ib_int64_t) index->stat_n_leaf_pages               \
 
113
          + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
 
114
 
103
115
/* @} */
104
116
#endif /* !UNIV_HOTBACKUP */
105
117
 
174
186
ulint
175
187
btr_rec_get_externally_stored_len(
176
188
/*==============================*/
177
 
        rec_t*          rec,    /*!< in: record */
 
189
        const rec_t*    rec,    /*!< in: record */
178
190
        const ulint*    offsets);/*!< in: array returned by rec_get_offsets() */
179
191
#endif /* !UNIV_HOTBACKUP */
180
192
 
226
238
        case BTR_SEARCH_LEAF:
227
239
        case BTR_MODIFY_LEAF:
228
240
                mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
229
 
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
 
241
                get_block = btr_block_get(
 
242
                        space, zip_size, page_no, mode, cursor->index, mtr);
230
243
#ifdef UNIV_BTR_DEBUG
231
244
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
232
245
#endif /* UNIV_BTR_DEBUG */
237
250
                left_page_no = btr_page_get_prev(page, mtr);
238
251
 
239
252
                if (left_page_no != FIL_NULL) {
240
 
                        get_block = btr_block_get(space, zip_size,
241
 
                                                  left_page_no,
242
 
                                                  RW_X_LATCH, mtr);
 
253
                        get_block = btr_block_get(
 
254
                                space, zip_size, left_page_no,
 
255
                                RW_X_LATCH, cursor->index, mtr);
243
256
#ifdef UNIV_BTR_DEBUG
244
257
                        ut_a(page_is_comp(get_block->frame)
245
258
                             == page_is_comp(page));
249
262
                        get_block->check_index_page_at_flush = TRUE;
250
263
                }
251
264
 
252
 
                get_block = btr_block_get(space, zip_size, page_no,
253
 
                                          RW_X_LATCH, mtr);
 
265
                get_block = btr_block_get(
 
266
                        space, zip_size, page_no,
 
267
                        RW_X_LATCH, cursor->index, mtr);
254
268
#ifdef UNIV_BTR_DEBUG
255
269
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
256
270
#endif /* UNIV_BTR_DEBUG */
259
273
                right_page_no = btr_page_get_next(page, mtr);
260
274
 
261
275
                if (right_page_no != FIL_NULL) {
262
 
                        get_block = btr_block_get(space, zip_size,
263
 
                                                  right_page_no,
264
 
                                                  RW_X_LATCH, mtr);
 
276
                        get_block = btr_block_get(
 
277
                                space, zip_size, right_page_no,
 
278
                                RW_X_LATCH, cursor->index, mtr);
265
279
#ifdef UNIV_BTR_DEBUG
266
280
                        ut_a(page_is_comp(get_block->frame)
267
281
                             == page_is_comp(page));
280
294
                left_page_no = btr_page_get_prev(page, mtr);
281
295
 
282
296
                if (left_page_no != FIL_NULL) {
283
 
                        get_block = btr_block_get(space, zip_size,
284
 
                                                  left_page_no, mode, mtr);
 
297
                        get_block = btr_block_get(
 
298
                                space, zip_size,
 
299
                                left_page_no, mode, cursor->index, mtr);
285
300
                        cursor->left_block = get_block;
286
301
#ifdef UNIV_BTR_DEBUG
287
302
                        ut_a(page_is_comp(get_block->frame)
292
307
                        get_block->check_index_page_at_flush = TRUE;
293
308
                }
294
309
 
295
 
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
 
310
                get_block = btr_block_get(
 
311
                        space, zip_size, page_no, mode, cursor->index, mtr);
296
312
#ifdef UNIV_BTR_DEBUG
297
313
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
298
314
#endif /* UNIV_BTR_DEBUG */
342
358
        ulint           has_search_latch,/*!< in: info on the latch mode the
343
359
                                caller currently has on btr_search_latch:
344
360
                                RW_S_LATCH, or 0 */
 
361
        const char*     file,   /*!< in: file name */
 
362
        ulint           line,   /*!< in: line where called */
345
363
        mtr_t*          mtr)    /*!< in: mtr */
346
364
{
347
365
        page_cur_t*     page_cursor;
520
538
retry_page_get:
521
539
                block = buf_page_get_gen(space, zip_size, page_no,
522
540
                                         rw_latch, guess, buf_mode,
523
 
                                         __FILE__, __LINE__, mtr);
 
541
                                         file, line, mtr);
524
542
                if (block == NULL) {
525
543
                        /* This must be a search to perform an insert;
526
544
                        try insert to the insert buffer */
558
576
                        ut_a(!page_zip || page_zip_validate(page_zip, page));
559
577
#endif /* UNIV_ZIP_DEBUG */
560
578
 
561
 
                        buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 
579
                        buf_block_dbg_add_level(
 
580
                                block, dict_index_is_ibuf(index)
 
581
                                ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
562
582
                }
563
583
 
564
584
                ut_ad(0 == ut_dulint_cmp(index->id,
616
636
 
617
637
                        if (level > 0) {
618
638
                                /* x-latch the page */
619
 
                                page = btr_page_get(space, zip_size,
620
 
                                                    page_no, RW_X_LATCH, mtr);
 
639
                                page = btr_page_get(space, zip_size, page_no,
 
640
                                                    RW_X_LATCH, index, mtr);
621
641
                                ut_a((ibool)!!page_is_comp(page)
622
642
                                     == dict_table_is_comp(index->table));
623
643
                        }
677
697
Opens a cursor at either end of an index. */
678
698
UNIV_INTERN
679
699
void
680
 
btr_cur_open_at_index_side(
681
 
/*=======================*/
 
700
btr_cur_open_at_index_side_func(
 
701
/*============================*/
682
702
        ibool           from_left,      /*!< in: TRUE if open to the low end,
683
703
                                        FALSE if to the high end */
684
704
        dict_index_t*   index,          /*!< in: index */
685
705
        ulint           latch_mode,     /*!< in: latch mode */
686
706
        btr_cur_t*      cursor,         /*!< in: cursor */
 
707
        const char*     file,           /*!< in: file name */
 
708
        ulint           line,           /*!< in: line where called */
687
709
        mtr_t*          mtr)            /*!< in: mtr */
688
710
{
689
711
        page_cur_t*     page_cursor;
728
750
                page_t*         page;
729
751
                block = buf_page_get_gen(space, zip_size, page_no,
730
752
                                         RW_NO_LATCH, NULL, BUF_GET,
731
 
                                         __FILE__, __LINE__, mtr);
 
753
                                         file, line, mtr);
732
754
                page = buf_block_get_frame(block);
733
755
                ut_ad(0 == ut_dulint_cmp(index->id,
734
756
                                         btr_page_get_index_id(page)));
808
830
Positions a cursor at a randomly chosen position within a B-tree. */
809
831
UNIV_INTERN
810
832
void
811
 
btr_cur_open_at_rnd_pos(
812
 
/*====================*/
 
833
btr_cur_open_at_rnd_pos_func(
 
834
/*=========================*/
813
835
        dict_index_t*   index,          /*!< in: index */
814
836
        ulint           latch_mode,     /*!< in: BTR_SEARCH_LEAF, ... */
815
837
        btr_cur_t*      cursor,         /*!< in/out: B-tree cursor */
 
838
        const char*     file,           /*!< in: file name */
 
839
        ulint           line,           /*!< in: line where called */
816
840
        mtr_t*          mtr)            /*!< in: mtr */
817
841
{
818
842
        page_cur_t*     page_cursor;
847
871
 
848
872
                block = buf_page_get_gen(space, zip_size, page_no,
849
873
                                         RW_NO_LATCH, NULL, BUF_GET,
850
 
                                         __FILE__, __LINE__, mtr);
 
874
                                         file, line, mtr);
851
875
                page = buf_block_get_frame(block);
852
876
                ut_ad(0 == ut_dulint_cmp(index->id,
853
877
                                         btr_page_get_index_id(page)));
947
971
                                not zero, the parameters index and thr
948
972
                                should be specified */
949
973
        btr_cur_t*      cursor, /*!< in: cursor on page after which to insert */
950
 
        const dtuple_t* entry,  /*!< in: entry to insert */
 
974
        dtuple_t*       entry,  /*!< in/out: entry to insert */
951
975
        que_thr_t*      thr,    /*!< in: query thread or NULL */
952
976
        mtr_t*          mtr,    /*!< in/out: mini-transaction */
953
977
        ibool*          inherit)/*!< out: TRUE if the inserted new record maybe
1058
1082
        ibool           inherit;
1059
1083
        ulint           zip_size;
1060
1084
        ulint           rec_size;
1061
 
        mem_heap_t*     heap            = NULL;
1062
1085
        ulint           err;
1063
1086
 
1064
1087
        *big_rec = NULL;
1138
1161
                                        index, entry, big_rec_vec);
1139
1162
                        }
1140
1163
 
1141
 
                        if (heap) {
1142
 
                                mem_heap_free(heap);
1143
 
                        }
1144
 
 
1145
1164
                        return(DB_TOO_BIG_RECORD);
1146
1165
                }
1147
1166
        }
1164
1183
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1165
1184
                }
1166
1185
 
1167
 
                if (UNIV_LIKELY_NULL(heap)) {
1168
 
                        mem_heap_free(heap);
1169
 
                }
1170
 
 
1171
1186
                return(err);
1172
1187
        }
1173
1188
 
1174
1189
        if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
1175
 
             || max_size < rec_size)
 
1190
                          || max_size < rec_size)
1176
1191
            && UNIV_LIKELY(page_get_n_recs(page) > 1)
1177
1192
            && page_get_max_insert_size(page, 1) < rec_size) {
1178
1193
 
1238
1253
                }
1239
1254
        }
1240
1255
 
1241
 
        if (UNIV_LIKELY_NULL(heap)) {
1242
 
                mem_heap_free(heap);
1243
 
        }
1244
 
 
1245
1256
#ifdef BTR_CUR_HASH_ADAPT
1246
1257
        if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
1247
1258
                btr_search_update_hash_node_on_insert(cursor);
1633
1644
See if there is enough place in the page modification log to log
1634
1645
an update-in-place.
1635
1646
@return TRUE if enough place */
1636
 
static
 
1647
UNIV_INTERN
1637
1648
ibool
1638
1649
btr_cur_update_alloc_zip(
1639
1650
/*=====================*/
1716
1727
        roll_ptr_t      roll_ptr        = ut_dulint_zero;
1717
1728
        trx_t*          trx;
1718
1729
        ulint           was_delete_marked;
 
1730
        ibool           is_hashed;
1719
1731
        mem_heap_t*     heap            = NULL;
1720
1732
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
1721
1733
        ulint*          offsets         = offsets_;
1757
1769
                return(err);
1758
1770
        }
1759
1771
 
1760
 
        if (block->is_hashed) {
 
1772
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
 
1773
                row_upd_rec_sys_fields(rec, NULL,
 
1774
                                       index, offsets, trx, roll_ptr);
 
1775
        }
 
1776
 
 
1777
        was_delete_marked = rec_get_deleted_flag(
 
1778
                rec, page_is_comp(buf_block_get_frame(block)));
 
1779
 
 
1780
        is_hashed = (block->index != NULL);
 
1781
 
 
1782
        if (is_hashed) {
 
1783
                /* TO DO: Can we skip this if none of the fields
 
1784
                index->search_info->curr_n_fields
 
1785
                are being updated? */
 
1786
 
1761
1787
                /* The function row_upd_changes_ord_field_binary works only
1762
1788
                if the update vector was built for a clustered index, we must
1763
1789
                NOT call it if index is secondary */
1764
1790
 
1765
1791
                if (!dict_index_is_clust(index)
1766
 
                    || row_upd_changes_ord_field_binary(NULL, index, update)) {
 
1792
                    || row_upd_changes_ord_field_binary(index, update, thr,
 
1793
                                                        NULL, NULL)) {
1767
1794
 
1768
1795
                        /* Remove possible hash index pointer to this record */
1769
1796
                        btr_search_update_hash_on_delete(cursor);
1772
1799
                rw_lock_x_lock(&btr_search_latch);
1773
1800
        }
1774
1801
 
1775
 
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1776
 
                row_upd_rec_sys_fields(rec, NULL,
1777
 
                                       index, offsets, trx, roll_ptr);
1778
 
        }
1779
 
 
1780
 
        was_delete_marked = rec_get_deleted_flag(
1781
 
                rec, page_is_comp(buf_block_get_frame(block)));
1782
 
 
1783
1802
        row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1784
1803
 
1785
 
        if (block->is_hashed) {
 
1804
        if (is_hashed) {
1786
1805
                rw_lock_x_unlock(&btr_search_latch);
1787
1806
        }
1788
1807
 
1843
1862
        page_t*         page;
1844
1863
        page_zip_des_t* page_zip;
1845
1864
        rec_t*          rec;
1846
 
        rec_t*          orig_rec;
1847
1865
        ulint           max_size;
1848
1866
        ulint           new_rec_size;
1849
1867
        ulint           old_rec_size;
1850
1868
        dtuple_t*       new_entry;
1851
1869
        roll_ptr_t      roll_ptr;
1852
 
        trx_t*          trx;
1853
1870
        mem_heap_t*     heap;
1854
1871
        ulint           i;
1855
1872
        ulint           n_ext;
1857
1874
 
1858
1875
        block = btr_cur_get_block(cursor);
1859
1876
        page = buf_block_get_frame(block);
1860
 
        orig_rec = rec = btr_cur_get_rec(cursor);
 
1877
        rec = btr_cur_get_rec(cursor);
1861
1878
        index = cursor->index;
1862
1879
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1863
1880
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1866
1883
 
1867
1884
        heap = mem_heap_create(1024);
1868
1885
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
1886
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
1887
        ut_a(!rec_offs_any_null_extern(rec, offsets)
 
1888
             || trx_is_recv(thr_get_trx(thr)));
 
1889
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1869
1890
 
1870
1891
#ifdef UNIV_DEBUG
1871
1892
        if (btr_cur_print_record_ops && thr) {
1966
1987
        err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
1967
1988
                                        thr, mtr, &roll_ptr);
1968
1989
        if (err != DB_SUCCESS) {
1969
 
err_exit:
1970
 
                mem_heap_free(heap);
1971
 
                return(err);
 
1990
 
 
1991
                goto err_exit;
1972
1992
        }
1973
1993
 
1974
1994
        /* Ok, we may do the replacement. Store on the page infimum the
1989
2009
 
1990
2010
        page_cur_move_to_prev(page_cursor);
1991
2011
 
1992
 
        trx = thr_get_trx(thr);
1993
 
 
1994
2012
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1995
2013
                row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
1996
2014
                                              roll_ptr);
1997
2015
                row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
1998
 
                                              trx->id);
 
2016
                                              thr_get_trx(thr)->id);
1999
2017
        }
2000
2018
 
2001
2019
        /* There are no externally stored columns in new_entry */
2014
2032
 
2015
2033
        page_cur_move_to_next(page_cursor);
2016
2034
 
 
2035
        err = DB_SUCCESS;
 
2036
err_exit:
2017
2037
        mem_heap_free(heap);
2018
 
 
2019
 
        return(DB_SUCCESS);
 
2038
        return(err);
2020
2039
}
2021
2040
 
2022
2041
/*************************************************************//**
2481
2500
 
2482
2501
                /* We do not need to reserve btr_search_latch, as the page
2483
2502
                is only being recovered, and there cannot be a hash index to
2484
 
                it. */
 
2503
                it. Besides, these fields are being updated in place
 
2504
                and the adaptive hash index does not depend on them. */
2485
2505
 
2486
2506
                btr_rec_set_deleted_flag(rec, page_zip, val);
2487
2507
 
2516
2536
btr_cur_del_mark_set_clust_rec(
2517
2537
/*===========================*/
2518
2538
        ulint           flags,  /*!< in: undo logging and locking flags */
2519
 
        btr_cur_t*      cursor, /*!< in: cursor */
 
2539
        buf_block_t*    block,  /*!< in/out: buffer block of the record */
 
2540
        rec_t*          rec,    /*!< in/out: record */
 
2541
        dict_index_t*   index,  /*!< in: clustered index of the record */
 
2542
        const ulint*    offsets,/*!< in: rec_get_offsets(rec) */
2520
2543
        ibool           val,    /*!< in: value to set */
2521
2544
        que_thr_t*      thr,    /*!< in: query thread */
2522
2545
        mtr_t*          mtr)    /*!< in: mtr */
2523
2546
{
2524
 
        dict_index_t*   index;
2525
 
        buf_block_t*    block;
2526
2547
        roll_ptr_t      roll_ptr;
2527
2548
        ulint           err;
2528
 
        rec_t*          rec;
2529
2549
        page_zip_des_t* page_zip;
2530
2550
        trx_t*          trx;
2531
 
        mem_heap_t*     heap            = NULL;
2532
 
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2533
 
        ulint*          offsets         = offsets_;
2534
 
        rec_offs_init(offsets_);
2535
2551
 
2536
 
        rec = btr_cur_get_rec(cursor);
2537
 
        index = cursor->index;
 
2552
        ut_ad(dict_index_is_clust(index));
 
2553
        ut_ad(rec_offs_validate(rec, index, offsets));
2538
2554
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
2539
 
        offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
2555
        ut_ad(buf_block_get_frame(block) == page_align(rec));
 
2556
        ut_ad(page_is_leaf(page_align(rec)));
2540
2557
 
2541
2558
#ifdef UNIV_DEBUG
2542
2559
        if (btr_cur_print_record_ops && thr) {
2548
2565
        ut_ad(dict_index_is_clust(index));
2549
2566
        ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2550
2567
 
2551
 
        err = lock_clust_rec_modify_check_and_lock(flags,
2552
 
                                                   btr_cur_get_block(cursor),
 
2568
        err = lock_clust_rec_modify_check_and_lock(flags, block,
2553
2569
                                                   rec, index, offsets, thr);
2554
2570
 
2555
2571
        if (err != DB_SUCCESS) {
2556
2572
 
2557
 
                goto func_exit;
 
2573
                return(err);
2558
2574
        }
2559
2575
 
2560
2576
        err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
2562
2578
                                            &roll_ptr);
2563
2579
        if (err != DB_SUCCESS) {
2564
2580
 
2565
 
                goto func_exit;
2566
 
        }
2567
 
 
2568
 
        block = btr_cur_get_block(cursor);
2569
 
 
2570
 
        if (block->is_hashed) {
2571
 
                rw_lock_x_lock(&btr_search_latch);
2572
 
        }
 
2581
                return(err);
 
2582
        }
 
2583
 
 
2584
        /* The btr_search_latch is not needed here, because
 
2585
        the adaptive hash index does not depend on the delete-mark
 
2586
        and the delete-mark is being updated in place. */
2573
2587
 
2574
2588
        page_zip = buf_block_get_page_zip(block);
2575
2589
 
 
2590
        btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
2576
2591
        btr_rec_set_deleted_flag(rec, page_zip, val);
2577
2592
 
2578
2593
        trx = thr_get_trx(thr);
2582
2597
                                       index, offsets, trx, roll_ptr);
2583
2598
        }
2584
2599
 
2585
 
        if (block->is_hashed) {
2586
 
                rw_lock_x_unlock(&btr_search_latch);
2587
 
        }
2588
 
 
2589
2600
        btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
2590
2601
                                           roll_ptr, mtr);
2591
2602
 
2592
 
func_exit:
2593
 
        if (UNIV_LIKELY_NULL(heap)) {
2594
 
                mem_heap_free(heap);
2595
 
        }
2596
2603
        return(err);
2597
2604
}
2598
2605
 
2665
2672
 
2666
2673
                /* We do not need to reserve btr_search_latch, as the page
2667
2674
                is only being recovered, and there cannot be a hash index to
2668
 
                it. */
 
2675
                it. Besides, the delete-mark flag is being updated in place
 
2676
                and the adaptive hash index does not depend on it. */
2669
2677
 
2670
2678
                btr_rec_set_deleted_flag(rec, page_zip, val);
2671
2679
        }
2713
2721
        ut_ad(!!page_rec_is_comp(rec)
2714
2722
              == dict_table_is_comp(cursor->index->table));
2715
2723
 
2716
 
        if (block->is_hashed) {
2717
 
                rw_lock_x_lock(&btr_search_latch);
2718
 
        }
2719
 
 
 
2724
        /* We do not need to reserve btr_search_latch, as the
 
2725
        delete-mark flag is being updated in place and the adaptive
 
2726
        hash index does not depend on it. */
2720
2727
        btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
2721
2728
 
2722
 
        if (block->is_hashed) {
2723
 
                rw_lock_x_unlock(&btr_search_latch);
2724
 
        }
2725
 
 
2726
2729
        btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2727
2730
 
2728
2731
        return(DB_SUCCESS);
2742
2745
                                        uncompressed */
2743
2746
        mtr_t*          mtr)            /*!< in: mtr */
2744
2747
{
2745
 
        /* We do not need to reserve btr_search_latch, as the page has just
2746
 
        been read to the buffer pool and there cannot be a hash index to it. */
 
2748
        /* We do not need to reserve btr_search_latch, as the page
 
2749
        has just been read to the buffer pool and there cannot be
 
2750
        a hash index to it.  Besides, the delete-mark flag is being
 
2751
        updated in place and the adaptive hash index does not depend
 
2752
        on it. */
2747
2753
 
2748
2754
        btr_rec_set_deleted_flag(rec, page_zip, FALSE);
2749
2755
 
3100
3106
 
3101
3107
                btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
3102
3108
                                            BTR_SEARCH_LEAF | BTR_ESTIMATE,
3103
 
                                            &cursor, 0, &mtr);
 
3109
                                            &cursor, 0,
 
3110
                                            __FILE__, __LINE__, &mtr);
3104
3111
        } else {
3105
3112
                btr_cur_open_at_index_side(TRUE, index,
3106
3113
                                           BTR_SEARCH_LEAF | BTR_ESTIMATE,
3117
3124
 
3118
3125
                btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
3119
3126
                                            BTR_SEARCH_LEAF | BTR_ESTIMATE,
3120
 
                                            &cursor, 0, &mtr);
 
3127
                                            &cursor, 0,
 
3128
                                            __FILE__, __LINE__, &mtr);
3121
3129
        } else {
3122
3130
                btr_cur_open_at_index_side(FALSE, index,
3123
3131
                                           BTR_SEARCH_LEAF | BTR_ESTIMATE,
3216
3224
}
3217
3225
 
3218
3226
/*******************************************************************//**
 
3227
Record the number of non_null key values in a given index for
 
3228
each n-column prefix of the index where n < dict_index_get_n_unique(index).
 
3229
The estimates are eventually stored in the array:
 
3230
index->stat_n_non_null_key_vals. */
 
3231
static
 
3232
void
 
3233
btr_record_not_null_field_in_rec(
 
3234
/*=============================*/
 
3235
        ulint           n_unique,       /*!< in: dict_index_get_n_unique(index),
 
3236
                                        number of columns uniquely determine
 
3237
                                        an index entry */
 
3238
        const ulint*    offsets,        /*!< in: rec_get_offsets(rec, index),
 
3239
                                        its size could be for all fields or
 
3240
                                        that of "n_unique" */
 
3241
        ib_int64_t*     n_not_null)     /*!< in/out: array to record number of
 
3242
                                        not null rows for n-column prefix */
 
3243
{
 
3244
        ulint   i;
 
3245
 
 
3246
        ut_ad(rec_offs_n_fields(offsets) >= n_unique);
 
3247
 
 
3248
        if (n_not_null == NULL) {
 
3249
                return;
 
3250
        }
 
3251
 
 
3252
        for (i = 0; i < n_unique; i++) {
 
3253
                if (rec_offs_nth_sql_null(offsets, i)) {
 
3254
                        break;
 
3255
                }
 
3256
 
 
3257
                n_not_null[i]++;
 
3258
        }
 
3259
}
 
3260
 
 
3261
/*******************************************************************//**
3219
3262
Estimates the number of different key values in a given index, for
3220
3263
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
3221
 
The estimates are stored in the array index->stat_n_diff_key_vals. */
 
3264
The estimates are stored in the array index->stat_n_diff_key_vals.
 
3265
If innodb_stats_method is "nulls_ignored", we also record the number of
 
3266
non-null values for each prefix and store the estimates in
 
3267
array index->stat_n_non_null_key_vals. */
3222
3268
UNIV_INTERN
3223
3269
void
3224
3270
btr_estimate_number_of_different_key_vals(
3232
3278
        ulint           matched_fields;
3233
3279
        ulint           matched_bytes;
3234
3280
        ib_int64_t*     n_diff;
 
3281
        ib_int64_t*     n_not_null;
 
3282
        ibool           stats_null_not_equal;
3235
3283
        ullint          n_sample_pages; /* number of pages to sample */
3236
3284
        ulint           not_empty_flag  = 0;
3237
3285
        ulint           total_external_size = 0;
3240
3288
        ullint          add_on;
3241
3289
        mtr_t           mtr;
3242
3290
        mem_heap_t*     heap            = NULL;
3243
 
        ulint           offsets_rec_[REC_OFFS_NORMAL_SIZE];
3244
 
        ulint           offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
3245
 
        ulint*          offsets_rec     = offsets_rec_;
3246
 
        ulint*          offsets_next_rec= offsets_next_rec_;
3247
 
        rec_offs_init(offsets_rec_);
3248
 
        rec_offs_init(offsets_next_rec_);
 
3291
        ulint*          offsets_rec     = NULL;
 
3292
        ulint*          offsets_next_rec = NULL;
3249
3293
 
3250
3294
        n_cols = dict_index_get_n_unique(index);
3251
3295
 
3252
 
        n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
 
3296
        heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
 
3297
                               * (n_cols + 1)
 
3298
                               + dict_index_get_n_fields(index)
 
3299
                               * (sizeof *offsets_rec
 
3300
                                  + sizeof *offsets_next_rec));
 
3301
 
 
3302
        n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
 
3303
 
 
3304
        n_not_null = NULL;
 
3305
 
 
3306
        /* Check srv_innodb_stats_method setting, and decide whether we
 
3307
        need to record non-null value and also decide if NULL is
 
3308
        considered equal (by setting stats_null_not_equal value) */
 
3309
        switch (srv_innodb_stats_method) {
 
3310
        case SRV_STATS_NULLS_IGNORED:
 
3311
                n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
 
3312
                                             * sizeof *n_not_null);
 
3313
                /* fall through */
 
3314
 
 
3315
        case SRV_STATS_NULLS_UNEQUAL:
 
3316
                /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
 
3317
                case, we will treat NULLs as unequal value */
 
3318
                stats_null_not_equal = TRUE;
 
3319
                break;
 
3320
 
 
3321
        case SRV_STATS_NULLS_EQUAL:
 
3322
                stats_null_not_equal = FALSE;
 
3323
                break;
 
3324
 
 
3325
        default:
 
3326
                ut_error;
 
3327
        }
3253
3328
 
3254
3329
        /* It makes no sense to test more pages than are contained
3255
3330
        in the index, thus we lower the number if it is too high */
3266
3341
        /* We sample some pages in the index to get an estimate */
3267
3342
 
3268
3343
        for (i = 0; i < n_sample_pages; i++) {
3269
 
                rec_t*  supremum;
3270
3344
                mtr_start(&mtr);
3271
3345
 
3272
3346
                btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
3279
3353
 
3280
3354
                page = btr_cur_get_page(&cursor);
3281
3355
 
3282
 
                supremum = page_get_supremum_rec(page);
3283
3356
                rec = page_rec_get_next(page_get_infimum_rec(page));
3284
3357
 
3285
 
                if (rec != supremum) {
 
3358
                if (!page_rec_is_supremum(rec)) {
3286
3359
                        not_empty_flag = 1;
3287
3360
                        offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3288
3361
                                                      ULINT_UNDEFINED, &heap);
 
3362
 
 
3363
                        if (n_not_null) {
 
3364
                                btr_record_not_null_field_in_rec(
 
3365
                                        n_cols, offsets_rec, n_not_null);
 
3366
                        }
3289
3367
                }
3290
3368
 
3291
 
                while (rec != supremum) {
 
3369
                while (!page_rec_is_supremum(rec)) {
3292
3370
                        rec_t*  next_rec = page_rec_get_next(rec);
3293
 
                        if (next_rec == supremum) {
 
3371
                        if (page_rec_is_supremum(next_rec)) {
 
3372
                                total_external_size +=
 
3373
                                        btr_rec_get_externally_stored_len(
 
3374
                                                rec, offsets_rec);
3294
3375
                                break;
3295
3376
                        }
3296
3377
 
3298
3379
                        matched_bytes = 0;
3299
3380
                        offsets_next_rec = rec_get_offsets(next_rec, index,
3300
3381
                                                           offsets_next_rec,
3301
 
                                                           n_cols, &heap);
 
3382
                                                           ULINT_UNDEFINED,
 
3383
                                                           &heap);
3302
3384
 
3303
3385
                        cmp_rec_rec_with_match(rec, next_rec,
3304
3386
                                               offsets_rec, offsets_next_rec,
3305
 
                                               index, &matched_fields,
 
3387
                                               index, stats_null_not_equal,
 
3388
                                               &matched_fields,
3306
3389
                                               &matched_bytes);
3307
3390
 
3308
3391
                        for (j = matched_fields + 1; j <= n_cols; j++) {
3312
3395
                                n_diff[j]++;
3313
3396
                        }
3314
3397
 
 
3398
                        if (n_not_null) {
 
3399
                                btr_record_not_null_field_in_rec(
 
3400
                                        n_cols, offsets_next_rec, n_not_null);
 
3401
                        }
 
3402
 
3315
3403
                        total_external_size
3316
3404
                                += btr_rec_get_externally_stored_len(
3317
3405
                                        rec, offsets_rec);
3346
3434
                        }
3347
3435
                }
3348
3436
 
3349
 
                offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3350
 
                                              ULINT_UNDEFINED, &heap);
3351
 
                total_external_size += btr_rec_get_externally_stored_len(
3352
 
                        rec, offsets_rec);
3353
3437
                mtr_commit(&mtr);
3354
3438
        }
3355
3439
 
3363
3447
 
3364
3448
        for (j = 0; j <= n_cols; j++) {
3365
3449
                index->stat_n_diff_key_vals[j]
3366
 
                        = ((n_diff[j]
3367
 
                            * (ib_int64_t)index->stat_n_leaf_pages
3368
 
                            + n_sample_pages - 1
3369
 
                            + total_external_size
3370
 
                            + not_empty_flag)
3371
 
                           / (n_sample_pages
3372
 
                              + total_external_size));
 
3450
                        = BTR_TABLE_STATS_FROM_SAMPLE(
 
3451
                                n_diff[j], index, n_sample_pages,
 
3452
                                total_external_size, not_empty_flag); 
3373
3453
 
3374
3454
                /* If the tree is small, smaller than
3375
3455
                10 * n_sample_pages + total_external_size, then
3388
3468
                }
3389
3469
 
3390
3470
                index->stat_n_diff_key_vals[j] += add_on;
3391
 
        }
3392
 
 
3393
 
        mem_free(n_diff);
3394
 
        if (UNIV_LIKELY_NULL(heap)) {
3395
 
                mem_heap_free(heap);
3396
 
        }
 
3471
 
 
3472
                /* Update the stat_n_non_null_key_vals[] with our
 
3473
                sampled result. stat_n_non_null_key_vals[] is created
 
3474
                and initialized to zero in dict_index_add_to_cache(),
 
3475
                along with stat_n_diff_key_vals[] array */
 
3476
                if (n_not_null != NULL && (j < n_cols)) {
 
3477
                        index->stat_n_non_null_key_vals[j] =
 
3478
                                 BTR_TABLE_STATS_FROM_SAMPLE(
 
3479
                                        n_not_null[j], index, n_sample_pages,
 
3480
                                        total_external_size, not_empty_flag);
 
3481
                }
 
3482
        }
 
3483
 
 
3484
        mem_heap_free(heap);
3397
3485
}
3398
3486
 
3399
3487
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
3400
3488
 
3401
3489
/***********************************************************//**
 
3490
Gets the offset of the pointer to the externally stored part of a field.
 
3491
@return offset of the pointer to the externally stored part */
 
3492
static
 
3493
ulint
 
3494
btr_rec_get_field_ref_offs(
 
3495
/*=======================*/
 
3496
        const ulint*    offsets,/*!< in: array returned by rec_get_offsets() */
 
3497
        ulint           n)      /*!< in: index of the external field */
 
3498
{
 
3499
        ulint   field_ref_offs;
 
3500
        ulint   local_len;
 
3501
 
 
3502
        ut_a(rec_offs_nth_extern(offsets, n));
 
3503
        field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len);
 
3504
        ut_a(local_len != UNIV_SQL_NULL);
 
3505
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
3506
 
 
3507
        return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE);
 
3508
}
 
3509
 
 
3510
/** Gets a pointer to the externally stored part of a field.
 
3511
@param rec      record
 
3512
@param offsets  rec_get_offsets(rec)
 
3513
@param n        index of the externally stored field
 
3514
@return pointer to the externally stored part */
 
3515
#define btr_rec_get_field_ref(rec, offsets, n)                  \
 
3516
        ((rec) + btr_rec_get_field_ref_offs(offsets, n))
 
3517
 
 
3518
/***********************************************************//**
3402
3519
Gets the externally stored size of a record, in units of a database page.
3403
3520
@return externally stored part, in units of a database page */
3404
3521
static
3405
3522
ulint
3406
3523
btr_rec_get_externally_stored_len(
3407
3524
/*==============================*/
3408
 
        rec_t*          rec,    /*!< in: record */
 
3525
        const rec_t*    rec,    /*!< in: record */
3409
3526
        const ulint*    offsets)/*!< in: array returned by rec_get_offsets() */
3410
3527
{
3411
3528
        ulint   n_fields;
3412
 
        byte*   data;
3413
 
        ulint   local_len;
3414
 
        ulint   extern_len;
3415
3529
        ulint   total_extern_len = 0;
3416
3530
        ulint   i;
3417
3531
 
3418
3532
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
 
3533
 
 
3534
        if (!rec_offs_any_extern(offsets)) {
 
3535
                return(0);
 
3536
        }
 
3537
 
3419
3538
        n_fields = rec_offs_n_fields(offsets);
3420
3539
 
3421
3540
        for (i = 0; i < n_fields; i++) {
3422
3541
                if (rec_offs_nth_extern(offsets, i)) {
3423
3542
 
3424
 
                        data = rec_get_nth_field(rec, offsets, i, &local_len);
3425
 
 
3426
 
                        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3427
 
 
3428
 
                        extern_len = mach_read_from_4(data + local_len
3429
 
                                                      + BTR_EXTERN_LEN + 4);
 
3543
                        ulint   extern_len = mach_read_from_4(
 
3544
                                btr_rec_get_field_ref(rec, offsets, i)
 
3545
                                + BTR_EXTERN_LEN + 4);
3430
3546
 
3431
3547
                        total_extern_len += ut_calc_align(extern_len,
3432
3548
                                                          UNIV_PAGE_SIZE);
3456
3572
        ulint   byte_val;
3457
3573
 
3458
3574
        data = rec_get_nth_field(rec, offsets, i, &local_len);
3459
 
 
 
3575
        ut_ad(rec_offs_nth_extern(offsets, i));
3460
3576
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3461
3577
 
3462
3578
        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3466
3582
        if (val) {
3467
3583
                byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3468
3584
        } else {
 
3585
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3586
                ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG));
 
3587
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3469
3588
                byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
3470
3589
        }
3471
3590
 
3479
3598
        } else {
3480
3599
                mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3481
3600
        }
 
3601
 
 
3602
        btr_blob_dbg_owner(rec, index, offsets, i, val);
3482
3603
}
3483
3604
 
3484
3605
/*******************************************************************//**
3485
 
Marks not updated extern fields as not-owned by this record. The ownership
3486
 
is transferred to the updated record which is inserted elsewhere in the
 
3606
Marks non-updated off-page fields as disowned by this record. The ownership
 
3607
must be transferred to the updated record which is inserted elsewhere in the
3487
3608
index tree. In purge only the owner of externally stored field is allowed
3488
3609
to free the field. */
3489
3610
UNIV_INTERN
3490
3611
void
3491
 
btr_cur_mark_extern_inherited_fields(
3492
 
/*=================================*/
 
3612
btr_cur_disown_inherited_fields(
 
3613
/*============================*/
3493
3614
        page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
3494
3615
                                part will be updated, or NULL */
3495
3616
        rec_t*          rec,    /*!< in/out: record in a clustered index */
3496
3617
        dict_index_t*   index,  /*!< in: index of the page */
3497
3618
        const ulint*    offsets,/*!< in: array returned by rec_get_offsets() */
3498
3619
        const upd_t*    update, /*!< in: update vector */
3499
 
        mtr_t*          mtr)    /*!< in: mtr, or NULL if not logged */
 
3620
        mtr_t*          mtr)    /*!< in/out: mini-transaction */
3500
3621
{
3501
 
        ulint   n;
3502
 
        ulint   j;
3503
3622
        ulint   i;
3504
3623
 
3505
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
 
3624
        ut_ad(rec_offs_validate(rec, index, offsets));
3506
3625
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3507
 
 
3508
 
        if (!rec_offs_any_extern(offsets)) {
3509
 
 
3510
 
                return;
3511
 
        }
3512
 
 
3513
 
        n = rec_offs_n_fields(offsets);
3514
 
 
3515
 
        for (i = 0; i < n; i++) {
3516
 
                if (rec_offs_nth_extern(offsets, i)) {
3517
 
 
3518
 
                        /* Check it is not in updated fields */
3519
 
 
3520
 
                        if (update) {
3521
 
                                for (j = 0; j < upd_get_n_fields(update);
3522
 
                                     j++) {
3523
 
                                        if (upd_get_nth_field(update, j)
3524
 
                                            ->field_no == i) {
3525
 
 
3526
 
                                                goto updated;
3527
 
                                        }
3528
 
                                }
3529
 
                        }
3530
 
 
 
3626
        ut_ad(rec_offs_any_extern(offsets));
 
3627
        ut_ad(mtr);
 
3628
 
 
3629
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
3630
                if (rec_offs_nth_extern(offsets, i)
 
3631
                    && !upd_get_field_by_field_no(update, i)) {
3531
3632
                        btr_cur_set_ownership_of_extern_field(
3532
3633
                                page_zip, rec, index, offsets, i, FALSE, mtr);
3533
 
updated:
3534
 
                        ;
3535
 
                }
3536
 
        }
3537
 
}
3538
 
 
3539
 
/*******************************************************************//**
3540
 
The complement of the previous function: in an update entry may inherit
3541
 
some externally stored fields from a record. We must mark them as inherited
3542
 
in entry, so that they are not freed in a rollback. */
3543
 
UNIV_INTERN
3544
 
void
3545
 
btr_cur_mark_dtuple_inherited_extern(
3546
 
/*=================================*/
3547
 
        dtuple_t*       entry,          /*!< in/out: updated entry to be
3548
 
                                        inserted to clustered index */
3549
 
        const upd_t*    update)         /*!< in: update vector */
3550
 
{
3551
 
        ulint           i;
3552
 
 
3553
 
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3554
 
 
3555
 
                dfield_t*       dfield = dtuple_get_nth_field(entry, i);
3556
 
                byte*           data;
3557
 
                ulint           len;
3558
 
                ulint           j;
3559
 
 
3560
 
                if (!dfield_is_ext(dfield)) {
3561
 
                        continue;
3562
 
                }
3563
 
 
3564
 
                /* Check if it is in updated fields */
3565
 
 
3566
 
                for (j = 0; j < upd_get_n_fields(update); j++) {
3567
 
                        if (upd_get_nth_field(update, j)->field_no == i) {
3568
 
 
3569
 
                                goto is_updated;
3570
 
                        }
3571
 
                }
3572
 
 
3573
 
                data = dfield_get_data(dfield);
3574
 
                len = dfield_get_len(dfield);
3575
 
                data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3576
 
                        |= BTR_EXTERN_INHERITED_FLAG;
3577
 
 
3578
 
is_updated:
3579
 
                ;
 
3634
                }
3580
3635
        }
3581
3636
}
3582
3637
 
3616
3671
}
3617
3672
 
3618
3673
/*******************************************************************//**
3619
 
Marks all extern fields in a dtuple as owned by the record. */
3620
 
UNIV_INTERN
3621
 
void
3622
 
btr_cur_unmark_dtuple_extern_fields(
3623
 
/*================================*/
3624
 
        dtuple_t*       entry)          /*!< in/out: clustered index entry */
3625
 
{
3626
 
        ulint   i;
3627
 
 
3628
 
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3629
 
                dfield_t* dfield = dtuple_get_nth_field(entry, i);
3630
 
 
3631
 
                if (dfield_is_ext(dfield)) {
3632
 
                        byte*   data = dfield_get_data(dfield);
3633
 
                        ulint   len = dfield_get_len(dfield);
3634
 
 
3635
 
                        data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3636
 
                                &= ~BTR_EXTERN_OWNER_FLAG;
3637
 
                }
3638
 
        }
3639
 
}
3640
 
 
3641
 
/*******************************************************************//**
3642
3674
Flags the data tuple fields that are marked as extern storage in the
3643
3675
update vector.  We use this function to remember which fields we must
3644
3676
mark as extern storage in a record inserted for an update.
3771
3803
            && buf_block_get_space(block) == space
3772
3804
            && buf_block_get_page_no(block) == page_no) {
3773
3805
 
3774
 
                if (buf_LRU_free_block(&block->page, all, NULL)
3775
 
                    != BUF_LRU_FREED
 
3806
                if (!buf_LRU_free_block(&block->page, all)
3776
3807
                    && all && block->page.zip.data) {
3777
3808
                        /* Attempt to deallocate the uncompressed page
3778
3809
                        if the whole block cannot be deallocted. */
3779
3810
 
3780
 
                        buf_LRU_free_block(&block->page, FALSE, NULL);
 
3811
                        buf_LRU_free_block(&block->page, FALSE);
3781
3812
                }
3782
3813
        }
3783
3814
 
3790
3821
them in rec.  The extern flags in rec will have to be set beforehand.
3791
3822
The fields are stored on pages allocated from leaf node
3792
3823
file segment of the index tree.
3793
 
@return DB_SUCCESS or error */
 
3824
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
3794
3825
UNIV_INTERN
3795
3826
ulint
3796
 
btr_store_big_rec_extern_fields(
3797
 
/*============================*/
 
3827
btr_store_big_rec_extern_fields_func(
 
3828
/*=================================*/
3798
3829
        dict_index_t*   index,          /*!< in: index of rec; the index tree
3799
3830
                                        MUST be X-latched */
3800
3831
        buf_block_t*    rec_block,      /*!< in/out: block containing rec */
3803
3834
                                        the "external storage" flags in offsets
3804
3835
                                        will not correspond to rec when
3805
3836
                                        this function returns */
3806
 
        big_rec_t*      big_rec_vec,    /*!< in: vector containing fields
 
3837
#ifdef UNIV_DEBUG
 
3838
        mtr_t*          local_mtr,      /*!< in: mtr containing the
 
3839
                                        latch to rec and to the tree */
 
3840
#endif /* UNIV_DEBUG */
 
3841
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3842
        ibool           update_in_place,/*! in: TRUE if the record is updated
 
3843
                                        in place (not delete+insert) */
 
3844
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
3845
        const big_rec_t*big_rec_vec)    /*!< in: vector containing fields
3807
3846
                                        to be stored externally */
3808
 
        mtr_t*          local_mtr __attribute__((unused))) /*!< in: mtr
3809
 
                                        containing the latch to rec and to the
3810
 
                                        tree */
 
3847
 
3811
3848
{
3812
3849
        ulint   rec_page_no;
3813
3850
        byte*   field_ref;
3825
3862
        z_stream c_stream;
3826
3863
 
3827
3864
        ut_ad(rec_offs_validate(rec, index, offsets));
 
3865
        ut_ad(rec_offs_any_extern(offsets));
3828
3866
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3829
3867
                                MTR_MEMO_X_LOCK));
3830
3868
        ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
3856
3894
                ut_a(err == Z_OK);
3857
3895
        }
3858
3896
 
 
3897
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3898
        /* All pointers to externally stored columns in the record
 
3899
        must either be zero or they must be pointers to inherited
 
3900
        columns, owned by this record or an earlier record version. */
 
3901
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
3902
                if (!rec_offs_nth_extern(offsets, i)) {
 
3903
                        continue;
 
3904
                }
 
3905
                field_ref = btr_rec_get_field_ref(rec, offsets, i);
 
3906
 
 
3907
                ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 
3908
                /* Either this must be an update in place,
 
3909
                or the BLOB must be inherited, or the BLOB pointer
 
3910
                must be zero (will be written in this function). */
 
3911
                ut_a(update_in_place
 
3912
                     || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
 
3913
                     || !memcmp(field_ref, field_ref_zero,
 
3914
                                BTR_EXTERN_FIELD_REF_SIZE));
 
3915
        }
 
3916
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3859
3917
        /* We have to create a file segment to the tablespace
3860
3918
        for each field and put the pointer to the field in rec */
3861
3919
 
3862
3920
        for (i = 0; i < big_rec_vec->n_fields; i++) {
3863
 
                ut_ad(rec_offs_nth_extern(offsets,
3864
 
                                          big_rec_vec->fields[i].field_no));
3865
 
                {
3866
 
                        ulint   local_len;
3867
 
                        field_ref = rec_get_nth_field(
3868
 
                                rec, offsets, big_rec_vec->fields[i].field_no,
3869
 
                                &local_len);
3870
 
                        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3871
 
                        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3872
 
                        field_ref += local_len;
3873
 
                }
 
3921
                field_ref = btr_rec_get_field_ref(
 
3922
                        rec, offsets, big_rec_vec->fields[i].field_no);
 
3923
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
3924
                /* A zero BLOB pointer should have been initially inserted. */
 
3925
                ut_a(!memcmp(field_ref, field_ref_zero,
 
3926
                             BTR_EXTERN_FIELD_REF_SIZE));
 
3927
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
3874
3928
                extern_len = big_rec_vec->fields[i].len;
 
3929
                UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data,
 
3930
                                   extern_len);
3875
3931
 
3876
3932
                ut_a(extern_len > 0);
3877
3933
 
4043
4099
                                }
4044
4100
 
4045
4101
                                if (prev_page_no == FIL_NULL) {
 
4102
                                        btr_blob_dbg_add_blob(
 
4103
                                                rec, big_rec_vec->fields[i]
 
4104
                                                .field_no, page_no, index,
 
4105
                                                "store");
 
4106
 
4046
4107
                                        mach_write_to_4(field_ref
4047
4108
                                                        + BTR_EXTERN_SPACE_ID,
4048
4109
                                                        space_id);
4118
4179
                                                 MLOG_4BYTES, &mtr);
4119
4180
 
4120
4181
                                if (prev_page_no == FIL_NULL) {
 
4182
                                        btr_blob_dbg_add_blob(
 
4183
                                                rec, big_rec_vec->fields[i]
 
4184
                                                .field_no, page_no, index,
 
4185
                                                "store");
 
4186
 
4121
4187
                                        mlog_write_ulint(field_ref
4122
4188
                                                         + BTR_EXTERN_SPACE_ID,
4123
4189
                                                         space_id,
4150
4216
                mem_heap_free(heap);
4151
4217
        }
4152
4218
 
 
4219
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 
4220
        /* All pointers to externally stored columns in the record
 
4221
        must be valid. */
 
4222
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
4223
                if (!rec_offs_nth_extern(offsets, i)) {
 
4224
                        continue;
 
4225
                }
 
4226
 
 
4227
                field_ref = btr_rec_get_field_ref(rec, offsets, i);
 
4228
 
 
4229
                /* The pointer must not be zero. */
 
4230
                ut_a(0 != memcmp(field_ref, field_ref_zero,
 
4231
                                 BTR_EXTERN_FIELD_REF_SIZE));
 
4232
                /* The column must not be disowned by this record. */
 
4233
                ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 
4234
        }
 
4235
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
4153
4236
        return(DB_SUCCESS);
4154
4237
}
4155
4238
 
4172
4255
        if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
4173
4256
                ulint   flags = fil_space_get_flags(space_id);
4174
4257
 
 
4258
#ifndef UNIV_DEBUG /* Improve debug test coverage */
4175
4259
                if (UNIV_LIKELY
4176
4260
                    ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
4177
4261
                        /* Old versions of InnoDB did not initialize
4180
4264
                        a BLOB page that is in Antelope format.*/
4181
4265
                        return;
4182
4266
                }
 
4267
#endif /* !UNIV_DEBUG */
4183
4268
 
4184
4269
                ut_print_timestamp(stderr);
4185
4270
                fprintf(stderr,
4229
4314
        ulint           page_no;
4230
4315
        ulint           next_page_no;
4231
4316
        mtr_t           mtr;
4232
 
#ifdef UNIV_DEBUG
 
4317
 
4233
4318
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4234
4319
                                MTR_MEMO_X_LOCK));
4235
4320
        ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4236
4321
                                     MTR_MEMO_PAGE_X_FIX));
4237
4322
        ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4238
 
 
4239
 
        if (rec) {
4240
 
                ulint   local_len;
4241
 
                const byte*     f = rec_get_nth_field(rec, offsets,
4242
 
                                                      i, &local_len);
4243
 
                ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4244
 
                local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4245
 
                f += local_len;
4246
 
                ut_ad(f == field_ref);
4247
 
        }
4248
 
#endif /* UNIV_DEBUG */
 
4323
        ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
4249
4324
 
4250
4325
        if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
4251
4326
                                  BTR_EXTERN_FIELD_REF_SIZE))) {
4252
4327
                /* In the rollback of uncommitted transactions, we may
4253
4328
                encounter a clustered index record whose BLOBs have
4254
4329
                not been written.  There is nothing to free then. */
4255
 
                ut_a(rb_ctx == RB_RECOVERY);
 
4330
                ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC);
4256
4331
                return;
4257
4332
        }
4258
4333
 
4277
4352
                rec_zip_size = 0;
4278
4353
        }
4279
4354
 
 
4355
#ifdef UNIV_BLOB_DEBUG
 
4356
        if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
 
4357
            && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
 
4358
                 && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
 
4359
                /* This off-page column will be freed.
 
4360
                Check that no references remain. */
 
4361
 
 
4362
                btr_blob_dbg_t  b;
 
4363
 
 
4364
                b.blob_page_no = mach_read_from_4(
 
4365
                        field_ref + BTR_EXTERN_PAGE_NO);
 
4366
 
 
4367
                if (rec) {
 
4368
                        /* Remove the reference from the record to the
 
4369
                        BLOB. If the BLOB were not freed, the
 
4370
                        reference would be removed when the record is
 
4371
                        removed. Freeing the BLOB will overwrite the
 
4372
                        BTR_EXTERN_PAGE_NO in the field_ref of the
 
4373
                        record with FIL_NULL, which would make the
 
4374
                        btr_blob_dbg information inconsistent with the
 
4375
                        record. */
 
4376
                        b.ref_page_no = page_get_page_no(page_align(rec));
 
4377
                        b.ref_heap_no = page_rec_get_heap_no(rec);
 
4378
                        b.ref_field_no = i;
 
4379
                        btr_blob_dbg_rbt_delete(index, &b, "free");
 
4380
                }
 
4381
 
 
4382
                btr_blob_dbg_assert_empty(index, b.blob_page_no);
 
4383
        }
 
4384
#endif /* UNIV_BLOB_DEBUG */
 
4385
 
4280
4386
        for (;;) {
 
4387
#ifdef UNIV_SYNC_DEBUG
4281
4388
                buf_block_t*    rec_block;
 
4389
#endif /* UNIV_SYNC_DEBUG */
4282
4390
                buf_block_t*    ext_block;
4283
4391
 
4284
4392
                mtr_start(&mtr);
4285
4393
 
4286
 
                rec_block = buf_page_get(page_get_space_id(
 
4394
#ifdef UNIV_SYNC_DEBUG
 
4395
                rec_block =
 
4396
#endif /* UNIV_SYNC_DEBUG */
 
4397
                        buf_page_get(page_get_space_id(
4287
4398
                                                 page_align(field_ref)),
4288
4399
                                         rec_zip_size,
4289
4400
                                         page_get_page_no(
4298
4409
                    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4299
4410
                        & BTR_EXTERN_OWNER_FLAG)
4300
4411
                    /* Rollback and inherited field */
4301
 
                    || (rb_ctx != RB_NONE
 
4412
                    || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
4302
4413
                        && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4303
4414
                            & BTR_EXTERN_INHERITED_FLAG))) {
4304
4415
 
4405
4516
 
4406
4517
        for (i = 0; i < n_fields; i++) {
4407
4518
                if (rec_offs_nth_extern(offsets, i)) {
4408
 
                        ulint   len;
4409
 
                        byte*   data
4410
 
                                = rec_get_nth_field(rec, offsets, i, &len);
4411
 
                        ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4412
 
 
4413
4519
                        btr_free_externally_stored_field(
4414
 
                                index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
 
4520
                                index, btr_rec_get_field_ref(rec, offsets, i),
4415
4521
                                rec, offsets, page_zip, i, rb_ctx, mtr);
4416
4522
                }
4417
4523
        }
4508
4614
                mtr_commit(&mtr);
4509
4615
 
4510
4616
                if (page_no == FIL_NULL || copy_len != part_len) {
 
4617
                        UNIV_MEM_ASSERT_RW(buf, copied_len);
4511
4618
                        return(copied_len);
4512
4619
                }
4513
4620
 
4522
4629
 
4523
4630
/*******************************************************************//**
4524
4631
Copies the prefix of a compressed BLOB.  The clustered index record
4525
 
that points to this BLOB must be protected by a lock or a page latch. */
 
4632
that points to this BLOB must be protected by a lock or a page latch.
 
4633
@return number of bytes written to buf */
4526
4634
static
4527
 
void
 
4635
ulint
4528
4636
btr_copy_zblob_prefix(
4529
4637
/*==================*/
4530
 
        z_stream*       d_stream,/*!< in/out: the decompressing stream */
 
4638
        byte*           buf,    /*!< out: the externally stored part of
 
4639
                                the field, or a prefix of it */
 
4640
        ulint           len,    /*!< in: length of buf, in bytes */
4531
4641
        ulint           zip_size,/*!< in: compressed BLOB page size */
4532
4642
        ulint           space_id,/*!< in: space id of the BLOB pages */
4533
4643
        ulint           page_no,/*!< in: page number of the first BLOB page */
4534
4644
        ulint           offset) /*!< in: offset on the first BLOB page */
4535
4645
{
4536
 
        ulint   page_type = FIL_PAGE_TYPE_ZBLOB;
 
4646
        ulint           page_type = FIL_PAGE_TYPE_ZBLOB;
 
4647
        mem_heap_t*     heap;
 
4648
        int             err;
 
4649
        z_stream        d_stream;
 
4650
 
 
4651
        d_stream.next_out = buf;
 
4652
        d_stream.avail_out = len;
 
4653
        d_stream.next_in = Z_NULL;
 
4654
        d_stream.avail_in = 0;
 
4655
 
 
4656
        /* Zlib inflate needs 32 kilobytes for the default
 
4657
        window size, plus a few kilobytes for small objects. */
 
4658
        heap = mem_heap_create(40000);
 
4659
        page_zip_set_alloc(&d_stream, heap);
4537
4660
 
4538
4661
        ut_ad(ut_is_2pow(zip_size));
4539
4662
        ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4540
4663
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
4541
4664
        ut_ad(space_id);
4542
4665
 
 
4666
        err = inflateInit(&d_stream);
 
4667
        ut_a(err == Z_OK);
 
4668
 
4543
4669
        for (;;) {
4544
4670
                buf_page_t*     bpage;
4545
 
                int             err;
4546
4671
                ulint           next_page_no;
4547
4672
 
4548
4673
                /* There is no latch on bpage directly.  Instead,
4558
4683
                                " compressed BLOB"
4559
4684
                                " page %lu space %lu\n",
4560
4685
                                (ulong) page_no, (ulong) space_id);
4561
 
                        return;
 
4686
                        goto func_exit;
4562
4687
                }
4563
4688
 
4564
4689
                if (UNIV_UNLIKELY
4584
4709
                        offset += 4;
4585
4710
                }
4586
4711
 
4587
 
                d_stream->next_in = bpage->zip.data + offset;
4588
 
                d_stream->avail_in = zip_size - offset;
 
4712
                d_stream.next_in = bpage->zip.data + offset;
 
4713
                d_stream.avail_in = zip_size - offset;
4589
4714
 
4590
 
                err = inflate(d_stream, Z_NO_FLUSH);
 
4715
                err = inflate(&d_stream, Z_NO_FLUSH);
4591
4716
                switch (err) {
4592
4717
                case Z_OK:
4593
 
                        if (!d_stream->avail_out) {
 
4718
                        if (!d_stream.avail_out) {
4594
4719
                                goto end_of_blob;
4595
4720
                        }
4596
4721
                        break;
4607
4732
                                " compressed BLOB"
4608
4733
                                " page %lu space %lu returned %d (%s)\n",
4609
4734
                                (ulong) page_no, (ulong) space_id,
4610
 
                                err, d_stream->msg);
 
4735
                                err, d_stream.msg);
4611
4736
                case Z_BUF_ERROR:
4612
4737
                        goto end_of_blob;
4613
4738
                }
4614
4739
 
4615
4740
                if (next_page_no == FIL_NULL) {
4616
 
                        if (!d_stream->avail_in) {
 
4741
                        if (!d_stream.avail_in) {
4617
4742
                                ut_print_timestamp(stderr);
4618
4743
                                fprintf(stderr,
4619
4744
                                        "  InnoDB: unexpected end of"
4622
4747
                                        (ulong) page_no,
4623
4748
                                        (ulong) space_id);
4624
4749
                        } else {
4625
 
                                err = inflate(d_stream, Z_FINISH);
 
4750
                                err = inflate(&d_stream, Z_FINISH);
4626
4751
                                switch (err) {
4627
4752
                                case Z_STREAM_END:
4628
4753
                                case Z_BUF_ERROR:
4634
4759
 
4635
4760
end_of_blob:
4636
4761
                        buf_page_release_zip(bpage);
4637
 
                        return;
 
4762
                        goto func_exit;
4638
4763
                }
4639
4764
 
4640
4765
                buf_page_release_zip(bpage);
4646
4771
                offset = FIL_PAGE_NEXT;
4647
4772
                page_type = FIL_PAGE_TYPE_ZBLOB2;
4648
4773
        }
 
4774
 
 
4775
func_exit:
 
4776
        inflateEnd(&d_stream);
 
4777
        mem_heap_free(heap);
 
4778
        UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
 
4779
        return(d_stream.total_out);
4649
4780
}
4650
4781
 
4651
4782
/*******************************************************************//**
4671
4802
        }
4672
4803
 
4673
4804
        if (UNIV_UNLIKELY(zip_size)) {
4674
 
                int             err;
4675
 
                z_stream        d_stream;
4676
 
                mem_heap_t*     heap;
4677
 
 
4678
 
                /* Zlib inflate needs 32 kilobytes for the default
4679
 
                window size, plus a few kilobytes for small objects. */
4680
 
                heap = mem_heap_create(40000);
4681
 
                page_zip_set_alloc(&d_stream, heap);
4682
 
 
4683
 
                err = inflateInit(&d_stream);
4684
 
                ut_a(err == Z_OK);
4685
 
 
4686
 
                d_stream.next_out = buf;
4687
 
                d_stream.avail_out = len;
4688
 
                d_stream.avail_in = 0;
4689
 
 
4690
 
                btr_copy_zblob_prefix(&d_stream, zip_size,
4691
 
                                      space_id, page_no, offset);
4692
 
                inflateEnd(&d_stream);
4693
 
                mem_heap_free(heap);
4694
 
                return(d_stream.total_out);
 
4805
                return(btr_copy_zblob_prefix(buf, len, zip_size,
 
4806
                                             space_id, page_no, offset));
4695
4807
        } else {
4696
4808
                return(btr_copy_blob_prefix(buf, len, space_id,
4697
4809
                                            page_no, offset));
4811
4923
 
4812
4924
/*******************************************************************//**
4813
4925
Copies an externally stored field of a record to mem heap.
4814
 
@return the field copied to heap */
 
4926
@return the field copied to heap, or NULL if the field is incomplete */
4815
4927
UNIV_INTERN
4816
4928
byte*
4817
4929
btr_rec_copy_externally_stored_field(
4841
4953
 
4842
4954
        data = rec_get_nth_field(rec, offsets, no, &local_len);
4843
4955
 
 
4956
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
4957
 
 
4958
        if (UNIV_UNLIKELY
 
4959
            (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
 
4960
                     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
 
4961
                /* The externally stored field was not written yet.
 
4962
                This record should only be seen by
 
4963
                recv_recovery_rollback_active() or any
 
4964
                TRX_ISO_READ_UNCOMMITTED transactions. */
 
4965
                return(NULL);
 
4966
        }
 
4967
 
4844
4968
        return(btr_copy_externally_stored_field(len, data,
4845
4969
                                                zip_size, local_len, heap));
4846
4970
}