1
/*******************************************************
6
Created 12/19/1997 Heikki Tuuri
7
*******************************************************/
15
#include "dict0dict.h"
16
#include "dict0boot.h"
22
#include "mach0data.h"
28
#include "lock0lock.h"
29
#include "eval0eval.h"
31
#include "pars0pars.h"
32
#include "row0mysql.h"
33
#include "read0read.h"
35
#include "ha_prototypes.h"
37
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
38
#define SEL_MAX_N_PREFETCH 16
40
/* Number of rows fetched, after which to start prefetching; MySQL interface
41
has another parameter */
42
#define SEL_PREFETCH_LIMIT 1
44
/* When a select has accessed about this many pages, it returns control back
45
to que_run_threads: this is to allow canceling runaway queries */
47
#define SEL_COST_LIMIT 100
49
/* Flags for search shortcut */
51
#define SEL_EXHAUSTED 1
54
/************************************************************************
55
Returns TRUE if the user-defined column values in a secondary index record
56
are alphabetically the same as the corresponding columns in the clustered
58
NOTE: the comparison is NOT done as a binary comparison, but character
59
fields are compared with collation! */
62
row_sel_sec_rec_is_for_clust_rec(
63
/*=============================*/
64
/* out: TRUE if the secondary
65
record is equal to the corresponding
66
fields in the clustered record,
67
when compared with collation */
68
rec_t* sec_rec, /* in: secondary index record */
69
dict_index_t* sec_index, /* in: secondary index */
70
rec_t* clust_rec, /* in: clustered index record */
71
dict_index_t* clust_index) /* in: clustered index */
79
mem_heap_t* heap = NULL;
80
ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
81
ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
82
ulint* clust_offs = clust_offsets_;
83
ulint* sec_offs = sec_offsets_;
84
ibool is_equal = TRUE;
86
*clust_offsets_ = (sizeof clust_offsets_) / sizeof *clust_offsets_;
87
*sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_;
89
clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
90
ULINT_UNDEFINED, &heap);
91
sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
92
ULINT_UNDEFINED, &heap);
94
n = dict_index_get_n_ordering_defined_by_user(sec_index);
96
for (i = 0; i < n; i++) {
97
const dict_field_t* ifield;
98
const dict_col_t* col;
100
ifield = dict_index_get_nth_field(sec_index, i);
101
col = dict_field_get_col(ifield);
103
clust_field = rec_get_nth_field(
104
clust_rec, clust_offs,
105
dict_col_get_clust_pos(col, clust_index), &clust_len);
106
sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
108
if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) {
110
clust_len = dtype_get_at_most_n_mbchars(
111
col->prtype, col->mbminlen, col->mbmaxlen,
113
clust_len, (char*) clust_field);
116
if (0 != cmp_data_data(col->mtype, col->prtype,
117
clust_field, clust_len,
118
sec_field, sec_len)) {
125
if (UNIV_LIKELY_NULL(heap)) {
131
/*************************************************************************
132
Creates a select node struct. */
137
/* out, own: select node struct */
138
mem_heap_t* heap) /* in: memory heap where created */
142
node = mem_heap_alloc(heap, sizeof(sel_node_t));
143
node->common.type = QUE_NODE_SELECT;
144
node->state = SEL_NODE_OPEN;
146
node->select_will_do_update = FALSE;
147
node->latch_mode = BTR_SEARCH_LEAF;
154
/*************************************************************************
155
Frees the memory private to a select node when a query graph is freed,
156
does not free the heap where the node was originally created. */
159
sel_node_free_private(
160
/*==================*/
161
sel_node_t* node) /* in: select node struct */
166
if (node->plans != NULL) {
167
for (i = 0; i < node->n_tables; i++) {
168
plan = sel_node_get_nth_plan(node, i);
170
btr_pcur_close(&(plan->pcur));
171
btr_pcur_close(&(plan->clust_pcur));
173
if (plan->old_vers_heap) {
174
mem_heap_free(plan->old_vers_heap);
180
/*************************************************************************
181
Evaluates the values in a select list. If there are aggregate functions,
182
their argument value is added to the aggregate total. */
185
sel_eval_select_list(
186
/*=================*/
187
sel_node_t* node) /* in: select node */
191
exp = node->select_list;
196
exp = que_node_get_next(exp);
200
/*************************************************************************
201
Assigns the values in the select list to the possible into-variables in
202
SELECT ... INTO ... */
205
sel_assign_into_var_values(
206
/*=======================*/
207
sym_node_t* var, /* in: first variable in a list of variables */
208
sel_node_t* node) /* in: select node */
217
exp = node->select_list;
222
eval_node_copy_val(var->alias, exp);
224
exp = que_node_get_next(exp);
225
var = que_node_get_next(var);
229
/*************************************************************************
230
Resets the aggregate value totals in the select list of an aggregate type
234
sel_reset_aggregate_vals(
235
/*=====================*/
236
sel_node_t* node) /* in: select node */
238
func_node_t* func_node;
240
ut_ad(node->is_aggregate);
242
func_node = node->select_list;
245
eval_node_set_int_val(func_node, 0);
247
func_node = que_node_get_next(func_node);
250
node->aggregate_already_fetched = FALSE;
253
/*************************************************************************
254
Copies the input variable values when an explicit cursor is opened. */
257
row_sel_copy_input_variable_vals(
258
/*=============================*/
259
sel_node_t* node) /* in: select node */
263
var = UT_LIST_GET_FIRST(node->copy_variables);
266
eval_node_copy_val(var, var->alias);
268
var->indirection = NULL;
270
var = UT_LIST_GET_NEXT(col_var_list, var);
274
/*************************************************************************
275
Fetches the column values from a record. */
278
row_sel_fetch_columns(
279
/*==================*/
280
dict_index_t* index, /* in: record index */
281
rec_t* rec, /* in: record in a clustered or non-clustered
283
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
284
sym_node_t* column) /* in: first column in a column list, or
293
ut_ad(rec_offs_validate(rec, index, offsets));
295
if (index->type & DICT_CLUSTERED) {
296
index_type = SYM_CLUST_FIELD_NO;
298
index_type = SYM_SEC_FIELD_NO;
302
mem_heap_t* heap = NULL;
305
field_no = column->field_nos[index_type];
307
if (field_no != ULINT_UNDEFINED) {
309
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
312
/* Copy an externally stored field to the
315
heap = mem_heap_create(1);
317
data = btr_rec_copy_externally_stored_field(
318
rec, offsets, field_no, &len, heap);
320
ut_a(len != UNIV_SQL_NULL);
324
data = rec_get_nth_field(rec, offsets,
327
needs_copy = column->copy_val;
331
eval_node_copy_and_alloc_val(column, data,
334
val = que_node_get_val(column);
335
dfield_set_data(val, data, len);
338
if (UNIV_LIKELY_NULL(heap)) {
343
column = UT_LIST_GET_NEXT(col_var_list, column);
347
/*************************************************************************
348
Allocates a prefetch buffer for a column when prefetch is first time done. */
351
sel_col_prefetch_buf_alloc(
352
/*=======================*/
353
sym_node_t* column) /* in: symbol table node for a column */
358
ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
360
column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
361
* sizeof(sel_buf_t));
362
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
363
sel_buf = column->prefetch_buf + i;
365
sel_buf->data = NULL;
367
sel_buf->val_buf_size = 0;
371
/*************************************************************************
372
Frees a prefetch buffer for a column, including the dynamically allocated
373
memory for data stored there. */
376
sel_col_prefetch_buf_free(
377
/*======================*/
378
sel_buf_t* prefetch_buf) /* in, own: prefetch buffer */
383
for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
384
sel_buf = prefetch_buf + i;
386
if (sel_buf->val_buf_size > 0) {
388
mem_free(sel_buf->data);
393
/*************************************************************************
394
Pops the column values for a prefetched, cached row from the column prefetch
395
buffers and places them to the val fields in the column nodes. */
398
sel_pop_prefetched_row(
399
/*===================*/
400
plan_t* plan) /* in: plan node for a table */
409
ut_ad(plan->n_rows_prefetched > 0);
411
column = UT_LIST_GET_FIRST(plan->columns);
414
val = que_node_get_val(column);
416
if (!column->copy_val) {
417
/* We did not really push any value for the
420
ut_ad(!column->prefetch_buf);
421
ut_ad(que_node_get_val_buf_size(column) == 0);
423
dfield_set_data(val, NULL, 0);
428
ut_ad(column->prefetch_buf);
430
sel_buf = column->prefetch_buf + plan->first_prefetched;
432
data = sel_buf->data;
434
val_buf_size = sel_buf->val_buf_size;
436
/* We must keep track of the allocated memory for
437
column values to be able to free it later: therefore
438
we swap the values for sel_buf and val */
440
sel_buf->data = dfield_get_data(val);
441
sel_buf->len = dfield_get_len(val);
442
sel_buf->val_buf_size = que_node_get_val_buf_size(column);
444
dfield_set_data(val, data, len);
445
que_node_set_val_buf_size(column, val_buf_size);
447
column = UT_LIST_GET_NEXT(col_var_list, column);
450
plan->n_rows_prefetched--;
452
plan->first_prefetched++;
455
/*************************************************************************
456
Pushes the column values for a prefetched, cached row to the column prefetch
457
buffers from the val fields in the column nodes. */
460
sel_push_prefetched_row(
461
/*====================*/
462
plan_t* plan) /* in: plan node for a table */
472
if (plan->n_rows_prefetched == 0) {
474
plan->first_prefetched = 0;
476
pos = plan->n_rows_prefetched;
478
/* We have the convention that pushing new rows starts only
479
after the prefetch stack has been emptied: */
481
ut_ad(plan->first_prefetched == 0);
484
plan->n_rows_prefetched++;
486
ut_ad(pos < SEL_MAX_N_PREFETCH);
488
column = UT_LIST_GET_FIRST(plan->columns);
491
if (!column->copy_val) {
492
/* There is no sense to push pointers to database
493
page fields when we do not keep latch on the page! */
498
if (!column->prefetch_buf) {
499
/* Allocate a new prefetch buffer */
501
sel_col_prefetch_buf_alloc(column);
504
sel_buf = column->prefetch_buf + pos;
506
val = que_node_get_val(column);
508
data = dfield_get_data(val);
509
len = dfield_get_len(val);
510
val_buf_size = que_node_get_val_buf_size(column);
512
/* We must keep track of the allocated memory for
513
column values to be able to free it later: therefore
514
we swap the values for sel_buf and val */
516
dfield_set_data(val, sel_buf->data, sel_buf->len);
517
que_node_set_val_buf_size(column, sel_buf->val_buf_size);
519
sel_buf->data = data;
521
sel_buf->val_buf_size = val_buf_size;
523
column = UT_LIST_GET_NEXT(col_var_list, column);
527
/*************************************************************************
528
Builds a previous version of a clustered index record for a consistent read */
531
row_sel_build_prev_vers(
532
/*====================*/
533
/* out: DB_SUCCESS or error code */
534
read_view_t* read_view, /* in: read view */
535
dict_index_t* index, /* in: plan node for table */
536
rec_t* rec, /* in: record in a clustered index */
537
ulint** offsets, /* in/out: offsets returned by
538
rec_get_offsets(rec, plan->index) */
539
mem_heap_t** offset_heap, /* in/out: memory heap from which
540
the offsets are allocated */
541
mem_heap_t** old_vers_heap, /* out: old version heap to use */
542
rec_t** old_vers, /* out: old version, or NULL if the
543
record does not exist in the view:
544
i.e., it was freshly inserted
546
mtr_t* mtr) /* in: mtr */
550
if (*old_vers_heap) {
551
mem_heap_empty(*old_vers_heap);
553
*old_vers_heap = mem_heap_create(512);
556
err = row_vers_build_for_consistent_read(
557
rec, mtr, index, offsets, read_view, offset_heap,
558
*old_vers_heap, old_vers);
562
/*************************************************************************
563
Builds the last committed version of a clustered index record for a
564
semi-consistent read. */
567
row_sel_build_committed_vers_for_mysql(
568
/*===================================*/
569
/* out: DB_SUCCESS or error code */
570
dict_index_t* clust_index, /* in: clustered index */
571
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
572
rec_t* rec, /* in: record in a clustered index */
573
ulint** offsets, /* in/out: offsets returned by
574
rec_get_offsets(rec, clust_index) */
575
mem_heap_t** offset_heap, /* in/out: memory heap from which
576
the offsets are allocated */
577
rec_t** old_vers, /* out: old version, or NULL if the
578
record does not exist in the view:
579
i.e., it was freshly inserted
581
mtr_t* mtr) /* in: mtr */
585
if (prebuilt->old_vers_heap) {
586
mem_heap_empty(prebuilt->old_vers_heap);
588
prebuilt->old_vers_heap = mem_heap_create(200);
591
err = row_vers_build_for_semi_consistent_read(
592
rec, mtr, clust_index, offsets, offset_heap,
593
prebuilt->old_vers_heap, old_vers);
597
/*************************************************************************
598
Tests the conditions which determine when the index segment we are searching
599
through has been exhausted. */
602
row_sel_test_end_conds(
603
/*===================*/
604
/* out: TRUE if row passed the tests */
605
plan_t* plan) /* in: plan for the table; the column values must
606
already have been retrieved and the right sides of
607
comparisons evaluated */
611
/* All conditions in end_conds are comparisons of a column to an
614
cond = UT_LIST_GET_FIRST(plan->end_conds);
617
/* Evaluate the left side of the comparison, i.e., get the
618
column value if there is an indirection */
620
eval_sym(cond->args);
622
/* Do the comparison */
624
if (!eval_cmp(cond)) {
629
cond = UT_LIST_GET_NEXT(cond_list, cond);
635
/*************************************************************************
636
Tests the other conditions. */
639
row_sel_test_other_conds(
640
/*=====================*/
641
/* out: TRUE if row passed the tests */
642
plan_t* plan) /* in: plan for the table; the column values must
643
already have been retrieved */
647
cond = UT_LIST_GET_FIRST(plan->other_conds);
652
if (!eval_node_get_ibool_val(cond)) {
657
cond = UT_LIST_GET_NEXT(cond_list, cond);
663
/*************************************************************************
664
Retrieves the clustered index record corresponding to a record in a
665
non-clustered index. Does the necessary locking. */
668
row_sel_get_clust_rec(
669
/*==================*/
670
/* out: DB_SUCCESS or error code */
671
sel_node_t* node, /* in: select_node */
672
plan_t* plan, /* in: plan node for table */
673
rec_t* rec, /* in: record in a non-clustered index */
674
que_thr_t* thr, /* in: query thread */
675
rec_t** out_rec,/* out: clustered record or an old version of
676
it, NULL if the old version did not exist
677
in the read view, i.e., it was a fresh
679
mtr_t* mtr) /* in: mtr used to get access to the
680
non-clustered record; the same mtr is used to
681
access the clustered index */
687
mem_heap_t* heap = NULL;
688
ulint offsets_[REC_OFFS_NORMAL_SIZE];
689
ulint* offsets = offsets_;
690
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
694
offsets = rec_get_offsets(rec,
695
btr_pcur_get_btr_cur(&plan->pcur)->index,
696
offsets, ULINT_UNDEFINED, &heap);
698
row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
700
index = dict_table_get_first_index(plan->table);
702
btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
703
node->latch_mode, &(plan->clust_pcur),
706
clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
708
/* Note: only if the search ends up on a non-infimum record is the
709
low_match value the real match to the search tuple */
711
if (!page_rec_is_user_rec(clust_rec)
712
|| btr_pcur_get_low_match(&(plan->clust_pcur))
713
< dict_index_get_n_unique(index)) {
715
ut_a(rec_get_deleted_flag(rec,
716
dict_table_is_comp(plan->table)));
717
ut_a(node->read_view);
719
/* In a rare case it is possible that no clust rec is found
720
for a delete-marked secondary index record: if in row0umod.c
721
in row_undo_mod_remove_clust_low() we have already removed
722
the clust rec, while purge is still cleaning and removing
723
secondary index records associated with earlier versions of
724
the clustered index record. In that case we know that the
725
clustered index record did not exist in the read view of
731
offsets = rec_get_offsets(clust_rec, index, offsets,
732
ULINT_UNDEFINED, &heap);
734
if (!node->read_view) {
735
/* Try to place a lock on the index record */
737
/* If innodb_locks_unsafe_for_binlog option is used
738
or this session is using READ COMMITTED isolation level
739
we lock only the record, i.e., next-key locking is
744
trx = thr_get_trx(thr);
746
if (srv_locks_unsafe_for_binlog
747
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
748
lock_type = LOCK_REC_NOT_GAP;
750
lock_type = LOCK_ORDINARY;
753
err = lock_clust_rec_read_check_and_lock(
754
0, clust_rec, index, offsets,
755
node->row_lock_mode, lock_type, thr);
757
if (err != DB_SUCCESS) {
762
/* This is a non-locking consistent read: if necessary, fetch
763
a previous version of the record */
767
if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
770
err = row_sel_build_prev_vers(
771
node->read_view, index, clust_rec,
772
&offsets, &heap, &plan->old_vers_heap,
775
if (err != DB_SUCCESS) {
780
clust_rec = old_vers;
782
if (clust_rec == NULL) {
787
/* If we had to go to an earlier version of row or the
788
secondary index record is delete marked, then it may be that
789
the secondary index record corresponding to clust_rec
790
(or old_vers) is not rec; in that case we must ignore
791
such row because in our snapshot rec would not have existed.
792
Remember that from rec we cannot see directly which transaction
793
id corresponds to it: we have to go to the clustered index
794
record. A query where we want to fetch all rows where
795
the secondary index value is in some interval would return
796
a wrong result if we would not drop rows which we come to
797
visit through secondary index records that would not really
798
exist in our snapshot. */
801
|| rec_get_deleted_flag(rec, dict_table_is_comp(
803
&& !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
809
/* Fetch the columns needed in test conditions */
811
row_sel_fetch_columns(index, clust_rec, offsets,
812
UT_LIST_GET_FIRST(plan->columns));
813
*out_rec = clust_rec;
817
if (UNIV_LIKELY_NULL(heap)) {
823
/*************************************************************************
824
Sets a lock on a record. */
829
/* out: DB_SUCCESS or error code */
830
rec_t* rec, /* in: record */
831
dict_index_t* index, /* in: index */
832
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
833
ulint mode, /* in: lock mode */
834
ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
836
que_thr_t* thr) /* in: query thread */
841
trx = thr_get_trx(thr);
843
if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
844
if (buf_LRU_buf_pool_running_out()) {
846
return(DB_LOCK_TABLE_FULL);
850
if (index->type & DICT_CLUSTERED) {
851
err = lock_clust_rec_read_check_and_lock(
852
0, rec, index, offsets, mode, type, thr);
854
err = lock_sec_rec_read_check_and_lock(
855
0, rec, index, offsets, mode, type, thr);
861
/*************************************************************************
862
Opens a pcur to a table index. */
867
sel_node_t* node, /* in: select node */
868
plan_t* plan, /* in: table plan */
869
ibool search_latch_locked,
870
/* in: TRUE if the thread currently
871
has the search latch locked in
873
mtr_t* mtr) /* in: mtr */
879
ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
882
if (search_latch_locked) {
883
has_search_latch = RW_S_LATCH;
888
/* Calculate the value of the search tuple: the exact match columns
889
get their expressions evaluated when we evaluate the right sides of
892
cond = UT_LIST_GET_FIRST(plan->end_conds);
895
eval_exp(que_node_get_next(cond->args));
897
cond = UT_LIST_GET_NEXT(cond_list, cond);
901
n_fields = dtuple_get_n_fields(plan->tuple);
903
if (plan->n_exact_match < n_fields) {
904
/* There is a non-exact match field which must be
905
evaluated separately */
907
eval_exp(plan->tuple_exps[n_fields - 1]);
910
for (i = 0; i < n_fields; i++) {
911
exp = plan->tuple_exps[i];
913
dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
914
que_node_get_val(exp));
917
/* Open pcur to the index */
919
btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
920
node->latch_mode, &(plan->pcur),
921
has_search_latch, mtr);
923
/* Open the cursor to the start or the end of the index
926
btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode,
927
&(plan->pcur), FALSE, mtr);
930
ut_ad(plan->n_rows_prefetched == 0);
931
ut_ad(plan->n_rows_fetched == 0);
932
ut_ad(plan->cursor_at_end == FALSE);
934
plan->pcur_is_open = TRUE;
937
/*************************************************************************
938
Restores a stored pcur position to a table index. */
941
row_sel_restore_pcur_pos(
942
/*=====================*/
943
/* out: TRUE if the cursor should be moved to
944
the next record after we return from this
945
function (moved to the previous, in the case
946
of a descending cursor) without processing
947
again the current cursor record */
948
sel_node_t* node, /* in: select node */
949
plan_t* plan, /* in: table plan */
950
mtr_t* mtr) /* in: mtr */
952
ibool equal_position;
953
ulint relative_position;
955
ut_ad(!plan->cursor_at_end);
957
relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
959
equal_position = btr_pcur_restore_position(node->latch_mode,
962
/* If the cursor is traveling upwards, and relative_position is
964
(1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
965
yet on the successor of the page infimum;
966
(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
967
first record GREATER than the predecessor of a page supremum; we have
968
not yet processed the cursor record: no need to move the cursor to the
970
(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
971
last record LESS or EQUAL to the old stored user record; (a) if
972
equal_position is FALSE, this means that the cursor is now on a record
973
less than the old user record, and we must move to the next record;
974
(b) if equal_position is TRUE, then if
975
plan->stored_cursor_rec_processed is TRUE, we must move to the next
976
record, else there is no need to move the cursor. */
979
if (relative_position == BTR_PCUR_ON) {
981
if (equal_position) {
983
return(plan->stored_cursor_rec_processed);
989
ut_ad(relative_position == BTR_PCUR_AFTER
990
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
995
/* If the cursor is traveling downwards, and relative_position is
997
(1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
998
the last record LESS than the successor of a page infimum; we have not
999
processed the cursor record: no need to move the cursor;
1000
(2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1001
first record GREATER than the predecessor of a page supremum; we have
1002
processed the cursor record: we should move the cursor to the previous
1004
(3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1005
last record LESS or EQUAL to the old stored user record; (a) if
1006
equal_position is FALSE, this means that the cursor is now on a record
1007
less than the old user record, and we need not move to the previous
1008
record; (b) if equal_position is TRUE, then if
1009
plan->stored_cursor_rec_processed is TRUE, we must move to the previous
1010
record, else there is no need to move the cursor. */
1012
if (relative_position == BTR_PCUR_BEFORE
1013
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
1018
if (relative_position == BTR_PCUR_ON) {
1020
if (equal_position) {
1022
return(plan->stored_cursor_rec_processed);
1028
ut_ad(relative_position == BTR_PCUR_AFTER
1029
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1034
/*************************************************************************
1035
Resets a plan cursor to a closed state. */
1040
plan_t* plan) /* in: plan */
1042
plan->pcur_is_open = FALSE;
1043
plan->cursor_at_end = FALSE;
1044
plan->n_rows_fetched = 0;
1045
plan->n_rows_prefetched = 0;
1048
/*************************************************************************
1049
Tries to do a shortcut to fetch a clustered index record with a unique key,
1050
using the hash index if possible (not always). */
1053
row_sel_try_search_shortcut(
1054
/*========================*/
1055
/* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
1056
sel_node_t* node, /* in: select node for a consistent read */
1057
plan_t* plan, /* in: plan for a unique search in clustered
1059
mtr_t* mtr) /* in: mtr */
1061
dict_index_t* index;
1063
mem_heap_t* heap = NULL;
1064
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1065
ulint* offsets = offsets_;
1067
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1069
index = plan->index;
1071
ut_ad(node->read_view);
1072
ut_ad(plan->unique_search);
1073
ut_ad(!plan->must_get_clust);
1074
#ifdef UNIV_SYNC_DEBUG
1075
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
1076
#endif /* UNIV_SYNC_DEBUG */
1078
row_sel_open_pcur(node, plan, TRUE, mtr);
1080
rec = btr_pcur_get_rec(&(plan->pcur));
1082
if (!page_rec_is_user_rec(rec)) {
1087
ut_ad(plan->mode == PAGE_CUR_GE);
1089
/* As the cursor is now placed on a user record after a search with
1090
the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
1091
fields in the user record matched to the search tuple */
1093
if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
1095
return(SEL_EXHAUSTED);
1098
/* This is a non-locking consistent read: if necessary, fetch
1099
a previous version of the record */
1101
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1103
if (index->type & DICT_CLUSTERED) {
1104
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1109
} else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) {
1115
/* Test deleted flag. Fetch the columns needed in test conditions. */
1117
row_sel_fetch_columns(index, rec, offsets,
1118
UT_LIST_GET_FIRST(plan->columns));
1120
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
1122
ret = SEL_EXHAUSTED;
1126
/* Test the rest of search conditions */
1128
if (!row_sel_test_other_conds(plan)) {
1130
ret = SEL_EXHAUSTED;
1134
ut_ad(plan->pcur.latch_mode == node->latch_mode);
1136
plan->n_rows_fetched++;
1139
if (UNIV_LIKELY_NULL(heap)) {
1140
mem_heap_free(heap);
1145
/*************************************************************************
1146
Performs a select step. */
1151
/* out: DB_SUCCESS or error code */
1152
sel_node_t* node, /* in: select node */
1153
que_thr_t* thr) /* in: query thread */
1155
dict_index_t* index;
1162
ibool search_latch_locked;
1163
ibool consistent_read;
1165
/* The following flag becomes TRUE when we are doing a
1166
consistent read from a non-clustered index and we must look
1167
at the clustered index to find out the previous delete mark
1168
state of the non-clustered record: */
1170
ibool cons_read_requires_clust_rec = FALSE;
1171
ulint cost_counter = 0;
1172
ibool cursor_just_opened;
1173
ibool must_go_to_next;
1174
ibool leaf_contains_updates = FALSE;
1175
/* TRUE if select_will_do_update is
1176
TRUE and the current clustered index
1177
leaf page has been updated during
1178
the current mtr: mtr must be committed
1179
at the same time as the leaf x-latch
1181
ibool mtr_has_extra_clust_latch = FALSE;
1182
/* TRUE if the search was made using
1183
a non-clustered index, and we had to
1184
access the clustered record: now &mtr
1185
contains a clustered index latch, and
1186
&mtr must be committed before we move
1187
to the next non-clustered record */
1190
mem_heap_t* heap = NULL;
1191
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1192
ulint* offsets = offsets_;
1193
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1195
ut_ad(thr->run_node == node);
1197
search_latch_locked = FALSE;
1199
if (node->read_view) {
1200
/* In consistent reads, we try to do with the hash index and
1201
not to use the buffer page get. This is to reduce memory bus
1202
load resulting from semaphore operations. The search latch
1203
will be s-locked when we access an index with a unique search
1204
condition, but not locked when we access an index with a
1205
less selective search condition. */
1207
consistent_read = TRUE;
1209
consistent_read = FALSE;
1215
This is the outer major loop in calculating a join. We come here when
1216
node->fetch_table changes, and after adding a row to aggregate totals
1217
and, of course, when this function is called. */
1219
ut_ad(leaf_contains_updates == FALSE);
1220
ut_ad(mtr_has_extra_clust_latch == FALSE);
1222
plan = sel_node_get_nth_plan(node, node->fetch_table);
1223
index = plan->index;
1225
if (plan->n_rows_prefetched > 0) {
1226
sel_pop_prefetched_row(plan);
1228
goto next_table_no_mtr;
1231
if (plan->cursor_at_end) {
1232
/* The cursor has already reached the result set end: no more
1233
rows to process for this table cursor, as also the prefetch
1236
ut_ad(plan->pcur_is_open);
1238
goto table_exhausted_no_mtr;
1241
/* Open a cursor to index, or restore an open cursor position */
1245
if (consistent_read && plan->unique_search && !plan->pcur_is_open
1246
&& !plan->must_get_clust
1247
&& !plan->table->big_rows) {
1248
if (!search_latch_locked) {
1249
rw_lock_s_lock(&btr_search_latch);
1251
search_latch_locked = TRUE;
1252
} else if (btr_search_latch.writer_is_wait_ex) {
1254
/* There is an x-latch request waiting: release the
1255
s-latch for a moment; as an s-latch here is often
1256
kept for some 10 searches before being released,
1257
a waiting x-latch request would block other threads
1258
from acquiring an s-latch for a long time, lowering
1259
performance significantly in multiprocessors. */
1261
rw_lock_s_unlock(&btr_search_latch);
1262
rw_lock_s_lock(&btr_search_latch);
1265
found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
1267
if (found_flag == SEL_FOUND) {
1271
} else if (found_flag == SEL_EXHAUSTED) {
1273
goto table_exhausted;
1276
ut_ad(found_flag == SEL_RETRY);
1278
plan_reset_cursor(plan);
1284
if (search_latch_locked) {
1285
rw_lock_s_unlock(&btr_search_latch);
1287
search_latch_locked = FALSE;
1290
if (!plan->pcur_is_open) {
1291
/* Evaluate the expressions to build the search tuple and
1294
row_sel_open_pcur(node, plan, search_latch_locked, &mtr);
1296
cursor_just_opened = TRUE;
1298
/* A new search was made: increment the cost counter */
1301
/* Restore pcur position to the index */
1303
must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr);
1305
cursor_just_opened = FALSE;
1307
if (must_go_to_next) {
1308
/* We have already processed the cursor record: move
1318
In this loop we use pcur and try to fetch a qualifying row, and
1319
also fill the prefetch buffer for this table if n_rows_fetched has
1320
exceeded a threshold. While we are inside this loop, the following
1322
(1) &mtr is started,
1323
(2) pcur is positioned and open.
1325
NOTE that if cursor_just_opened is TRUE here, it means that we came
1326
to this point right after row_sel_open_pcur. */
1328
ut_ad(mtr_has_extra_clust_latch == FALSE);
1330
rec = btr_pcur_get_rec(&(plan->pcur));
1332
/* PHASE 1: Set a lock if specified */
1334
if (!node->asc && cursor_just_opened
1335
&& !page_rec_is_supremum(rec)) {
1337
/* When we open a cursor for a descending search, we must set
1338
a next-key lock on the successor record: otherwise it would
1339
be possible to insert new records next to the cursor position,
1340
and it might be that these new records should appear in the
1341
search result set, resulting in the phantom problem. */
1343
if (!consistent_read) {
1345
/* If innodb_locks_unsafe_for_binlog option is used
1346
or this session is using READ COMMITTED isolation
1347
level, we lock only the record, i.e., next-key
1348
locking is not used. */
1350
rec_t* next_rec = page_rec_get_next(rec);
1354
trx = thr_get_trx(thr);
1356
offsets = rec_get_offsets(next_rec, index, offsets,
1357
ULINT_UNDEFINED, &heap);
1359
if (srv_locks_unsafe_for_binlog
1360
|| trx->isolation_level
1361
== TRX_ISO_READ_COMMITTED) {
1363
if (page_rec_is_supremum(next_rec)) {
1368
lock_type = LOCK_REC_NOT_GAP;
1370
lock_type = LOCK_ORDINARY;
1373
err = sel_set_rec_lock(next_rec, index, offsets,
1374
node->row_lock_mode,
1377
if (err != DB_SUCCESS) {
1378
/* Note that in this case we will store in pcur
1379
the PREDECESSOR of the record we are waiting
1382
goto lock_wait_or_error;
1388
if (page_rec_is_infimum(rec)) {
1390
/* The infimum record on a page cannot be in the result set,
1391
and neither can a record lock be placed on it: we skip such
1392
a record. We also increment the cost counter as we may have
1393
processed yet another page of index. */
1400
if (!consistent_read) {
1401
/* Try to place a lock on the index record */
1403
/* If innodb_locks_unsafe_for_binlog option is used
1404
or this session is using READ COMMITTED isolation level,
1405
we lock only the record, i.e., next-key locking is
1411
offsets = rec_get_offsets(rec, index, offsets,
1412
ULINT_UNDEFINED, &heap);
1414
trx = thr_get_trx(thr);
1416
if (srv_locks_unsafe_for_binlog
1417
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
1419
if (page_rec_is_supremum(rec)) {
1424
lock_type = LOCK_REC_NOT_GAP;
1426
lock_type = LOCK_ORDINARY;
1429
err = sel_set_rec_lock(rec, index, offsets,
1430
node->row_lock_mode, lock_type, thr);
1432
if (err != DB_SUCCESS) {
1434
goto lock_wait_or_error;
1438
if (page_rec_is_supremum(rec)) {
1440
/* A page supremum record cannot be in the result set: skip
1441
it now when we have placed a possible lock on it */
1446
ut_ad(page_rec_is_user_rec(rec));
1448
if (cost_counter > SEL_COST_LIMIT) {
1450
/* Now that we have placed the necessary locks, we can stop
1451
for a while and store the cursor position; NOTE that if we
1452
would store the cursor position BEFORE placing a record lock,
1453
it might happen that the cursor would jump over some records
1454
that another transaction could meanwhile insert adjacent to
1455
the cursor: this would result in the phantom problem. */
1457
goto stop_for_a_while;
1460
/* PHASE 2: Check a mixed index mix id if needed */
1462
if (plan->unique_search && cursor_just_opened) {
1464
ut_ad(plan->mode == PAGE_CUR_GE);
1466
/* As the cursor is now placed on a user record after a search
1467
with the mode PAGE_CUR_GE, the up_match field in the cursor
1468
tells how many fields in the user record matched to the search
1471
if (btr_pcur_get_up_match(&(plan->pcur))
1472
< plan->n_exact_match) {
1473
goto table_exhausted;
1476
/* Ok, no need to test end_conds or mix id */
1480
/* We are ready to look at a possible new index entry in the result
1481
set: the cursor is now placed on a user record */
1483
/* PHASE 3: Get previous version in a consistent read */
1485
cons_read_requires_clust_rec = FALSE;
1486
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1488
if (consistent_read) {
1489
/* This is a non-locking consistent read: if necessary, fetch
1490
a previous version of the record */
1492
if (index->type & DICT_CLUSTERED) {
1494
if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1497
err = row_sel_build_prev_vers(
1498
node->read_view, index, rec,
1499
&offsets, &heap, &plan->old_vers_heap,
1502
if (err != DB_SUCCESS) {
1504
goto lock_wait_or_error;
1507
if (old_vers == NULL) {
1508
offsets = rec_get_offsets(
1509
rec, index, offsets,
1510
ULINT_UNDEFINED, &heap);
1511
row_sel_fetch_columns(
1512
index, rec, offsets,
1516
if (!row_sel_test_end_conds(plan)) {
1518
goto table_exhausted;
1526
} else if (!lock_sec_rec_cons_read_sees(rec, index,
1528
cons_read_requires_clust_rec = TRUE;
1532
/* PHASE 4: Test search end conditions and deleted flag */
1534
/* Fetch the columns needed in test conditions */
1536
row_sel_fetch_columns(index, rec, offsets,
1537
UT_LIST_GET_FIRST(plan->columns));
1539
/* Test the selection end conditions: these can only contain columns
1540
which already are found in the index, even though the index might be
1543
if (plan->unique_search && cursor_just_opened) {
1545
/* No test necessary: the test was already made above */
1547
} else if (!row_sel_test_end_conds(plan)) {
1549
goto table_exhausted;
1552
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
1553
&& !cons_read_requires_clust_rec) {
1555
/* The record is delete marked: we can skip it if this is
1556
not a consistent read which might see an earlier version
1557
of a non-clustered index record */
1559
if (plan->unique_search) {
1561
goto table_exhausted;
1567
/* PHASE 5: Get the clustered index record, if needed and if we did
1568
not do the search using the clustered index */
1570
if (plan->must_get_clust || cons_read_requires_clust_rec) {
1572
/* It was a non-clustered index and we must fetch also the
1573
clustered index record */
1575
err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
1577
mtr_has_extra_clust_latch = TRUE;
1579
if (err != DB_SUCCESS) {
1581
goto lock_wait_or_error;
1584
/* Retrieving the clustered record required a search:
1585
increment the cost counter */
1589
if (clust_rec == NULL) {
1590
/* The record did not exist in the read view */
1591
ut_ad(consistent_read);
1596
if (rec_get_deleted_flag(clust_rec,
1597
dict_table_is_comp(plan->table))) {
1599
/* The record is delete marked: we can skip it */
1604
if (node->can_get_updated) {
1606
btr_pcur_store_position(&(plan->clust_pcur), &mtr);
1610
/* PHASE 6: Test the rest of search conditions */
1612
if (!row_sel_test_other_conds(plan)) {
1614
if (plan->unique_search) {
1616
goto table_exhausted;
1622
/* PHASE 7: We found a new qualifying row for the current table; push
1623
the row if prefetch is on, or move to the next table in the join */
1625
plan->n_rows_fetched++;
1627
ut_ad(plan->pcur.latch_mode == node->latch_mode);
1629
if (node->select_will_do_update) {
1630
/* This is a searched update and we can do the update in-place,
1633
row_upd_in_place_in_select(node, thr, &mtr);
1635
leaf_contains_updates = TRUE;
1637
/* When the database is in the online backup mode, the number
1638
of log records for a single mtr should be small: increment the
1639
cost counter to ensure it */
1641
cost_counter += 1 + (SEL_COST_LIMIT / 8);
1643
if (plan->unique_search) {
1645
goto table_exhausted;
1651
if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
1652
|| plan->unique_search || plan->no_prefetch
1653
|| plan->table->big_rows) {
1655
/* No prefetch in operation: go to the next table */
1660
sel_push_prefetched_row(plan);
1662
if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
1664
/* The prefetch buffer is now full */
1666
sel_pop_prefetched_row(plan);
1672
ut_ad(!search_latch_locked);
1674
if (mtr_has_extra_clust_latch) {
1676
/* We must commit &mtr if we are moving to the next
1677
non-clustered index record, because we could break the
1678
latching order if we would access a different clustered
1679
index page right away without releasing the previous. */
1681
goto commit_mtr_for_a_while;
1684
if (leaf_contains_updates
1685
&& btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) {
1687
/* We must commit &mtr if we are moving to a different page,
1688
because we have done updates to the x-latched leaf page, and
1689
the latch would be released in btr_pcur_move_to_next, without
1690
&mtr getting committed there */
1694
goto commit_mtr_for_a_while;
1698
moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
1700
moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
1705
goto table_exhausted;
1708
cursor_just_opened = FALSE;
1710
/* END OF RECORD LOOP
1711
------------------ */
1715
/* We found a record which satisfies the conditions: we can move to
1716
the next table or return a row in the result set */
1718
ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr));
1720
if (plan->unique_search && !node->can_get_updated) {
1722
plan->cursor_at_end = TRUE;
1724
ut_ad(!search_latch_locked);
1726
plan->stored_cursor_rec_processed = TRUE;
1728
btr_pcur_store_position(&(plan->pcur), &mtr);
1733
leaf_contains_updates = FALSE;
1734
mtr_has_extra_clust_latch = FALSE;
1737
/* If we use 'goto' to this label, it means that the row was popped
1738
from the prefetched rows stack, and &mtr is already committed */
1740
if (node->fetch_table + 1 == node->n_tables) {
1742
sel_eval_select_list(node);
1744
if (node->is_aggregate) {
1749
sel_assign_into_var_values(node->into_list, node);
1751
thr->run_node = que_node_get_parent(node);
1753
if (search_latch_locked) {
1754
rw_lock_s_unlock(&btr_search_latch);
1761
node->fetch_table++;
1763
/* When we move to the next table, we first reset the plan cursor:
1764
we do not care about resetting it when we backtrack from a table */
1766
plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
1771
/* The table cursor pcur reached the result set end: backtrack to the
1772
previous table in the join if we do not have cached prefetched rows */
1774
plan->cursor_at_end = TRUE;
1778
leaf_contains_updates = FALSE;
1779
mtr_has_extra_clust_latch = FALSE;
1781
if (plan->n_rows_prefetched > 0) {
1782
/* The table became exhausted during a prefetch */
1784
sel_pop_prefetched_row(plan);
1786
goto next_table_no_mtr;
1789
table_exhausted_no_mtr:
1790
if (node->fetch_table == 0) {
1793
if (node->is_aggregate && !node->aggregate_already_fetched) {
1795
node->aggregate_already_fetched = TRUE;
1797
sel_assign_into_var_values(node->into_list, node);
1799
thr->run_node = que_node_get_parent(node);
1801
if (search_latch_locked) {
1802
rw_lock_s_unlock(&btr_search_latch);
1808
node->state = SEL_NODE_NO_MORE_ROWS;
1810
thr->run_node = que_node_get_parent(node);
1812
if (search_latch_locked) {
1813
rw_lock_s_unlock(&btr_search_latch);
1819
node->fetch_table--;
1824
/* Return control for a while to que_run_threads, so that runaway
1825
queries can be canceled. NOTE that when we come here, we must, in a
1826
locking read, have placed the necessary (possibly waiting request)
1827
record lock on the cursor record or its successor: when we reposition
1828
the cursor, this record lock guarantees that nobody can meanwhile have
1829
inserted new records which should have appeared in the result set,
1830
which would result in the phantom problem. */
1832
ut_ad(!search_latch_locked);
1834
plan->stored_cursor_rec_processed = FALSE;
1835
btr_pcur_store_position(&(plan->pcur), &mtr);
1839
#ifdef UNIV_SYNC_DEBUG
1840
ut_ad(sync_thread_levels_empty_gen(TRUE));
1841
#endif /* UNIV_SYNC_DEBUG */
1845
commit_mtr_for_a_while:
1846
/* Stores the cursor position and commits &mtr; this is used if
1847
&mtr may contain latches which would break the latching order if
1848
&mtr would not be committed and the latches released. */
1850
plan->stored_cursor_rec_processed = TRUE;
1852
ut_ad(!search_latch_locked);
1853
btr_pcur_store_position(&(plan->pcur), &mtr);
1857
leaf_contains_updates = FALSE;
1858
mtr_has_extra_clust_latch = FALSE;
1860
#ifdef UNIV_SYNC_DEBUG
1861
ut_ad(sync_thread_levels_empty_gen(TRUE));
1862
#endif /* UNIV_SYNC_DEBUG */
1867
/* See the note at stop_for_a_while: the same holds for this case */
1869
ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr)
1871
ut_ad(!search_latch_locked);
1873
plan->stored_cursor_rec_processed = FALSE;
1874
btr_pcur_store_position(&(plan->pcur), &mtr);
1878
#ifdef UNIV_SYNC_DEBUG
1879
ut_ad(sync_thread_levels_empty_gen(TRUE));
1880
#endif /* UNIV_SYNC_DEBUG */
1883
if (UNIV_LIKELY_NULL(heap)) {
1884
mem_heap_free(heap);
1889
/**************************************************************************
1890
Performs a select step. This is a high-level function used in SQL execution
1896
/* out: query thread to run next or NULL */
1897
que_thr_t* thr) /* in: query thread */
1900
sym_node_t* table_node;
1906
node = thr->run_node;
1908
ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
1910
/* If this is a new time this node is executed (or when execution
1911
resumes after wait for a table intention lock), set intention locks
1912
on the tables, or assign a read view */
1914
if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
1916
node->state = SEL_NODE_OPEN;
1919
if (node->state == SEL_NODE_OPEN) {
1921
/* It may be that the current session has not yet started
1922
its transaction, or it has been committed: */
1924
trx_start_if_not_started(thr_get_trx(thr));
1926
plan_reset_cursor(sel_node_get_nth_plan(node, 0));
1928
if (node->consistent_read) {
1929
/* Assign a read view for the query */
1930
node->read_view = trx_assign_read_view(
1933
if (node->set_x_locks) {
1934
i_lock_mode = LOCK_IX;
1936
i_lock_mode = LOCK_IS;
1939
table_node = node->table_list;
1941
while (table_node) {
1942
err = lock_table(0, table_node->table,
1944
if (err != DB_SUCCESS) {
1945
thr_get_trx(thr)->error_state = err;
1950
table_node = que_node_get_next(table_node);
1954
/* If this is an explicit cursor, copy stored procedure
1955
variable values, so that the values cannot change between
1956
fetches (currently, we copy them also for non-explicit
1959
if (node->explicit_cursor
1960
&& UT_LIST_GET_FIRST(node->copy_variables)) {
1962
row_sel_copy_input_variable_vals(node);
1965
node->state = SEL_NODE_FETCH;
1966
node->fetch_table = 0;
1968
if (node->is_aggregate) {
1969
/* Reset the aggregate total values */
1970
sel_reset_aggregate_vals(node);
1974
err = row_sel(node, thr);
1976
/* NOTE! if queries are parallelized, the following assignment may
1977
have problems; the assignment should be made only if thr is the
1978
only top-level thr in the graph: */
1980
thr->graph->last_sel_node = node;
1982
if (err != DB_SUCCESS) {
1983
thr_get_trx(thr)->error_state = err;
1991
/**************************************************************************
1992
Performs a fetch for a cursor. */
1997
/* out: query thread to run next or NULL */
1998
que_thr_t* thr) /* in: query thread */
2000
sel_node_t* sel_node;
2005
node = thr->run_node;
2006
sel_node = node->cursor_def;
2008
ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
2010
if (thr->prev_node != que_node_get_parent(node)) {
2012
if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
2014
if (node->into_list) {
2015
sel_assign_into_var_values(node->into_list,
2018
void* ret = (*node->func->func)(
2019
sel_node, node->func->arg);
2023
= SEL_NODE_NO_MORE_ROWS;
2028
thr->run_node = que_node_get_parent(node);
2033
/* Make the fetch node the parent of the cursor definition for
2034
the time of the fetch, so that execution knows to return to this
2035
fetch node after a row has been selected or we know that there is
2038
sel_node->common.parent = node;
2040
if (sel_node->state == SEL_NODE_CLOSED) {
2042
"InnoDB: Error: fetch called on a closed cursor\n");
2044
thr_get_trx(thr)->error_state = DB_ERROR;
2049
thr->run_node = sel_node;
2054
/********************************************************************
2055
Sample callback function for fetch that prints each row.*/
2060
/* out: always returns non-NULL */
2061
void* row, /* in: sel_node_t* */
2062
void* user_arg) /* in: not used */
2064
sel_node_t* node = row;
2068
UT_NOT_USED(user_arg);
2070
fprintf(stderr, "row_fetch_print: row %p\n", row);
2072
exp = node->select_list;
2075
dfield_t* dfield = que_node_get_val(exp);
2076
dtype_t* type = dfield_get_type(dfield);
2078
fprintf(stderr, " column %lu:\n", (ulong)i);
2081
fprintf(stderr, "\n");
2083
if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
2084
ut_print_buf(stderr, dfield_get_data(dfield),
2085
dfield_get_len(dfield));
2087
fprintf(stderr, " <NULL>;");
2090
fprintf(stderr, "\n");
2092
exp = que_node_get_next(exp);
2099
/********************************************************************
2100
Callback function for fetch that stores an unsigned 4 byte integer to the
2101
location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
2105
row_fetch_store_uint4(
2106
/*==================*/
2107
/* out: always returns NULL */
2108
void* row, /* in: sel_node_t* */
2109
void* user_arg) /* in: data pointer */
2111
sel_node_t* node = row;
2112
ib_uint32_t* val = user_arg;
2115
dfield_t* dfield = que_node_get_val(node->select_list);
2116
dtype_t* type = dfield_get_type(dfield);
2117
ulint len = dfield_get_len(dfield);
2119
ut_a(dtype_get_mtype(type) == DATA_INT);
2120
ut_a(dtype_get_prtype(type) & DATA_UNSIGNED);
2123
tmp = mach_read_from_4(dfield_get_data(dfield));
2124
*val = (ib_uint32_t) tmp;
2129
/***************************************************************
2130
Prints a row in a select result. */
2135
/* out: query thread to run next or NULL */
2136
que_thr_t* thr) /* in: query thread */
2138
row_printf_node_t* node;
2139
sel_node_t* sel_node;
2144
node = thr->run_node;
2146
sel_node = node->sel_node;
2148
ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
2150
if (thr->prev_node == que_node_get_parent(node)) {
2152
/* Reset the cursor */
2153
sel_node->state = SEL_NODE_OPEN;
2155
/* Fetch next row to print */
2157
thr->run_node = sel_node;
2162
if (sel_node->state != SEL_NODE_FETCH) {
2164
ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2166
/* No more rows to print */
2168
thr->run_node = que_node_get_parent(node);
2173
arg = sel_node->select_list;
2176
dfield_print_also_hex(que_node_get_val(arg));
2178
fputs(" ::: ", stderr);
2180
arg = que_node_get_next(arg);
2185
/* Fetch next row to print */
2187
thr->run_node = sel_node;
2192
/********************************************************************
2193
Converts a key value stored in MySQL format to an Innobase dtuple. The last
2194
field of the key value may be just a prefix of a fixed length field: hence
2195
the parameter key_len. But currently we do not allow search keys where the
2196
last field is only a prefix of the full key field len and print a warning if
2197
such appears. A counterpart of this function is
2198
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2201
row_sel_convert_mysql_key_to_innobase(
2202
/*==================================*/
2203
dtuple_t* tuple, /* in: tuple where to build;
2204
NOTE: we assume that the type info
2205
in the tuple is already according
2207
byte* buf, /* in: buffer to use in field
2209
ulint buf_len, /* in: buffer length */
2210
dict_index_t* index, /* in: index of the key value */
2211
byte* key_ptr, /* in: MySQL key value */
2212
ulint key_len, /* in: MySQL key value length */
2213
trx_t* trx) /* in: transaction */
2215
byte* original_buf = buf;
2216
byte* original_key_ptr = key_ptr;
2217
dict_field_t* field;
2221
ulint data_field_len;
2227
/* For documentation of the key value storage format in MySQL, see
2228
ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2230
key_end = key_ptr + key_len;
2232
/* Permit us to access any field in the tuple (ULINT_MAX): */
2234
dtuple_set_n_fields(tuple, ULINT_MAX);
2236
dfield = dtuple_get_nth_field(tuple, 0);
2237
field = dict_index_get_nth_field(index, 0);
2239
if (dfield_get_type(dfield)->mtype == DATA_SYS) {
2240
/* A special case: we are looking for a position in the
2241
generated clustered index which InnoDB automatically added
2242
to a table with no primary key: the first and the only
2243
ordering column is ROW_ID which InnoDB stored to the key_ptr
2246
ut_a(key_len == DATA_ROW_ID_LEN);
2248
dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
2250
dtuple_set_n_fields(tuple, 1);
2255
while (key_ptr < key_end) {
2257
ut_a(field->col->mtype == dfield_get_type(dfield)->mtype);
2262
if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
2263
/* The first byte in the field tells if this is
2264
an SQL NULL value */
2268
if (*key_ptr != 0) {
2269
dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
2275
type = dfield_get_type(dfield)->mtype;
2277
/* Calculate data length and data field total length */
2279
if (type == DATA_BLOB) {
2280
/* The key field is a column prefix of a BLOB or
2283
ut_a(field->prefix_len > 0);
2285
/* MySQL stores the actual data length to the first 2
2286
bytes after the optional SQL NULL marker byte. The
2287
storage format is little-endian, that is, the most
2288
significant byte at a higher address. In UTF-8, MySQL
2289
seems to reserve field->prefix_len bytes for
2290
storing this field in the key value buffer, even
2291
though the actual value only takes data_len bytes
2294
data_len = key_ptr[data_offset]
2295
+ 256 * key_ptr[data_offset + 1];
2296
data_field_len = data_offset + 2 + field->prefix_len;
2300
/* Now that we know the length, we store the column
2301
value like it would be a fixed char field */
2303
} else if (field->prefix_len > 0) {
2304
/* Looks like MySQL pads unused end bytes in the
2305
prefix with space. Therefore, also in UTF-8, it is ok
2306
to compare with a prefix containing full prefix_len
2307
bytes, and no need to take at most prefix_len / 3
2308
UTF-8 characters from the start.
2309
If the prefix is used as the upper end of a LIKE
2310
'abc%' query, then MySQL pads the end with chars
2311
0xff. TODO: in that case does it any harm to compare
2312
with the full prefix_len bytes. How do characters
2313
0xff in UTF-8 behave? */
2315
data_len = field->prefix_len;
2316
data_field_len = data_offset + data_len;
2318
data_len = dfield_get_type(dfield)->len;
2319
data_field_len = data_offset + data_len;
2322
if (dtype_get_mysql_type(dfield_get_type(dfield))
2323
== DATA_MYSQL_TRUE_VARCHAR
2324
&& dfield_get_type(dfield)->mtype != DATA_INT) {
2325
/* In a MySQL key value format, a true VARCHAR is
2326
always preceded by 2 bytes of a length field.
2327
dfield_get_type(dfield)->len returns the maximum
2328
'payload' len in bytes. That does not include the
2329
2 bytes that tell the actual data length.
2331
We added the check != DATA_INT to make sure we do
2332
not treat MySQL ENUM or SET as a true VARCHAR! */
2335
data_field_len += 2;
2338
/* Storing may use at most data_len bytes of buf */
2341
row_mysql_store_col_in_innobase_format(
2343
FALSE, /* MySQL key value format col */
2344
key_ptr + data_offset, data_len,
2345
dict_table_is_comp(index->table));
2349
key_ptr += data_field_len;
2351
if (key_ptr > key_end) {
2352
/* The last field in key was not a complete key field
2355
Print a warning about this! HA_READ_PREFIX_LAST does
2356
not currently work in InnoDB with partial-field key
2357
value prefixes. Since MySQL currently uses a padding
2358
trick to calculate LIKE 'abc%' type queries there
2359
should never be partial-field prefixes in searches. */
2361
ut_print_timestamp(stderr);
2363
fputs(" InnoDB: Warning: using a partial-field"
2364
" key prefix in search.\n"
2365
"InnoDB: ", stderr);
2366
dict_index_name_print(stderr, trx, index);
2367
fprintf(stderr, ". Last data field length %lu bytes,\n"
2368
"InnoDB: key ptr now exceeds"
2369
" key end by %lu bytes.\n"
2370
"InnoDB: Key value in the MySQL format:\n",
2371
(ulong) data_field_len,
2372
(ulong) (key_ptr - key_end));
2374
ut_print_buf(stderr, original_key_ptr, key_len);
2375
fprintf(stderr, "\n");
2378
dfield->len -= (ulint)(key_ptr - key_end);
2387
ut_a(buf <= original_buf + buf_len);
2389
/* We set the length of tuple to n_fields: we assume that the memory
2390
area allocated for it is big enough (usually bigger than n_fields). */
2392
dtuple_set_n_fields(tuple, n_fields);
2395
/******************************************************************
2396
Stores the row id to the prebuilt struct. */
2399
row_sel_store_row_id_to_prebuilt(
2400
/*=============================*/
2401
row_prebuilt_t* prebuilt, /* in: prebuilt */
2402
rec_t* index_rec, /* in: record */
2403
dict_index_t* index, /* in: index of the record */
2404
const ulint* offsets) /* in: rec_get_offsets
2405
(index_rec, index) */
2410
ut_ad(rec_offs_validate(index_rec, index, offsets));
2412
data = rec_get_nth_field(
2414
dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
2416
if (len != DATA_ROW_ID_LEN) {
2418
"InnoDB: Error: Row id field is"
2419
" wrong length %lu in ", (ulong) len);
2420
dict_index_name_print(stderr, prebuilt->trx, index);
2421
fprintf(stderr, "\n"
2422
"InnoDB: Field number %lu, record:\n",
2423
(ulong) dict_index_get_sys_col_pos(index,
2425
rec_print_new(stderr, index_rec, offsets);
2430
ut_memcpy(prebuilt->row_id, data, len);
2433
/******************************************************************
2434
Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
2435
function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
2438
row_sel_field_store_in_mysql_format(
2439
/*================================*/
2440
byte* dest, /* in/out: buffer where to store; NOTE that BLOBs
2441
are not in themselves stored here: the caller must
2442
allocate and copy the BLOB into buffer before, and pass
2443
the pointer to the BLOB in 'data' */
2444
const mysql_row_templ_t* templ, /* in: MySQL column template.
2445
Its following fields are referenced:
2446
type, is_unsigned, mysql_col_len, mbminlen, mbmaxlen */
2447
byte* data, /* in: data to store */
2448
ulint len) /* in: length of the data */
2454
ut_ad(len != UNIV_SQL_NULL);
2456
if (templ->type == DATA_INT) {
2457
/* Convert integer data from Innobase to a little-endian
2458
format, sign bit restored to normal */
2471
if (!templ->is_unsigned) {
2472
dest[len - 1] = (byte) (dest[len - 1] ^ 128);
2475
ut_ad(templ->mysql_col_len == len);
2476
} else if (templ->type == DATA_VARCHAR
2477
|| templ->type == DATA_VARMYSQL
2478
|| templ->type == DATA_BINARY) {
2480
field_end = dest + templ->mysql_col_len;
2482
if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
2483
/* This is a >= 5.0.3 type true VARCHAR. Store the
2484
length of the data to the first byte or the first
2485
two bytes of dest. */
2487
dest = row_mysql_store_true_var_len(
2488
dest, len, templ->mysql_length_bytes);
2491
/* Copy the actual data */
2492
ut_memcpy(dest, data, len);
2494
/* Pad with trailing spaces. We pad with spaces also the
2495
unused end of a >= 5.0.3 true VARCHAR column, just in case
2496
MySQL expects its contents to be deterministic. */
2498
pad_ptr = dest + len;
2500
ut_ad(templ->mbminlen <= templ->mbmaxlen);
2502
/* We handle UCS2 charset strings differently. */
2503
if (templ->mbminlen == 2) {
2504
/* A space char is two bytes, 0x0020 in UCS2 */
2507
/* A 0x20 has been stripped from the column.
2510
if (pad_ptr < field_end) {
2516
/* Pad the rest of the string with 0x0020 */
2518
while (pad_ptr < field_end) {
2525
ut_ad(templ->mbminlen == 1);
2528
memset(pad_ptr, 0x20, field_end - pad_ptr);
2530
} else if (templ->type == DATA_BLOB) {
2531
/* Store a pointer to the BLOB buffer to dest: the BLOB was
2532
already copied to the buffer in row_sel_store_mysql_rec */
2534
row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
2536
} else if (templ->type == DATA_MYSQL) {
2537
memcpy(dest, data, len);
2539
ut_ad(templ->mysql_col_len >= len);
2540
ut_ad(templ->mbmaxlen >= templ->mbminlen);
2542
ut_ad(templ->mbmaxlen > templ->mbminlen
2543
|| templ->mysql_col_len == len);
2544
/* The following assertion would fail for old tables
2545
containing UTF-8 ENUM columns due to Bug #9526. */
2546
ut_ad(!templ->mbmaxlen
2547
|| !(templ->mysql_col_len % templ->mbmaxlen));
2548
ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
2550
if (templ->mbminlen != templ->mbmaxlen) {
2551
/* Pad with spaces. This undoes the stripping
2552
done in row0mysql.ic, function
2553
row_mysql_store_col_in_innobase_format(). */
2555
memset(dest + len, 0x20, templ->mysql_col_len - len);
2558
ut_ad(templ->type == DATA_CHAR
2559
|| templ->type == DATA_FIXBINARY
2560
/*|| templ->type == DATA_SYS_CHILD
2561
|| templ->type == DATA_SYS*/
2562
|| templ->type == DATA_FLOAT
2563
|| templ->type == DATA_DOUBLE
2564
|| templ->type == DATA_DECIMAL);
2565
ut_ad(templ->mysql_col_len == len);
2567
memcpy(dest, data, len);
2571
/******************************************************************
2572
Convert a row in the Innobase format to a row in the MySQL format.
2573
Note that the template in prebuilt may advise us to copy only a few
2574
columns to mysql_rec, other columns are left blank. All columns may not
2575
be needed in the query. */
2578
row_sel_store_mysql_rec(
2579
/*====================*/
2580
/* out: TRUE if success, FALSE if
2581
could not allocate memory for a BLOB
2582
(though we may also assert in that
2584
byte* mysql_rec, /* out: row in the MySQL format */
2585
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
2586
rec_t* rec, /* in: Innobase record in the index
2587
which was described in prebuilt's
2589
const ulint* offsets) /* in: array returned by
2590
rec_get_offsets() */
2592
mysql_row_templ_t* templ;
2593
mem_heap_t* extern_field_heap = NULL;
2599
ut_ad(prebuilt->mysql_template);
2600
ut_ad(prebuilt->default_rec);
2601
ut_ad(rec_offs_validate(rec, NULL, offsets));
2603
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
2604
mem_heap_free(prebuilt->blob_heap);
2605
prebuilt->blob_heap = NULL;
2608
for (i = 0; i < prebuilt->n_template; i++) {
2610
templ = prebuilt->mysql_template + i;
2612
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
2613
templ->rec_field_no))) {
2615
/* Copy an externally stored field to the temporary
2618
ut_a(!prebuilt->trx->has_search_latch);
2620
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
2621
if (prebuilt->blob_heap == NULL) {
2622
prebuilt->blob_heap = mem_heap_create(
2626
heap = prebuilt->blob_heap;
2629
= mem_heap_create(UNIV_PAGE_SIZE);
2631
heap = extern_field_heap;
2634
/* NOTE: if we are retrieving a big BLOB, we may
2635
already run out of memory in the next call, which
2638
data = btr_rec_copy_externally_stored_field(
2639
rec, offsets, templ->rec_field_no,
2642
ut_a(len != UNIV_SQL_NULL);
2644
/* Field is stored in the row. */
2646
data = rec_get_nth_field(rec, offsets,
2647
templ->rec_field_no, &len);
2649
if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
2650
&& len != UNIV_SQL_NULL) {
2652
/* It is a BLOB field locally stored in the
2653
InnoDB record: we MUST copy its contents to
2654
prebuilt->blob_heap here because later code
2655
assumes all BLOB values have been copied to a
2658
if (prebuilt->blob_heap == NULL) {
2659
prebuilt->blob_heap = mem_heap_create(
2663
data = memcpy(mem_heap_alloc(
2664
prebuilt->blob_heap, len),
2669
if (len != UNIV_SQL_NULL) {
2670
row_sel_field_store_in_mysql_format(
2671
mysql_rec + templ->mysql_col_offset,
2675
if (extern_field_heap) {
2676
mem_heap_free(extern_field_heap);
2677
extern_field_heap = NULL;
2680
if (templ->mysql_null_bit_mask) {
2681
/* It is a nullable column with a non-NULL
2683
mysql_rec[templ->mysql_null_byte_offset]
2684
&= ~(byte) templ->mysql_null_bit_mask;
2687
/* MySQL assumes that the field for an SQL
2688
NULL value is set to the default value. */
2690
mysql_rec[templ->mysql_null_byte_offset]
2691
|= (byte) templ->mysql_null_bit_mask;
2692
memcpy(mysql_rec + templ->mysql_col_offset,
2693
prebuilt->default_rec + templ->mysql_col_offset,
2694
templ->mysql_col_len);
2701
/*************************************************************************
2702
Builds a previous version of a clustered index record for a consistent read */
2705
row_sel_build_prev_vers_for_mysql(
2706
/*==============================*/
2707
/* out: DB_SUCCESS or error code */
2708
read_view_t* read_view, /* in: read view */
2709
dict_index_t* clust_index, /* in: clustered index */
2710
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
2711
rec_t* rec, /* in: record in a clustered index */
2712
ulint** offsets, /* in/out: offsets returned by
2713
rec_get_offsets(rec, clust_index) */
2714
mem_heap_t** offset_heap, /* in/out: memory heap from which
2715
the offsets are allocated */
2716
rec_t** old_vers, /* out: old version, or NULL if the
2717
record does not exist in the view:
2718
i.e., it was freshly inserted
2720
mtr_t* mtr) /* in: mtr */
2724
if (prebuilt->old_vers_heap) {
2725
mem_heap_empty(prebuilt->old_vers_heap);
2727
prebuilt->old_vers_heap = mem_heap_create(200);
2730
err = row_vers_build_for_consistent_read(
2731
rec, mtr, clust_index, offsets, read_view, offset_heap,
2732
prebuilt->old_vers_heap, old_vers);
2736
/*************************************************************************
2737
Retrieves the clustered index record corresponding to a record in a
2738
non-clustered index. Does the necessary locking. Used in the MySQL
2742
row_sel_get_clust_rec_for_mysql(
2743
/*============================*/
2744
/* out: DB_SUCCESS or error code */
2745
row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */
2746
dict_index_t* sec_index,/* in: secondary index where rec resides */
2747
rec_t* rec, /* in: record in a non-clustered index; if
2748
this is a locking read, then rec is not
2749
allowed to be delete-marked, and that would
2750
not make sense either */
2751
que_thr_t* thr, /* in: query thread */
2752
rec_t** out_rec,/* out: clustered record or an old version of
2753
it, NULL if the old version did not exist
2754
in the read view, i.e., it was a fresh
2756
ulint** offsets,/* out: offsets returned by
2757
rec_get_offsets(out_rec, clust_index) */
2758
mem_heap_t** offset_heap,/* in/out: memory heap from which
2759
the offsets are allocated */
2760
mtr_t* mtr) /* in: mtr used to get access to the
2761
non-clustered record; the same mtr is used to
2762
access the clustered index */
2764
dict_index_t* clust_index;
2771
trx = thr_get_trx(thr);
2773
row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec, trx);
2775
clust_index = dict_table_get_first_index(sec_index->table);
2777
btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
2778
PAGE_CUR_LE, BTR_SEARCH_LEAF,
2779
prebuilt->clust_pcur, 0, mtr);
2781
clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
2783
prebuilt->clust_pcur->trx_if_known = trx;
2785
/* Note: only if the search ends up on a non-infimum record is the
2786
low_match value the real match to the search tuple */
2788
if (!page_rec_is_user_rec(clust_rec)
2789
|| btr_pcur_get_low_match(prebuilt->clust_pcur)
2790
< dict_index_get_n_unique(clust_index)) {
2792
/* In a rare case it is possible that no clust rec is found
2793
for a delete-marked secondary index record: if in row0umod.c
2794
in row_undo_mod_remove_clust_low() we have already removed
2795
the clust rec, while purge is still cleaning and removing
2796
secondary index records associated with earlier versions of
2797
the clustered index record. In that case we know that the
2798
clustered index record did not exist in the read view of
2801
if (!rec_get_deleted_flag(rec,
2802
dict_table_is_comp(sec_index->table))
2803
|| prebuilt->select_lock_type != LOCK_NONE) {
2804
ut_print_timestamp(stderr);
2805
fputs(" InnoDB: error clustered record"
2806
" for sec rec not found\n"
2807
"InnoDB: ", stderr);
2808
dict_index_name_print(stderr, trx, sec_index);
2810
"InnoDB: sec index record ", stderr);
2811
rec_print(stderr, rec, sec_index);
2813
"InnoDB: clust index record ", stderr);
2814
rec_print(stderr, clust_rec, clust_index);
2816
trx_print(stderr, trx, 600);
2819
"InnoDB: Submit a detailed bug report"
2820
" to http://bugs.mysql.com\n", stderr);
2828
*offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
2829
ULINT_UNDEFINED, offset_heap);
2831
if (prebuilt->select_lock_type != LOCK_NONE) {
2832
/* Try to place a lock on the index record; we are searching
2833
the clust rec with a unique condition, hence
2834
we set a LOCK_REC_NOT_GAP type lock */
2836
err = lock_clust_rec_read_check_and_lock(
2837
0, clust_rec, clust_index, *offsets,
2838
prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
2839
if (err != DB_SUCCESS) {
2844
/* This is a non-locking consistent read: if necessary, fetch
2845
a previous version of the record */
2849
/* If the isolation level allows reading of uncommitted data,
2850
then we never look for an earlier version */
2852
if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
2853
&& !lock_clust_rec_cons_read_sees(
2854
clust_rec, clust_index, *offsets,
2857
/* The following call returns 'offsets' associated with
2859
err = row_sel_build_prev_vers_for_mysql(
2860
trx->read_view, clust_index, prebuilt,
2861
clust_rec, offsets, offset_heap, &old_vers,
2864
if (err != DB_SUCCESS) {
2869
clust_rec = old_vers;
2872
/* If we had to go to an earlier version of row or the
2873
secondary index record is delete marked, then it may be that
2874
the secondary index record corresponding to clust_rec
2875
(or old_vers) is not rec; in that case we must ignore
2876
such row because in our snapshot rec would not have existed.
2877
Remember that from rec we cannot see directly which transaction
2878
id corresponds to it: we have to go to the clustered index
2879
record. A query where we want to fetch all rows where
2880
the secondary index value is in some interval would return
2881
a wrong result if we would not drop rows which we come to
2882
visit through secondary index records that would not really
2883
exist in our snapshot. */
2885
if (clust_rec && (old_vers || rec_get_deleted_flag(
2889
&& !row_sel_sec_rec_is_for_clust_rec(
2890
rec, sec_index, clust_rec, clust_index)) {
2893
#ifdef UNIV_SEARCH_DEBUG
2894
ut_a(clust_rec == NULL
2895
|| row_sel_sec_rec_is_for_clust_rec(
2896
rec, sec_index, clust_rec, clust_index));
2902
*out_rec = clust_rec;
2904
if (prebuilt->select_lock_type != LOCK_NONE) {
2905
/* We may use the cursor in update or in unlock_row():
2906
store its position */
2908
btr_pcur_store_position(prebuilt->clust_pcur, mtr);
2916
/************************************************************************
2917
Restores cursor position after it has been stored. We have to take into
2918
account that the record cursor was positioned on may have been deleted.
2919
Then we may have to move the cursor one step up or down. */
2922
sel_restore_position_for_mysql(
2923
/*===========================*/
2924
/* out: TRUE if we may need to
2925
process the record the cursor is
2926
now positioned on (i.e. we should
2927
not go to the next record yet) */
2928
ibool* same_user_rec, /* out: TRUE if we were able to restore
2929
the cursor on a user record with the
2930
same ordering prefix in in the
2932
ulint latch_mode, /* in: latch mode wished in
2934
btr_pcur_t* pcur, /* in: cursor whose position
2936
ibool moves_up, /* in: TRUE if the cursor moves up
2938
mtr_t* mtr) /* in: mtr; CAUTION: may commit
2942
ulint relative_position;
2944
relative_position = pcur->rel_pos;
2946
success = btr_pcur_restore_position(latch_mode, pcur, mtr);
2948
*same_user_rec = success;
2950
if (relative_position == BTR_PCUR_ON) {
2956
btr_pcur_move_to_next(pcur, mtr);
2962
if (relative_position == BTR_PCUR_AFTER
2963
|| relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
2969
if (btr_pcur_is_on_user_rec(pcur, mtr)) {
2970
btr_pcur_move_to_prev(pcur, mtr);
2976
ut_ad(relative_position == BTR_PCUR_BEFORE
2977
|| relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
2979
if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
2980
btr_pcur_move_to_next(pcur, mtr);
2986
/************************************************************************
2987
Pops a cached row for MySQL from the fetch cache. */
2990
row_sel_pop_cached_row_for_mysql(
2991
/*=============================*/
2992
byte* buf, /* in/out: buffer where to copy the
2994
row_prebuilt_t* prebuilt) /* in: prebuilt struct */
2997
mysql_row_templ_t* templ;
2999
ut_ad(prebuilt->n_fetch_cached > 0);
3000
ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
3002
if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
3003
/* Copy cache record field by field, don't touch fields that
3004
are not covered by current key */
3005
cached_rec = prebuilt->fetch_cache[
3006
prebuilt->fetch_cache_first];
3008
for (i = 0; i < prebuilt->n_template; i++) {
3009
templ = prebuilt->mysql_template + i;
3010
ut_memcpy(buf + templ->mysql_col_offset,
3011
cached_rec + templ->mysql_col_offset,
3012
templ->mysql_col_len);
3013
/* Copy NULL bit of the current field from cached_rec
3015
if (templ->mysql_null_bit_mask) {
3016
buf[templ->mysql_null_byte_offset]
3017
^= (buf[templ->mysql_null_byte_offset]
3018
^ cached_rec[templ->mysql_null_byte_offset])
3019
& (byte)templ->mysql_null_bit_mask;
3025
prebuilt->fetch_cache[prebuilt->fetch_cache_first],
3026
prebuilt->mysql_prefix_len);
3028
prebuilt->n_fetch_cached--;
3029
prebuilt->fetch_cache_first++;
3031
if (prebuilt->n_fetch_cached == 0) {
3032
prebuilt->fetch_cache_first = 0;
3036
/************************************************************************
3037
Pushes a row for MySQL to the fetch cache. */
3040
row_sel_push_cache_row_for_mysql(
3041
/*=============================*/
3042
row_prebuilt_t* prebuilt, /* in: prebuilt struct */
3043
rec_t* rec, /* in: record to push */
3044
const ulint* offsets) /* in: rec_get_offsets() */
3049
ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
3050
ut_ad(rec_offs_validate(rec, NULL, offsets));
3051
ut_a(!prebuilt->templ_contains_blob);
3053
if (prebuilt->fetch_cache[0] == NULL) {
3054
/* Allocate memory for the fetch cache */
3056
for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
3058
/* A user has reported memory corruption in these
3059
buffers in Linux. Put magic numbers there to help
3060
to track a possible bug. */
3062
buf = mem_alloc(prebuilt->mysql_row_len + 8);
3064
prebuilt->fetch_cache[i] = buf + 4;
3066
mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
3067
mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
3068
ROW_PREBUILT_FETCH_MAGIC_N);
3072
ut_ad(prebuilt->fetch_cache_first == 0);
3074
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
3075
prebuilt->fetch_cache[
3076
prebuilt->n_fetch_cached],
3077
prebuilt, rec, offsets))) {
3081
prebuilt->n_fetch_cached++;
3084
/*************************************************************************
3085
Tries to do a shortcut to fetch a clustered index record with a unique key,
3086
using the hash index if possible (not always). We assume that the search
3087
mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
3088
btr search latch has been locked in S-mode. */
3091
row_sel_try_search_shortcut_for_mysql(
3092
/*==================================*/
3093
/* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
3094
rec_t** out_rec,/* out: record if found */
3095
row_prebuilt_t* prebuilt,/* in: prebuilt struct */
3096
ulint** offsets,/* in/out: for rec_get_offsets(*out_rec) */
3097
mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
3098
mtr_t* mtr) /* in: started mtr */
3100
dict_index_t* index = prebuilt->index;
3101
dtuple_t* search_tuple = prebuilt->search_tuple;
3102
btr_pcur_t* pcur = prebuilt->pcur;
3103
trx_t* trx = prebuilt->trx;
3106
ut_ad(index->type & DICT_CLUSTERED);
3107
ut_ad(!prebuilt->templ_contains_blob);
3109
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3110
BTR_SEARCH_LEAF, pcur,
3111
#ifndef UNIV_SEARCH_DEBUG
3117
rec = btr_pcur_get_rec(pcur);
3119
if (!page_rec_is_user_rec(rec)) {
3124
/* As the cursor is now placed on a user record after a search with
3125
the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
3126
fields in the user record matched to the search tuple */
3128
if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
3130
return(SEL_EXHAUSTED);
3133
/* This is a non-locking consistent read: if necessary, fetch
3134
a previous version of the record */
3136
*offsets = rec_get_offsets(rec, index, *offsets,
3137
ULINT_UNDEFINED, heap);
3139
if (!lock_clust_rec_cons_read_sees(rec, index,
3140
*offsets, trx->read_view)) {
3145
if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
3147
return(SEL_EXHAUSTED);
3155
/************************************************************************
3156
Searches for rows in the database. This is used in the interface to
3157
MySQL. This function opens a cursor, and also implements fetch next
3158
and fetch prev. NOTE that if we do a search with a full key value
3159
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
3160
position and fetch next or fetch prev must not be tried to the cursor! */
3163
row_search_for_mysql(
3164
/*=================*/
3166
DB_RECORD_NOT_FOUND,
3167
DB_END_OF_INDEX, DB_DEADLOCK,
3168
DB_LOCK_TABLE_FULL, DB_CORRUPTION,
3169
or DB_TOO_BIG_RECORD */
3170
byte* buf, /* in/out: buffer for the fetched
3171
row in the MySQL format */
3172
ulint mode, /* in: search mode PAGE_CUR_L, ... */
3173
row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
3174
table handle; this contains the info
3175
of search_tuple, index; if search
3176
tuple contains 0 fields then we
3177
position the cursor at the start or
3178
the end of the index, depending on
3180
ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
3181
ROW_SEL_EXACT_PREFIX */
3182
ulint direction) /* in: 0 or ROW_SEL_NEXT or
3183
ROW_SEL_PREV; NOTE: if this is != 0,
3184
then prebuilt must have a pcur
3185
with stored position! In opening of a
3186
cursor 'direction' should be 0. */
3188
dict_index_t* index = prebuilt->index;
3189
ibool comp = dict_table_is_comp(index->table);
3190
dtuple_t* search_tuple = prebuilt->search_tuple;
3191
btr_pcur_t* pcur = prebuilt->pcur;
3192
trx_t* trx = prebuilt->trx;
3193
dict_index_t* clust_index;
3198
ulint err = DB_SUCCESS;
3199
ibool unique_search = FALSE;
3200
ibool unique_search_from_clust_index = FALSE;
3201
ibool mtr_has_extra_clust_latch = FALSE;
3202
ibool moves_up = FALSE;
3203
ibool set_also_gap_locks = TRUE;
3204
/* if the query is a plain locking SELECT, and the isolation level
3205
is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
3206
ibool did_semi_consistent_read = FALSE;
3207
/* if the returned record was locked and we did a semi-consistent
3208
read (fetch the newest committed version), then this is set to
3210
#ifdef UNIV_SEARCH_DEBUG
3212
#endif /* UNIV_SEARCH_DEBUG */
3214
ibool same_user_rec;
3216
mem_heap_t* heap = NULL;
3217
ulint offsets_[REC_OFFS_NORMAL_SIZE];
3218
ulint* offsets = offsets_;
3220
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
3222
ut_ad(index && pcur && search_tuple);
3223
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
3225
if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
3226
ut_print_timestamp(stderr);
3227
fprintf(stderr, " InnoDB: Error:\n"
3228
"InnoDB: MySQL is trying to use a table handle"
3229
" but the .ibd file for\n"
3230
"InnoDB: table %s does not exist.\n"
3231
"InnoDB: Have you deleted the .ibd file"
3232
" from the database directory under\n"
3233
"InnoDB: the MySQL datadir, or have you used"
3234
" DISCARD TABLESPACE?\n"
3235
"InnoDB: Look from\n"
3236
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3237
"innodb-troubleshooting.html\n"
3238
"InnoDB: how you can resolve the problem.\n",
3239
prebuilt->table->name);
3244
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
3246
"InnoDB: Error: trying to free a corrupt\n"
3247
"InnoDB: table handle. Magic n %lu, table name ",
3248
(ulong) prebuilt->magic_n);
3249
ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
3252
mem_analyze_corruption(prebuilt);
3258
/* August 19, 2005 by Heikki: temporarily disable this error
3259
print until the cursor lock count is done correctly.
3260
See bugs #12263 and #12456!*/
3262
if (trx->n_mysql_tables_in_use == 0
3263
&& UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
3264
/* Note that if MySQL uses an InnoDB temp table that it
3265
created inside LOCK TABLES, then n_mysql_tables_in_use can
3266
be zero; in that case select_lock_type is set to LOCK_X in
3269
fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
3270
"InnoDB: but it has not locked"
3271
" any tables in ::external_lock()!\n",
3273
trx_print(stderr, trx, 600);
3274
fputc('\n', stderr);
3279
fprintf(stderr, "Match mode %lu\n search tuple ",
3280
(ulong) match_mode);
3281
dtuple_print(search_tuple);
3282
fprintf(stderr, "N tables locked %lu\n",
3283
(ulong) trx->mysql_n_tables_locked);
3285
/*-------------------------------------------------------------*/
3286
/* PHASE 0: Release a possible s-latch we are holding on the
3287
adaptive hash index latch if there is someone waiting behind */
3289
if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
3290
&& trx->has_search_latch) {
3292
/* There is an x-latch request on the adaptive hash index:
3293
release the s-latch to reduce starvation and wait for
3294
BTR_SEA_TIMEOUT rounds before trying to keep it again over
3297
rw_lock_s_unlock(&btr_search_latch);
3298
trx->has_search_latch = FALSE;
3300
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
3303
/* Reset the new record lock info if srv_locks_unsafe_for_binlog
3304
is set or session is using a READ COMMITED isolation level. Then
3305
we are able to remove the record locks set here on an individual
3307
prebuilt->new_rec_locks = 0;
3309
/*-------------------------------------------------------------*/
3310
/* PHASE 1: Try to pop the row from the prefetch cache */
3312
if (UNIV_UNLIKELY(direction == 0)) {
3313
trx->op_info = "starting index read";
3315
prebuilt->n_rows_fetched = 0;
3316
prebuilt->n_fetch_cached = 0;
3317
prebuilt->fetch_cache_first = 0;
3319
if (prebuilt->sel_graph == NULL) {
3320
/* Build a dummy select query graph */
3321
row_prebuild_sel_graph(prebuilt);
3324
trx->op_info = "fetching rows";
3326
if (prebuilt->n_rows_fetched == 0) {
3327
prebuilt->fetch_direction = direction;
3330
if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
3331
if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
3333
/* TODO: scrollable cursor: restore cursor to
3334
the place of the latest returned row,
3335
or better: prevent caching for a scroll
3339
prebuilt->n_rows_fetched = 0;
3340
prebuilt->n_fetch_cached = 0;
3341
prebuilt->fetch_cache_first = 0;
3343
} else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
3344
row_sel_pop_cached_row_for_mysql(buf, prebuilt);
3346
prebuilt->n_rows_fetched++;
3353
if (prebuilt->fetch_cache_first > 0
3354
&& prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
3356
/* The previous returned row was popped from the fetch
3357
cache, but the cache was not full at the time of the
3358
popping: no more rows can exist in the result set */
3360
err = DB_RECORD_NOT_FOUND;
3364
prebuilt->n_rows_fetched++;
3366
if (prebuilt->n_rows_fetched > 1000000000) {
3367
/* Prevent wrap-over */
3368
prebuilt->n_rows_fetched = 500000000;
3371
mode = pcur->search_mode;
3374
/* In a search where at most one record in the index may match, we
3375
can use a LOCK_REC_NOT_GAP type record lock when locking a
3376
non-delete-marked matching record.
3378
Note that in a unique secondary index there may be different
3379
delete-marked versions of a record where only the primary key
3380
values differ: thus in a secondary index we must use next-key
3381
locks when locking delete-marked records. */
3383
if (match_mode == ROW_SEL_EXACT
3384
&& index->type & DICT_UNIQUE
3385
&& dtuple_get_n_fields(search_tuple)
3386
== dict_index_get_n_unique(index)
3387
&& (index->type & DICT_CLUSTERED
3388
|| !dtuple_contains_null(search_tuple))) {
3390
/* Note above that a UNIQUE secondary index can contain many
3391
rows with the same key value if one of the columns is the SQL
3392
null. A clustered index under MySQL can never contain null
3393
columns because we demand that all the columns in primary key
3396
unique_search = TRUE;
3398
/* Even if the condition is unique, MySQL seems to try to
3399
retrieve also a second row if a primary key contains more than
3400
1 column. Return immediately if this is not a HANDLER
3403
if (UNIV_UNLIKELY(direction != 0
3404
&& !prebuilt->used_in_HANDLER)) {
3406
err = DB_RECORD_NOT_FOUND;
3413
/*-------------------------------------------------------------*/
3414
/* PHASE 2: Try fast adaptive hash index search if possible */
3416
/* Next test if this is the special case where we can use the fast
3417
adaptive hash index to try the search. Since we must release the
3418
search system latch when we retrieve an externally stored field, we
3419
cannot use the adaptive hash index in a search in the case the row
3420
may be long and there may be externally stored fields */
3422
if (UNIV_UNLIKELY(direction == 0)
3424
&& index->type & DICT_CLUSTERED
3425
&& !prebuilt->templ_contains_blob
3426
&& !prebuilt->used_in_HANDLER
3427
&& (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
3431
unique_search_from_clust_index = TRUE;
3433
if (trx->mysql_n_tables_locked == 0
3434
&& prebuilt->select_lock_type == LOCK_NONE
3435
&& trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
3436
&& trx->read_view) {
3438
/* This is a SELECT query done as a consistent read,
3439
and the read view has already been allocated:
3440
let us try a search shortcut through the hash
3442
NOTE that we must also test that
3443
mysql_n_tables_locked == 0, because this might
3444
also be INSERT INTO ... SELECT ... or
3445
CREATE TABLE ... SELECT ... . Our algorithm is
3446
NOT prepared to inserts interleaved with the SELECT,
3447
and if we try that, we can deadlock on the adaptive
3448
hash index semaphore! */
3450
#ifndef UNIV_SEARCH_DEBUG
3451
if (!trx->has_search_latch) {
3452
rw_lock_s_lock(&btr_search_latch);
3453
trx->has_search_latch = TRUE;
3456
switch (row_sel_try_search_shortcut_for_mysql(
3457
&rec, prebuilt, &offsets, &heap,
3460
#ifdef UNIV_SEARCH_DEBUG
3461
ut_a(0 == cmp_dtuple_rec(search_tuple,
3464
if (!row_sel_store_mysql_rec(buf, prebuilt,
3466
err = DB_TOO_BIG_RECORD;
3468
/* We let the main loop to do the
3470
goto shortcut_fails_too_big_rec;
3475
/* ut_print_name(stderr, index->name);
3476
fputs(" shortcut\n", stderr); */
3480
if (trx->search_latch_timeout > 0
3481
&& trx->has_search_latch) {
3483
trx->search_latch_timeout--;
3485
rw_lock_s_unlock(&btr_search_latch);
3486
trx->has_search_latch = FALSE;
3489
/* NOTE that we do NOT store the cursor
3497
/* ut_print_name(stderr, index->name);
3498
fputs(" record not found 2\n", stderr); */
3500
if (trx->search_latch_timeout > 0
3501
&& trx->has_search_latch) {
3503
trx->search_latch_timeout--;
3505
rw_lock_s_unlock(&btr_search_latch);
3506
trx->has_search_latch = FALSE;
3509
/* NOTE that we do NOT store the cursor
3512
err = DB_RECORD_NOT_FOUND;
3515
shortcut_fails_too_big_rec:
3521
/*-------------------------------------------------------------*/
3522
/* PHASE 3: Open or restore index cursor position */
3524
if (trx->has_search_latch) {
3525
rw_lock_s_unlock(&btr_search_latch);
3526
trx->has_search_latch = FALSE;
3529
trx_start_if_not_started(trx);
3531
if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
3532
&& prebuilt->select_lock_type != LOCK_NONE
3533
&& trx->mysql_thd != NULL
3534
&& thd_is_select(trx->mysql_thd)) {
3535
/* It is a plain locking SELECT and the isolation
3536
level is low: do not lock gaps */
3538
set_also_gap_locks = FALSE;
3541
/* Note that if the search mode was GE or G, then the cursor
3542
naturally moves upward (in fetch next) in alphabetical order,
3543
otherwise downward */
3545
if (UNIV_UNLIKELY(direction == 0)) {
3546
if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
3549
} else if (direction == ROW_SEL_NEXT) {
3553
thr = que_fork_get_first_thr(prebuilt->sel_graph);
3555
que_thr_move_to_run_state_for_mysql(thr, trx);
3557
clust_index = dict_table_get_first_index(index->table);
3559
if (UNIV_LIKELY(direction != 0)) {
3560
ibool need_to_process = sel_restore_position_for_mysql(
3561
&same_user_rec, BTR_SEARCH_LEAF,
3562
pcur, moves_up, &mtr);
3564
if (UNIV_UNLIKELY(need_to_process)) {
3565
if (UNIV_UNLIKELY(prebuilt->row_read_type
3566
== ROW_READ_DID_SEMI_CONSISTENT)) {
3567
/* We did a semi-consistent read,
3568
but the record was removed in
3570
prebuilt->row_read_type
3571
= ROW_READ_TRY_SEMI_CONSISTENT;
3573
} else if (UNIV_LIKELY(prebuilt->row_read_type
3574
!= ROW_READ_DID_SEMI_CONSISTENT)) {
3576
/* The cursor was positioned on the record
3577
that we returned previously. If we need
3578
to repeat a semi-consistent read as a
3579
pessimistic locking read, the record
3580
cannot be skipped. */
3585
} else if (dtuple_get_n_fields(search_tuple) > 0) {
3587
btr_pcur_open_with_no_init(index, search_tuple, mode,
3591
pcur->trx_if_known = trx;
3593
rec = btr_pcur_get_rec(pcur);
3596
&& !page_rec_is_supremum(rec)
3597
&& set_also_gap_locks
3598
&& !(srv_locks_unsafe_for_binlog
3599
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
3600
&& prebuilt->select_lock_type != LOCK_NONE) {
3602
/* Try to place a gap lock on the next index record
3603
to prevent phantoms in ORDER BY ... DESC queries */
3605
offsets = rec_get_offsets(page_rec_get_next(rec),
3607
ULINT_UNDEFINED, &heap);
3608
err = sel_set_rec_lock(page_rec_get_next(rec),
3610
prebuilt->select_lock_type,
3613
if (err != DB_SUCCESS) {
3615
goto lock_wait_or_error;
3619
if (mode == PAGE_CUR_G) {
3620
btr_pcur_open_at_index_side(
3621
TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3623
} else if (mode == PAGE_CUR_L) {
3624
btr_pcur_open_at_index_side(
3625
FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3630
if (!prebuilt->sql_stat_start) {
3631
/* No need to set an intention lock or assign a read view */
3633
if (trx->read_view == NULL
3634
&& prebuilt->select_lock_type == LOCK_NONE) {
3636
fputs("InnoDB: Error: MySQL is trying to"
3637
" perform a consistent read\n"
3638
"InnoDB: but the read view is not assigned!\n",
3640
trx_print(stderr, trx, 600);
3641
fputc('\n', stderr);
3644
} else if (prebuilt->select_lock_type == LOCK_NONE) {
3645
/* This is a consistent read */
3646
/* Assign a read view for the query */
3648
trx_assign_read_view(trx);
3649
prebuilt->sql_stat_start = FALSE;
3652
if (prebuilt->select_lock_type == LOCK_S) {
3653
lock_mode = LOCK_IS;
3655
lock_mode = LOCK_IX;
3657
err = lock_table(0, index->table, lock_mode, thr);
3659
if (err != DB_SUCCESS) {
3661
goto lock_wait_or_error;
3663
prebuilt->sql_stat_start = FALSE;
3667
/*-------------------------------------------------------------*/
3668
/* PHASE 4: Look for matching records in a loop */
3670
rec = btr_pcur_get_rec(pcur);
3671
ut_ad(!!page_rec_is_comp(rec) == comp);
3672
#ifdef UNIV_SEARCH_DEBUG
3674
fputs("Using ", stderr);
3675
dict_index_name_print(stderr, index);
3676
fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
3677
buf_frame_get_page_no(buf_frame_align(rec)));
3680
#endif /* UNIV_SEARCH_DEBUG */
3682
if (page_rec_is_infimum(rec)) {
3684
/* The infimum record on a page cannot be in the result set,
3685
and neither can a record lock be placed on it: we skip such
3691
if (page_rec_is_supremum(rec)) {
3693
if (set_also_gap_locks
3694
&& !(srv_locks_unsafe_for_binlog
3695
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
3696
&& prebuilt->select_lock_type != LOCK_NONE) {
3698
/* Try to place a lock on the index record */
3700
/* If innodb_locks_unsafe_for_binlog option is used
3701
or this session is using a READ COMMITTED isolation
3702
level we do not lock gaps. Supremum record is really
3703
a gap and therefore we do not set locks there. */
3705
offsets = rec_get_offsets(rec, index, offsets,
3706
ULINT_UNDEFINED, &heap);
3707
err = sel_set_rec_lock(rec, index, offsets,
3708
prebuilt->select_lock_type,
3709
LOCK_ORDINARY, thr);
3711
if (err != DB_SUCCESS) {
3713
goto lock_wait_or_error;
3716
/* A page supremum record cannot be in the result set: skip
3717
it now that we have placed a possible lock on it */
3722
/*-------------------------------------------------------------*/
3723
/* Do sanity checks in case our cursor has bumped into page
3727
next_offs = rec_get_next_offs(rec, TRUE);
3728
if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
3733
next_offs = rec_get_next_offs(rec, FALSE);
3734
if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
3740
if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
3743
if (srv_force_recovery == 0 || moves_up == FALSE) {
3744
ut_print_timestamp(stderr);
3745
buf_page_print(buf_frame_align(rec));
3747
"\nInnoDB: rec address %p, first"
3748
" buffer frame %p\n"
3749
"InnoDB: buffer pool high end %p,"
3750
" buf block fix count %lu\n",
3751
(void*) rec, (void*) buf_pool->frame_zero,
3752
(void*) buf_pool->high_end,
3753
(ulong)buf_block_align(rec)->buf_fix_count);
3755
"InnoDB: Index corruption: rec offs %lu"
3756
" next offs %lu, page no %lu,\n"
3758
(ulong) page_offset(rec),
3760
(ulong) buf_frame_get_page_no(rec));
3761
dict_index_name_print(stderr, trx, index);
3762
fputs(". Run CHECK TABLE. You may need to\n"
3763
"InnoDB: restore from a backup, or"
3764
" dump + drop + reimport the table.\n",
3767
err = DB_CORRUPTION;
3769
goto lock_wait_or_error;
3771
/* The user may be dumping a corrupt table. Jump
3772
over the corruption to recover as much as possible. */
3775
"InnoDB: Index corruption: rec offs %lu"
3776
" next offs %lu, page no %lu,\n"
3778
(ulong) page_offset(rec),
3780
(ulong) buf_frame_get_page_no(rec));
3781
dict_index_name_print(stderr, trx, index);
3782
fputs(". We try to skip the rest of the page.\n",
3785
btr_pcur_move_to_last_on_page(pcur, &mtr);
3790
/*-------------------------------------------------------------*/
3792
/* Calculate the 'offsets' associated with 'rec' */
3794
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3796
if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
3797
if (!rec_validate(rec, offsets)
3798
|| !btr_index_rec_validate(rec, index, FALSE)) {
3800
"InnoDB: Index corruption: rec offs %lu"
3801
" next offs %lu, page no %lu,\n"
3803
(ulong) page_offset(rec),
3805
(ulong) buf_frame_get_page_no(rec));
3806
dict_index_name_print(stderr, trx, index);
3807
fputs(". We try to skip the record.\n",
3814
/* Note that we cannot trust the up_match value in the cursor at this
3815
place because we can arrive here after moving the cursor! Thus
3816
we have to recompare rec and search_tuple to determine if they
3819
if (match_mode == ROW_SEL_EXACT) {
3820
/* Test if the index record matches completely to search_tuple
3821
in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
3823
/* fputs("Comparing rec and search tuple\n", stderr); */
3825
if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
3827
if (set_also_gap_locks
3828
&& !(srv_locks_unsafe_for_binlog
3829
|| trx->isolation_level
3830
== TRX_ISO_READ_COMMITTED)
3831
&& prebuilt->select_lock_type != LOCK_NONE) {
3833
/* Try to place a gap lock on the index
3834
record only if innodb_locks_unsafe_for_binlog
3835
option is not set or this session is not
3836
using a READ COMMITTED isolation level. */
3838
err = sel_set_rec_lock(
3839
rec, index, offsets,
3840
prebuilt->select_lock_type, LOCK_GAP,
3843
if (err != DB_SUCCESS) {
3845
goto lock_wait_or_error;
3849
btr_pcur_store_position(pcur, &mtr);
3851
err = DB_RECORD_NOT_FOUND;
3852
/* ut_print_name(stderr, index->name);
3853
fputs(" record not found 3\n", stderr); */
3858
} else if (match_mode == ROW_SEL_EXACT_PREFIX) {
3860
if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
3862
if (set_also_gap_locks
3863
&& !(srv_locks_unsafe_for_binlog
3864
|| trx->isolation_level
3865
== TRX_ISO_READ_COMMITTED)
3866
&& prebuilt->select_lock_type != LOCK_NONE) {
3868
/* Try to place a gap lock on the index
3869
record only if innodb_locks_unsafe_for_binlog
3870
option is not set or this session is not
3871
using a READ COMMITTED isolation level. */
3873
err = sel_set_rec_lock(
3874
rec, index, offsets,
3875
prebuilt->select_lock_type, LOCK_GAP,
3878
if (err != DB_SUCCESS) {
3880
goto lock_wait_or_error;
3884
btr_pcur_store_position(pcur, &mtr);
3886
err = DB_RECORD_NOT_FOUND;
3887
/* ut_print_name(stderr, index->name);
3888
fputs(" record not found 4\n", stderr); */
3894
/* We are ready to look at a possible new index entry in the result
3895
set: the cursor is now placed on a user record */
3897
if (prebuilt->select_lock_type != LOCK_NONE) {
3898
/* Try to place a lock on the index record; note that delete
3899
marked records are a special case in a unique search. If there
3900
is a non-delete marked record, then it is enough to lock its
3901
existence with LOCK_REC_NOT_GAP. */
3903
/* If innodb_locks_unsafe_for_binlog option is used
3904
or this session is using a READ COMMITED isolation
3905
level we lock only the record, i.e., next-key locking is
3910
if (!set_also_gap_locks
3911
|| srv_locks_unsafe_for_binlog
3912
|| trx->isolation_level == TRX_ISO_READ_COMMITTED
3914
&& !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
3918
lock_type = LOCK_ORDINARY;
3921
/* If we are doing a 'greater or equal than a primary key
3922
value' search from a clustered index, and we find a record
3923
that has that exact primary key value, then there is no need
3924
to lock the gap before the record, because no insert in the
3925
gap can be in our search range. That is, no phantom row can
3928
An example: if col1 is the primary key, the search is WHERE
3929
col1 >= 100, and we find a record where col1 = 100, then no
3930
need to lock the gap before that record. */
3932
if (index == clust_index
3933
&& mode == PAGE_CUR_GE
3935
&& dtuple_get_n_fields_cmp(search_tuple)
3936
== dict_index_get_n_unique(index)
3937
&& 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
3939
lock_type = LOCK_REC_NOT_GAP;
3942
err = sel_set_rec_lock(rec, index, offsets,
3943
prebuilt->select_lock_type,
3949
if (srv_locks_unsafe_for_binlog
3950
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
3951
/* Note that a record of
3952
prebuilt->index was locked. */
3953
prebuilt->new_rec_locks = 1;
3957
if (UNIV_LIKELY(prebuilt->row_read_type
3958
!= ROW_READ_TRY_SEMI_CONSISTENT)
3959
|| index != clust_index) {
3961
goto lock_wait_or_error;
3964
/* The following call returns 'offsets'
3965
associated with 'old_vers' */
3966
err = row_sel_build_committed_vers_for_mysql(
3967
clust_index, prebuilt, rec,
3968
&offsets, &heap, &old_vers, &mtr);
3970
if (err != DB_SUCCESS) {
3972
goto lock_wait_or_error;
3975
mutex_enter(&kernel_mutex);
3976
if (trx->was_chosen_as_deadlock_victim) {
3977
mutex_exit(&kernel_mutex);
3980
goto lock_wait_or_error;
3982
if (UNIV_LIKELY(trx->wait_lock != NULL)) {
3983
lock_cancel_waiting_and_release(
3985
prebuilt->new_rec_locks = 0;
3987
mutex_exit(&kernel_mutex);
3989
/* The lock was granted while we were
3990
searching for the last committed version.
3991
Do a normal locking read. */
3993
offsets = rec_get_offsets(rec, index, offsets,
3997
/* Note that a record of
3998
prebuilt->index was locked. */
3999
prebuilt->new_rec_locks = 1;
4002
mutex_exit(&kernel_mutex);
4004
if (old_vers == NULL) {
4005
/* The row was not yet committed */
4010
did_semi_consistent_read = TRUE;
4015
goto lock_wait_or_error;
4018
/* This is a non-locking consistent read: if necessary, fetch
4019
a previous version of the record */
4021
if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
4023
/* Do nothing: we let a non-locking SELECT read the
4024
latest version of the record */
4026
} else if (index == clust_index) {
4028
/* Fetch a previous version of the row if the current
4029
one is not visible in the snapshot; if we have a very
4030
high force recovery level set, we try to avoid crashes
4031
by skipping this lookup */
4033
if (UNIV_LIKELY(srv_force_recovery < 5)
4034
&& !lock_clust_rec_cons_read_sees(
4035
rec, index, offsets, trx->read_view)) {
4038
/* The following call returns 'offsets'
4039
associated with 'old_vers' */
4040
err = row_sel_build_prev_vers_for_mysql(
4041
trx->read_view, clust_index,
4042
prebuilt, rec, &offsets, &heap,
4045
if (err != DB_SUCCESS) {
4047
goto lock_wait_or_error;
4050
if (old_vers == NULL) {
4051
/* The row did not exist yet in
4059
} else if (!lock_sec_rec_cons_read_sees(rec, index,
4061
/* We are looking into a non-clustered index,
4062
and to get the right version of the record we
4063
have to look also into the clustered index: this
4064
is necessary, because we can only get the undo
4065
information via the clustered index record. */
4067
ut_ad(index != clust_index);
4069
goto requires_clust_rec;
4073
/* NOTE that at this point rec can be an old version of a clustered
4074
index record built for a consistent read. We cannot assume after this
4075
point that rec is on a buffer pool page. Functions like
4076
page_rec_is_comp() cannot be used! */
4078
if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
4080
/* The record is delete-marked: we can skip it */
4082
if ((srv_locks_unsafe_for_binlog
4083
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4084
&& prebuilt->select_lock_type != LOCK_NONE
4085
&& !did_semi_consistent_read) {
4087
/* No need to keep a lock on a delete-marked record
4088
if we do not want to use next-key locking. */
4090
row_unlock_for_mysql(prebuilt, TRUE);
4093
/* This is an optimization to skip setting the next key lock
4094
on the record that follows this delete-marked record. This
4095
optimization works because of the unique search criteria
4096
which precludes the presence of a range lock between this
4097
delete marked record and the record following it.
4099
For now this is applicable only to clustered indexes while
4100
doing a unique search. There is scope for further optimization
4101
applicable to unique secondary indexes. Current behaviour is
4102
to widen the scope of a lock on an already delete marked record
4103
if the same record is deleted twice by the same transaction */
4104
if (index == clust_index && unique_search) {
4105
err = DB_RECORD_NOT_FOUND;
4113
/* Get the clustered index record if needed, if we did not do the
4114
search using the clustered index. */
4116
if (index != clust_index && prebuilt->need_to_access_clustered) {
4119
/* We use a 'goto' to the preceding label if a consistent
4120
read of a secondary index record requires us to look up old
4121
versions of the associated clustered index record. */
4123
ut_ad(rec_offs_validate(rec, index, offsets));
4125
/* It was a non-clustered index and we must fetch also the
4126
clustered index record */
4128
mtr_has_extra_clust_latch = TRUE;
4130
/* The following call returns 'offsets' associated with
4131
'clust_rec'. Note that 'clust_rec' can be an old version
4132
built for a consistent read. */
4134
err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
4136
&offsets, &heap, &mtr);
4137
if (err != DB_SUCCESS) {
4139
goto lock_wait_or_error;
4142
if (clust_rec == NULL) {
4143
/* The record did not exist in the read view */
4144
ut_ad(prebuilt->select_lock_type == LOCK_NONE);
4149
if ((srv_locks_unsafe_for_binlog
4150
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4151
&& prebuilt->select_lock_type != LOCK_NONE) {
4152
/* Note that both the secondary index record
4153
and the clustered index record were locked. */
4154
ut_ad(prebuilt->new_rec_locks == 1);
4155
prebuilt->new_rec_locks = 2;
4158
if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
4160
/* The record is delete marked: we can skip it */
4162
if ((srv_locks_unsafe_for_binlog
4163
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4164
&& prebuilt->select_lock_type != LOCK_NONE) {
4166
/* No need to keep a lock on a delete-marked
4167
record if we do not want to use next-key
4170
row_unlock_for_mysql(prebuilt, TRUE);
4176
if (prebuilt->need_to_access_clustered) {
4178
result_rec = clust_rec;
4180
ut_ad(rec_offs_validate(result_rec, clust_index,
4183
/* We used 'offsets' for the clust rec, recalculate
4185
offsets = rec_get_offsets(rec, index, offsets,
4186
ULINT_UNDEFINED, &heap);
4193
/* We found a qualifying record 'result_rec'. At this point,
4194
'offsets' are associated with 'result_rec'. */
4196
ut_ad(rec_offs_validate(result_rec,
4197
result_rec != rec ? clust_index : index,
4200
if ((match_mode == ROW_SEL_EXACT
4201
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
4202
&& prebuilt->select_lock_type == LOCK_NONE
4203
&& !prebuilt->templ_contains_blob
4204
&& !prebuilt->clust_index_was_generated
4205
&& !prebuilt->used_in_HANDLER
4206
&& prebuilt->template_type
4207
!= ROW_MYSQL_DUMMY_TEMPLATE) {
4209
/* Inside an update, for example, we do not cache rows,
4210
since we may use the cursor position to do the actual
4211
update, that is why we require ...lock_type == LOCK_NONE.
4212
Since we keep space in prebuilt only for the BLOBs of
4213
a single row, we cannot cache rows in the case there
4214
are BLOBs in the fields to be fetched. In HANDLER we do
4215
not cache rows because there the cursor is a scrollable
4218
row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
4220
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
4227
if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
4228
memcpy(buf + 4, result_rec
4229
- rec_offs_extra_size(offsets),
4230
rec_offs_size(offsets));
4231
mach_write_to_4(buf,
4232
rec_offs_extra_size(offsets) + 4);
4234
if (!row_sel_store_mysql_rec(buf, prebuilt,
4235
result_rec, offsets)) {
4236
err = DB_TOO_BIG_RECORD;
4238
goto lock_wait_or_error;
4242
if (prebuilt->clust_index_was_generated) {
4243
if (result_rec != rec) {
4244
offsets = rec_get_offsets(
4245
rec, index, offsets, ULINT_UNDEFINED,
4248
row_sel_store_row_id_to_prebuilt(prebuilt, rec,
4253
/* From this point on, 'offsets' are invalid. */
4256
/* We have an optimization to save CPU time: if this is a consistent
4257
read on a unique condition on the clustered index, then we do not
4258
store the pcur position, because any fetch next or prev will anyway
4259
return 'end of file'. Exceptions are locking reads and the MySQL
4260
HANDLER command where the user can move the cursor with PREV or NEXT
4261
even after a unique search. */
4263
if (!unique_search_from_clust_index
4264
|| prebuilt->select_lock_type != LOCK_NONE
4265
|| prebuilt->used_in_HANDLER) {
4267
/* Inside an update always store the cursor position */
4269
btr_pcur_store_position(pcur, &mtr);
4277
/* Reset the old and new "did semi-consistent read" flags. */
4278
if (UNIV_UNLIKELY(prebuilt->row_read_type
4279
== ROW_READ_DID_SEMI_CONSISTENT)) {
4280
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4282
did_semi_consistent_read = FALSE;
4283
prebuilt->new_rec_locks = 0;
4285
/*-------------------------------------------------------------*/
4286
/* PHASE 5: Move the cursor to the next index record */
4288
if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
4289
/* We must commit mtr if we are moving to the next
4290
non-clustered index record, because we could break the
4291
latching order if we would access a different clustered
4292
index page right away without releasing the previous. */
4294
btr_pcur_store_position(pcur, &mtr);
4297
mtr_has_extra_clust_latch = FALSE;
4300
if (sel_restore_position_for_mysql(&same_user_rec,
4302
pcur, moves_up, &mtr)) {
4303
#ifdef UNIV_SEARCH_DEBUG
4305
#endif /* UNIV_SEARCH_DEBUG */
4312
if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
4314
btr_pcur_store_position(pcur, &mtr);
4316
if (match_mode != 0) {
4317
err = DB_RECORD_NOT_FOUND;
4319
err = DB_END_OF_INDEX;
4325
if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
4330
#ifdef UNIV_SEARCH_DEBUG
4332
#endif /* UNIV_SEARCH_DEBUG */
4337
/* Reset the old and new "did semi-consistent read" flags. */
4338
if (UNIV_UNLIKELY(prebuilt->row_read_type
4339
== ROW_READ_DID_SEMI_CONSISTENT)) {
4340
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4342
did_semi_consistent_read = FALSE;
4344
/*-------------------------------------------------------------*/
4346
btr_pcur_store_position(pcur, &mtr);
4349
mtr_has_extra_clust_latch = FALSE;
4351
trx->error_state = err;
4353
/* The following is a patch for MySQL */
4355
que_thr_stop_for_mysql(thr);
4357
thr->lock_state = QUE_THR_LOCK_ROW;
4359
if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4360
/* It was a lock wait, and it ended */
4362
thr->lock_state = QUE_THR_LOCK_NOLOCK;
4365
sel_restore_position_for_mysql(&same_user_rec,
4366
BTR_SEARCH_LEAF, pcur,
4369
if ((srv_locks_unsafe_for_binlog
4370
|| trx->isolation_level == TRX_ISO_READ_COMMITTED)
4371
&& !same_user_rec) {
4373
/* Since we were not able to restore the cursor
4374
on the same user record, we cannot use
4375
row_unlock_for_mysql() to unlock any records, and
4376
we must thus reset the new rec lock info. Since
4377
in lock0lock.c we have blocked the inheriting of gap
4378
X-locks, we actually do not have any new record locks
4381
Note that if we were able to restore on the 'same'
4382
user record, it is still possible that we were actually
4383
waiting on a delete-marked record, and meanwhile
4384
it was removed by purge and inserted again by some
4385
other user. But that is no problem, because in
4386
rec_loop we will again try to set a lock, and
4387
new_rec_lock_info in trx will be right at the end. */
4389
prebuilt->new_rec_locks = 0;
4392
mode = pcur->search_mode;
4397
thr->lock_state = QUE_THR_LOCK_NOLOCK;
4399
#ifdef UNIV_SEARCH_DEBUG
4400
/* fputs("Using ", stderr);
4401
dict_index_name_print(stderr, index);
4402
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4403
#endif /* UNIV_SEARCH_DEBUG */
4407
/*-------------------------------------------------------------*/
4408
que_thr_stop_for_mysql_no_error(thr, trx);
4412
if (prebuilt->n_fetch_cached > 0) {
4413
row_sel_pop_cached_row_for_mysql(buf, prebuilt);
4418
#ifdef UNIV_SEARCH_DEBUG
4419
/* fputs("Using ", stderr);
4420
dict_index_name_print(stderr, index);
4421
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4422
#endif /* UNIV_SEARCH_DEBUG */
4423
if (err == DB_SUCCESS) {
4429
if (UNIV_LIKELY_NULL(heap)) {
4430
mem_heap_free(heap);
4433
/* Set or reset the "did semi-consistent read" flag on return.
4434
The flag did_semi_consistent_read is set if and only if
4435
the record being returned was fetched with a semi-consistent read. */
4436
ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
4437
|| !did_semi_consistent_read);
4439
if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
4440
if (UNIV_UNLIKELY(did_semi_consistent_read)) {
4441
prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
4443
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4449
/***********************************************************************
4450
Checks if MySQL at the moment is allowed for this table to retrieve a
4451
consistent read result, or store it to the query cache. */
4454
row_search_check_if_query_cache_permitted(
4455
/*======================================*/
4456
/* out: TRUE if storing or retrieving
4457
from the query cache is permitted */
4458
trx_t* trx, /* in: transaction object */
4459
const char* norm_name) /* in: concatenation of database name,
4460
'/' char, table name */
4462
dict_table_t* table;
4465
table = dict_table_get(norm_name, FALSE);
4467
if (table == NULL) {
4472
mutex_enter(&kernel_mutex);
4474
/* Start the transaction if it is not started yet */
4476
trx_start_if_not_started_low(trx);
4478
/* If there are locks on the table or some trx has invalidated the
4479
cache up to our trx id, then ret = FALSE.
4480
We do not check what type locks there are on the table, though only
4481
IX type locks actually would require ret = FALSE. */
4483
if (UT_LIST_GET_LEN(table->locks) == 0
4484
&& ut_dulint_cmp(trx->id,
4485
table->query_cache_inv_trx_id) >= 0) {
4489
/* If the isolation level is high, assign a read view for the
4490
transaction if it does not yet have one */
4492
if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
4493
&& !trx->read_view) {
4495
trx->read_view = read_view_open_now(
4496
trx->id, trx->global_read_view_heap);
4497
trx->global_read_view = trx->read_view;
4501
mutex_exit(&kernel_mutex);
4506
/***********************************************************************
4507
Read the AUTOINC column from the current row. If the value is less than
4508
0 and the type is not unsigned then we reset the value to 0. */
4511
row_search_autoinc_read_column(
4512
/*===========================*/
4513
/* out: value read from the column */
4514
dict_index_t* index, /* in: index to read from */
4515
const rec_t* rec, /* in: current rec */
4516
ulint col_no, /* in: column number */
4517
ulint mtype, /*!< in: column main type */
4518
ibool unsigned_type) /* in: signed or unsigned flag */
4523
mem_heap_t* heap = NULL;
4524
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4525
ulint* offsets = offsets_;
4527
*offsets_ = sizeof offsets_ / sizeof *offsets_;
4529
/* TODO: We have to cast away the const of rec for now. This needs
4530
to be fixed later.*/
4531
offsets = rec_get_offsets(
4532
(rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap);
4534
/* TODO: We have to cast away the const of rec for now. This needs
4535
to be fixed later.*/
4536
data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len);
4538
ut_a(len != UNIV_SQL_NULL);
4542
ut_a(len <= sizeof value);
4543
value = mach_read_int_type(data, len, unsigned_type);
4547
ut_a(len == sizeof(float));
4548
value = mach_float_read(data);
4552
ut_a(len == sizeof(double));
4553
value = mach_double_read(data);
4560
if (UNIV_LIKELY_NULL(heap)) {
4561
mem_heap_free(heap);
4564
/* We assume that the autoinc counter can't be negative. */
4565
if (!unsigned_type && (ib_longlong) value < 0) {
4572
/***********************************************************************
4573
Get the last row. */
4576
row_search_autoinc_get_rec(
4577
/*=======================*/
4578
/* out: current rec or NULL */
4579
btr_pcur_t* pcur, /* in: the current cursor */
4580
mtr_t* mtr) /* in: mini transaction */
4583
const rec_t* rec = btr_pcur_get_rec(pcur);
4585
if (page_rec_is_user_rec(rec)) {
4588
} while (btr_pcur_move_to_prev(pcur, mtr));
4593
/***********************************************************************
4594
Read the max AUTOINC value from an index. */
4597
row_search_max_autoinc(
4598
/*===================*/
4599
/* out: DB_SUCCESS if all OK else
4600
error code, DB_RECORD_NOT_FOUND if
4601
column name can't be found in index */
4602
dict_index_t* index, /* in: index to search */
4603
const char* col_name, /* in: name of autoinc column */
4604
ib_ulonglong* value) /* out: AUTOINC value read */
4608
dict_field_t* dfield = NULL;
4609
ulint error = DB_SUCCESS;
4611
n_cols = dict_index_get_n_ordering_defined_by_user(index);
4613
/* Search the index for the AUTOINC column name */
4614
for (i = 0; i < n_cols; ++i) {
4615
dfield = dict_index_get_nth_field(index, i);
4617
if (strcmp(col_name, dfield->name) == 0) {
4624
/* Must find the AUTOINC column name */
4625
if (i < n_cols && dfield) {
4631
/* Open at the high/right end (FALSE), and INIT
4633
btr_pcur_open_at_index_side(
4634
FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
4636
if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
4639
rec = row_search_autoinc_get_rec(&pcur, &mtr);
4642
ibool unsigned_type = (
4643
dfield->col->prtype & DATA_UNSIGNED);
4645
*value = row_search_autoinc_read_column(
4647
dfield->col->mtype, unsigned_type);
4651
btr_pcur_close(&pcur);
4655
error = DB_RECORD_NOT_FOUND;