1
/******************************************************
6
Created 9/20/1997 Heikki Tuuri
7
*******************************************************/
12
#include "log0recv.ic"
20
#include "srv0start.h"
23
#include "page0page.h"
27
#include "ibuf0ibuf.h"
34
#include "dict0boot.h"
36
#include "sync0sync.h"
39
/* This is set to FALSE if the backup was originally taken with the
40
ibbackup --include regexp option: then we do not want to create tables in
41
directories which were not included */
42
ibool recv_replay_file_ops = TRUE;
43
#endif /* UNIV_HOTBACKUP */
45
/* Log records are stored in the hash table in chunks at most of this size;
46
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
47
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
49
/* Read-ahead area in applying log records to file pages */
50
#define RECV_READ_AHEAD_AREA 32
52
recv_sys_t* recv_sys = NULL;
53
ibool recv_recovery_on = FALSE;
54
ibool recv_recovery_from_backup_on = FALSE;
56
ibool recv_needed_recovery = FALSE;
58
ibool recv_lsn_checks_on = FALSE;
60
/* There are two conditions under which we scan the logs, the first
61
is normal startup and the second is when we do a recovery from an
63
This flag is set if we are doing a scan from the last checkpoint during
64
startup. If we find log entries that were written after the last checkpoint
65
we know that the server was not cleanly shutdown. We must then initialize
66
the crash recovery environment before attempting to store these entries in
67
the log hash table. */
68
ibool recv_log_scan_is_startup_type = FALSE;
70
/* If the following is TRUE, the buffer pool file pages must be invalidated
71
after recovery and no ibuf operations are allowed; this becomes TRUE if
72
the log record hash table becomes too full, and log records must be merged
73
to file pages already before the recovery is finished: in this case no
74
ibuf operations are allowed, as they could modify the pages read in the
75
buffer pool before the pages have been recovered to the up-to-date state */
77
/* Recovery is running and no operations on the log files are allowed
78
yet: the variable name is misleading */
80
ibool recv_no_ibuf_operations = FALSE;
82
/* The following counter is used to decide when to print info on
84
ulint recv_scan_print_counter = 0;
86
ibool recv_is_from_backup = FALSE;
88
ibool recv_is_making_a_backup = FALSE;
90
# define recv_is_making_a_backup FALSE
91
#endif /* UNIV_HOTBACKUP */
93
ulint recv_previous_parsed_rec_type = 999999;
94
ulint recv_previous_parsed_rec_offset = 0;
95
ulint recv_previous_parsed_rec_is_multi = 0;
97
ulint recv_max_parsed_page_no = 0;
99
/* This many frames must be left free in the buffer pool when we scan
100
the log and store the scanned log records in the buffer pool: we will
101
use these free frames to read in pages when we start applying the
102
log records to the database. */
104
ulint recv_n_pool_free_frames = 256;
106
/* The maximum lsn we see for a page during the recovery process. If this
107
is bigger than the lsn we are able to scan up to, that is an indication that
108
the recovery failed and the database may be corrupt. */
110
dulint recv_max_page_lsn;
114
/***********************************************************
115
Initialize crash recovery environment. Can be called iff
116
recv_needed_recovery == FALSE. */
119
recv_init_crash_recovery(void);
120
/*===========================*/
122
/************************************************************
123
Creates the recovery system. */
126
recv_sys_create(void)
127
/*=================*/
129
if (recv_sys != NULL) {
134
recv_sys = mem_alloc(sizeof(recv_sys_t));
136
mutex_create(&recv_sys->mutex, SYNC_RECV);
138
recv_sys->heap = NULL;
139
recv_sys->addr_hash = NULL;
142
/************************************************************
143
Inits the recovery system for a recovery operation. */
148
ibool recover_from_backup, /* in: TRUE if this is called
149
to recover from a hot backup */
150
ulint available_memory) /* in: available memory in bytes */
152
if (recv_sys->heap != NULL) {
157
mutex_enter(&(recv_sys->mutex));
159
if (!recover_from_backup) {
160
recv_sys->heap = mem_heap_create_in_buffer(256);
162
recv_sys->heap = mem_heap_create(256);
163
recv_is_from_backup = TRUE;
166
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
168
recv_sys->recovered_offset = 0;
170
recv_sys->addr_hash = hash_create(available_memory / 64);
171
recv_sys->n_addrs = 0;
173
recv_sys->apply_log_recs = FALSE;
174
recv_sys->apply_batch_on = FALSE;
176
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
178
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
179
OS_FILE_LOG_BLOCK_SIZE);
180
recv_sys->found_corrupt_log = FALSE;
182
recv_max_page_lsn = ut_dulint_zero;
184
mutex_exit(&(recv_sys->mutex));
187
/************************************************************
188
Empties the hash table when it has been fully processed. */
191
recv_sys_empty_hash(void)
192
/*=====================*/
194
ut_ad(mutex_own(&(recv_sys->mutex)));
196
if (recv_sys->n_addrs != 0) {
198
"InnoDB: Error: %lu pages with log records"
199
" were left unprocessed!\n"
200
"InnoDB: Maximum page number with"
201
" log records on it %lu\n",
202
(ulong) recv_sys->n_addrs,
203
(ulong) recv_max_parsed_page_no);
207
hash_table_free(recv_sys->addr_hash);
208
mem_heap_empty(recv_sys->heap);
210
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
213
#ifndef UNIV_LOG_DEBUG
214
/************************************************************
215
Frees the recovery system. */
221
mutex_enter(&(recv_sys->mutex));
223
hash_table_free(recv_sys->addr_hash);
224
mem_heap_free(recv_sys->heap);
225
ut_free(recv_sys->buf);
226
mem_free(recv_sys->last_block_buf_start);
228
recv_sys->addr_hash = NULL;
229
recv_sys->heap = NULL;
231
mutex_exit(&(recv_sys->mutex));
233
#endif /* UNIV_LOG_DEBUG */
235
/************************************************************
236
Truncates possible corrupted or extra records from a log group. */
241
log_group_t* group, /* in: log group */
242
dulint recovered_lsn, /* in: recovery succeeded up to this
244
dulint limit_lsn, /* in: this was the limit for
246
dulint checkpoint_lsn, /* in: recovery was started from this
248
dulint archived_lsn) /* in: the log has been archived up to
259
if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
260
/* Checkpoint was taken in the NOARCHIVELOG mode */
261
archived_lsn = checkpoint_lsn;
264
finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
266
OS_FILE_LOG_BLOCK_SIZE),
267
log_group_get_capacity(group));
269
finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
271
OS_FILE_LOG_BLOCK_SIZE),
272
recv_sys->last_log_buf_size);
274
if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
275
/* We do not know how far we should erase log records: erase
276
as much as possible */
278
finish_lsn = finish_lsn1;
280
/* It is enough to erase the length of the log buffer */
281
finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
284
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
286
/* Write the log buffer full of zeros */
287
for (i = 0; i < RECV_SCAN_SIZE; i++) {
289
*(log_sys->buf + i) = '\0';
292
start_lsn = ut_dulint_align_down(recovered_lsn,
293
OS_FILE_LOG_BLOCK_SIZE);
295
if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
296
/* Copy the last incomplete log block to the log buffer and
297
edit its data length: */
299
ut_memcpy(log_sys->buf, recv_sys->last_block,
300
OS_FILE_LOG_BLOCK_SIZE);
301
log_block_set_data_len(log_sys->buf, ut_dulint_minus(
302
recovered_lsn, start_lsn));
305
if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
311
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
313
if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
315
end_lsn = finish_lsn;
318
len = ut_dulint_minus(end_lsn, start_lsn);
320
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
321
if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
326
/* Write the log buffer full of zeros */
327
for (i = 0; i < RECV_SCAN_SIZE; i++) {
329
*(log_sys->buf + i) = '\0';
336
/************************************************************
337
Copies the log segment between group->recovered_lsn and recovered_lsn from the
338
most up-to-date log group to group, so that it contains the latest log data. */
343
log_group_t* up_to_date_group, /* in: the most up-to-date log
345
log_group_t* group, /* in: copy to this log
347
dulint recovered_lsn) /* in: recovery succeeded up
354
if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
359
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
361
start_lsn = ut_dulint_align_down(group->scanned_lsn,
362
OS_FILE_LOG_BLOCK_SIZE);
364
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
366
if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
367
end_lsn = ut_dulint_align_up(recovered_lsn,
368
OS_FILE_LOG_BLOCK_SIZE);
371
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
372
up_to_date_group, start_lsn, end_lsn);
374
len = ut_dulint_minus(end_lsn, start_lsn);
376
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
378
if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
387
/************************************************************
388
Copies a log segment from the most up-to-date log group to the other log
389
groups, so that they all contain the latest log data. Also writes the info
390
about the latest checkpoint to the groups, and inits the fields in the group
391
memory structs to up-to-date values. */
394
recv_synchronize_groups(
395
/*====================*/
396
log_group_t* up_to_date_group) /* in: the most up-to-date
402
dulint recovered_lsn;
405
recovered_lsn = recv_sys->recovered_lsn;
406
limit_lsn = recv_sys->limit_lsn;
408
/* Read the last recovered log block to the recovery system buffer:
409
the block is always incomplete */
411
start_lsn = ut_dulint_align_down(recovered_lsn,
412
OS_FILE_LOG_BLOCK_SIZE);
413
end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
415
ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
417
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
418
up_to_date_group, start_lsn, end_lsn);
420
group = UT_LIST_GET_FIRST(log_sys->log_groups);
423
if (group != up_to_date_group) {
425
/* Copy log data if needed */
427
recv_copy_group(group, up_to_date_group,
431
/* Update the fields in the group struct to correspond to
434
log_group_set_fields(group, recovered_lsn);
436
group = UT_LIST_GET_NEXT(log_groups, group);
439
/* Copy the checkpoint info to the groups; remember that we have
440
incremented checkpoint_no by one, and the info will not be written
441
over the max checkpoint info, thus making the preservation of max
442
checkpoint info on disk certain */
444
log_groups_write_checkpoint_info();
446
mutex_exit(&(log_sys->mutex));
448
/* Wait for the checkpoint write to complete */
449
rw_lock_s_lock(&(log_sys->checkpoint_lock));
450
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
452
mutex_enter(&(log_sys->mutex));
455
/***************************************************************************
456
Checks the consistency of the checkpoint info */
459
recv_check_cp_is_consistent(
460
/*========================*/
461
/* out: TRUE if ok */
462
byte* buf) /* in: buffer containing checkpoint info */
466
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
468
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
469
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
473
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
474
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
476
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
477
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
484
/************************************************************
485
Looks for the maximum consistent checkpoint from the log groups. */
488
recv_find_max_checkpoint(
489
/*=====================*/
490
/* out: error code or DB_SUCCESS */
491
log_group_t** max_group, /* out: max group */
492
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
497
dulint checkpoint_no;
501
group = UT_LIST_GET_FIRST(log_sys->log_groups);
503
max_no = ut_dulint_zero;
507
buf = log_sys->checkpoint_buf;
510
group->state = LOG_GROUP_CORRUPTED;
512
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
513
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
515
log_group_read_checkpoint_info(group, field);
517
if (!recv_check_cp_is_consistent(buf)) {
519
if (log_debug_writes) {
521
"InnoDB: Checkpoint in group"
522
" %lu at %lu invalid, %lu\n",
525
(ulong) mach_read_from_4(
527
+ LOG_CHECKPOINT_CHECKSUM_1));
530
#endif /* UNIV_DEBUG */
534
group->state = LOG_GROUP_OK;
536
group->lsn = mach_read_from_8(
537
buf + LOG_CHECKPOINT_LSN);
538
group->lsn_offset = mach_read_from_4(
539
buf + LOG_CHECKPOINT_OFFSET);
540
checkpoint_no = mach_read_from_8(
541
buf + LOG_CHECKPOINT_NO);
544
if (log_debug_writes) {
546
"InnoDB: Checkpoint number %lu"
547
" found in group %lu\n",
548
(ulong) ut_dulint_get_low(
552
#endif /* UNIV_DEBUG */
554
if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
557
max_no = checkpoint_no;
564
group = UT_LIST_GET_NEXT(log_groups, group);
567
if (*max_group == NULL) {
570
"InnoDB: No valid checkpoint found.\n"
571
"InnoDB: If this error appears when you are"
572
" creating an InnoDB database,\n"
573
"InnoDB: the problem may be that during"
574
" an earlier attempt you managed\n"
575
"InnoDB: to create the InnoDB data files,"
576
" but log file creation failed.\n"
577
"InnoDB: If that is the case, please refer to\n"
578
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
579
"error-creating-innodb.html\n");
586
/***********************************************************************
587
Reads the checkpoint info needed in hot backup. */
590
recv_read_cp_info_for_backup(
591
/*=========================*/
592
/* out: TRUE if success */
593
byte* hdr, /* in: buffer containing the log group header */
594
dulint* lsn, /* out: checkpoint lsn */
595
ulint* offset, /* out: checkpoint offset in the log group */
596
ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
597
database is running with < version 3.23.50 of InnoDB */
598
dulint* cp_no, /* out: checkpoint number */
599
dulint* first_header_lsn)
600
/* out: lsn of of the start of the first log file */
603
dulint max_cp_no = ut_dulint_zero;
606
cp_buf = hdr + LOG_CHECKPOINT_1;
608
if (recv_check_cp_is_consistent(cp_buf)) {
609
max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
610
max_cp = LOG_CHECKPOINT_1;
613
cp_buf = hdr + LOG_CHECKPOINT_2;
615
if (recv_check_cp_is_consistent(cp_buf)) {
616
if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
618
max_cp = LOG_CHECKPOINT_2;
626
cp_buf = hdr + max_cp;
628
*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
629
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
631
/* If the user is running a pre-3.23.50 version of InnoDB, its
632
checkpoint data does not contain the fsp limit info */
633
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
634
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
636
*fsp_limit = mach_read_from_4(
637
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
639
if (*fsp_limit == 0) {
640
*fsp_limit = 1000000000;
643
*fsp_limit = 1000000000;
646
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
648
*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
650
*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
655
/**********************************************************
656
Checks the 4-byte checksum to the trailer checksum field of a log block.
657
We also accept a log block in the old format < InnoDB-3.23.52 where the
658
checksum field contains the log block number. */
661
log_block_checksum_is_ok_or_old_format(
662
/*===================================*/
663
/* out: TRUE if ok, or if the log block may be in the
664
format of InnoDB version < 3.23.52 */
665
byte* block) /* in: pointer to a log block */
667
#ifdef UNIV_LOG_DEBUG
669
#endif /* UNIV_LOG_DEBUG */
670
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
675
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
677
/* We assume the log block is in the format of
678
InnoDB version < 3.23.52 and the block is ok */
681
"InnoDB: Scanned old format < InnoDB-3.23.52"
682
" log block number %lu\n",
683
log_block_get_hdr_no(block));
691
/***********************************************************************
692
Scans the log segment and n_bytes_scanned is set to the length of valid
696
recv_scan_log_seg_for_backup(
697
/*=========================*/
698
byte* buf, /* in: buffer containing log data */
699
ulint buf_len, /* in: data length in that buffer */
700
dulint* scanned_lsn, /* in/out: lsn of buffer start,
701
we return scanned lsn */
702
ulint* scanned_checkpoint_no,
703
/* in/out: 4 lowest bytes of the
704
highest scanned checkpoint number so
706
ulint* n_bytes_scanned)/* out: how much we were able to
707
scan, smaller than buf_len if log
714
*n_bytes_scanned = 0;
716
for (log_block = buf; log_block < buf + buf_len;
717
log_block += OS_FILE_LOG_BLOCK_SIZE) {
719
no = log_block_get_hdr_no(log_block);
722
fprintf(stderr, "Log block header no %lu\n", no);
725
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
726
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
729
"Log block n:o %lu, scanned lsn n:o %lu\n",
730
no, log_block_convert_lsn_to_no(*scanned_lsn));
732
/* Garbage or an incompletely written log block */
734
log_block += OS_FILE_LOG_BLOCK_SIZE;
737
"Next log block n:o %lu\n",
738
log_block_get_hdr_no(log_block));
743
if (*scanned_checkpoint_no > 0
744
&& log_block_get_checkpoint_no(log_block)
745
< *scanned_checkpoint_no
746
&& *scanned_checkpoint_no
747
- log_block_get_checkpoint_no(log_block)
750
/* Garbage from a log buffer flush which was made
751
before the most recent database recovery */
754
"Scanned cp n:o %lu, block cp n:o %lu\n",
755
*scanned_checkpoint_no,
756
log_block_get_checkpoint_no(log_block));
761
data_len = log_block_get_data_len(log_block);
763
*scanned_checkpoint_no
764
= log_block_get_checkpoint_no(log_block);
765
*scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
767
*n_bytes_scanned += data_len;
769
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
770
/* Log data ends here */
773
fprintf(stderr, "Log block data len %lu\n",
781
/***********************************************************************
782
Tries to parse a single log record body and also applies it to a page if
783
specified. File ops are parsed, but not applied in this function. */
786
recv_parse_or_apply_log_rec_body(
787
/*=============================*/
788
/* out: log record end, NULL if not a complete
790
byte type, /* in: type */
791
byte* ptr, /* in: pointer to a buffer */
792
byte* end_ptr,/* in: pointer to the buffer end */
793
page_t* page, /* in: buffer page or NULL; if not NULL, then the log
794
record is applied to the page, and the log record
795
should be complete then */
796
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
799
dict_index_t* index = NULL;
802
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
803
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
805
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
806
if (NULL != (ptr = mlog_parse_index(
808
type == MLOG_COMP_REC_INSERT,
811
|| (ibool)!!page_is_comp(page)
812
== dict_table_is_comp(index->table));
813
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
817
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
818
if (NULL != (ptr = mlog_parse_index(
820
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
823
|| (ibool)!!page_is_comp(page)
824
== dict_table_is_comp(index->table));
825
ptr = btr_cur_parse_del_mark_set_clust_rec(
826
ptr, end_ptr, index, page);
829
case MLOG_COMP_REC_SEC_DELETE_MARK:
830
/* This log record type is obsolete, but we process it for
831
backward compatibility with MySQL 5.0.3 and 5.0.4. */
832
ut_a(!page || page_is_comp(page));
833
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
838
case MLOG_REC_SEC_DELETE_MARK:
839
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
841
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
842
if (NULL != (ptr = mlog_parse_index(
844
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
847
|| (ibool)!!page_is_comp(page)
848
== dict_table_is_comp(index->table));
849
ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
853
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
854
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
855
if (NULL != (ptr = mlog_parse_index(
857
type == MLOG_COMP_LIST_END_DELETE
858
|| type == MLOG_COMP_LIST_START_DELETE,
861
|| (ibool)!!page_is_comp(page)
862
== dict_table_is_comp(index->table));
863
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
867
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
868
if (NULL != (ptr = mlog_parse_index(
870
type == MLOG_COMP_LIST_END_COPY_CREATED,
873
|| (ibool)!!page_is_comp(page)
874
== dict_table_is_comp(index->table));
875
ptr = page_parse_copy_rec_list_to_created_page(
876
ptr, end_ptr, index, page, mtr);
879
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
880
if (NULL != (ptr = mlog_parse_index(
882
type == MLOG_COMP_PAGE_REORGANIZE,
885
|| (ibool)!!page_is_comp(page)
886
== dict_table_is_comp(index->table));
887
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
891
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
892
ptr = page_parse_create(ptr, end_ptr,
893
type == MLOG_COMP_PAGE_CREATE,
896
case MLOG_UNDO_INSERT:
897
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
899
case MLOG_UNDO_ERASE_END:
900
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
903
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
905
case MLOG_UNDO_HDR_DISCARD:
906
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
908
case MLOG_UNDO_HDR_CREATE:
909
case MLOG_UNDO_HDR_REUSE:
910
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
913
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
914
ptr = btr_parse_set_min_rec_mark(
915
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
918
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
919
if (NULL != (ptr = mlog_parse_index(
921
type == MLOG_COMP_REC_DELETE,
924
|| (ibool)!!page_is_comp(page)
925
== dict_table_is_comp(index->table));
926
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
930
case MLOG_IBUF_BITMAP_INIT:
931
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
933
case MLOG_INIT_FILE_PAGE:
934
ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
936
case MLOG_WRITE_STRING:
937
ptr = mlog_parse_string(ptr, end_ptr, page);
939
case MLOG_FILE_CREATE:
940
case MLOG_FILE_RENAME:
941
case MLOG_FILE_DELETE:
942
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
946
recv_sys->found_corrupt_log = TRUE;
950
dict_table_t* table = index->table;
952
dict_mem_index_free(index);
953
dict_mem_table_free(table);
959
/*************************************************************************
960
Calculates the fold value of a page file address: used in inserting or
961
searching for a log record in the hash table. */
966
/* out: folded value */
967
ulint space, /* in: space */
968
ulint page_no)/* in: page number */
970
return(ut_fold_ulint_pair(space, page_no));
973
/*************************************************************************
974
Calculates the hash value of a page file address: used in inserting or
975
searching for a log record in the hash table. */
980
/* out: folded value */
981
ulint space, /* in: space */
982
ulint page_no)/* in: page number */
984
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
987
/*************************************************************************
988
Gets the hashed file address struct for a page. */
991
recv_get_fil_addr_struct(
992
/*=====================*/
993
/* out: file address struct, NULL if not found from
995
ulint space, /* in: space id */
996
ulint page_no)/* in: page number */
998
recv_addr_t* recv_addr;
1000
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
1001
recv_hash(space, page_no));
1003
if ((recv_addr->space == space)
1004
&& (recv_addr->page_no == page_no)) {
1009
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1015
/***********************************************************************
1016
Adds a new log record to the hash table of log records. */
1019
recv_add_to_hash_table(
1020
/*===================*/
1021
byte type, /* in: log record type */
1022
ulint space, /* in: space id */
1023
ulint page_no, /* in: page number */
1024
byte* body, /* in: log record body */
1025
byte* rec_end, /* in: log record end */
1026
dulint start_lsn, /* in: start lsn of the mtr */
1027
dulint end_lsn) /* in: end lsn of the mtr */
1031
recv_data_t* recv_data;
1032
recv_data_t** prev_field;
1033
recv_addr_t* recv_addr;
1035
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
1036
/* The tablespace does not exist any more: do not store the
1042
len = rec_end - body;
1044
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
1046
recv->len = rec_end - body;
1047
recv->start_lsn = start_lsn;
1048
recv->end_lsn = end_lsn;
1050
recv_addr = recv_get_fil_addr_struct(space, page_no);
1052
if (recv_addr == NULL) {
1053
recv_addr = mem_heap_alloc(recv_sys->heap,
1054
sizeof(recv_addr_t));
1055
recv_addr->space = space;
1056
recv_addr->page_no = page_no;
1057
recv_addr->state = RECV_NOT_PROCESSED;
1059
UT_LIST_INIT(recv_addr->rec_list);
1061
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
1062
recv_fold(space, page_no), recv_addr);
1063
recv_sys->n_addrs++;
1065
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
1070
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
1072
prev_field = &(recv->data);
1074
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
1075
recv_sys->heap grows into the buffer pool, and bigger chunks could not
1078
while (rec_end > body) {
1080
len = rec_end - body;
1082
if (len > RECV_DATA_BLOCK_SIZE) {
1083
len = RECV_DATA_BLOCK_SIZE;
1086
recv_data = mem_heap_alloc(recv_sys->heap,
1087
sizeof(recv_data_t) + len);
1088
*prev_field = recv_data;
1090
ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
1092
prev_field = &(recv_data->next);
1100
/*************************************************************************
1101
Copies the log record body from recv to buf. */
1104
recv_data_copy_to_buf(
1105
/*==================*/
1106
byte* buf, /* in: buffer of length at least recv->len */
1107
recv_t* recv) /* in: log record */
1109
recv_data_t* recv_data;
1114
recv_data = recv->data;
1117
if (len > RECV_DATA_BLOCK_SIZE) {
1118
part_len = RECV_DATA_BLOCK_SIZE;
1123
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
1128
recv_data = recv_data->next;
1132
/****************************************************************************
1133
Applies the hashed log records to the page, if the page lsn is less than the
1134
lsn of a log record. This can be called when a buffer page has just been
1135
read in, or also for a page already in the buffer pool. */
1140
ibool recover_backup, /* in: TRUE if we are recovering a backup
1141
page: then we do not acquire any latches
1142
since the page was read in outside the
1144
ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
1145
a freshly read page */
1146
page_t* page, /* in: buffer page */
1147
ulint space, /* in: space id */
1148
ulint page_no) /* in: page number */
1150
buf_block_t* block = NULL;
1151
recv_addr_t* recv_addr;
1157
dulint page_newest_lsn;
1158
ibool modification_to_page;
1162
mutex_enter(&(recv_sys->mutex));
1164
if (recv_sys->apply_log_recs == FALSE) {
1166
/* Log records should not be applied now */
1168
mutex_exit(&(recv_sys->mutex));
1173
recv_addr = recv_get_fil_addr_struct(space, page_no);
1175
if ((recv_addr == NULL)
1176
|| (recv_addr->state == RECV_BEING_PROCESSED)
1177
|| (recv_addr->state == RECV_PROCESSED)) {
1179
mutex_exit(&(recv_sys->mutex));
1185
fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
1188
recv_addr->state = RECV_BEING_PROCESSED;
1190
mutex_exit(&(recv_sys->mutex));
1193
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
1195
if (!recover_backup) {
1196
block = buf_block_align(page);
1199
/* Move the ownership of the x-latch on the
1200
page to this OS thread, so that we can acquire
1201
a second x-latch on it. This is needed for the
1202
operations to the page to pass the debug
1205
rw_lock_x_lock_move_ownership(&(block->lock));
1208
success = buf_page_get_known_nowait(RW_X_LATCH, page,
1214
#ifdef UNIV_SYNC_DEBUG
1215
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
1216
#endif /* UNIV_SYNC_DEBUG */
1219
/* Read the newest modification lsn from the page */
1220
page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
1222
if (!recover_backup) {
1223
/* It may be that the page has been modified in the buffer
1224
pool: read the newest modification lsn there */
1226
page_newest_lsn = buf_frame_get_newest_modification(page);
1228
if (!ut_dulint_is_zero(page_newest_lsn)) {
1230
page_lsn = page_newest_lsn;
1233
/* In recovery from a backup we do not really use the buffer
1236
page_newest_lsn = ut_dulint_zero;
1239
modification_to_page = FALSE;
1240
start_lsn = end_lsn = ut_dulint_zero;
1242
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
1245
end_lsn = recv->end_lsn;
1247
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1248
/* We have to copy the record body to a separate
1251
buf = mem_alloc(recv->len);
1253
recv_data_copy_to_buf(buf, recv);
1255
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
1258
if (recv->type == MLOG_INIT_FILE_PAGE) {
1259
page_lsn = page_newest_lsn;
1261
mach_write_to_8(page + UNIV_PAGE_SIZE
1262
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1264
mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
1267
if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
1269
if (!modification_to_page) {
1271
modification_to_page = TRUE;
1272
start_lsn = recv->start_lsn;
1276
if (log_debug_writes) {
1278
"InnoDB: Applying log rec"
1280
" to space %lu page no %lu\n",
1281
(ulong) recv->type, (ulong) recv->len,
1282
(ulong) recv_addr->space,
1283
(ulong) recv_addr->page_no);
1285
#endif /* UNIV_DEBUG */
1287
recv_parse_or_apply_log_rec_body(recv->type, buf,
1290
mach_write_to_8(page + UNIV_PAGE_SIZE
1291
- FIL_PAGE_END_LSN_OLD_CHKSUM,
1292
ut_dulint_add(recv->start_lsn,
1294
mach_write_to_8(page + FIL_PAGE_LSN,
1295
ut_dulint_add(recv->start_lsn,
1299
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1303
recv = UT_LIST_GET_NEXT(rec_list, recv);
1306
mutex_enter(&(recv_sys->mutex));
1308
if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
1309
recv_max_page_lsn = page_lsn;
1312
recv_addr->state = RECV_PROCESSED;
1314
ut_a(recv_sys->n_addrs);
1315
recv_sys->n_addrs--;
1317
mutex_exit(&(recv_sys->mutex));
1319
if (!recover_backup && modification_to_page) {
1322
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
1325
/* Make sure that committing mtr does not change the modification
1326
lsn values of page */
1328
mtr.modifications = FALSE;
1333
/***********************************************************************
1334
Reads in pages which have hashed log records, from an area around a given
1340
/* out: number of pages found */
1341
ulint space, /* in: space */
1342
ulint page_no)/* in: page number */
1344
recv_addr_t* recv_addr;
1345
ulint page_nos[RECV_READ_AHEAD_AREA];
1349
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
1353
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
1355
recv_addr = recv_get_fil_addr_struct(space, page_no);
1357
if (recv_addr && !buf_page_peek(space, page_no)) {
1359
mutex_enter(&(recv_sys->mutex));
1361
if (recv_addr->state == RECV_NOT_PROCESSED) {
1362
recv_addr->state = RECV_BEING_READ;
1364
page_nos[n] = page_no;
1369
mutex_exit(&(recv_sys->mutex));
1373
buf_read_recv_pages(FALSE, space, page_nos, n);
1375
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
1380
/***********************************************************************
1381
Empties the hash table of stored log records, applying them to appropriate
1385
recv_apply_hashed_log_recs(
1386
/*=======================*/
1387
ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
1388
allowed during the application; if FALSE,
1389
no ibuf operations are allowed, and after
1390
the application all file pages are flushed to
1391
disk and invalidated in buffer pool: this
1392
alternative means that no new log records
1393
can be generated during the application;
1394
the caller must in this case own the log
1397
recv_addr_t* recv_addr;
1403
ibool has_printed = FALSE;
1406
mutex_enter(&(recv_sys->mutex));
1408
if (recv_sys->apply_batch_on) {
1410
mutex_exit(&(recv_sys->mutex));
1412
os_thread_sleep(500000);
1417
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
1420
recv_no_ibuf_operations = TRUE;
1423
recv_sys->apply_log_recs = TRUE;
1424
recv_sys->apply_batch_on = TRUE;
1426
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
1428
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
1431
space = recv_addr->space;
1432
page_no = recv_addr->page_no;
1434
if (recv_addr->state == RECV_NOT_PROCESSED) {
1436
ut_print_timestamp(stderr);
1437
fputs(" InnoDB: Starting an"
1438
" apply batch of log records"
1439
" to the database...\n"
1440
"InnoDB: Progress in percents: ",
1445
mutex_exit(&(recv_sys->mutex));
1447
if (buf_page_peek(space, page_no)) {
1451
page = buf_page_get(space, page_no,
1454
#ifdef UNIV_SYNC_DEBUG
1455
buf_page_dbg_add_level(
1456
page, SYNC_NO_ORDER_CHECK);
1457
#endif /* UNIV_SYNC_DEBUG */
1458
recv_recover_page(FALSE, FALSE, page,
1462
recv_read_in_area(space, page_no);
1465
mutex_enter(&(recv_sys->mutex));
1468
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1472
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
1474
/ hash_get_n_cells(recv_sys->addr_hash)) {
1476
fprintf(stderr, "%lu ", (ulong)
1478
/ hash_get_n_cells(recv_sys->addr_hash)));
1482
/* Wait until all the pages have been processed */
1484
while (recv_sys->n_addrs != 0) {
1486
mutex_exit(&(recv_sys->mutex));
1488
os_thread_sleep(500000);
1490
mutex_enter(&(recv_sys->mutex));
1495
fprintf(stderr, "\n");
1499
/* Flush all the file pages to disk and invalidate them in
1502
mutex_exit(&(recv_sys->mutex));
1503
mutex_exit(&(log_sys->mutex));
1505
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
1507
ut_a(n_pages != ULINT_UNDEFINED);
1509
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1511
buf_pool_invalidate();
1513
mutex_enter(&(log_sys->mutex));
1514
mutex_enter(&(recv_sys->mutex));
1516
recv_no_ibuf_operations = FALSE;
1519
recv_sys->apply_log_recs = FALSE;
1520
recv_sys->apply_batch_on = FALSE;
1522
recv_sys_empty_hash();
1525
fprintf(stderr, "InnoDB: Apply batch completed\n");
1528
mutex_exit(&(recv_sys->mutex));
1531
/* This page is allocated from the buffer pool and used in the function
1533
static page_t* recv_backup_application_page = NULL;
1535
/***********************************************************************
1536
Applies log records in the hash table to a backup. */
1539
recv_apply_log_recs_for_backup(void)
1540
/*================================*/
1542
recv_addr_t* recv_addr;
1550
recv_sys->apply_log_recs = TRUE;
1551
recv_sys->apply_batch_on = TRUE;
1553
if (recv_backup_application_page == NULL) {
1554
recv_backup_application_page = buf_frame_alloc();
1557
page = recv_backup_application_page;
1559
fputs("InnoDB: Starting an apply batch of log records"
1560
" to the database...\n"
1561
"InnoDB: Progress in percents: ", stderr);
1563
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
1565
for (i = 0; i < n_hash_cells; i++) {
1566
/* The address hash table is externally chained */
1567
recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
1569
while (recv_addr != NULL) {
1571
if (!fil_tablespace_exists_in_mem(recv_addr->space)) {
1574
"InnoDB: Warning: cannot apply"
1576
" tablespace %lu page %lu,\n"
1577
"InnoDB: because tablespace with"
1578
" that id does not exist.\n",
1579
recv_addr->space, recv_addr->page_no);
1581
recv_addr->state = RECV_PROCESSED;
1583
ut_a(recv_sys->n_addrs);
1584
recv_sys->n_addrs--;
1586
goto skip_this_recv_addr;
1589
/* We simulate a page read made by the buffer pool, to
1590
make sure the recovery apparatus works ok, for
1591
example, the buf_frame_align() function. We must init
1592
the block corresponding to buf_pool->frame_zero
1595
buf_page_init_for_backup_restore(
1596
recv_addr->space, recv_addr->page_no,
1597
buf_block_align(page));
1599
/* Extend the tablespace's last file if the page_no
1600
does not fall inside its bounds; we assume the last
1601
file is auto-extending, and ibbackup copied the file
1602
when it still was smaller */
1604
success = fil_extend_space_to_desired_size(
1606
recv_addr->space, recv_addr->page_no + 1);
1609
"InnoDB: Fatal error: cannot extend"
1610
" tablespace %lu to hold %lu pages\n",
1611
recv_addr->space, recv_addr->page_no);
1616
/* Read the page from the tablespace file using the
1617
fil0fil.c routines */
1619
error = fil_io(OS_FILE_READ, TRUE, recv_addr->space,
1620
recv_addr->page_no, 0, UNIV_PAGE_SIZE,
1622
if (error != DB_SUCCESS) {
1624
"InnoDB: Fatal error: cannot read"
1626
" %lu page number %lu\n",
1627
(ulong) recv_addr->space,
1628
(ulong) recv_addr->page_no);
1633
/* Apply the log records to this page */
1634
recv_recover_page(TRUE, FALSE, page, recv_addr->space,
1635
recv_addr->page_no);
1637
/* Write the page back to the tablespace file using the
1638
fil0fil.c routines */
1640
buf_flush_init_for_writing(
1641
page, mach_read_from_8(page + FIL_PAGE_LSN),
1642
recv_addr->space, recv_addr->page_no);
1644
error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space,
1645
recv_addr->page_no, 0, UNIV_PAGE_SIZE,
1647
skip_this_recv_addr:
1648
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1651
if ((100 * i) / n_hash_cells
1652
!= (100 * (i + 1)) / n_hash_cells) {
1653
fprintf(stderr, "%lu ",
1654
(ulong) ((100 * i) / n_hash_cells));
1659
recv_sys_empty_hash();
1662
/***********************************************************************
1663
Tries to parse a single log record and returns its length. */
1668
/* out: length of the record, or 0 if the record was
1670
byte* ptr, /* in: pointer to a buffer */
1671
byte* end_ptr,/* in: pointer to the buffer end */
1672
byte* type, /* out: type */
1673
ulint* space, /* out: space id */
1674
ulint* page_no,/* out: page number */
1675
byte** body) /* out: log record body start */
1681
if (ptr == end_ptr) {
1686
if (*ptr == MLOG_MULTI_REC_END) {
1693
if (*ptr == MLOG_DUMMY_RECORD) {
1696
*space = ULINT_UNDEFINED - 1; /* For debugging */
1701
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
1705
if (UNIV_UNLIKELY(!new_ptr)) {
1710
/* Check that page_no is sensible */
1712
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
1714
recv_sys->found_corrupt_log = TRUE;
1719
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
1721
if (UNIV_UNLIKELY(new_ptr == NULL)) {
1726
if (*page_no > recv_max_parsed_page_no) {
1727
recv_max_parsed_page_no = *page_no;
1730
return(new_ptr - ptr);
1733
/***********************************************************
1734
Calculates the new value for lsn when more data is added to the log. */
1737
recv_calc_lsn_on_data_add(
1738
/*======================*/
1739
dulint lsn, /* in: old lsn */
1740
ulint len) /* in: this many bytes of data is added, log block
1741
headers not included */
1746
frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
1747
- LOG_BLOCK_HDR_SIZE;
1748
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1749
- LOG_BLOCK_TRL_SIZE);
1750
lsn_len = len + ((len + frag_len)
1751
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
1752
- LOG_BLOCK_TRL_SIZE))
1753
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
1755
return(ut_dulint_add(lsn, lsn_len));
1758
/***********************************************************
1759
Checks that the parser recognizes incomplete initial segments of a log
1760
record as incomplete. */
1763
recv_check_incomplete_log_recs(
1764
/*===========================*/
1765
byte* ptr, /* in: pointer to a complete log record */
1766
ulint len) /* in: length of the log record */
1774
for (i = 0; i < len; i++) {
1775
ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
1780
/***********************************************************
1781
Prints diagnostic info of corrupt log. */
1784
recv_report_corrupt_log(
1785
/*====================*/
1786
byte* ptr, /* in: pointer to corrupt log record */
1787
byte type, /* in: type of the record */
1788
ulint space, /* in: space id, this may also be garbage */
1789
ulint page_no)/* in: page number, this may also be garbage */
1792
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
1793
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
1794
"InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
1795
"InnoDB: Previous log record type %lu, is multi %lu\n"
1796
"InnoDB: Recv offset %lu, prev %lu\n",
1797
(ulong) type, (ulong) space, (ulong) page_no,
1798
(ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
1799
(ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
1800
(ulong) recv_previous_parsed_rec_type,
1801
(ulong) recv_previous_parsed_rec_is_multi,
1802
(ulong) (ptr - recv_sys->buf),
1803
(ulong) recv_previous_parsed_rec_offset);
1805
if ((ulint)(ptr - recv_sys->buf + 100)
1806
> recv_previous_parsed_rec_offset
1807
&& (ulint)(ptr - recv_sys->buf + 100
1808
- recv_previous_parsed_rec_offset)
1810
fputs("InnoDB: Hex dump of corrupt log starting"
1811
" 100 bytes before the start\n"
1812
"InnoDB: of the previous log rec,\n"
1813
"InnoDB: and ending 100 bytes after the start"
1814
" of the corrupt rec:\n",
1817
ut_print_buf(stderr,
1819
+ recv_previous_parsed_rec_offset - 100,
1820
ptr - recv_sys->buf + 200
1821
- recv_previous_parsed_rec_offset);
1825
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
1826
"InnoDB: is possible that the log scan did not proceed\n"
1827
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
1828
"InnoDB: on your InnoDB tables to check that they are ok!\n"
1829
"InnoDB: If mysqld crashes after this recovery, look at\n"
1830
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
1831
"forcing-recovery.html\n"
1832
"InnoDB: about forcing recovery.\n", stderr);
1837
/***********************************************************
1838
Parses log records from a buffer and stores them to a hash table to wait
1839
merging to file pages. */
1842
recv_parse_log_recs(
1843
/*================*/
1844
/* out: currently always returns FALSE */
1845
ibool store_to_hash) /* in: TRUE if the records should be stored
1846
to the hash table; this is set to FALSE if just
1847
debug checking is needed */
1854
dulint new_recovered_lsn;
1862
ut_ad(mutex_own(&(log_sys->mutex)));
1863
ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
1865
ptr = recv_sys->buf + recv_sys->recovered_offset;
1867
end_ptr = recv_sys->buf + recv_sys->len;
1869
if (ptr == end_ptr) {
1874
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
1876
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
1877
/* The mtr only modified a single page, or this is a file op */
1879
old_lsn = recv_sys->recovered_lsn;
1881
/* Try to parse a log record, fetching its type, space id,
1882
page no, and a pointer to the body of the log record */
1884
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1887
if (len == 0 || recv_sys->found_corrupt_log) {
1888
if (recv_sys->found_corrupt_log) {
1890
recv_report_corrupt_log(ptr,
1891
type, space, page_no);
1897
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
1899
if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
1901
/* The log record filled a log block, and we require
1902
that also the next log block should have been scanned
1908
recv_previous_parsed_rec_type = (ulint)type;
1909
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
1910
recv_previous_parsed_rec_is_multi = 0;
1912
recv_sys->recovered_offset += len;
1913
recv_sys->recovered_lsn = new_recovered_lsn;
1916
if (log_debug_writes) {
1918
"InnoDB: Parsed a single log rec"
1919
" type %lu len %lu space %lu page no %lu\n",
1920
(ulong) type, (ulong) len, (ulong) space,
1923
#endif /* UNIV_DEBUG */
1925
if (type == MLOG_DUMMY_RECORD) {
1928
} else if (store_to_hash && (type == MLOG_FILE_CREATE
1929
|| type == MLOG_FILE_RENAME
1930
|| type == MLOG_FILE_DELETE)) {
1931
#ifdef UNIV_HOTBACKUP
1932
if (recv_replay_file_ops) {
1934
/* In ibbackup --apply-log, replay an .ibd file
1935
operation, if possible; note that
1936
fil_path_to_mysql_datadir is set in ibbackup to
1937
point to the datadir we should use there */
1939
if (NULL == fil_op_log_parse_or_replay(
1940
body, end_ptr, type,
1943
"InnoDB: Error: file op"
1944
" log record of type %lu"
1945
" space %lu not complete in\n"
1946
"InnoDB: the replay phase."
1955
/* In normal mysqld crash recovery we do not try to
1956
replay file operations */
1957
} else if (store_to_hash) {
1958
recv_add_to_hash_table(type, space, page_no, body,
1960
recv_sys->recovered_lsn);
1962
#ifdef UNIV_LOG_DEBUG
1963
recv_check_incomplete_log_recs(ptr, len);
1964
#endif/* UNIV_LOG_DEBUG */
1967
/* Check that all the records associated with the single mtr
1968
are included within the buffer */
1974
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
1976
if (len == 0 || recv_sys->found_corrupt_log) {
1978
if (recv_sys->found_corrupt_log) {
1980
recv_report_corrupt_log(
1981
ptr, type, space, page_no);
1987
recv_previous_parsed_rec_type = (ulint)type;
1988
recv_previous_parsed_rec_offset
1989
= recv_sys->recovered_offset + total_len;
1990
recv_previous_parsed_rec_is_multi = 1;
1992
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
1993
#ifdef UNIV_LOG_DEBUG
1994
recv_check_incomplete_log_recs(ptr, len);
1995
#endif /* UNIV_LOG_DEBUG */
1999
if (log_debug_writes) {
2001
"InnoDB: Parsed a multi log rec"
2003
" space %lu page no %lu\n",
2004
(ulong) type, (ulong) len,
2005
(ulong) space, (ulong) page_no);
2007
#endif /* UNIV_DEBUG */
2014
if (type == MLOG_MULTI_REC_END) {
2016
/* Found the end mark for the records */
2022
new_recovered_lsn = recv_calc_lsn_on_data_add(
2023
recv_sys->recovered_lsn, total_len);
2025
if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
2027
/* The log record filled a log block, and we require
2028
that also the next log block should have been scanned
2034
/* Add all the records to the hash table */
2036
ptr = recv_sys->buf + recv_sys->recovered_offset;
2039
old_lsn = recv_sys->recovered_lsn;
2040
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2042
if (recv_sys->found_corrupt_log) {
2044
recv_report_corrupt_log(ptr,
2045
type, space, page_no);
2049
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
2051
recv_sys->recovered_offset += len;
2052
recv_sys->recovered_lsn
2053
= recv_calc_lsn_on_data_add(old_lsn, len);
2054
if (type == MLOG_MULTI_REC_END) {
2056
/* Found the end mark for the records */
2061
if (store_to_hash) {
2062
recv_add_to_hash_table(type, space, page_no,
2075
/***********************************************************
2076
Adds data from a new log block to the parsing buffer of recv_sys if
2077
recv_sys->parse_start_lsn is non-zero. */
2080
recv_sys_add_to_parsing_buf(
2081
/*========================*/
2082
/* out: TRUE if more data added */
2083
byte* log_block, /* in: log block */
2084
dulint scanned_lsn) /* in: lsn of how far we were able to find
2085
data in this log block */
2092
ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
2094
if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
2095
/* Cannot start parsing yet because no start point for
2101
data_len = log_block_get_data_len(log_block);
2103
if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
2107
} else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
2111
} else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
2112
recv_sys->scanned_lsn) > 0) {
2113
more_len = ut_dulint_minus(scanned_lsn,
2114
recv_sys->parse_start_lsn);
2116
more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
2119
if (more_len == 0) {
2124
ut_ad(data_len >= more_len);
2126
start_offset = data_len - more_len;
2128
if (start_offset < LOG_BLOCK_HDR_SIZE) {
2129
start_offset = LOG_BLOCK_HDR_SIZE;
2132
end_offset = data_len;
2134
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
2135
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
2138
ut_ad(start_offset <= end_offset);
2140
if (start_offset < end_offset) {
2141
ut_memcpy(recv_sys->buf + recv_sys->len,
2142
log_block + start_offset, end_offset - start_offset);
2144
recv_sys->len += end_offset - start_offset;
2146
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
2152
/***********************************************************
2153
Moves the parsing buffer data left to the buffer start. */
2156
recv_sys_justify_left_parsing_buf(void)
2157
/*===================================*/
2159
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
2160
recv_sys->len - recv_sys->recovered_offset);
2162
recv_sys->len -= recv_sys->recovered_offset;
2164
recv_sys->recovered_offset = 0;
2167
/***********************************************************
2168
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2169
and hashes the log records if new data found. */
2174
/* out: TRUE if limit_lsn has been reached, or
2175
not able to scan any more in this log group */
2176
ibool apply_automatically,/* in: TRUE if we want this function to
2177
apply log records automatically when the
2178
hash table becomes full; in the hot backup tool
2179
the tool does the applying, not this
2181
ulint available_memory,/* in: we let the hash table of recs to grow
2182
to this size, at the maximum */
2183
ibool store_to_hash, /* in: TRUE if the records should be stored
2184
to the hash table; this is set to FALSE if just
2185
debug checking is needed */
2186
byte* buf, /* in: buffer containing a log segment or
2188
ulint len, /* in: buffer length */
2189
dulint start_lsn, /* in: buffer start lsn */
2190
dulint* contiguous_lsn, /* in/out: it is known that all log groups
2191
contain contiguous log data up to this lsn */
2192
dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
2201
ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
2202
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
2204
ut_a(apply_automatically <= TRUE);
2205
ut_a(store_to_hash <= TRUE);
2210
scanned_lsn = start_lsn;
2213
while (log_block < buf + len && !finished) {
2215
no = log_block_get_hdr_no(log_block);
2217
fprintf(stderr, "Log block header no %lu\n", no);
2219
fprintf(stderr, "Scanned lsn no %lu\n",
2220
log_block_convert_lsn_to_no(scanned_lsn));
2222
if (no != log_block_convert_lsn_to_no(scanned_lsn)
2223
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
2225
if (no == log_block_convert_lsn_to_no(scanned_lsn)
2226
&& !log_block_checksum_is_ok_or_old_format(
2229
"InnoDB: Log block no %lu at"
2230
" lsn %lu %lu has\n"
2231
"InnoDB: ok header, but checksum field"
2232
" contains %lu, should be %lu\n",
2234
(ulong) ut_dulint_get_high(
2236
(ulong) ut_dulint_get_low(scanned_lsn),
2237
(ulong) log_block_get_checksum(
2239
(ulong) log_block_calc_checksum(
2243
/* Garbage or an incompletely written log block */
2250
if (log_block_get_flush_bit(log_block)) {
2251
/* This block was a start of a log flush operation:
2252
we know that the previous flush operation must have
2253
been completed for all log groups before this block
2254
can have been flushed to any of the groups. Therefore,
2255
we know that log data is contiguous up to scanned_lsn
2256
in all non-corrupt log groups. */
2258
if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
2259
*contiguous_lsn = scanned_lsn;
2263
data_len = log_block_get_data_len(log_block);
2265
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
2266
&& (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
2267
recv_sys->scanned_lsn) > 0)
2268
&& (recv_sys->scanned_checkpoint_no > 0)
2269
&& (log_block_get_checkpoint_no(log_block)
2270
< recv_sys->scanned_checkpoint_no)
2271
&& (recv_sys->scanned_checkpoint_no
2272
- log_block_get_checkpoint_no(log_block)
2275
/* Garbage from a log buffer flush which was made
2276
before the most recent database recovery */
2279
#ifdef UNIV_LOG_DEBUG
2280
/* This is not really an error, but currently
2281
we stop here in the debug version: */
2288
if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
2289
&& (log_block_get_first_rec_group(log_block) > 0)) {
2291
/* We found a point from which to start the parsing
2294
recv_sys->parse_start_lsn
2295
= ut_dulint_add(scanned_lsn,
2296
log_block_get_first_rec_group(
2298
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
2299
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
2302
scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
2304
if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
2306
/* We have found more entries. If this scan is
2307
of startup type, we must initiate crash recovery
2308
environment before parsing these log records. */
2310
if (recv_log_scan_is_startup_type
2311
&& !recv_needed_recovery) {
2314
"InnoDB: Log scan progressed"
2315
" past the checkpoint lsn %lu %lu\n",
2316
(ulong) ut_dulint_get_high(
2317
recv_sys->scanned_lsn),
2318
(ulong) ut_dulint_get_low(
2319
recv_sys->scanned_lsn));
2320
recv_init_crash_recovery();
2323
/* We were able to find more log data: add it to the
2324
parsing buffer if parse_start_lsn is already
2327
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
2328
>= RECV_PARSING_BUF_SIZE) {
2330
"InnoDB: Error: log parsing"
2332
" Recovery may have failed!\n");
2334
recv_sys->found_corrupt_log = TRUE;
2336
} else if (!recv_sys->found_corrupt_log) {
2337
more_data = recv_sys_add_to_parsing_buf(
2338
log_block, scanned_lsn);
2341
recv_sys->scanned_lsn = scanned_lsn;
2342
recv_sys->scanned_checkpoint_no
2343
= log_block_get_checkpoint_no(log_block);
2346
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
2347
/* Log data for this group ends here */
2351
log_block += OS_FILE_LOG_BLOCK_SIZE;
2355
*group_scanned_lsn = scanned_lsn;
2357
if (recv_needed_recovery
2358
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
2359
recv_scan_print_counter++;
2361
if (finished || (recv_scan_print_counter % 80 == 0)) {
2364
"InnoDB: Doing recovery: scanned up to"
2365
" log sequence number %lu %lu\n",
2366
(ulong) ut_dulint_get_high(*group_scanned_lsn),
2367
(ulong) ut_dulint_get_low(*group_scanned_lsn));
2371
if (more_data && !recv_sys->found_corrupt_log) {
2372
/* Try to parse more log records */
2374
recv_parse_log_recs(store_to_hash);
2376
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
2378
&& apply_automatically) {
2380
/* Hash table of log records has grown too big:
2381
empty it; FALSE means no ibuf operations
2382
allowed, as we cannot add new records to the
2383
log yet: they would be produced by ibuf
2386
recv_apply_hashed_log_recs(FALSE);
2389
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
2390
/* Move parsing buffer data to the buffer start */
2392
recv_sys_justify_left_parsing_buf();
2399
/***********************************************************
2400
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2401
and hashes the log records if new data found. */
2404
recv_group_scan_log_recs(
2405
/*=====================*/
2406
log_group_t* group, /* in: log group */
2407
dulint* contiguous_lsn, /* in/out: it is known that all log groups
2408
contain contiguous log data up to this lsn */
2409
dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
2417
start_lsn = *contiguous_lsn;
2420
end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
2422
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
2423
group, start_lsn, end_lsn);
2425
finished = recv_scan_log_recs(
2426
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
2427
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
2428
start_lsn, contiguous_lsn, group_scanned_lsn);
2429
start_lsn = end_lsn;
2433
if (log_debug_writes) {
2435
"InnoDB: Scanned group %lu up to"
2436
" log sequence number %lu %lu\n",
2438
(ulong) ut_dulint_get_high(*group_scanned_lsn),
2439
(ulong) ut_dulint_get_low(*group_scanned_lsn));
2441
#endif /* UNIV_DEBUG */
2444
/***********************************************************
2445
Initialize crash recovery environment. Can be called iff
2446
recv_needed_recovery == FALSE. */
2449
recv_init_crash_recovery(void)
2450
/*==========================*/
2452
ut_a(!recv_needed_recovery);
2454
recv_needed_recovery = TRUE;
2456
ut_print_timestamp(stderr);
2459
" InnoDB: Database was not"
2460
" shut down normally!\n"
2461
"InnoDB: Starting crash recovery.\n");
2464
"InnoDB: Reading tablespace information"
2465
" from the .ibd files...\n");
2467
fil_load_single_table_tablespaces();
2469
/* If we are using the doublewrite method, we will
2470
check if there are half-written pages in data files,
2471
and restore them from the doublewrite buffer if
2474
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2477
"InnoDB: Restoring possible"
2478
" half-written data pages from"
2479
" the doublewrite\n"
2480
"InnoDB: buffer...\n");
2481
trx_sys_doublewrite_init_or_restore_pages(TRUE);
2485
/************************************************************
2486
Recovers from a checkpoint. When this function returns, the database is able
2487
to start processing of new user transactions, but the function
2488
recv_recovery_from_checkpoint_finish should be called later to complete
2489
the recovery and free the resources used in it. */
2492
recv_recovery_from_checkpoint_start(
2493
/*================================*/
2494
/* out: error code or DB_SUCCESS */
2495
ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
2496
dulint limit_lsn, /* in: recover up to this lsn if possible */
2497
dulint min_flushed_lsn,/* in: min flushed lsn from data files */
2498
dulint max_flushed_lsn)/* in: max flushed lsn from data files */
2501
log_group_t* max_cp_group;
2502
log_group_t* up_to_date_group;
2504
dulint checkpoint_lsn;
2505
dulint checkpoint_no;
2506
dulint old_scanned_lsn;
2507
dulint group_scanned_lsn;
2508
dulint contiguous_lsn;
2509
dulint archived_lsn;
2512
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
2515
ut_ad((type != LOG_CHECKPOINT)
2516
|| (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
2518
if (type == LOG_CHECKPOINT) {
2520
recv_sys_init(FALSE, buf_pool_get_curr_size());
2523
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
2525
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
2527
"InnoDB: Skipping log redo\n");
2532
recv_recovery_on = TRUE;
2534
recv_sys->limit_lsn = limit_lsn;
2536
mutex_enter(&(log_sys->mutex));
2538
/* Look for the latest checkpoint from any of the log groups */
2540
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
2542
if (err != DB_SUCCESS) {
2544
mutex_exit(&(log_sys->mutex));
2549
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
2551
buf = log_sys->checkpoint_buf;
2553
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
2554
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
2555
archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
2557
/* Read the first log file header to print a note if this is
2558
a recovery from a restored InnoDB Hot Backup */
2560
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
2561
0, 0, LOG_FILE_HDR_SIZE,
2562
log_hdr_buf, max_cp_group);
2564
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2565
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
2566
/* This log file was created by ibbackup --restore: print
2567
a note to the user about it */
2570
"InnoDB: The log file was created by"
2571
" ibbackup --apply-log at\n"
2573
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
2575
"InnoDB: NOTE: the following crash recovery"
2576
" is part of a normal restore.\n");
2578
/* Wipe over the label now */
2580
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2582
/* Write to the log file to wipe over the label */
2583
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
2584
max_cp_group->space_id,
2585
0, 0, OS_FILE_LOG_BLOCK_SIZE,
2586
log_hdr_buf, max_cp_group);
2589
#ifdef UNIV_LOG_ARCHIVE
2590
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2593
log_checkpoint_get_nth_group_info(buf, group->id,
2594
&(group->archived_file_no),
2595
&(group->archived_offset));
2597
group = UT_LIST_GET_NEXT(log_groups, group);
2599
#endif /* UNIV_LOG_ARCHIVE */
2601
if (type == LOG_CHECKPOINT) {
2602
/* Start reading the log groups from the checkpoint lsn up. The
2603
variable contiguous_lsn contains an lsn up to which the log is
2604
known to be contiguously written to all log groups. */
2606
recv_sys->parse_start_lsn = checkpoint_lsn;
2607
recv_sys->scanned_lsn = checkpoint_lsn;
2608
recv_sys->scanned_checkpoint_no = 0;
2609
recv_sys->recovered_lsn = checkpoint_lsn;
2611
srv_start_lsn = checkpoint_lsn;
2614
contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
2615
OS_FILE_LOG_BLOCK_SIZE);
2616
if (type == LOG_ARCHIVE) {
2617
/* Try to recover the remaining part from logs: first from
2618
the logs of the archived group */
2620
group = recv_sys->archive_group;
2621
capacity = log_group_get_capacity(group);
2623
if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
2624
checkpoint_lsn, capacity)) > 0)
2625
|| (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
2626
recv_sys->scanned_lsn, capacity))
2629
mutex_exit(&(log_sys->mutex));
2631
/* The group does not contain enough log: probably
2632
an archived log file was missing or corrupt */
2637
recv_group_scan_log_recs(group, &contiguous_lsn,
2638
&group_scanned_lsn);
2639
if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
2641
mutex_exit(&(log_sys->mutex));
2643
/* The group did not contain enough log: an archived
2644
log file was missing or invalid, or the log group
2650
group->scanned_lsn = group_scanned_lsn;
2651
up_to_date_group = group;
2653
up_to_date_group = max_cp_group;
2656
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
2658
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2660
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
2661
group = UT_LIST_GET_NEXT(log_groups, group);
2664
/* Set the flag to publish that we are doing startup scan. */
2665
recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
2667
old_scanned_lsn = recv_sys->scanned_lsn;
2669
recv_group_scan_log_recs(group, &contiguous_lsn,
2670
&group_scanned_lsn);
2671
group->scanned_lsn = group_scanned_lsn;
2673
if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
2674
/* We found a more up-to-date group */
2676
up_to_date_group = group;
2679
if ((type == LOG_ARCHIVE)
2680
&& (group == recv_sys->archive_group)) {
2681
group = UT_LIST_GET_NEXT(log_groups, group);
2684
group = UT_LIST_GET_NEXT(log_groups, group);
2687
/* Done with startup scan. Clear the flag. */
2688
recv_log_scan_is_startup_type = FALSE;
2689
if (type == LOG_CHECKPOINT) {
2690
/* NOTE: we always do a 'recovery' at startup, but only if
2691
there is something wrong we will print a message to the
2692
user about recovery: */
2694
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
2695
|| ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
2697
if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
2700
"InnoDB: #########################"
2701
"#################################\n"
2704
"InnoDB: The log sequence number"
2705
" in ibdata files is higher\n"
2706
"InnoDB: than the log sequence number"
2707
" in the ib_logfiles! Are you sure\n"
2708
"InnoDB: you are using the right"
2709
" ib_logfiles to start up"
2711
"InnoDB: Log sequence number in"
2712
" ib_logfiles is %lu %lu, log\n"
2713
"InnoDB: sequence numbers stamped"
2714
" to ibdata file headers are between\n"
2715
"InnoDB: %lu %lu and %lu %lu.\n"
2716
"InnoDB: #########################"
2717
"#################################\n",
2718
(ulong) ut_dulint_get_high(
2720
(ulong) ut_dulint_get_low(
2722
(ulong) ut_dulint_get_high(
2724
(ulong) ut_dulint_get_low(
2726
(ulong) ut_dulint_get_high(
2728
(ulong) ut_dulint_get_low(
2734
if (!recv_needed_recovery) {
2736
"InnoDB: The log sequence number"
2737
" in ibdata files does not match\n"
2738
"InnoDB: the log sequence number"
2739
" in the ib_logfiles!\n");
2740
recv_init_crash_recovery();
2744
if (!recv_needed_recovery) {
2745
/* Init the doublewrite buffer memory structure */
2746
trx_sys_doublewrite_init_or_restore_pages(FALSE);
2750
/* We currently have only one log group */
2751
if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
2752
ut_print_timestamp(stderr);
2754
" InnoDB: ERROR: We were only able to scan the log"
2756
"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
2757
"InnoDB: It is possible that"
2758
" the database is now corrupt!\n",
2759
(ulong) ut_dulint_get_high(group_scanned_lsn),
2760
(ulong) ut_dulint_get_low(group_scanned_lsn),
2761
(ulong) ut_dulint_get_high(checkpoint_lsn),
2762
(ulong) ut_dulint_get_low(checkpoint_lsn));
2765
if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
2766
ut_print_timestamp(stderr);
2768
" InnoDB: ERROR: We were only able to scan the log"
2770
"InnoDB: but a database page a had an lsn %lu %lu."
2771
" It is possible that the\n"
2772
"InnoDB: database is now corrupt!\n",
2773
(ulong) ut_dulint_get_high(group_scanned_lsn),
2774
(ulong) ut_dulint_get_low(group_scanned_lsn),
2775
(ulong) ut_dulint_get_high(recv_max_page_lsn),
2776
(ulong) ut_dulint_get_low(recv_max_page_lsn));
2779
if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
2781
mutex_exit(&(log_sys->mutex));
2783
if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
2793
/* Synchronize the uncorrupted log groups to the most up-to-date log
2794
group; we also copy checkpoint info to groups */
2796
log_sys->next_checkpoint_lsn = checkpoint_lsn;
2797
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
2799
#ifdef UNIV_LOG_ARCHIVE
2800
log_sys->archived_lsn = archived_lsn;
2801
#endif /* UNIV_LOG_ARCHIVE */
2803
recv_synchronize_groups(up_to_date_group);
2805
if (!recv_needed_recovery) {
2806
ut_a(ut_dulint_cmp(checkpoint_lsn,
2807
recv_sys->recovered_lsn) == 0);
2810
srv_start_lsn = recv_sys->recovered_lsn;
2813
log_sys->lsn = recv_sys->recovered_lsn;
2815
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
2817
log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
2818
% OS_FILE_LOG_BLOCK_SIZE;
2819
log_sys->buf_next_to_write = log_sys->buf_free;
2820
log_sys->written_to_some_lsn = log_sys->lsn;
2821
log_sys->written_to_all_lsn = log_sys->lsn;
2823
log_sys->last_checkpoint_lsn = checkpoint_lsn;
2825
log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
2827
#ifdef UNIV_LOG_ARCHIVE
2828
if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
2830
log_sys->archiving_state = LOG_ARCH_OFF;
2832
#endif /* UNIV_LOG_ARCHIVE */
2834
mutex_enter(&(recv_sys->mutex));
2836
recv_sys->apply_log_recs = TRUE;
2838
mutex_exit(&(recv_sys->mutex));
2840
mutex_exit(&(log_sys->mutex));
2842
recv_lsn_checks_on = TRUE;
2844
/* The database is now ready to start almost normal processing of user
2845
transactions: transaction rollbacks and the application of the log
2846
records in the hash table can be run in background. */
2851
/************************************************************
2852
Completes recovery from a checkpoint. */
2855
recv_recovery_from_checkpoint_finish(void)
2856
/*======================================*/
2860
/* Apply the hashed log records to the respective file pages */
2862
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2864
recv_apply_hashed_log_recs(TRUE);
2868
if (log_debug_writes) {
2870
"InnoDB: Log records applied to the database\n");
2872
#endif /* UNIV_DEBUG */
2874
if (recv_needed_recovery) {
2875
trx_sys_print_mysql_master_log_pos();
2876
trx_sys_print_mysql_binlog_offset();
2879
if (recv_sys->found_corrupt_log) {
2882
"InnoDB: WARNING: the log file may have been"
2884
"InnoDB: is possible that the log scan or parsing"
2885
" did not proceed\n"
2886
"InnoDB: far enough in recovery. Please run"
2888
"InnoDB: on your InnoDB tables to check that"
2890
"InnoDB: It may be safest to recover your"
2891
" InnoDB database from\n"
2892
"InnoDB: a backup!\n");
2895
/* Free the resources of the recovery system */
2897
recv_recovery_on = FALSE;
2899
#ifndef UNIV_LOG_DEBUG
2903
#ifdef UNIV_SYNC_DEBUG
2904
/* Wait for a while so that created threads have time to suspend
2905
themselves before we switch the latching order checks on */
2906
os_thread_sleep(1000000);
2908
/* Switch latching order checks on in sync0sync.c */
2909
sync_order_checks_on = TRUE;
2911
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
2912
/* Rollback the uncommitted transactions which have no user
2915
os_thread_create(trx_rollback_or_clean_all_without_sess,
2920
/**********************************************************
2921
Resets the logs. The contents of log files will be lost! */
2926
dulint lsn, /* in: reset to this lsn rounded up to
2927
be divisible by OS_FILE_LOG_BLOCK_SIZE,
2928
after which we add LOG_BLOCK_HDR_SIZE */
2929
#ifdef UNIV_LOG_ARCHIVE
2930
ulint arch_log_no, /* in: next archived log file number */
2931
#endif /* UNIV_LOG_ARCHIVE */
2932
ibool new_logs_created)/* in: TRUE if resetting logs is done
2933
at the log creation; FALSE if it is done
2934
after archive recovery */
2938
ut_ad(mutex_own(&(log_sys->mutex)));
2940
log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
2942
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2945
group->lsn = log_sys->lsn;
2946
group->lsn_offset = LOG_FILE_HDR_SIZE;
2947
#ifdef UNIV_LOG_ARCHIVE
2948
group->archived_file_no = arch_log_no;
2949
group->archived_offset = 0;
2950
#endif /* UNIV_LOG_ARCHIVE */
2952
if (!new_logs_created) {
2953
recv_truncate_group(group, group->lsn, group->lsn,
2954
group->lsn, group->lsn);
2957
group = UT_LIST_GET_NEXT(log_groups, group);
2960
log_sys->buf_next_to_write = 0;
2961
log_sys->written_to_some_lsn = log_sys->lsn;
2962
log_sys->written_to_all_lsn = log_sys->lsn;
2964
log_sys->next_checkpoint_no = ut_dulint_zero;
2965
log_sys->last_checkpoint_lsn = ut_dulint_zero;
2967
#ifdef UNIV_LOG_ARCHIVE
2968
log_sys->archived_lsn = log_sys->lsn;
2969
#endif /* UNIV_LOG_ARCHIVE */
2971
log_block_init(log_sys->buf, log_sys->lsn);
2972
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
2974
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
2975
log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
2977
mutex_exit(&(log_sys->mutex));
2979
/* Reset the checkpoint fields in logs */
2981
log_make_checkpoint_at(ut_dulint_max, TRUE);
2982
log_make_checkpoint_at(ut_dulint_max, TRUE);
2984
mutex_enter(&(log_sys->mutex));
2987
#ifdef UNIV_HOTBACKUP
2988
/**********************************************************
2989
Creates new log files after a backup has been restored. */
2992
recv_reset_log_files_for_backup(
2993
/*============================*/
2994
const char* log_dir, /* in: log file directory path */
2995
ulint n_log_files, /* in: number of log files */
2996
ulint log_file_size, /* in: log file size */
2997
dulint lsn) /* in: new start lsn, must be
2998
divisible by OS_FILE_LOG_BLOCK_SIZE */
3006
static const char ib_logfile_basename[] = "ib_logfile";
3008
log_dir_len = strlen(log_dir);
3009
/* full path name of ib_logfile consists of log dir path + basename
3010
+ number. This must fit in the name buffer.
3012
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
3014
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3015
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3017
for (i = 0; i < n_log_files; i++) {
3019
sprintf(name, "%s%s%lu", log_dir,
3020
ib_logfile_basename, (ulong)i);
3022
log_file = os_file_create_simple(name, OS_FILE_CREATE,
3023
OS_FILE_READ_WRITE, &success);
3026
"InnoDB: Cannot create %s. Check that"
3027
" the file does not exist yet.\n", name);
3033
"Setting log file size to %lu %lu\n",
3034
(ulong) ut_get_high32(log_file_size),
3035
(ulong) log_file_size & 0xFFFFFFFFUL);
3037
success = os_file_set_size(name, log_file,
3038
log_file_size & 0xFFFFFFFFUL,
3039
ut_get_high32(log_file_size));
3043
"InnoDB: Cannot set %s size to %lu %lu\n",
3044
name, (ulong) ut_get_high32(log_file_size),
3045
(ulong) (log_file_size & 0xFFFFFFFFUL));
3049
os_file_flush(log_file);
3050
os_file_close(log_file);
3053
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
3055
log_reset_first_header_and_checkpoint(buf, lsn);
3057
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
3058
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
3059
LOG_BLOCK_HDR_SIZE);
3060
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
3062
log_file = os_file_create_simple(name, OS_FILE_OPEN,
3063
OS_FILE_READ_WRITE, &success);
3065
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
3070
os_file_write(name, log_file, buf, 0, 0,
3071
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3072
os_file_flush(log_file);
3073
os_file_close(log_file);
3077
#endif /* UNIV_HOTBACKUP */
3079
#ifdef UNIV_LOG_ARCHIVE
3080
/**********************************************************
3081
Reads from the archive of a log group and performs recovery. */
3084
log_group_recover_from_archive_file(
3085
/*================================*/
3086
/* out: TRUE if no more complete
3087
consistent archive files */
3088
log_group_t* group) /* in: log group */
3090
os_file_t file_handle;
3092
dulint file_end_lsn;
3100
ulint file_size_high;
3109
/* Add the file to the archive file space; open the file */
3111
log_archived_file_name_gen(name, group->id, group->archived_file_no);
3113
file_handle = os_file_create(name, OS_FILE_OPEN,
3114
OS_FILE_LOG, OS_FILE_AIO, &ret);
3119
"InnoDB: Do you want to copy additional"
3120
" archived log files\n"
3121
"InnoDB: to the directory\n");
3123
"InnoDB: or were these all the files needed"
3126
"InnoDB: (Y == copy more files; N == this is all)?");
3128
input_char = getchar();
3130
if (input_char == (int) 'N') {
3133
} else if (input_char == (int) 'Y') {
3135
goto try_open_again;
3141
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
3144
ut_a(file_size_high == 0);
3146
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
3148
ret = os_file_close(file_handle);
3150
if (file_size < LOG_FILE_HDR_SIZE) {
3152
"InnoDB: Archive file header incomplete %s\n", name);
3159
/* Add the archive file as a node to the space */
3161
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
3162
group->archive_space_id, FALSE);
3163
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
3164
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
3167
/* Read the archive file header */
3168
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
3169
LOG_FILE_HDR_SIZE, buf, NULL);
3171
/* Check if the archive file header is consistent */
3173
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
3174
|| mach_read_from_4(buf + LOG_FILE_NO)
3175
!= group->archived_file_no) {
3177
"InnoDB: Archive file header inconsistent %s\n", name);
3182
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
3184
"InnoDB: Archive file not completely written %s\n",
3190
start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
3191
file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
3193
if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
3195
if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
3197
"InnoDB: Archive log file %s"
3198
" starts from too big a lsn\n",
3203
recv_sys->scanned_lsn = start_lsn;
3206
if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
3209
"InnoDB: Archive log file %s starts from"
3215
read_offset = LOG_FILE_HDR_SIZE;
3218
len = RECV_SCAN_SIZE;
3220
if (read_offset + len > file_size) {
3221
len = ut_calc_align_down(file_size - read_offset,
3222
OS_FILE_LOG_BLOCK_SIZE);
3231
if (log_debug_writes) {
3233
"InnoDB: Archive read starting at"
3234
" lsn %lu %lu, len %lu from file %s\n",
3235
(ulong) ut_dulint_get_high(start_lsn),
3236
(ulong) ut_dulint_get_low(start_lsn),
3239
#endif /* UNIV_DEBUG */
3241
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
3242
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
3243
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
3245
ret = recv_scan_log_recs(
3246
TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
3247
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
3248
&dummy_lsn, &scanned_lsn);
3250
if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
3257
"InnoDB: Archive log file %s"
3258
" does not scan right\n",
3264
start_lsn = ut_dulint_add(start_lsn, len);
3266
ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
3272
/************************************************************
3273
Recovers from archived log files, and also from log files, if they exist. */
3276
recv_recovery_from_archive_start(
3277
/*=============================*/
3278
/* out: error code or DB_SUCCESS */
3279
dulint min_flushed_lsn,/* in: min flushed lsn field from the
3281
dulint limit_lsn, /* in: recover up to this lsn if possible */
3282
ulint first_log_no) /* in: number of the first archived log file
3283
to use in the recovery; the file will be
3284
searched from INNOBASE_LOG_ARCH_DIR specified
3285
in server config file */
3296
recv_sys_init(FALSE, buf_pool_get_curr_size());
3298
recv_recovery_on = TRUE;
3299
recv_recovery_from_backup_on = TRUE;
3301
recv_sys->limit_lsn = limit_lsn;
3305
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3308
if (group->id == group_id) {
3313
group = UT_LIST_GET_NEXT(log_groups, group);
3318
"InnoDB: There is no log group defined with id %lu!\n",
3323
group->archived_file_no = first_log_no;
3325
recv_sys->parse_start_lsn = min_flushed_lsn;
3327
recv_sys->scanned_lsn = ut_dulint_zero;
3328
recv_sys->scanned_checkpoint_no = 0;
3329
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
3331
recv_sys->archive_group = group;
3335
mutex_enter(&(log_sys->mutex));
3338
ret = log_group_recover_from_archive_file(group);
3340
/* Close and truncate a possible processed archive file
3341
from the file space */
3343
trunc_len = UNIV_PAGE_SIZE
3344
* fil_space_get_size(group->archive_space_id);
3345
if (trunc_len > 0) {
3346
fil_space_truncate_start(group->archive_space_id,
3350
group->archived_file_no++;
3353
if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
3355
if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
3357
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
3360
mutex_exit(&(log_sys->mutex));
3362
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
3366
if (err != DB_SUCCESS) {
3371
mutex_enter(&(log_sys->mutex));
3374
if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
3376
recv_apply_hashed_log_recs(FALSE);
3378
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
3381
mutex_exit(&(log_sys->mutex));
3386
/************************************************************
3387
Completes recovery from archive. */
3390
recv_recovery_from_archive_finish(void)
3391
/*===================================*/
3393
recv_recovery_from_checkpoint_finish();
3395
recv_recovery_from_backup_on = FALSE;
3397
#endif /* UNIV_LOG_ARCHIVE */