1
/*****************************************************************************
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/**************************************************//**
23
Created 9/20/1997 Heikki Tuuri
24
*******************************************************/
29
#include "log0recv.ic"
41
#include "ibuf0ibuf.h"
45
#ifndef UNIV_HOTBACKUP
48
# include "srv0start.h"
49
# include "trx0roll.h"
50
# include "row0merge.h"
51
# include "sync0sync.h"
52
#else /* !UNIV_HOTBACKUP */
54
/** This is set to FALSE if the backup was originally taken with the
55
ibbackup --include regexp option: then we do not want to create tables in
56
directories which were not included */
57
UNIV_INTERN ibool recv_replay_file_ops = TRUE;
58
#endif /* !UNIV_HOTBACKUP */
60
/** Log records are stored in the hash table in chunks at most of this size;
61
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
62
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
64
/** Read-ahead area in applying log records to file pages */
65
#define RECV_READ_AHEAD_AREA 32
67
/** The recovery system */
68
UNIV_INTERN recv_sys_t* recv_sys = NULL;
69
/** TRUE when applying redo log records during crash recovery; FALSE
70
otherwise. Note that this is FALSE while a background thread is
71
rolling back incomplete transactions. */
72
UNIV_INTERN ibool recv_recovery_on;
73
#ifdef UNIV_LOG_ARCHIVE
74
/** TRUE when applying redo log records from an archived log file */
75
UNIV_INTERN ibool recv_recovery_from_backup_on;
76
#endif /* UNIV_LOG_ARCHIVE */
78
#ifndef UNIV_HOTBACKUP
79
/** TRUE when recv_init_crash_recovery() has been called. */
80
UNIV_INTERN ibool recv_needed_recovery;
82
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
83
Protected by log_sys->mutex. */
84
UNIV_INTERN ibool recv_no_log_write = FALSE;
85
# endif /* UNIV_DEBUG */
87
/** TRUE if buf_page_is_corrupted() should check if the log sequence
88
number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
89
recv_recovery_from_checkpoint_start_func(). */
90
UNIV_INTERN ibool recv_lsn_checks_on;
92
/** There are two conditions under which we scan the logs, the first
93
is normal startup and the second is when we do a recovery from an
95
This flag is set if we are doing a scan from the last checkpoint during
96
startup. If we find log entries that were written after the last checkpoint
97
we know that the server was not cleanly shutdown. We must then initialize
98
the crash recovery environment before attempting to store these entries in
99
the log hash table. */
100
static ibool recv_log_scan_is_startup_type;
102
/** If the following is TRUE, the buffer pool file pages must be invalidated
103
after recovery and no ibuf operations are allowed; this becomes TRUE if
104
the log record hash table becomes too full, and log records must be merged
105
to file pages already before the recovery is finished: in this case no
106
ibuf operations are allowed, as they could modify the pages read in the
107
buffer pool before the pages have been recovered to the up-to-date state.
109
TRUE means that recovery is running and no operations on the log files
110
are allowed yet: the variable name is misleading. */
111
UNIV_INTERN ibool recv_no_ibuf_operations;
112
/** TRUE when the redo log is being backed up */
113
# define recv_is_making_a_backup FALSE
114
/** TRUE when recovering from a backed up redo log file */
115
# define recv_is_from_backup FALSE
116
#else /* !UNIV_HOTBACKUP */
117
# define recv_needed_recovery FALSE
118
/** TRUE when the redo log is being backed up */
119
UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
120
/** TRUE when recovering from a backed up redo log file */
121
UNIV_INTERN ibool recv_is_from_backup = FALSE;
122
# define buf_pool_get_curr_size() (5 * 1024 * 1024)
123
#endif /* !UNIV_HOTBACKUP */
124
/** The following counter is used to decide when to print info on
126
static ulint recv_scan_print_counter;
128
/** The type of the previous parsed redo log record */
129
static ulint recv_previous_parsed_rec_type;
130
/** The offset of the previous parsed redo log record */
131
static ulint recv_previous_parsed_rec_offset;
132
/** The 'multi' flag of the previous parsed redo log record */
133
static ulint recv_previous_parsed_rec_is_multi;
135
/** Maximum page number encountered in the redo log */
136
UNIV_INTERN ulint recv_max_parsed_page_no;
138
/** This many frames must be left free in the buffer pool when we scan
139
the log and store the scanned log records in the buffer pool: we will
140
use these free frames to read in pages when we start applying the
141
log records to the database. */
142
UNIV_INTERN ulint recv_n_pool_free_frames;
144
/** The maximum lsn we see for a page during the recovery process. If this
145
is bigger than the lsn we are able to scan up to, that is an indication that
146
the recovery failed and the database may be corrupt. */
147
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
151
#ifndef UNIV_HOTBACKUP
152
/*******************************************************//**
153
Initialize crash recovery environment. Can be called iff
154
recv_needed_recovery == FALSE. */
157
recv_init_crash_recovery(void);
158
/*===========================*/
159
#endif /* !UNIV_HOTBACKUP */
161
/********************************************************//**
162
Creates the recovery system. */
165
recv_sys_create(void)
166
/*=================*/
168
if (recv_sys != NULL) {
173
recv_sys = mem_alloc(sizeof(*recv_sys));
174
memset(recv_sys, 0x0, sizeof(*recv_sys));
176
mutex_create(&recv_sys->mutex, SYNC_RECV);
178
recv_sys->heap = NULL;
179
recv_sys->addr_hash = NULL;
182
/********************************************************//**
183
Release recovery system mutexes. */
189
if (recv_sys != NULL) {
190
if (recv_sys->addr_hash != NULL) {
191
hash_table_free(recv_sys->addr_hash);
194
if (recv_sys->heap != NULL) {
195
mem_heap_free(recv_sys->heap);
198
if (recv_sys->buf != NULL) {
199
ut_free(recv_sys->buf);
202
if (recv_sys->last_block_buf_start != NULL) {
203
mem_free(recv_sys->last_block_buf_start);
206
mutex_free(&recv_sys->mutex);
213
/********************************************************//**
214
Frees the recovery system memory. */
217
recv_sys_mem_free(void)
218
/*===================*/
220
if (recv_sys != NULL) {
221
if (recv_sys->addr_hash != NULL) {
222
hash_table_free(recv_sys->addr_hash);
225
if (recv_sys->heap != NULL) {
226
mem_heap_free(recv_sys->heap);
229
if (recv_sys->buf != NULL) {
230
ut_free(recv_sys->buf);
233
if (recv_sys->last_block_buf_start != NULL) {
234
mem_free(recv_sys->last_block_buf_start);
242
/************************************************************
243
Reset the state of the recovery system variables. */
246
recv_sys_var_init(void)
247
/*===================*/
249
recv_lsn_checks_on = FALSE;
251
recv_n_pool_free_frames = 256;
253
recv_recovery_on = FALSE;
255
#ifdef UNIV_LOG_ARCHIVE
256
recv_recovery_from_backup_on = FALSE;
257
#endif /* UNIV_LOG_ARCHIVE */
259
recv_needed_recovery = FALSE;
261
recv_lsn_checks_on = FALSE;
263
recv_log_scan_is_startup_type = FALSE;
265
recv_no_ibuf_operations = FALSE;
267
recv_scan_print_counter = 0;
269
recv_previous_parsed_rec_type = 999999;
271
recv_previous_parsed_rec_offset = 0;
273
recv_previous_parsed_rec_is_multi = 0;
275
recv_max_parsed_page_no = 0;
277
recv_n_pool_free_frames = 256;
279
recv_max_page_lsn = 0;
282
/************************************************************
283
Inits the recovery system for a recovery operation. */
288
ulint available_memory) /*!< in: available memory in bytes */
290
if (recv_sys->heap != NULL) {
295
mutex_enter(&(recv_sys->mutex));
297
#ifndef UNIV_HOTBACKUP
298
recv_sys->heap = mem_heap_create_in_buffer(256);
299
#else /* !UNIV_HOTBACKUP */
300
recv_sys->heap = mem_heap_create(256);
301
recv_is_from_backup = TRUE;
302
#endif /* !UNIV_HOTBACKUP */
304
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
306
recv_sys->recovered_offset = 0;
308
recv_sys->addr_hash = hash_create(available_memory / 64);
309
recv_sys->n_addrs = 0;
311
recv_sys->apply_log_recs = FALSE;
312
recv_sys->apply_batch_on = FALSE;
314
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
316
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
317
OS_FILE_LOG_BLOCK_SIZE);
318
recv_sys->found_corrupt_log = FALSE;
320
recv_max_page_lsn = 0;
322
mutex_exit(&(recv_sys->mutex));
325
/********************************************************//**
326
Empties the hash table when it has been fully processed. */
329
recv_sys_empty_hash(void)
330
/*=====================*/
332
ut_ad(mutex_own(&(recv_sys->mutex)));
334
if (recv_sys->n_addrs != 0) {
336
"InnoDB: Error: %lu pages with log records"
337
" were left unprocessed!\n"
338
"InnoDB: Maximum page number with"
339
" log records on it %lu\n",
340
(ulong) recv_sys->n_addrs,
341
(ulong) recv_max_parsed_page_no);
345
hash_table_free(recv_sys->addr_hash);
346
mem_heap_empty(recv_sys->heap);
348
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
351
#ifndef UNIV_HOTBACKUP
352
# ifndef UNIV_LOG_DEBUG
353
/********************************************************//**
354
Frees the recovery system. */
357
recv_sys_debug_free(void)
358
/*=====================*/
360
mutex_enter(&(recv_sys->mutex));
362
hash_table_free(recv_sys->addr_hash);
363
mem_heap_free(recv_sys->heap);
364
ut_free(recv_sys->buf);
365
mem_free(recv_sys->last_block_buf_start);
367
recv_sys->buf = NULL;
368
recv_sys->heap = NULL;
369
recv_sys->addr_hash = NULL;
370
recv_sys->last_block_buf_start = NULL;
372
mutex_exit(&(recv_sys->mutex));
374
# endif /* UNIV_LOG_DEBUG */
376
/********************************************************//**
377
Truncates possible corrupted or extra records from a log group. */
382
log_group_t* group, /*!< in: log group */
383
ib_uint64_t recovered_lsn, /*!< in: recovery succeeded up to this
385
ib_uint64_t limit_lsn, /*!< in: this was the limit for
387
ib_uint64_t checkpoint_lsn, /*!< in: recovery was started from this
389
ib_uint64_t archived_lsn) /*!< in: the log has been archived up to
392
ib_uint64_t start_lsn;
394
ib_uint64_t finish_lsn1;
395
ib_uint64_t finish_lsn2;
396
ib_uint64_t finish_lsn;
400
if (archived_lsn == IB_ULONGLONG_MAX) {
401
/* Checkpoint was taken in the NOARCHIVELOG mode */
402
archived_lsn = checkpoint_lsn;
405
finish_lsn1 = ut_uint64_align_down(archived_lsn,
406
OS_FILE_LOG_BLOCK_SIZE)
407
+ log_group_get_capacity(group);
409
finish_lsn2 = ut_uint64_align_up(recovered_lsn,
410
OS_FILE_LOG_BLOCK_SIZE)
411
+ recv_sys->last_log_buf_size;
413
if (limit_lsn != IB_ULONGLONG_MAX) {
414
/* We do not know how far we should erase log records: erase
415
as much as possible */
417
finish_lsn = finish_lsn1;
419
/* It is enough to erase the length of the log buffer */
420
finish_lsn = finish_lsn1 < finish_lsn2
421
? finish_lsn1 : finish_lsn2;
424
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
426
/* Write the log buffer full of zeros */
427
for (i = 0; i < RECV_SCAN_SIZE; i++) {
429
*(log_sys->buf + i) = '\0';
432
start_lsn = ut_uint64_align_down(recovered_lsn,
433
OS_FILE_LOG_BLOCK_SIZE);
435
if (start_lsn != recovered_lsn) {
436
/* Copy the last incomplete log block to the log buffer and
437
edit its data length: */
439
ut_memcpy(log_sys->buf, recv_sys->last_block,
440
OS_FILE_LOG_BLOCK_SIZE);
441
log_block_set_data_len(log_sys->buf,
442
(ulint) (recovered_lsn - start_lsn));
445
if (start_lsn >= finish_lsn) {
451
end_lsn = start_lsn + RECV_SCAN_SIZE;
453
if (end_lsn > finish_lsn) {
455
end_lsn = finish_lsn;
458
len = (ulint) (end_lsn - start_lsn);
460
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
461
if (end_lsn >= finish_lsn) {
466
/* Write the log buffer full of zeros */
467
for (i = 0; i < RECV_SCAN_SIZE; i++) {
469
*(log_sys->buf + i) = '\0';
476
/********************************************************//**
477
Copies the log segment between group->recovered_lsn and recovered_lsn from the
478
most up-to-date log group to group, so that it contains the latest log data. */
483
log_group_t* up_to_date_group, /*!< in: the most up-to-date log
485
log_group_t* group, /*!< in: copy to this log
487
ib_uint64_t recovered_lsn) /*!< in: recovery succeeded up
490
ib_uint64_t start_lsn;
494
if (group->scanned_lsn >= recovered_lsn) {
499
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
501
start_lsn = ut_uint64_align_down(group->scanned_lsn,
502
OS_FILE_LOG_BLOCK_SIZE);
504
end_lsn = start_lsn + RECV_SCAN_SIZE;
506
if (end_lsn > recovered_lsn) {
507
end_lsn = ut_uint64_align_up(recovered_lsn,
508
OS_FILE_LOG_BLOCK_SIZE);
511
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
512
up_to_date_group, start_lsn, end_lsn);
514
len = (ulint) (end_lsn - start_lsn);
516
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
518
if (end_lsn >= recovered_lsn) {
527
/********************************************************//**
528
Copies a log segment from the most up-to-date log group to the other log
529
groups, so that they all contain the latest log data. Also writes the info
530
about the latest checkpoint to the groups, and inits the fields in the group
531
memory structs to up-to-date values. */
534
recv_synchronize_groups(
535
/*====================*/
536
log_group_t* up_to_date_group) /*!< in: the most up-to-date
540
ib_uint64_t start_lsn;
542
ib_uint64_t recovered_lsn;
543
ib_uint64_t limit_lsn;
545
recovered_lsn = recv_sys->recovered_lsn;
546
limit_lsn = recv_sys->limit_lsn;
548
/* Read the last recovered log block to the recovery system buffer:
549
the block is always incomplete */
551
start_lsn = ut_uint64_align_down(recovered_lsn,
552
OS_FILE_LOG_BLOCK_SIZE);
553
end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
555
ut_a(start_lsn != end_lsn);
557
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
558
up_to_date_group, start_lsn, end_lsn);
560
group = UT_LIST_GET_FIRST(log_sys->log_groups);
563
if (group != up_to_date_group) {
565
/* Copy log data if needed */
567
recv_copy_group(group, up_to_date_group,
571
/* Update the fields in the group struct to correspond to
574
log_group_set_fields(group, recovered_lsn);
576
group = UT_LIST_GET_NEXT(log_groups, group);
579
/* Copy the checkpoint info to the groups; remember that we have
580
incremented checkpoint_no by one, and the info will not be written
581
over the max checkpoint info, thus making the preservation of max
582
checkpoint info on disk certain */
584
log_groups_write_checkpoint_info();
586
mutex_exit(&(log_sys->mutex));
588
/* Wait for the checkpoint write to complete */
589
rw_lock_s_lock(&(log_sys->checkpoint_lock));
590
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
592
mutex_enter(&(log_sys->mutex));
594
#endif /* !UNIV_HOTBACKUP */
596
/***********************************************************************//**
597
Checks the consistency of the checkpoint info
598
@return TRUE if ok */
601
recv_check_cp_is_consistent(
602
/*========================*/
603
const byte* buf) /*!< in: buffer containing checkpoint info */
607
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
609
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
610
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
614
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
615
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
617
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
618
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
625
#ifndef UNIV_HOTBACKUP
626
/********************************************************//**
627
Looks for the maximum consistent checkpoint from the log groups.
628
@return error code or DB_SUCCESS */
631
recv_find_max_checkpoint(
632
/*=====================*/
633
log_group_t** max_group, /*!< out: max group */
634
ulint* max_field) /*!< out: LOG_CHECKPOINT_1 or
639
ib_uint64_t checkpoint_no;
643
group = UT_LIST_GET_FIRST(log_sys->log_groups);
649
buf = log_sys->checkpoint_buf;
652
group->state = LOG_GROUP_CORRUPTED;
654
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
655
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
657
log_group_read_checkpoint_info(group, field);
659
if (!recv_check_cp_is_consistent(buf)) {
661
if (log_debug_writes) {
663
"InnoDB: Checkpoint in group"
664
" %lu at %lu invalid, %lu\n",
667
(ulong) mach_read_from_4(
669
+ LOG_CHECKPOINT_CHECKSUM_1));
672
#endif /* UNIV_DEBUG */
676
group->state = LOG_GROUP_OK;
678
group->lsn = mach_read_ull(
679
buf + LOG_CHECKPOINT_LSN);
680
group->lsn_offset = mach_read_from_4(
681
buf + LOG_CHECKPOINT_OFFSET);
682
checkpoint_no = mach_read_ull(
683
buf + LOG_CHECKPOINT_NO);
686
if (log_debug_writes) {
688
"InnoDB: Checkpoint number %lu"
689
" found in group %lu\n",
690
(ulong) checkpoint_no,
693
#endif /* UNIV_DEBUG */
695
if (checkpoint_no >= max_no) {
698
max_no = checkpoint_no;
705
group = UT_LIST_GET_NEXT(log_groups, group);
708
if (*max_group == NULL) {
711
"InnoDB: No valid checkpoint found.\n"
712
"InnoDB: If this error appears when you are"
713
" creating an InnoDB database,\n"
714
"InnoDB: the problem may be that during"
715
" an earlier attempt you managed\n"
716
"InnoDB: to create the InnoDB data files,"
717
" but log file creation failed.\n"
718
"InnoDB: If that is the case, please refer to\n"
719
"InnoDB: " REFMAN "error-creating-innodb.html\n");
725
#else /* !UNIV_HOTBACKUP */
726
/*******************************************************************//**
727
Reads the checkpoint info needed in hot backup.
728
@return TRUE if success */
731
recv_read_cp_info_for_backup(
732
/*=========================*/
733
const byte* hdr, /*!< in: buffer containing the log group
735
ib_uint64_t* lsn, /*!< out: checkpoint lsn */
736
ulint* offset, /*!< out: checkpoint offset in the log group */
737
ulint* fsp_limit,/*!< out: fsp limit of space 0,
738
1000000000 if the database is running
739
with < version 3.23.50 of InnoDB */
740
ib_uint64_t* cp_no, /*!< out: checkpoint number */
741
ib_uint64_t* first_header_lsn)
742
/*!< out: lsn of of the start of the
746
ib_uint64_t max_cp_no = 0;
749
cp_buf = hdr + LOG_CHECKPOINT_1;
751
if (recv_check_cp_is_consistent(cp_buf)) {
752
max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
753
max_cp = LOG_CHECKPOINT_1;
756
cp_buf = hdr + LOG_CHECKPOINT_2;
758
if (recv_check_cp_is_consistent(cp_buf)) {
759
if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
760
max_cp = LOG_CHECKPOINT_2;
768
cp_buf = hdr + max_cp;
770
*lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
771
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
773
/* If the user is running a pre-3.23.50 version of InnoDB, its
774
checkpoint data does not contain the fsp limit info */
775
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
776
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
778
*fsp_limit = mach_read_from_4(
779
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
781
if (*fsp_limit == 0) {
782
*fsp_limit = 1000000000;
785
*fsp_limit = 1000000000;
788
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
790
*cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
792
*first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
796
#endif /* !UNIV_HOTBACKUP */
798
/******************************************************//**
799
Checks the 4-byte checksum to the trailer checksum field of a log
800
block. We also accept a log block in the old format before
801
InnoDB-3.23.52 where the checksum field contains the log block number.
802
@return TRUE if ok, or if the log block may be in the format of InnoDB
803
version predating 3.23.52 */
806
log_block_checksum_is_ok_or_old_format(
807
/*===================================*/
808
const byte* block) /*!< in: pointer to a log block */
810
#ifdef UNIV_LOG_DEBUG
812
#endif /* UNIV_LOG_DEBUG */
813
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
818
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
820
/* We assume the log block is in the format of
821
InnoDB version < 3.23.52 and the block is ok */
824
"InnoDB: Scanned old format < InnoDB-3.23.52"
825
" log block number %lu\n",
826
log_block_get_hdr_no(block));
834
#ifdef UNIV_HOTBACKUP
835
/*******************************************************************//**
836
Scans the log segment and n_bytes_scanned is set to the length of valid
840
recv_scan_log_seg_for_backup(
841
/*=========================*/
842
byte* buf, /*!< in: buffer containing log data */
843
ulint buf_len, /*!< in: data length in that buffer */
844
ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start,
845
we return scanned lsn */
846
ulint* scanned_checkpoint_no,
847
/*!< in/out: 4 lowest bytes of the
848
highest scanned checkpoint number so
850
ulint* n_bytes_scanned)/*!< out: how much we were able to
851
scan, smaller than buf_len if log
858
*n_bytes_scanned = 0;
860
for (log_block = buf; log_block < buf + buf_len;
861
log_block += OS_FILE_LOG_BLOCK_SIZE) {
863
no = log_block_get_hdr_no(log_block);
866
fprintf(stderr, "Log block header no %lu\n", no);
869
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
870
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
873
"Log block n:o %lu, scanned lsn n:o %lu\n",
874
no, log_block_convert_lsn_to_no(*scanned_lsn));
876
/* Garbage or an incompletely written log block */
878
log_block += OS_FILE_LOG_BLOCK_SIZE;
881
"Next log block n:o %lu\n",
882
log_block_get_hdr_no(log_block));
887
if (*scanned_checkpoint_no > 0
888
&& log_block_get_checkpoint_no(log_block)
889
< *scanned_checkpoint_no
890
&& *scanned_checkpoint_no
891
- log_block_get_checkpoint_no(log_block)
894
/* Garbage from a log buffer flush which was made
895
before the most recent database recovery */
898
"Scanned cp n:o %lu, block cp n:o %lu\n",
899
*scanned_checkpoint_no,
900
log_block_get_checkpoint_no(log_block));
905
data_len = log_block_get_data_len(log_block);
907
*scanned_checkpoint_no
908
= log_block_get_checkpoint_no(log_block);
909
*scanned_lsn += data_len;
911
*n_bytes_scanned += data_len;
913
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
914
/* Log data ends here */
917
fprintf(stderr, "Log block data len %lu\n",
924
#endif /* UNIV_HOTBACKUP */
926
/*******************************************************************//**
927
Tries to parse a single log record body and also applies it to a page if
928
specified. File ops are parsed, but not applied in this function.
929
@return log record end, NULL if not a complete record */
932
recv_parse_or_apply_log_rec_body(
933
/*=============================*/
934
byte type, /*!< in: type */
935
byte* ptr, /*!< in: pointer to a buffer */
936
byte* end_ptr,/*!< in: pointer to the buffer end */
937
buf_block_t* block, /*!< in/out: buffer block or NULL; if
938
not NULL, then the log record is
939
applied to the page, and the log
940
record should be complete then */
941
mtr_t* mtr) /*!< in: mtr or NULL; should be non-NULL
942
if and only if block is non-NULL */
944
dict_index_t* index = NULL;
946
page_zip_des_t* page_zip;
949
#endif /* UNIV_DEBUG */
951
ut_ad(!block == !mtr);
955
page_zip = buf_block_get_page_zip(block);
956
ut_d(page_type = fil_page_get_type(page));
960
ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
964
#ifdef UNIV_LOG_LSN_DEBUG
966
/* The LSN is checked in recv_parse_log_rec(). */
968
#endif /* UNIV_LOG_LSN_DEBUG */
969
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
971
if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
972
&& end_ptr >= ptr + 2) {
973
/* It is OK to set FIL_PAGE_TYPE and certain
974
list node fields on an empty page. Any other
977
/* NOTE: There may be bogus assertion failures for
978
dict_hdr_create(), trx_rseg_header_create(),
979
trx_sys_create_doublewrite_buf(), and
981
These are only called during database creation. */
982
ulint offs = mach_read_from_2(ptr);
988
/* Note that this can fail when the
989
redo log been written with something
990
older than InnoDB Plugin 1.0.4. */
991
ut_ad(offs == FIL_PAGE_TYPE
992
|| offs == IBUF_TREE_SEG_HEADER
993
+ IBUF_HEADER + FSEG_HDR_OFFSET
994
|| offs == PAGE_BTR_IBUF_FREE_LIST
995
+ PAGE_HEADER + FIL_ADDR_BYTE
996
|| offs == PAGE_BTR_IBUF_FREE_LIST
997
+ PAGE_HEADER + FIL_ADDR_BYTE
999
|| offs == PAGE_BTR_SEG_LEAF
1000
+ PAGE_HEADER + FSEG_HDR_OFFSET
1001
|| offs == PAGE_BTR_SEG_TOP
1002
+ PAGE_HEADER + FSEG_HDR_OFFSET
1003
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
1004
+ PAGE_HEADER + FIL_ADDR_BYTE
1006
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
1007
+ PAGE_HEADER + FIL_ADDR_BYTE
1008
+ FIL_ADDR_SIZE /*FLST_NEXT*/);
1011
/* Note that this can fail when the
1012
redo log been written with something
1013
older than InnoDB Plugin 1.0.4. */
1015
|| offs == IBUF_TREE_SEG_HEADER
1016
+ IBUF_HEADER + FSEG_HDR_SPACE
1017
|| offs == IBUF_TREE_SEG_HEADER
1018
+ IBUF_HEADER + FSEG_HDR_PAGE_NO
1019
|| offs == PAGE_BTR_IBUF_FREE_LIST
1020
+ PAGE_HEADER/* flst_init */
1021
|| offs == PAGE_BTR_IBUF_FREE_LIST
1022
+ PAGE_HEADER + FIL_ADDR_PAGE
1023
|| offs == PAGE_BTR_IBUF_FREE_LIST
1024
+ PAGE_HEADER + FIL_ADDR_PAGE
1026
|| offs == PAGE_BTR_SEG_LEAF
1027
+ PAGE_HEADER + FSEG_HDR_PAGE_NO
1028
|| offs == PAGE_BTR_SEG_LEAF
1029
+ PAGE_HEADER + FSEG_HDR_SPACE
1030
|| offs == PAGE_BTR_SEG_TOP
1031
+ PAGE_HEADER + FSEG_HDR_PAGE_NO
1032
|| offs == PAGE_BTR_SEG_TOP
1033
+ PAGE_HEADER + FSEG_HDR_SPACE
1034
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
1035
+ PAGE_HEADER + FIL_ADDR_PAGE
1037
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
1038
+ PAGE_HEADER + FIL_ADDR_PAGE
1039
+ FIL_ADDR_SIZE /*FLST_NEXT*/);
1043
#endif /* UNIV_DEBUG */
1044
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
1046
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
1047
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1049
if (NULL != (ptr = mlog_parse_index(
1051
type == MLOG_COMP_REC_INSERT,
1054
|| (ibool)!!page_is_comp(page)
1055
== dict_table_is_comp(index->table));
1056
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
1060
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
1061
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1063
if (NULL != (ptr = mlog_parse_index(
1065
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
1068
|| (ibool)!!page_is_comp(page)
1069
== dict_table_is_comp(index->table));
1070
ptr = btr_cur_parse_del_mark_set_clust_rec(
1071
ptr, end_ptr, page, page_zip, index);
1074
case MLOG_COMP_REC_SEC_DELETE_MARK:
1075
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1076
/* This log record type is obsolete, but we process it for
1077
backward compatibility with MySQL 5.0.3 and 5.0.4. */
1078
ut_a(!page || page_is_comp(page));
1080
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
1085
case MLOG_REC_SEC_DELETE_MARK:
1086
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1087
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
1090
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
1091
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1093
if (NULL != (ptr = mlog_parse_index(
1095
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
1098
|| (ibool)!!page_is_comp(page)
1099
== dict_table_is_comp(index->table));
1100
ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
1104
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
1105
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
1106
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1108
if (NULL != (ptr = mlog_parse_index(
1110
type == MLOG_COMP_LIST_END_DELETE
1111
|| type == MLOG_COMP_LIST_START_DELETE,
1114
|| (ibool)!!page_is_comp(page)
1115
== dict_table_is_comp(index->table));
1116
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
1120
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
1121
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1123
if (NULL != (ptr = mlog_parse_index(
1125
type == MLOG_COMP_LIST_END_COPY_CREATED,
1128
|| (ibool)!!page_is_comp(page)
1129
== dict_table_is_comp(index->table));
1130
ptr = page_parse_copy_rec_list_to_created_page(
1131
ptr, end_ptr, block, index, mtr);
1134
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
1135
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1137
if (NULL != (ptr = mlog_parse_index(
1139
type == MLOG_COMP_PAGE_REORGANIZE,
1142
|| (ibool)!!page_is_comp(page)
1143
== dict_table_is_comp(index->table));
1144
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
1148
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
1149
/* Allow anything in page_type when creating a page. */
1151
ptr = page_parse_create(ptr, end_ptr,
1152
type == MLOG_COMP_PAGE_CREATE,
1155
case MLOG_UNDO_INSERT:
1156
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
1157
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
1159
case MLOG_UNDO_ERASE_END:
1160
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
1161
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
1163
case MLOG_UNDO_INIT:
1164
/* Allow anything in page_type when creating a page. */
1165
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
1167
case MLOG_UNDO_HDR_DISCARD:
1168
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
1169
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
1171
case MLOG_UNDO_HDR_CREATE:
1172
case MLOG_UNDO_HDR_REUSE:
1173
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
1174
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
1177
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
1178
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1179
/* On a compressed page, MLOG_COMP_REC_MIN_MARK
1180
will be followed by MLOG_COMP_REC_DELETE
1181
or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
1182
in the same mini-transaction. */
1183
ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
1184
ptr = btr_parse_set_min_rec_mark(
1185
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
1188
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
1189
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1191
if (NULL != (ptr = mlog_parse_index(
1193
type == MLOG_COMP_REC_DELETE,
1196
|| (ibool)!!page_is_comp(page)
1197
== dict_table_is_comp(index->table));
1198
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
1202
case MLOG_IBUF_BITMAP_INIT:
1203
/* Allow anything in page_type when creating a page. */
1204
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
1206
case MLOG_INIT_FILE_PAGE:
1207
/* Allow anything in page_type when creating a page. */
1208
ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
1210
case MLOG_WRITE_STRING:
1211
ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
1212
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
1214
case MLOG_FILE_CREATE:
1215
case MLOG_FILE_RENAME:
1216
case MLOG_FILE_DELETE:
1217
case MLOG_FILE_CREATE2:
1218
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
1220
case MLOG_ZIP_WRITE_NODE_PTR:
1221
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1222
ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
1225
case MLOG_ZIP_WRITE_BLOB_PTR:
1226
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1227
ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
1230
case MLOG_ZIP_WRITE_HEADER:
1231
ut_ad(!page || page_type == FIL_PAGE_INDEX);
1232
ptr = page_zip_parse_write_header(ptr, end_ptr,
1235
case MLOG_ZIP_PAGE_COMPRESS:
1236
/* Allow anything in page_type when creating a page. */
1237
ptr = page_zip_parse_compress(ptr, end_ptr,
1242
recv_sys->found_corrupt_log = TRUE;
1246
dict_table_t* table = index->table;
1248
dict_mem_index_free(index);
1249
dict_mem_table_free(table);
1255
/*********************************************************************//**
1256
Calculates the fold value of a page file address: used in inserting or
1257
searching for a log record in the hash table.
1258
@return folded value */
1263
ulint space, /*!< in: space */
1264
ulint page_no)/*!< in: page number */
1266
return(ut_fold_ulint_pair(space, page_no));
1269
/*********************************************************************//**
1270
Calculates the hash value of a page file address: used in inserting or
1271
searching for a log record in the hash table.
1272
@return folded value */
1277
ulint space, /*!< in: space */
1278
ulint page_no)/*!< in: page number */
1280
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
1283
/*********************************************************************//**
1284
Gets the hashed file address struct for a page.
1285
@return file address struct, NULL if not found from the hash table */
1288
recv_get_fil_addr_struct(
1289
/*=====================*/
1290
ulint space, /*!< in: space id */
1291
ulint page_no)/*!< in: page number */
1293
recv_addr_t* recv_addr;
1295
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
1296
recv_hash(space, page_no));
1298
if ((recv_addr->space == space)
1299
&& (recv_addr->page_no == page_no)) {
1304
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1310
/*******************************************************************//**
1311
Adds a new log record to the hash table of log records. */
1314
recv_add_to_hash_table(
1315
/*===================*/
1316
byte type, /*!< in: log record type */
1317
ulint space, /*!< in: space id */
1318
ulint page_no, /*!< in: page number */
1319
byte* body, /*!< in: log record body */
1320
byte* rec_end, /*!< in: log record end */
1321
ib_uint64_t start_lsn, /*!< in: start lsn of the mtr */
1322
ib_uint64_t end_lsn) /*!< in: end lsn of the mtr */
1326
recv_data_t* recv_data;
1327
recv_data_t** prev_field;
1328
recv_addr_t* recv_addr;
1330
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
1331
/* The tablespace does not exist any more: do not store the
1337
len = rec_end - body;
1339
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
1341
recv->len = rec_end - body;
1342
recv->start_lsn = start_lsn;
1343
recv->end_lsn = end_lsn;
1345
recv_addr = recv_get_fil_addr_struct(space, page_no);
1347
if (recv_addr == NULL) {
1348
recv_addr = mem_heap_alloc(recv_sys->heap,
1349
sizeof(recv_addr_t));
1350
recv_addr->space = space;
1351
recv_addr->page_no = page_no;
1352
recv_addr->state = RECV_NOT_PROCESSED;
1354
UT_LIST_INIT(recv_addr->rec_list);
1356
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
1357
recv_fold(space, page_no), recv_addr);
1358
recv_sys->n_addrs++;
1360
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
1365
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
1367
prev_field = &(recv->data);
1369
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
1370
recv_sys->heap grows into the buffer pool, and bigger chunks could not
1373
while (rec_end > body) {
1375
len = rec_end - body;
1377
if (len > RECV_DATA_BLOCK_SIZE) {
1378
len = RECV_DATA_BLOCK_SIZE;
1381
recv_data = mem_heap_alloc(recv_sys->heap,
1382
sizeof(recv_data_t) + len);
1383
*prev_field = recv_data;
1385
memcpy(recv_data + 1, body, len);
1387
prev_field = &(recv_data->next);
1395
/*********************************************************************//**
1396
Copies the log record body from recv to buf. */
1399
recv_data_copy_to_buf(
1400
/*==================*/
1401
byte* buf, /*!< in: buffer of length at least recv->len */
1402
recv_t* recv) /*!< in: log record */
1404
recv_data_t* recv_data;
1409
recv_data = recv->data;
1412
if (len > RECV_DATA_BLOCK_SIZE) {
1413
part_len = RECV_DATA_BLOCK_SIZE;
1418
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
1423
recv_data = recv_data->next;
1427
/************************************************************************//**
1428
Applies the hashed log records to the page, if the page lsn is less than the
1429
lsn of a log record. This can be called when a buffer page has just been
1430
read in, or also for a page already in the buffer pool. */
1433
recv_recover_page_func(
1434
/*===================*/
1435
#ifndef UNIV_HOTBACKUP
1437
/*!< in: TRUE if the i/o handler calls
1438
this for a freshly read page */
1439
#endif /* !UNIV_HOTBACKUP */
1440
buf_block_t* block) /*!< in/out: buffer block */
1443
page_zip_des_t* page_zip;
1444
recv_addr_t* recv_addr;
1447
ib_uint64_t start_lsn;
1448
ib_uint64_t end_lsn;
1449
ib_uint64_t page_lsn;
1450
ib_uint64_t page_newest_lsn;
1451
ibool modification_to_page;
1452
#ifndef UNIV_HOTBACKUP
1454
#endif /* !UNIV_HOTBACKUP */
1457
mutex_enter(&(recv_sys->mutex));
1459
if (recv_sys->apply_log_recs == FALSE) {
1461
/* Log records should not be applied now */
1463
mutex_exit(&(recv_sys->mutex));
1468
recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
1469
buf_block_get_page_no(block));
1471
if ((recv_addr == NULL)
1472
|| (recv_addr->state == RECV_BEING_PROCESSED)
1473
|| (recv_addr->state == RECV_PROCESSED)) {
1475
mutex_exit(&(recv_sys->mutex));
1481
fprintf(stderr, "Recovering space %lu, page %lu\n",
1482
buf_block_get_space(block), buf_block_get_page_no(block));
1485
recv_addr->state = RECV_BEING_PROCESSED;
1487
mutex_exit(&(recv_sys->mutex));
1490
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
1492
page = block->frame;
1493
page_zip = buf_block_get_page_zip(block);
1495
#ifndef UNIV_HOTBACKUP
1497
/* Move the ownership of the x-latch on the page to
1498
this OS thread, so that we can acquire a second
1499
x-latch on it. This is needed for the operations to
1500
the page to pass the debug checks. */
1502
rw_lock_x_lock_move_ownership(&block->lock);
1505
success = buf_page_get_known_nowait(RW_X_LATCH, block,
1511
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
1512
#endif /* !UNIV_HOTBACKUP */
1514
/* Read the newest modification lsn from the page */
1515
page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
1517
#ifndef UNIV_HOTBACKUP
1518
/* It may be that the page has been modified in the buffer
1519
pool: read the newest modification lsn there */
1521
page_newest_lsn = buf_page_get_newest_modification(&block->page);
1523
if (page_newest_lsn) {
1525
page_lsn = page_newest_lsn;
1527
#else /* !UNIV_HOTBACKUP */
1528
/* In recovery from a backup we do not really use the buffer pool */
1529
page_newest_lsn = 0;
1530
#endif /* !UNIV_HOTBACKUP */
1532
modification_to_page = FALSE;
1533
start_lsn = end_lsn = 0;
1535
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
1538
end_lsn = recv->end_lsn;
1540
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1541
/* We have to copy the record body to a separate
1544
buf = mem_alloc(recv->len);
1546
recv_data_copy_to_buf(buf, recv);
1548
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
1551
if (recv->type == MLOG_INIT_FILE_PAGE) {
1552
page_lsn = page_newest_lsn;
1554
memset(FIL_PAGE_LSN + page, 0, 8);
1555
memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
1559
memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
1563
if (recv->start_lsn >= page_lsn) {
1565
ib_uint64_t end_lsn;
1567
if (!modification_to_page) {
1569
modification_to_page = TRUE;
1570
start_lsn = recv->start_lsn;
1574
if (log_debug_writes) {
1576
"InnoDB: Applying log rec"
1578
" to space %lu page no %lu\n",
1579
(ulong) recv->type, (ulong) recv->len,
1580
(ulong) recv_addr->space,
1581
(ulong) recv_addr->page_no);
1583
#endif /* UNIV_DEBUG */
1585
recv_parse_or_apply_log_rec_body(recv->type, buf,
1589
end_lsn = recv->start_lsn + recv->len;
1590
mach_write_ull(FIL_PAGE_LSN + page, end_lsn);
1591
mach_write_ull(UNIV_PAGE_SIZE
1592
- FIL_PAGE_END_LSN_OLD_CHKSUM
1596
mach_write_ull(FIL_PAGE_LSN
1597
+ page_zip->data, end_lsn);
1601
if (recv->len > RECV_DATA_BLOCK_SIZE) {
1605
recv = UT_LIST_GET_NEXT(rec_list, recv);
1608
#ifdef UNIV_ZIP_DEBUG
1609
if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
1610
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
1613
ut_a(page_zip_validate_low(page_zip, page, FALSE));
1616
#endif /* UNIV_ZIP_DEBUG */
1618
mutex_enter(&(recv_sys->mutex));
1620
if (recv_max_page_lsn < page_lsn) {
1621
recv_max_page_lsn = page_lsn;
1624
recv_addr->state = RECV_PROCESSED;
1626
ut_a(recv_sys->n_addrs);
1627
recv_sys->n_addrs--;
1629
mutex_exit(&(recv_sys->mutex));
1631
#ifndef UNIV_HOTBACKUP
1632
if (modification_to_page) {
1635
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
1637
#endif /* !UNIV_HOTBACKUP */
1639
/* Make sure that committing mtr does not change the modification
1640
lsn values of page */
1642
mtr.modifications = FALSE;
1647
#ifndef UNIV_HOTBACKUP
1648
/*******************************************************************//**
1649
Reads in pages which have hashed log records, from an area around a given
1651
@return number of pages found */
1656
ulint space, /*!< in: space */
1657
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
1658
ulint page_no)/*!< in: page number */
1660
recv_addr_t* recv_addr;
1661
ulint page_nos[RECV_READ_AHEAD_AREA];
1665
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
1669
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
1671
recv_addr = recv_get_fil_addr_struct(space, page_no);
1673
if (recv_addr && !buf_page_peek(space, page_no)) {
1675
mutex_enter(&(recv_sys->mutex));
1677
if (recv_addr->state == RECV_NOT_PROCESSED) {
1678
recv_addr->state = RECV_BEING_READ;
1680
page_nos[n] = page_no;
1685
mutex_exit(&(recv_sys->mutex));
1689
buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
1691
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
1696
/*******************************************************************//**
1697
Empties the hash table of stored log records, applying them to appropriate
1701
recv_apply_hashed_log_recs(
1702
/*=======================*/
1703
ibool allow_ibuf) /*!< in: if TRUE, also ibuf operations are
1704
allowed during the application; if FALSE,
1705
no ibuf operations are allowed, and after
1706
the application all file pages are flushed to
1707
disk and invalidated in buffer pool: this
1708
alternative means that no new log records
1709
can be generated during the application;
1710
the caller must in this case own the log
1713
recv_addr_t* recv_addr;
1716
ibool has_printed = FALSE;
1719
mutex_enter(&(recv_sys->mutex));
1721
if (recv_sys->apply_batch_on) {
1723
mutex_exit(&(recv_sys->mutex));
1725
os_thread_sleep(500000);
1730
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
1733
recv_no_ibuf_operations = TRUE;
1736
recv_sys->apply_log_recs = TRUE;
1737
recv_sys->apply_batch_on = TRUE;
1739
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
1741
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
1744
ulint space = recv_addr->space;
1745
ulint zip_size = fil_space_get_zip_size(space);
1746
ulint page_no = recv_addr->page_no;
1748
if (recv_addr->state == RECV_NOT_PROCESSED) {
1750
ut_print_timestamp(stderr);
1751
fputs(" InnoDB: Starting an"
1752
" apply batch of log records"
1753
" to the database...\n"
1754
"InnoDB: Progress in percents: ",
1759
mutex_exit(&(recv_sys->mutex));
1761
if (buf_page_peek(space, page_no)) {
1766
block = buf_page_get(
1767
space, zip_size, page_no,
1769
buf_block_dbg_add_level(
1770
block, SYNC_NO_ORDER_CHECK);
1772
recv_recover_page(FALSE, block);
1775
recv_read_in_area(space, zip_size,
1779
mutex_enter(&(recv_sys->mutex));
1782
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1786
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
1788
/ hash_get_n_cells(recv_sys->addr_hash)) {
1790
fprintf(stderr, "%lu ", (ulong)
1792
/ hash_get_n_cells(recv_sys->addr_hash)));
1796
/* Wait until all the pages have been processed */
1798
while (recv_sys->n_addrs != 0) {
1800
mutex_exit(&(recv_sys->mutex));
1802
os_thread_sleep(500000);
1804
mutex_enter(&(recv_sys->mutex));
1809
fprintf(stderr, "\n");
1813
/* Flush all the file pages to disk and invalidate them in
1816
ut_d(recv_no_log_write = TRUE);
1817
mutex_exit(&(recv_sys->mutex));
1818
mutex_exit(&(log_sys->mutex));
1820
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
1822
ut_a(n_pages != ULINT_UNDEFINED);
1824
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1826
buf_pool_invalidate();
1828
mutex_enter(&(log_sys->mutex));
1829
mutex_enter(&(recv_sys->mutex));
1830
ut_d(recv_no_log_write = FALSE);
1832
recv_no_ibuf_operations = FALSE;
1835
recv_sys->apply_log_recs = FALSE;
1836
recv_sys->apply_batch_on = FALSE;
1838
recv_sys_empty_hash();
1841
fprintf(stderr, "InnoDB: Apply batch completed\n");
1844
mutex_exit(&(recv_sys->mutex));
1846
#else /* !UNIV_HOTBACKUP */
1847
/*******************************************************************//**
1848
Applies log records in the hash table to a backup. */
1851
recv_apply_log_recs_for_backup(void)
1852
/*================================*/
1854
recv_addr_t* recv_addr;
1862
recv_sys->apply_log_recs = TRUE;
1863
recv_sys->apply_batch_on = TRUE;
1865
block = back_block1;
1867
fputs("InnoDB: Starting an apply batch of log records"
1868
" to the database...\n"
1869
"InnoDB: Progress in percents: ", stderr);
1871
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
1873
for (i = 0; i < n_hash_cells; i++) {
1874
/* The address hash table is externally chained */
1875
recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
1877
while (recv_addr != NULL) {
1880
= fil_space_get_zip_size(recv_addr->space);
1882
if (zip_size == ULINT_UNDEFINED) {
1885
"InnoDB: Warning: cannot apply"
1887
" tablespace %lu page %lu,\n"
1888
"InnoDB: because tablespace with"
1889
" that id does not exist.\n",
1890
recv_addr->space, recv_addr->page_no);
1892
recv_addr->state = RECV_PROCESSED;
1894
ut_a(recv_sys->n_addrs);
1895
recv_sys->n_addrs--;
1897
goto skip_this_recv_addr;
1900
/* We simulate a page read made by the buffer pool, to
1901
make sure the recovery apparatus works ok. We must init
1904
buf_page_init_for_backup_restore(
1905
recv_addr->space, recv_addr->page_no,
1908
/* Extend the tablespace's last file if the page_no
1909
does not fall inside its bounds; we assume the last
1910
file is auto-extending, and ibbackup copied the file
1911
when it still was smaller */
1913
success = fil_extend_space_to_desired_size(
1915
recv_addr->space, recv_addr->page_no + 1);
1918
"InnoDB: Fatal error: cannot extend"
1919
" tablespace %lu to hold %lu pages\n",
1920
recv_addr->space, recv_addr->page_no);
1925
/* Read the page from the tablespace file using the
1926
fil0fil.c routines */
1929
error = fil_io(OS_FILE_READ, TRUE,
1930
recv_addr->space, zip_size,
1931
recv_addr->page_no, 0, zip_size,
1932
block->page.zip.data, NULL);
1933
if (error == DB_SUCCESS
1934
&& !buf_zip_decompress(block, TRUE)) {
1938
error = fil_io(OS_FILE_READ, TRUE,
1939
recv_addr->space, 0,
1940
recv_addr->page_no, 0,
1942
block->frame, NULL);
1945
if (error != DB_SUCCESS) {
1947
"InnoDB: Fatal error: cannot read"
1949
" %lu page number %lu\n",
1950
(ulong) recv_addr->space,
1951
(ulong) recv_addr->page_no);
1956
/* Apply the log records to this page */
1957
recv_recover_page(FALSE, block);
1959
/* Write the page back to the tablespace file using the
1960
fil0fil.c routines */
1962
buf_flush_init_for_writing(
1963
block->frame, buf_block_get_page_zip(block),
1964
mach_read_ull(block->frame + FIL_PAGE_LSN));
1967
error = fil_io(OS_FILE_WRITE, TRUE,
1968
recv_addr->space, zip_size,
1969
recv_addr->page_no, 0,
1971
block->page.zip.data, NULL);
1973
error = fil_io(OS_FILE_WRITE, TRUE,
1974
recv_addr->space, 0,
1975
recv_addr->page_no, 0,
1977
block->frame, NULL);
1979
skip_this_recv_addr:
1980
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
1983
if ((100 * i) / n_hash_cells
1984
!= (100 * (i + 1)) / n_hash_cells) {
1985
fprintf(stderr, "%lu ",
1986
(ulong) ((100 * i) / n_hash_cells));
1991
recv_sys_empty_hash();
1993
#endif /* !UNIV_HOTBACKUP */
1995
/*******************************************************************//**
1996
Tries to parse a single log record and returns its length.
1997
@return length of the record, or 0 if the record was not complete */
2002
byte* ptr, /*!< in: pointer to a buffer */
2003
byte* end_ptr,/*!< in: pointer to the buffer end */
2004
byte* type, /*!< out: type */
2005
ulint* space, /*!< out: space id */
2006
ulint* page_no,/*!< out: page number */
2007
byte** body) /*!< out: log record body start */
2013
if (ptr == end_ptr) {
2018
if (*ptr == MLOG_MULTI_REC_END) {
2025
if (*ptr == MLOG_DUMMY_RECORD) {
2028
*space = ULINT_UNDEFINED - 1; /* For debugging */
2033
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
2037
if (UNIV_UNLIKELY(!new_ptr)) {
2042
#ifdef UNIV_LOG_LSN_DEBUG
2043
if (*type == MLOG_LSN) {
2044
ib_uint64_t lsn = (ib_uint64_t) *space << 32 | *page_no;
2045
# ifdef UNIV_LOG_DEBUG
2046
ut_a(lsn == log_sys->old_lsn);
2047
# else /* UNIV_LOG_DEBUG */
2048
ut_a(lsn == recv_sys->recovered_lsn);
2049
# endif /* UNIV_LOG_DEBUG */
2051
#endif /* UNIV_LOG_LSN_DEBUG */
2053
/* Check that page_no is sensible */
2055
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
2057
recv_sys->found_corrupt_log = TRUE;
2062
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
2064
if (UNIV_UNLIKELY(new_ptr == NULL)) {
2069
if (*page_no > recv_max_parsed_page_no) {
2070
recv_max_parsed_page_no = *page_no;
2073
return(new_ptr - ptr);
2076
/*******************************************************//**
2077
Calculates the new value for lsn when more data is added to the log. */
2080
recv_calc_lsn_on_data_add(
2081
/*======================*/
2082
ib_uint64_t lsn, /*!< in: old lsn */
2083
ib_uint64_t len) /*!< in: this many bytes of data is
2084
added, log block headers not included */
2089
frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
2090
- LOG_BLOCK_HDR_SIZE;
2091
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
2092
- LOG_BLOCK_TRL_SIZE);
2093
lsn_len = (ulint) len;
2094
lsn_len += (lsn_len + frag_len)
2095
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
2096
- LOG_BLOCK_TRL_SIZE)
2097
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
2099
return(lsn + lsn_len);
2102
#ifdef UNIV_LOG_DEBUG
2103
/*******************************************************//**
2104
Checks that the parser recognizes incomplete initial segments of a log
2105
record as incomplete. */
2108
recv_check_incomplete_log_recs(
2109
/*===========================*/
2110
byte* ptr, /*!< in: pointer to a complete log record */
2111
ulint len) /*!< in: length of the log record */
2119
for (i = 0; i < len; i++) {
2120
ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
2124
#endif /* UNIV_LOG_DEBUG */
2126
/*******************************************************//**
2127
Prints diagnostic info of corrupt log. */
2130
recv_report_corrupt_log(
2131
/*====================*/
2132
byte* ptr, /*!< in: pointer to corrupt log record */
2133
byte type, /*!< in: type of the record */
2134
ulint space, /*!< in: space id, this may also be garbage */
2135
ulint page_no)/*!< in: page number, this may also be garbage */
2138
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
2139
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
2140
"InnoDB: Log parsing proceeded successfully up to %llu\n"
2141
"InnoDB: Previous log record type %lu, is multi %lu\n"
2142
"InnoDB: Recv offset %lu, prev %lu\n",
2143
(ulong) type, (ulong) space, (ulong) page_no,
2144
recv_sys->recovered_lsn,
2145
(ulong) recv_previous_parsed_rec_type,
2146
(ulong) recv_previous_parsed_rec_is_multi,
2147
(ulong) (ptr - recv_sys->buf),
2148
(ulong) recv_previous_parsed_rec_offset);
2150
if ((ulint)(ptr - recv_sys->buf + 100)
2151
> recv_previous_parsed_rec_offset
2152
&& (ulint)(ptr - recv_sys->buf + 100
2153
- recv_previous_parsed_rec_offset)
2155
fputs("InnoDB: Hex dump of corrupt log starting"
2156
" 100 bytes before the start\n"
2157
"InnoDB: of the previous log rec,\n"
2158
"InnoDB: and ending 100 bytes after the start"
2159
" of the corrupt rec:\n",
2162
ut_print_buf(stderr,
2164
+ recv_previous_parsed_rec_offset - 100,
2165
ptr - recv_sys->buf + 200
2166
- recv_previous_parsed_rec_offset);
2170
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
2171
"InnoDB: is possible that the log scan did not proceed\n"
2172
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
2173
"InnoDB: on your InnoDB tables to check that they are ok!\n"
2174
"InnoDB: If mysqld crashes after this recovery, look at\n"
2175
"InnoDB: " REFMAN "forcing-recovery.html\n"
2176
"InnoDB: about forcing recovery.\n", stderr);
2181
/*******************************************************//**
2182
Parses log records from a buffer and stores them to a hash table to wait
2183
merging to file pages.
2184
@return currently always returns FALSE */
2187
recv_parse_log_recs(
2188
/*================*/
2189
ibool store_to_hash) /*!< in: TRUE if the records should be stored
2190
to the hash table; this is set to FALSE if just
2191
debug checking is needed */
2198
ib_uint64_t new_recovered_lsn;
2199
ib_uint64_t old_lsn;
2206
ut_ad(mutex_own(&(log_sys->mutex)));
2207
ut_ad(recv_sys->parse_start_lsn != 0);
2209
ptr = recv_sys->buf + recv_sys->recovered_offset;
2211
end_ptr = recv_sys->buf + recv_sys->len;
2213
if (ptr == end_ptr) {
2218
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
2220
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
2221
/* The mtr only modified a single page, or this is a file op */
2223
old_lsn = recv_sys->recovered_lsn;
2225
/* Try to parse a log record, fetching its type, space id,
2226
page no, and a pointer to the body of the log record */
2228
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2231
if (len == 0 || recv_sys->found_corrupt_log) {
2232
if (recv_sys->found_corrupt_log) {
2234
recv_report_corrupt_log(ptr,
2235
type, space, page_no);
2241
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
2243
if (new_recovered_lsn > recv_sys->scanned_lsn) {
2244
/* The log record filled a log block, and we require
2245
that also the next log block should have been scanned
2251
recv_previous_parsed_rec_type = (ulint)type;
2252
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
2253
recv_previous_parsed_rec_is_multi = 0;
2255
recv_sys->recovered_offset += len;
2256
recv_sys->recovered_lsn = new_recovered_lsn;
2259
if (log_debug_writes) {
2261
"InnoDB: Parsed a single log rec"
2262
" type %lu len %lu space %lu page no %lu\n",
2263
(ulong) type, (ulong) len, (ulong) space,
2266
#endif /* UNIV_DEBUG */
2268
if (type == MLOG_DUMMY_RECORD) {
2271
} else if (!store_to_hash) {
2272
/* In debug checking, update a replicate page
2273
according to the log record, and check that it
2274
becomes identical with the original page */
2275
#ifdef UNIV_LOG_DEBUG
2276
recv_check_incomplete_log_recs(ptr, len);
2277
#endif/* UNIV_LOG_DEBUG */
2279
} else if (type == MLOG_FILE_CREATE
2280
|| type == MLOG_FILE_CREATE2
2281
|| type == MLOG_FILE_RENAME
2282
|| type == MLOG_FILE_DELETE) {
2284
#ifdef UNIV_HOTBACKUP
2285
if (recv_replay_file_ops) {
2287
/* In ibbackup --apply-log, replay an .ibd file
2288
operation, if possible; note that
2289
fil_path_to_mysql_datadir is set in ibbackup to
2290
point to the datadir we should use there */
2292
if (NULL == fil_op_log_parse_or_replay(
2293
body, end_ptr, type,
2296
"InnoDB: Error: file op"
2297
" log record of type %lu"
2298
" space %lu not complete in\n"
2299
"InnoDB: the replay phase."
2308
/* In normal mysqld crash recovery we do not try to
2309
replay file operations */
2310
#ifdef UNIV_LOG_LSN_DEBUG
2311
} else if (type == MLOG_LSN) {
2312
/* Do not add these records to the hash table.
2313
The page number and space id fields are misused
2314
for something else. */
2315
#endif /* UNIV_LOG_LSN_DEBUG */
2317
recv_add_to_hash_table(type, space, page_no, body,
2319
recv_sys->recovered_lsn);
2322
/* Check that all the records associated with the single mtr
2323
are included within the buffer */
2329
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2331
if (len == 0 || recv_sys->found_corrupt_log) {
2333
if (recv_sys->found_corrupt_log) {
2335
recv_report_corrupt_log(
2336
ptr, type, space, page_no);
2342
recv_previous_parsed_rec_type = (ulint)type;
2343
recv_previous_parsed_rec_offset
2344
= recv_sys->recovered_offset + total_len;
2345
recv_previous_parsed_rec_is_multi = 1;
2347
#ifdef UNIV_LOG_DEBUG
2348
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
2349
recv_check_incomplete_log_recs(ptr, len);
2351
#endif /* UNIV_LOG_DEBUG */
2354
if (log_debug_writes) {
2356
"InnoDB: Parsed a multi log rec"
2358
" space %lu page no %lu\n",
2359
(ulong) type, (ulong) len,
2360
(ulong) space, (ulong) page_no);
2362
#endif /* UNIV_DEBUG */
2369
if (type == MLOG_MULTI_REC_END) {
2371
/* Found the end mark for the records */
2377
new_recovered_lsn = recv_calc_lsn_on_data_add(
2378
recv_sys->recovered_lsn, total_len);
2380
if (new_recovered_lsn > recv_sys->scanned_lsn) {
2381
/* The log record filled a log block, and we require
2382
that also the next log block should have been scanned
2388
/* Add all the records to the hash table */
2390
ptr = recv_sys->buf + recv_sys->recovered_offset;
2393
old_lsn = recv_sys->recovered_lsn;
2394
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
2396
if (recv_sys->found_corrupt_log) {
2398
recv_report_corrupt_log(ptr,
2399
type, space, page_no);
2403
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
2405
recv_sys->recovered_offset += len;
2406
recv_sys->recovered_lsn
2407
= recv_calc_lsn_on_data_add(old_lsn, len);
2408
if (type == MLOG_MULTI_REC_END) {
2410
/* Found the end mark for the records */
2416
#ifdef UNIV_LOG_LSN_DEBUG
2418
#endif /* UNIV_LOG_LSN_DEBUG */
2420
recv_add_to_hash_table(type, space, page_no,
2433
/*******************************************************//**
2434
Adds data from a new log block to the parsing buffer of recv_sys if
2435
recv_sys->parse_start_lsn is non-zero.
2436
@return TRUE if more data added */
2439
recv_sys_add_to_parsing_buf(
2440
/*========================*/
2441
const byte* log_block, /*!< in: log block */
2442
ib_uint64_t scanned_lsn) /*!< in: lsn of how far we were able
2443
to find data in this log block */
2450
ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
2452
if (!recv_sys->parse_start_lsn) {
2453
/* Cannot start parsing yet because no start point for
2459
data_len = log_block_get_data_len(log_block);
2461
if (recv_sys->parse_start_lsn >= scanned_lsn) {
2465
} else if (recv_sys->scanned_lsn >= scanned_lsn) {
2469
} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
2470
more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
2472
more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
2475
if (more_len == 0) {
2480
ut_ad(data_len >= more_len);
2482
start_offset = data_len - more_len;
2484
if (start_offset < LOG_BLOCK_HDR_SIZE) {
2485
start_offset = LOG_BLOCK_HDR_SIZE;
2488
end_offset = data_len;
2490
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
2491
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
2494
ut_ad(start_offset <= end_offset);
2496
if (start_offset < end_offset) {
2497
ut_memcpy(recv_sys->buf + recv_sys->len,
2498
log_block + start_offset, end_offset - start_offset);
2500
recv_sys->len += end_offset - start_offset;
2502
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
2508
/*******************************************************//**
2509
Moves the parsing buffer data left to the buffer start. */
2512
recv_sys_justify_left_parsing_buf(void)
2513
/*===================================*/
2515
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
2516
recv_sys->len - recv_sys->recovered_offset);
2518
recv_sys->len -= recv_sys->recovered_offset;
2520
recv_sys->recovered_offset = 0;
2523
/*******************************************************//**
2524
Scans log from a buffer and stores new log data to the parsing buffer.
2525
Parses and hashes the log records if new data found. Unless
2526
UNIV_HOTBACKUP is defined, this function will apply log records
2527
automatically when the hash table becomes full.
2528
@return TRUE if limit_lsn has been reached, or not able to scan any
2529
more in this log group */
2534
ulint available_memory,/*!< in: we let the hash table of recs
2535
to grow to this size, at the maximum */
2536
ibool store_to_hash, /*!< in: TRUE if the records should be
2537
stored to the hash table; this is set
2538
to FALSE if just debug checking is
2540
const byte* buf, /*!< in: buffer containing a log
2541
segment or garbage */
2542
ulint len, /*!< in: buffer length */
2543
ib_uint64_t start_lsn, /*!< in: buffer start lsn */
2544
ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
2545
groups contain contiguous log data up
2547
ib_uint64_t* group_scanned_lsn)/*!< out: scanning succeeded up to
2550
const byte* log_block;
2552
ib_uint64_t scanned_lsn;
2557
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2558
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
2560
ut_a(store_to_hash <= TRUE);
2565
scanned_lsn = start_lsn;
2569
no = log_block_get_hdr_no(log_block);
2571
fprintf(stderr, "Log block header no %lu\n", no);
2573
fprintf(stderr, "Scanned lsn no %lu\n",
2574
log_block_convert_lsn_to_no(scanned_lsn));
2576
if (no != log_block_convert_lsn_to_no(scanned_lsn)
2577
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
2579
if (no == log_block_convert_lsn_to_no(scanned_lsn)
2580
&& !log_block_checksum_is_ok_or_old_format(
2583
"InnoDB: Log block no %lu at"
2585
"InnoDB: ok header, but checksum field"
2586
" contains %lu, should be %lu\n",
2589
(ulong) log_block_get_checksum(
2591
(ulong) log_block_calc_checksum(
2595
/* Garbage or an incompletely written log block */
2602
if (log_block_get_flush_bit(log_block)) {
2603
/* This block was a start of a log flush operation:
2604
we know that the previous flush operation must have
2605
been completed for all log groups before this block
2606
can have been flushed to any of the groups. Therefore,
2607
we know that log data is contiguous up to scanned_lsn
2608
in all non-corrupt log groups. */
2610
if (scanned_lsn > *contiguous_lsn) {
2611
*contiguous_lsn = scanned_lsn;
2615
data_len = log_block_get_data_len(log_block);
2617
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
2618
&& scanned_lsn + data_len > recv_sys->scanned_lsn
2619
&& (recv_sys->scanned_checkpoint_no > 0)
2620
&& (log_block_get_checkpoint_no(log_block)
2621
< recv_sys->scanned_checkpoint_no)
2622
&& (recv_sys->scanned_checkpoint_no
2623
- log_block_get_checkpoint_no(log_block)
2626
/* Garbage from a log buffer flush which was made
2627
before the most recent database recovery */
2630
#ifdef UNIV_LOG_DEBUG
2631
/* This is not really an error, but currently
2632
we stop here in the debug version: */
2639
if (!recv_sys->parse_start_lsn
2640
&& (log_block_get_first_rec_group(log_block) > 0)) {
2642
/* We found a point from which to start the parsing
2645
recv_sys->parse_start_lsn = scanned_lsn
2646
+ log_block_get_first_rec_group(log_block);
2647
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
2648
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
2651
scanned_lsn += data_len;
2653
if (scanned_lsn > recv_sys->scanned_lsn) {
2655
/* We have found more entries. If this scan is
2656
of startup type, we must initiate crash recovery
2657
environment before parsing these log records. */
2659
#ifndef UNIV_HOTBACKUP
2660
if (recv_log_scan_is_startup_type
2661
&& !recv_needed_recovery) {
2664
"InnoDB: Log scan progressed"
2665
" past the checkpoint lsn %llu\n",
2666
recv_sys->scanned_lsn);
2667
recv_init_crash_recovery();
2669
#endif /* !UNIV_HOTBACKUP */
2671
/* We were able to find more log data: add it to the
2672
parsing buffer if parse_start_lsn is already
2675
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
2676
>= RECV_PARSING_BUF_SIZE) {
2678
"InnoDB: Error: log parsing"
2680
" Recovery may have failed!\n");
2682
recv_sys->found_corrupt_log = TRUE;
2684
} else if (!recv_sys->found_corrupt_log) {
2685
more_data = recv_sys_add_to_parsing_buf(
2686
log_block, scanned_lsn);
2689
recv_sys->scanned_lsn = scanned_lsn;
2690
recv_sys->scanned_checkpoint_no
2691
= log_block_get_checkpoint_no(log_block);
2694
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
2695
/* Log data for this group ends here */
2700
log_block += OS_FILE_LOG_BLOCK_SIZE;
2702
} while (log_block < buf + len && !finished);
2704
*group_scanned_lsn = scanned_lsn;
2706
if (recv_needed_recovery
2707
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
2708
recv_scan_print_counter++;
2710
if (finished || (recv_scan_print_counter % 80 == 0)) {
2713
"InnoDB: Doing recovery: scanned up to"
2714
" log sequence number %llu\n",
2715
*group_scanned_lsn);
2719
if (more_data && !recv_sys->found_corrupt_log) {
2720
/* Try to parse more log records */
2722
recv_parse_log_recs(store_to_hash);
2724
#ifndef UNIV_HOTBACKUP
2725
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
2726
> available_memory) {
2728
/* Hash table of log records has grown too big:
2729
empty it; FALSE means no ibuf operations
2730
allowed, as we cannot add new records to the
2731
log yet: they would be produced by ibuf
2734
recv_apply_hashed_log_recs(FALSE);
2736
#endif /* !UNIV_HOTBACKUP */
2738
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
2739
/* Move parsing buffer data to the buffer start */
2741
recv_sys_justify_left_parsing_buf();
2748
#ifndef UNIV_HOTBACKUP
2749
/*******************************************************//**
2750
Scans log from a buffer and stores new log data to the parsing buffer. Parses
2751
and hashes the log records if new data found. */
2754
recv_group_scan_log_recs(
2755
/*=====================*/
2756
log_group_t* group, /*!< in: log group */
2757
ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log
2758
groups contain contiguous log data up
2760
ib_uint64_t* group_scanned_lsn)/*!< out: scanning succeeded up to
2764
ib_uint64_t start_lsn;
2765
ib_uint64_t end_lsn;
2769
start_lsn = *contiguous_lsn;
2772
end_lsn = start_lsn + RECV_SCAN_SIZE;
2774
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
2775
group, start_lsn, end_lsn);
2777
finished = recv_scan_log_recs(
2778
(buf_pool->curr_size - recv_n_pool_free_frames)
2779
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
2780
start_lsn, contiguous_lsn, group_scanned_lsn);
2781
start_lsn = end_lsn;
2785
if (log_debug_writes) {
2787
"InnoDB: Scanned group %lu up to"
2788
" log sequence number %llu\n",
2790
*group_scanned_lsn);
2792
#endif /* UNIV_DEBUG */
2795
/*******************************************************//**
2796
Initialize crash recovery environment. Can be called iff
2797
recv_needed_recovery == FALSE. */
2800
recv_init_crash_recovery(void)
2801
/*==========================*/
2803
ut_a(!recv_needed_recovery);
2805
recv_needed_recovery = TRUE;
2807
ut_print_timestamp(stderr);
2810
" InnoDB: Database was not"
2811
" shut down normally!\n"
2812
"InnoDB: Starting crash recovery.\n");
2815
"InnoDB: Reading tablespace information"
2816
" from the .ibd files...\n");
2818
fil_load_single_table_tablespaces();
2820
/* If we are using the doublewrite method, we will
2821
check if there are half-written pages in data files,
2822
and restore them from the doublewrite buffer if
2825
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2828
"InnoDB: Restoring possible"
2829
" half-written data pages from"
2830
" the doublewrite\n"
2831
"InnoDB: buffer...\n");
2832
trx_sys_doublewrite_init_or_restore_pages(TRUE);
2836
/********************************************************//**
2837
Recovers from a checkpoint. When this function returns, the database is able
2838
to start processing of new user transactions, but the function
2839
recv_recovery_from_checkpoint_finish should be called later to complete
2840
the recovery and free the resources used in it.
2841
@return error code or DB_SUCCESS */
2844
recv_recovery_from_checkpoint_start_func(
2845
/*=====================================*/
2846
#ifdef UNIV_LOG_ARCHIVE
2847
ulint type, /*!< in: LOG_CHECKPOINT or
2849
ib_uint64_t limit_lsn, /*!< in: recover up to this lsn
2851
#endif /* UNIV_LOG_ARCHIVE */
2852
ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from
2854
ib_uint64_t max_flushed_lsn)/*!< in: max flushed lsn from
2858
log_group_t* max_cp_group;
2859
log_group_t* up_to_date_group;
2861
ib_uint64_t checkpoint_lsn;
2862
ib_uint64_t checkpoint_no;
2863
ib_uint64_t old_scanned_lsn;
2864
ib_uint64_t group_scanned_lsn;
2865
ib_uint64_t contiguous_lsn;
2866
ib_uint64_t archived_lsn;
2868
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
2871
#ifdef UNIV_LOG_ARCHIVE
2872
ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
2873
/** TRUE when recovering from a checkpoint */
2874
# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
2875
/** Recover up to this log sequence number */
2876
# define LIMIT_LSN limit_lsn
2877
#else /* UNIV_LOG_ARCHIVE */
2878
/** TRUE when recovering from a checkpoint */
2879
# define TYPE_CHECKPOINT 1
2880
/** Recover up to this log sequence number */
2881
# define LIMIT_LSN IB_ULONGLONG_MAX
2882
#endif /* UNIV_LOG_ARCHIVE */
2884
if (TYPE_CHECKPOINT) {
2886
recv_sys_init(buf_pool_get_curr_size());
2889
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
2891
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
2893
"InnoDB: Skipping log redo\n");
2898
recv_recovery_on = TRUE;
2900
recv_sys->limit_lsn = LIMIT_LSN;
2902
mutex_enter(&(log_sys->mutex));
2904
/* Look for the latest checkpoint from any of the log groups */
2906
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
2908
if (err != DB_SUCCESS) {
2910
mutex_exit(&(log_sys->mutex));
2915
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
2917
buf = log_sys->checkpoint_buf;
2919
checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
2920
checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
2921
archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
2923
/* Read the first log file header to print a note if this is
2924
a recovery from a restored InnoDB Hot Backup */
2926
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
2927
0, 0, LOG_FILE_HDR_SIZE,
2928
log_hdr_buf, max_cp_group);
2930
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2931
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
2932
/* This log file was created by ibbackup --restore: print
2933
a note to the user about it */
2936
"InnoDB: The log file was created by"
2937
" ibbackup --apply-log at\n"
2939
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
2941
"InnoDB: NOTE: the following crash recovery"
2942
" is part of a normal restore.\n");
2944
/* Wipe over the label now */
2946
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
2948
/* Write to the log file to wipe over the label */
2949
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
2950
max_cp_group->space_id, 0,
2951
0, 0, OS_FILE_LOG_BLOCK_SIZE,
2952
log_hdr_buf, max_cp_group);
2955
#ifdef UNIV_LOG_ARCHIVE
2956
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2959
log_checkpoint_get_nth_group_info(buf, group->id,
2960
&(group->archived_file_no),
2961
&(group->archived_offset));
2963
group = UT_LIST_GET_NEXT(log_groups, group);
2965
#endif /* UNIV_LOG_ARCHIVE */
2967
if (TYPE_CHECKPOINT) {
2968
/* Start reading the log groups from the checkpoint lsn up. The
2969
variable contiguous_lsn contains an lsn up to which the log is
2970
known to be contiguously written to all log groups. */
2972
recv_sys->parse_start_lsn = checkpoint_lsn;
2973
recv_sys->scanned_lsn = checkpoint_lsn;
2974
recv_sys->scanned_checkpoint_no = 0;
2975
recv_sys->recovered_lsn = checkpoint_lsn;
2977
srv_start_lsn = checkpoint_lsn;
2980
contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
2981
OS_FILE_LOG_BLOCK_SIZE);
2982
if (TYPE_CHECKPOINT) {
2983
up_to_date_group = max_cp_group;
2984
#ifdef UNIV_LOG_ARCHIVE
2988
/* Try to recover the remaining part from logs: first from
2989
the logs of the archived group */
2991
group = recv_sys->archive_group;
2992
capacity = log_group_get_capacity(group);
2994
if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
2995
|| checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
2997
mutex_exit(&(log_sys->mutex));
2999
/* The group does not contain enough log: probably
3000
an archived log file was missing or corrupt */
3005
recv_group_scan_log_recs(group, &contiguous_lsn,
3006
&group_scanned_lsn);
3007
if (recv_sys->scanned_lsn < checkpoint_lsn) {
3009
mutex_exit(&(log_sys->mutex));
3011
/* The group did not contain enough log: an archived
3012
log file was missing or invalid, or the log group
3018
group->scanned_lsn = group_scanned_lsn;
3019
up_to_date_group = group;
3020
#endif /* UNIV_LOG_ARCHIVE */
3023
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
3025
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3027
#ifdef UNIV_LOG_ARCHIVE
3028
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
3029
group = UT_LIST_GET_NEXT(log_groups, group);
3031
#endif /* UNIV_LOG_ARCHIVE */
3033
/* Set the flag to publish that we are doing startup scan. */
3034
recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
3036
old_scanned_lsn = recv_sys->scanned_lsn;
3038
recv_group_scan_log_recs(group, &contiguous_lsn,
3039
&group_scanned_lsn);
3040
group->scanned_lsn = group_scanned_lsn;
3042
if (old_scanned_lsn < group_scanned_lsn) {
3043
/* We found a more up-to-date group */
3045
up_to_date_group = group;
3048
#ifdef UNIV_LOG_ARCHIVE
3049
if ((type == LOG_ARCHIVE)
3050
&& (group == recv_sys->archive_group)) {
3051
group = UT_LIST_GET_NEXT(log_groups, group);
3053
#endif /* UNIV_LOG_ARCHIVE */
3055
group = UT_LIST_GET_NEXT(log_groups, group);
3058
/* Done with startup scan. Clear the flag. */
3059
recv_log_scan_is_startup_type = FALSE;
3060
if (TYPE_CHECKPOINT) {
3061
/* NOTE: we always do a 'recovery' at startup, but only if
3062
there is something wrong we will print a message to the
3063
user about recovery: */
3065
if (checkpoint_lsn != max_flushed_lsn
3066
|| checkpoint_lsn != min_flushed_lsn) {
3068
if (checkpoint_lsn < max_flushed_lsn) {
3070
"InnoDB: #########################"
3071
"#################################\n"
3074
"InnoDB: The log sequence number"
3075
" in ibdata files is higher\n"
3076
"InnoDB: than the log sequence number"
3077
" in the ib_logfiles! Are you sure\n"
3078
"InnoDB: you are using the right"
3079
" ib_logfiles to start up"
3081
"InnoDB: Log sequence number in"
3082
" ib_logfiles is %llu, log\n"
3083
"InnoDB: sequence numbers stamped"
3084
" to ibdata file headers are between\n"
3085
"InnoDB: %llu and %llu.\n"
3086
"InnoDB: #########################"
3087
"#################################\n",
3093
if (!recv_needed_recovery) {
3095
"InnoDB: The log sequence number"
3096
" in ibdata files does not match\n"
3097
"InnoDB: the log sequence number"
3098
" in the ib_logfiles!\n");
3099
recv_init_crash_recovery();
3103
if (!recv_needed_recovery) {
3104
/* Init the doublewrite buffer memory structure */
3105
trx_sys_doublewrite_init_or_restore_pages(FALSE);
3109
/* We currently have only one log group */
3110
if (group_scanned_lsn < checkpoint_lsn) {
3111
ut_print_timestamp(stderr);
3113
" InnoDB: ERROR: We were only able to scan the log"
3115
"InnoDB: %llu, but a checkpoint was at %llu.\n"
3116
"InnoDB: It is possible that"
3117
" the database is now corrupt!\n",
3122
if (group_scanned_lsn < recv_max_page_lsn) {
3123
ut_print_timestamp(stderr);
3125
" InnoDB: ERROR: We were only able to scan the log"
3127
"InnoDB: but a database page a had an lsn %llu."
3128
" It is possible that the\n"
3129
"InnoDB: database is now corrupt!\n",
3134
if (recv_sys->recovered_lsn < checkpoint_lsn) {
3136
mutex_exit(&(log_sys->mutex));
3138
if (recv_sys->recovered_lsn >= LIMIT_LSN) {
3148
/* Synchronize the uncorrupted log groups to the most up-to-date log
3149
group; we also copy checkpoint info to groups */
3151
log_sys->next_checkpoint_lsn = checkpoint_lsn;
3152
log_sys->next_checkpoint_no = checkpoint_no + 1;
3154
#ifdef UNIV_LOG_ARCHIVE
3155
log_sys->archived_lsn = archived_lsn;
3156
#endif /* UNIV_LOG_ARCHIVE */
3158
recv_synchronize_groups(up_to_date_group);
3160
if (!recv_needed_recovery) {
3161
ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
3163
srv_start_lsn = recv_sys->recovered_lsn;
3166
log_sys->lsn = recv_sys->recovered_lsn;
3168
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
3170
log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
3171
log_sys->buf_next_to_write = log_sys->buf_free;
3172
log_sys->written_to_some_lsn = log_sys->lsn;
3173
log_sys->written_to_all_lsn = log_sys->lsn;
3175
log_sys->last_checkpoint_lsn = checkpoint_lsn;
3177
log_sys->next_checkpoint_no = checkpoint_no + 1;
3179
#ifdef UNIV_LOG_ARCHIVE
3180
if (archived_lsn == IB_ULONGLONG_MAX) {
3182
log_sys->archiving_state = LOG_ARCH_OFF;
3184
#endif /* UNIV_LOG_ARCHIVE */
3186
mutex_enter(&(recv_sys->mutex));
3188
recv_sys->apply_log_recs = TRUE;
3190
mutex_exit(&(recv_sys->mutex));
3192
mutex_exit(&(log_sys->mutex));
3194
recv_lsn_checks_on = TRUE;
3196
/* The database is now ready to start almost normal processing of user
3197
transactions: transaction rollbacks and the application of the log
3198
records in the hash table can be run in background. */
3202
#undef TYPE_CHECKPOINT
3206
/********************************************************//**
3207
Completes recovery from a checkpoint. */
3210
recv_recovery_from_checkpoint_finish(void)
3211
/*======================================*/
3215
/* Apply the hashed log records to the respective file pages */
3217
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
3219
recv_apply_hashed_log_recs(TRUE);
3223
if (log_debug_writes) {
3225
"InnoDB: Log records applied to the database\n");
3227
#endif /* UNIV_DEBUG */
3229
if (recv_needed_recovery) {
3230
trx_sys_print_mysql_master_log_pos();
3231
trx_sys_print_mysql_binlog_offset();
3234
if (recv_sys->found_corrupt_log) {
3237
"InnoDB: WARNING: the log file may have been"
3239
"InnoDB: is possible that the log scan or parsing"
3240
" did not proceed\n"
3241
"InnoDB: far enough in recovery. Please run"
3243
"InnoDB: on your InnoDB tables to check that"
3245
"InnoDB: It may be safest to recover your"
3246
" InnoDB database from\n"
3247
"InnoDB: a backup!\n");
3250
/* Free the resources of the recovery system */
3252
recv_recovery_on = FALSE;
3254
#ifndef UNIV_LOG_DEBUG
3255
recv_sys_debug_free();
3257
/* Roll back any recovered data dictionary transactions, so
3258
that the data dictionary tables will be free of any locks.
3259
The data dictionary latch should guarantee that there is at
3260
most one data dictionary transaction active at a time. */
3261
trx_rollback_or_clean_recovered(FALSE);
3263
/* Drop partially created indexes. */
3264
row_merge_drop_temp_indexes();
3266
#ifdef UNIV_SYNC_DEBUG
3267
/* Wait for a while so that created threads have time to suspend
3268
themselves before we switch the latching order checks on */
3269
os_thread_sleep(1000000);
3271
/* Switch latching order checks on in sync0sync.c */
3272
sync_order_checks_on = TRUE;
3274
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
3275
/* Rollback the uncommitted transactions which have no user
3278
os_thread_create(trx_rollback_or_clean_all_recovered,
3283
/******************************************************//**
3284
Resets the logs. The contents of log files will be lost! */
3289
ib_uint64_t lsn, /*!< in: reset to this lsn
3290
rounded up to be divisible by
3291
OS_FILE_LOG_BLOCK_SIZE, after
3293
LOG_BLOCK_HDR_SIZE */
3294
#ifdef UNIV_LOG_ARCHIVE
3295
ulint arch_log_no, /*!< in: next archived log file number */
3296
#endif /* UNIV_LOG_ARCHIVE */
3297
ibool new_logs_created)/*!< in: TRUE if resetting logs
3298
is done at the log creation;
3299
FALSE if it is done after
3304
ut_ad(mutex_own(&(log_sys->mutex)));
3306
log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
3308
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3311
group->lsn = log_sys->lsn;
3312
group->lsn_offset = LOG_FILE_HDR_SIZE;
3313
#ifdef UNIV_LOG_ARCHIVE
3314
group->archived_file_no = arch_log_no;
3315
group->archived_offset = 0;
3316
#endif /* UNIV_LOG_ARCHIVE */
3318
if (!new_logs_created) {
3319
recv_truncate_group(group, group->lsn, group->lsn,
3320
group->lsn, group->lsn);
3323
group = UT_LIST_GET_NEXT(log_groups, group);
3326
log_sys->buf_next_to_write = 0;
3327
log_sys->written_to_some_lsn = log_sys->lsn;
3328
log_sys->written_to_all_lsn = log_sys->lsn;
3330
log_sys->next_checkpoint_no = 0;
3331
log_sys->last_checkpoint_lsn = 0;
3333
#ifdef UNIV_LOG_ARCHIVE
3334
log_sys->archived_lsn = log_sys->lsn;
3335
#endif /* UNIV_LOG_ARCHIVE */
3337
log_block_init(log_sys->buf, log_sys->lsn);
3338
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
3340
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
3341
log_sys->lsn += LOG_BLOCK_HDR_SIZE;
3343
mutex_exit(&(log_sys->mutex));
3345
/* Reset the checkpoint fields in logs */
3347
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3348
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3350
mutex_enter(&(log_sys->mutex));
3352
#endif /* !UNIV_HOTBACKUP */
3354
#ifdef UNIV_HOTBACKUP
3355
/******************************************************//**
3356
Creates new log files after a backup has been restored. */
3359
recv_reset_log_files_for_backup(
3360
/*============================*/
3361
const char* log_dir, /*!< in: log file directory path */
3362
ulint n_log_files, /*!< in: number of log files */
3363
ulint log_file_size, /*!< in: log file size */
3364
ib_uint64_t lsn) /*!< in: new start lsn, must be
3365
divisible by OS_FILE_LOG_BLOCK_SIZE */
3373
static const char ib_logfile_basename[] = "ib_logfile";
3375
log_dir_len = strlen(log_dir);
3376
/* full path name of ib_logfile consists of log dir path + basename
3377
+ number. This must fit in the name buffer.
3379
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
3381
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3382
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3384
for (i = 0; i < n_log_files; i++) {
3386
sprintf(name, "%s%s%lu", log_dir,
3387
ib_logfile_basename, (ulong)i);
3389
log_file = os_file_create_simple(name, OS_FILE_CREATE,
3390
OS_FILE_READ_WRITE, &success);
3393
"InnoDB: Cannot create %s. Check that"
3394
" the file does not exist yet.\n", name);
3400
"Setting log file size to %lu %lu\n",
3401
(ulong) ut_get_high32(log_file_size),
3402
(ulong) log_file_size & 0xFFFFFFFFUL);
3404
success = os_file_set_size(name, log_file,
3405
log_file_size & 0xFFFFFFFFUL,
3406
ut_get_high32(log_file_size));
3410
"InnoDB: Cannot set %s size to %lu %lu\n",
3411
name, (ulong) ut_get_high32(log_file_size),
3412
(ulong) (log_file_size & 0xFFFFFFFFUL));
3416
os_file_flush(log_file);
3417
os_file_close(log_file);
3420
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
3422
log_reset_first_header_and_checkpoint(buf, lsn);
3424
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
3425
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
3426
LOG_BLOCK_HDR_SIZE);
3427
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
3429
log_file = os_file_create_simple(name, OS_FILE_OPEN,
3430
OS_FILE_READ_WRITE, &success);
3432
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
3437
os_file_write(name, log_file, buf, 0, 0,
3438
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
3439
os_file_flush(log_file);
3440
os_file_close(log_file);
3444
#endif /* UNIV_HOTBACKUP */
3446
#ifdef UNIV_LOG_ARCHIVE
3447
/******************************************************//**
3448
Reads from the archive of a log group and performs recovery.
3449
@return TRUE if no more complete consistent archive files */
3452
log_group_recover_from_archive_file(
3453
/*================================*/
3454
log_group_t* group) /*!< in: log group */
3456
os_file_t file_handle;
3457
ib_uint64_t start_lsn;
3458
ib_uint64_t file_end_lsn;
3459
ib_uint64_t dummy_lsn;
3460
ib_uint64_t scanned_lsn;
3466
ulint file_size_high;
3475
/* Add the file to the archive file space; open the file */
3477
log_archived_file_name_gen(name, group->id, group->archived_file_no);
3479
file_handle = os_file_create(name, OS_FILE_OPEN,
3480
OS_FILE_LOG, OS_FILE_AIO, &ret);
3485
"InnoDB: Do you want to copy additional"
3486
" archived log files\n"
3487
"InnoDB: to the directory\n");
3489
"InnoDB: or were these all the files needed"
3492
"InnoDB: (Y == copy more files; N == this is all)?");
3494
input_char = getchar();
3496
if (input_char == (int) 'N') {
3499
} else if (input_char == (int) 'Y') {
3501
goto try_open_again;
3507
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
3510
ut_a(file_size_high == 0);
3512
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
3514
ret = os_file_close(file_handle);
3516
if (file_size < LOG_FILE_HDR_SIZE) {
3518
"InnoDB: Archive file header incomplete %s\n", name);
3525
/* Add the archive file as a node to the space */
3527
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
3528
group->archive_space_id, FALSE);
3529
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
3530
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
3533
/* Read the archive file header */
3534
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
3535
LOG_FILE_HDR_SIZE, buf, NULL);
3537
/* Check if the archive file header is consistent */
3539
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
3540
|| mach_read_from_4(buf + LOG_FILE_NO)
3541
!= group->archived_file_no) {
3543
"InnoDB: Archive file header inconsistent %s\n", name);
3548
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
3550
"InnoDB: Archive file not completely written %s\n",
3556
start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
3557
file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
3559
if (!recv_sys->scanned_lsn) {
3561
if (recv_sys->parse_start_lsn < start_lsn) {
3563
"InnoDB: Archive log file %s"
3564
" starts from too big a lsn\n",
3569
recv_sys->scanned_lsn = start_lsn;
3572
if (recv_sys->scanned_lsn != start_lsn) {
3575
"InnoDB: Archive log file %s starts from"
3581
read_offset = LOG_FILE_HDR_SIZE;
3584
len = RECV_SCAN_SIZE;
3586
if (read_offset + len > file_size) {
3587
len = ut_calc_align_down(file_size - read_offset,
3588
OS_FILE_LOG_BLOCK_SIZE);
3597
if (log_debug_writes) {
3599
"InnoDB: Archive read starting at"
3600
" lsn %llu, len %lu from file %s\n",
3604
#endif /* UNIV_DEBUG */
3606
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
3607
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
3608
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
3610
ret = recv_scan_log_recs(
3611
(buf_pool->n_frames - recv_n_pool_free_frames)
3612
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
3613
&dummy_lsn, &scanned_lsn);
3615
if (scanned_lsn == file_end_lsn) {
3622
"InnoDB: Archive log file %s"
3623
" does not scan right\n",
3631
ut_ad(start_lsn == scanned_lsn);
3637
/********************************************************//**
3638
Recovers from archived log files, and also from log files, if they exist.
3639
@return error code or DB_SUCCESS */
3642
recv_recovery_from_archive_start(
3643
/*=============================*/
3644
ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the
3646
ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if
3648
ulint first_log_no) /*!< in: number of the first archived
3649
log file to use in the recovery; the
3650
file will be searched from
3651
INNOBASE_LOG_ARCH_DIR specified in
3652
server config file */
3663
recv_sys_init(buf_pool_get_curr_size());
3665
recv_recovery_on = TRUE;
3666
recv_recovery_from_backup_on = TRUE;
3668
recv_sys->limit_lsn = limit_lsn;
3672
group = UT_LIST_GET_FIRST(log_sys->log_groups);
3675
if (group->id == group_id) {
3680
group = UT_LIST_GET_NEXT(log_groups, group);
3685
"InnoDB: There is no log group defined with id %lu!\n",
3690
group->archived_file_no = first_log_no;
3692
recv_sys->parse_start_lsn = min_flushed_lsn;
3694
recv_sys->scanned_lsn = 0;
3695
recv_sys->scanned_checkpoint_no = 0;
3696
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
3698
recv_sys->archive_group = group;
3702
mutex_enter(&(log_sys->mutex));
3705
ret = log_group_recover_from_archive_file(group);
3707
/* Close and truncate a possible processed archive file
3708
from the file space */
3710
trunc_len = UNIV_PAGE_SIZE
3711
* fil_space_get_size(group->archive_space_id);
3712
if (trunc_len > 0) {
3713
fil_space_truncate_start(group->archive_space_id,
3717
group->archived_file_no++;
3720
if (recv_sys->recovered_lsn < limit_lsn) {
3722
if (!recv_sys->scanned_lsn) {
3724
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
3727
mutex_exit(&(log_sys->mutex));
3729
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
3733
if (err != DB_SUCCESS) {
3738
mutex_enter(&(log_sys->mutex));
3741
if (limit_lsn != IB_ULONGLONG_MAX) {
3743
recv_apply_hashed_log_recs(FALSE);
3745
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
3748
mutex_exit(&(log_sys->mutex));
3753
/********************************************************//**
3754
Completes recovery from archive. */
3757
recv_recovery_from_archive_finish(void)
3758
/*===================================*/
3760
recv_recovery_from_checkpoint_finish();
3762
recv_recovery_from_backup_on = FALSE;
3764
#endif /* UNIV_LOG_ARCHIVE */