~ubuntu-branches/ubuntu/precise/mysql-5.1/precise

« back to all changes in this revision

Viewing changes to storage/innodb_plugin/log/log0recv.c

  • Committer: Bazaar Package Importer
  • Author(s): Norbert Tretkowski
  • Date: 2010-03-17 14:56:02 UTC
  • Revision ID: james.westby@ubuntu.com-20100317145602-x7e30l1b2sb5s6w6
Tags: upstream-5.1.45
ImportĀ upstreamĀ versionĀ 5.1.45

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************
 
2
 
 
3
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
 
4
 
 
5
This program is free software; you can redistribute it and/or modify it under
 
6
the terms of the GNU General Public License as published by the Free Software
 
7
Foundation; version 2 of the License.
 
8
 
 
9
This program is distributed in the hope that it will be useful, but WITHOUT
 
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
12
 
 
13
You should have received a copy of the GNU General Public License along with
 
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 
15
Place, Suite 330, Boston, MA 02111-1307 USA
 
16
 
 
17
*****************************************************************************/
 
18
 
 
19
/**************************************************//**
 
20
@file log/log0recv.c
 
21
Recovery
 
22
 
 
23
Created 9/20/1997 Heikki Tuuri
 
24
*******************************************************/
 
25
 
 
26
#include "log0recv.h"
 
27
 
 
28
#ifdef UNIV_NONINL
 
29
#include "log0recv.ic"
 
30
#endif
 
31
 
 
32
#include "mem0mem.h"
 
33
#include "buf0buf.h"
 
34
#include "buf0flu.h"
 
35
#include "mtr0mtr.h"
 
36
#include "mtr0log.h"
 
37
#include "page0cur.h"
 
38
#include "page0zip.h"
 
39
#include "btr0btr.h"
 
40
#include "btr0cur.h"
 
41
#include "ibuf0ibuf.h"
 
42
#include "trx0undo.h"
 
43
#include "trx0rec.h"
 
44
#include "fil0fil.h"
 
45
#ifndef UNIV_HOTBACKUP
 
46
# include "buf0rea.h"
 
47
# include "srv0srv.h"
 
48
# include "srv0start.h"
 
49
# include "trx0roll.h"
 
50
# include "row0merge.h"
 
51
# include "sync0sync.h"
 
52
#else /* !UNIV_HOTBACKUP */
 
53
 
 
54
/** This is set to FALSE if the backup was originally taken with the
 
55
ibbackup --include regexp option: then we do not want to create tables in
 
56
directories which were not included */
 
57
UNIV_INTERN ibool       recv_replay_file_ops    = TRUE;
 
58
#endif /* !UNIV_HOTBACKUP */
 
59
 
 
60
/** Log records are stored in the hash table in chunks at most of this size;
 
61
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 
62
#define RECV_DATA_BLOCK_SIZE    (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 
63
 
 
64
/** Read-ahead area in applying log records to file pages */
 
65
#define RECV_READ_AHEAD_AREA    32
 
66
 
 
67
/** The recovery system */
 
68
UNIV_INTERN recv_sys_t* recv_sys = NULL;
 
69
/** TRUE when applying redo log records during crash recovery; FALSE
 
70
otherwise.  Note that this is FALSE while a background thread is
 
71
rolling back incomplete transactions. */
 
72
UNIV_INTERN ibool       recv_recovery_on;
 
73
#ifdef UNIV_LOG_ARCHIVE
 
74
/** TRUE when applying redo log records from an archived log file */
 
75
UNIV_INTERN ibool       recv_recovery_from_backup_on;
 
76
#endif /* UNIV_LOG_ARCHIVE */
 
77
 
 
78
#ifndef UNIV_HOTBACKUP
 
79
/** TRUE when recv_init_crash_recovery() has been called. */
 
80
UNIV_INTERN ibool       recv_needed_recovery;
 
81
# ifdef UNIV_DEBUG
 
82
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
 
83
Protected by log_sys->mutex. */
 
84
UNIV_INTERN ibool       recv_no_log_write = FALSE;
 
85
# endif /* UNIV_DEBUG */
 
86
 
 
87
/** TRUE if buf_page_is_corrupted() should check if the log sequence
 
88
number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
 
89
recv_recovery_from_checkpoint_start_func(). */
 
90
UNIV_INTERN ibool       recv_lsn_checks_on;
 
91
 
 
92
/** There are two conditions under which we scan the logs, the first
 
93
is normal startup and the second is when we do a recovery from an
 
94
archive.
 
95
This flag is set if we are doing a scan from the last checkpoint during
 
96
startup. If we find log entries that were written after the last checkpoint
 
97
we know that the server was not cleanly shutdown. We must then initialize
 
98
the crash recovery environment before attempting to store these entries in
 
99
the log hash table. */
 
100
static ibool            recv_log_scan_is_startup_type;
 
101
 
 
102
/** If the following is TRUE, the buffer pool file pages must be invalidated
 
103
after recovery and no ibuf operations are allowed; this becomes TRUE if
 
104
the log record hash table becomes too full, and log records must be merged
 
105
to file pages already before the recovery is finished: in this case no
 
106
ibuf operations are allowed, as they could modify the pages read in the
 
107
buffer pool before the pages have been recovered to the up-to-date state.
 
108
 
 
109
TRUE means that recovery is running and no operations on the log files
 
110
are allowed yet: the variable name is misleading. */
 
111
UNIV_INTERN ibool       recv_no_ibuf_operations;
 
112
/** TRUE when the redo log is being backed up */
 
113
# define recv_is_making_a_backup                FALSE
 
114
/** TRUE when recovering from a backed up redo log file */
 
115
# define recv_is_from_backup                    FALSE
 
116
#else /* !UNIV_HOTBACKUP */
 
117
# define recv_needed_recovery                   FALSE
 
118
/** TRUE when the redo log is being backed up */
 
119
UNIV_INTERN ibool       recv_is_making_a_backup = FALSE;
 
120
/** TRUE when recovering from a backed up redo log file */
 
121
UNIV_INTERN ibool       recv_is_from_backup     = FALSE;
 
122
# define buf_pool_get_curr_size() (5 * 1024 * 1024)
 
123
#endif /* !UNIV_HOTBACKUP */
 
124
/** The following counter is used to decide when to print info on
 
125
log scan */
 
126
static ulint    recv_scan_print_counter;
 
127
 
 
128
/** The type of the previous parsed redo log record */
 
129
static ulint    recv_previous_parsed_rec_type;
 
130
/** The offset of the previous parsed redo log record */
 
131
static ulint    recv_previous_parsed_rec_offset;
 
132
/** The 'multi' flag of the previous parsed redo log record */
 
133
static ulint    recv_previous_parsed_rec_is_multi;
 
134
 
 
135
/** Maximum page number encountered in the redo log */
 
136
UNIV_INTERN ulint       recv_max_parsed_page_no;
 
137
 
 
138
/** This many frames must be left free in the buffer pool when we scan
 
139
the log and store the scanned log records in the buffer pool: we will
 
140
use these free frames to read in pages when we start applying the
 
141
log records to the database. */
 
142
UNIV_INTERN ulint       recv_n_pool_free_frames;
 
143
 
 
144
/** The maximum lsn we see for a page during the recovery process. If this
 
145
is bigger than the lsn we are able to scan up to, that is an indication that
 
146
the recovery failed and the database may be corrupt. */
 
147
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
 
148
 
 
149
/* prototypes */
 
150
 
 
151
#ifndef UNIV_HOTBACKUP
 
152
/*******************************************************//**
 
153
Initialize crash recovery environment. Can be called iff
 
154
recv_needed_recovery == FALSE. */
 
155
static
 
156
void
 
157
recv_init_crash_recovery(void);
 
158
/*===========================*/
 
159
#endif /* !UNIV_HOTBACKUP */
 
160
 
 
161
/********************************************************//**
 
162
Creates the recovery system. */
 
163
UNIV_INTERN
 
164
void
 
165
recv_sys_create(void)
 
166
/*=================*/
 
167
{
 
168
        if (recv_sys != NULL) {
 
169
 
 
170
                return;
 
171
        }
 
172
 
 
173
        recv_sys = mem_alloc(sizeof(*recv_sys));
 
174
        memset(recv_sys, 0x0, sizeof(*recv_sys));
 
175
 
 
176
        mutex_create(&recv_sys->mutex, SYNC_RECV);
 
177
 
 
178
        recv_sys->heap = NULL;
 
179
        recv_sys->addr_hash = NULL;
 
180
}
 
181
 
 
182
/********************************************************//**
 
183
Release recovery system mutexes. */
 
184
UNIV_INTERN
 
185
void
 
186
recv_sys_close(void)
 
187
/*================*/
 
188
{
 
189
        if (recv_sys != NULL) {
 
190
                if (recv_sys->addr_hash != NULL) {
 
191
                        hash_table_free(recv_sys->addr_hash);
 
192
                }
 
193
 
 
194
                if (recv_sys->heap != NULL) {
 
195
                        mem_heap_free(recv_sys->heap);
 
196
                }
 
197
 
 
198
                if (recv_sys->buf != NULL) {
 
199
                        ut_free(recv_sys->buf);
 
200
                }
 
201
 
 
202
                if (recv_sys->last_block_buf_start != NULL) {
 
203
                        mem_free(recv_sys->last_block_buf_start);
 
204
                }
 
205
 
 
206
                mutex_free(&recv_sys->mutex);
 
207
 
 
208
                mem_free(recv_sys);
 
209
                recv_sys = NULL;
 
210
        }
 
211
}
 
212
 
 
213
/********************************************************//**
 
214
Frees the recovery system memory. */
 
215
UNIV_INTERN
 
216
void
 
217
recv_sys_mem_free(void)
 
218
/*===================*/
 
219
{
 
220
        if (recv_sys != NULL) {
 
221
                if (recv_sys->addr_hash != NULL) {
 
222
                        hash_table_free(recv_sys->addr_hash);
 
223
                }
 
224
 
 
225
                if (recv_sys->heap != NULL) {
 
226
                        mem_heap_free(recv_sys->heap);
 
227
                }
 
228
 
 
229
                if (recv_sys->buf != NULL) {
 
230
                        ut_free(recv_sys->buf);
 
231
                }
 
232
 
 
233
                if (recv_sys->last_block_buf_start != NULL) {
 
234
                        mem_free(recv_sys->last_block_buf_start);
 
235
                }
 
236
 
 
237
                mem_free(recv_sys);
 
238
                recv_sys = NULL;
 
239
        }
 
240
}
 
241
 
 
242
/************************************************************
 
243
Reset the state of the recovery system variables. */
 
244
UNIV_INTERN
 
245
void
 
246
recv_sys_var_init(void)
 
247
/*===================*/
 
248
{
 
249
        recv_lsn_checks_on = FALSE;
 
250
 
 
251
        recv_n_pool_free_frames = 256;
 
252
 
 
253
        recv_recovery_on = FALSE;
 
254
 
 
255
#ifdef UNIV_LOG_ARCHIVE
 
256
        recv_recovery_from_backup_on = FALSE;
 
257
#endif /* UNIV_LOG_ARCHIVE */
 
258
 
 
259
        recv_needed_recovery = FALSE;
 
260
 
 
261
        recv_lsn_checks_on = FALSE;
 
262
 
 
263
        recv_log_scan_is_startup_type = FALSE;
 
264
 
 
265
        recv_no_ibuf_operations = FALSE;
 
266
 
 
267
        recv_scan_print_counter = 0;
 
268
 
 
269
        recv_previous_parsed_rec_type   = 999999;
 
270
 
 
271
        recv_previous_parsed_rec_offset = 0;
 
272
 
 
273
        recv_previous_parsed_rec_is_multi = 0;
 
274
 
 
275
        recv_max_parsed_page_no = 0;
 
276
 
 
277
        recv_n_pool_free_frames = 256;
 
278
 
 
279
        recv_max_page_lsn = 0;
 
280
}
 
281
 
 
282
/************************************************************
 
283
Inits the recovery system for a recovery operation. */
 
284
UNIV_INTERN
 
285
void
 
286
recv_sys_init(
 
287
/*==========*/
 
288
        ulint   available_memory)       /*!< in: available memory in bytes */
 
289
{
 
290
        if (recv_sys->heap != NULL) {
 
291
 
 
292
                return;
 
293
        }
 
294
 
 
295
        mutex_enter(&(recv_sys->mutex));
 
296
 
 
297
#ifndef UNIV_HOTBACKUP
 
298
        recv_sys->heap = mem_heap_create_in_buffer(256);
 
299
#else /* !UNIV_HOTBACKUP */
 
300
        recv_sys->heap = mem_heap_create(256);
 
301
        recv_is_from_backup = TRUE;
 
302
#endif /* !UNIV_HOTBACKUP */
 
303
 
 
304
        recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 
305
        recv_sys->len = 0;
 
306
        recv_sys->recovered_offset = 0;
 
307
 
 
308
        recv_sys->addr_hash = hash_create(available_memory / 64);
 
309
        recv_sys->n_addrs = 0;
 
310
 
 
311
        recv_sys->apply_log_recs = FALSE;
 
312
        recv_sys->apply_batch_on = FALSE;
 
313
 
 
314
        recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
 
315
 
 
316
        recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
 
317
                                        OS_FILE_LOG_BLOCK_SIZE);
 
318
        recv_sys->found_corrupt_log = FALSE;
 
319
 
 
320
        recv_max_page_lsn = 0;
 
321
 
 
322
        mutex_exit(&(recv_sys->mutex));
 
323
}
 
324
 
 
325
/********************************************************//**
 
326
Empties the hash table when it has been fully processed. */
 
327
static
 
328
void
 
329
recv_sys_empty_hash(void)
 
330
/*=====================*/
 
331
{
 
332
        ut_ad(mutex_own(&(recv_sys->mutex)));
 
333
 
 
334
        if (recv_sys->n_addrs != 0) {
 
335
                fprintf(stderr,
 
336
                        "InnoDB: Error: %lu pages with log records"
 
337
                        " were left unprocessed!\n"
 
338
                        "InnoDB: Maximum page number with"
 
339
                        " log records on it %lu\n",
 
340
                        (ulong) recv_sys->n_addrs,
 
341
                        (ulong) recv_max_parsed_page_no);
 
342
                ut_error;
 
343
        }
 
344
 
 
345
        hash_table_free(recv_sys->addr_hash);
 
346
        mem_heap_empty(recv_sys->heap);
 
347
 
 
348
        recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
 
349
}
 
350
 
 
351
#ifndef UNIV_HOTBACKUP
 
352
# ifndef UNIV_LOG_DEBUG
 
353
/********************************************************//**
 
354
Frees the recovery system. */
 
355
static
 
356
void
 
357
recv_sys_debug_free(void)
 
358
/*=====================*/
 
359
{
 
360
        mutex_enter(&(recv_sys->mutex));
 
361
 
 
362
        hash_table_free(recv_sys->addr_hash);
 
363
        mem_heap_free(recv_sys->heap);
 
364
        ut_free(recv_sys->buf);
 
365
        mem_free(recv_sys->last_block_buf_start);
 
366
 
 
367
        recv_sys->buf = NULL;
 
368
        recv_sys->heap = NULL;
 
369
        recv_sys->addr_hash = NULL;
 
370
        recv_sys->last_block_buf_start = NULL;
 
371
 
 
372
        mutex_exit(&(recv_sys->mutex));
 
373
}
 
374
# endif /* UNIV_LOG_DEBUG */
 
375
 
 
376
/********************************************************//**
 
377
Truncates possible corrupted or extra records from a log group. */
 
378
static
 
379
void
 
380
recv_truncate_group(
 
381
/*================*/
 
382
        log_group_t*    group,          /*!< in: log group */
 
383
        ib_uint64_t     recovered_lsn,  /*!< in: recovery succeeded up to this
 
384
                                        lsn */
 
385
        ib_uint64_t     limit_lsn,      /*!< in: this was the limit for
 
386
                                        recovery */
 
387
        ib_uint64_t     checkpoint_lsn, /*!< in: recovery was started from this
 
388
                                        checkpoint */
 
389
        ib_uint64_t     archived_lsn)   /*!< in: the log has been archived up to
 
390
                                        this lsn */
 
391
{
 
392
        ib_uint64_t     start_lsn;
 
393
        ib_uint64_t     end_lsn;
 
394
        ib_uint64_t     finish_lsn1;
 
395
        ib_uint64_t     finish_lsn2;
 
396
        ib_uint64_t     finish_lsn;
 
397
        ulint           len;
 
398
        ulint           i;
 
399
 
 
400
        if (archived_lsn == IB_ULONGLONG_MAX) {
 
401
                /* Checkpoint was taken in the NOARCHIVELOG mode */
 
402
                archived_lsn = checkpoint_lsn;
 
403
        }
 
404
 
 
405
        finish_lsn1 = ut_uint64_align_down(archived_lsn,
 
406
                                           OS_FILE_LOG_BLOCK_SIZE)
 
407
                + log_group_get_capacity(group);
 
408
 
 
409
        finish_lsn2 = ut_uint64_align_up(recovered_lsn,
 
410
                                         OS_FILE_LOG_BLOCK_SIZE)
 
411
                + recv_sys->last_log_buf_size;
 
412
 
 
413
        if (limit_lsn != IB_ULONGLONG_MAX) {
 
414
                /* We do not know how far we should erase log records: erase
 
415
                as much as possible */
 
416
 
 
417
                finish_lsn = finish_lsn1;
 
418
        } else {
 
419
                /* It is enough to erase the length of the log buffer */
 
420
                finish_lsn = finish_lsn1 < finish_lsn2
 
421
                        ? finish_lsn1 : finish_lsn2;
 
422
        }
 
423
 
 
424
        ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
425
 
 
426
        /* Write the log buffer full of zeros */
 
427
        for (i = 0; i < RECV_SCAN_SIZE; i++) {
 
428
 
 
429
                *(log_sys->buf + i) = '\0';
 
430
        }
 
431
 
 
432
        start_lsn = ut_uint64_align_down(recovered_lsn,
 
433
                                         OS_FILE_LOG_BLOCK_SIZE);
 
434
 
 
435
        if (start_lsn != recovered_lsn) {
 
436
                /* Copy the last incomplete log block to the log buffer and
 
437
                edit its data length: */
 
438
 
 
439
                ut_memcpy(log_sys->buf, recv_sys->last_block,
 
440
                          OS_FILE_LOG_BLOCK_SIZE);
 
441
                log_block_set_data_len(log_sys->buf,
 
442
                                       (ulint) (recovered_lsn - start_lsn));
 
443
        }
 
444
 
 
445
        if (start_lsn >= finish_lsn) {
 
446
 
 
447
                return;
 
448
        }
 
449
 
 
450
        for (;;) {
 
451
                end_lsn = start_lsn + RECV_SCAN_SIZE;
 
452
 
 
453
                if (end_lsn > finish_lsn) {
 
454
 
 
455
                        end_lsn = finish_lsn;
 
456
                }
 
457
 
 
458
                len = (ulint) (end_lsn - start_lsn);
 
459
 
 
460
                log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
461
                if (end_lsn >= finish_lsn) {
 
462
 
 
463
                        return;
 
464
                }
 
465
 
 
466
                /* Write the log buffer full of zeros */
 
467
                for (i = 0; i < RECV_SCAN_SIZE; i++) {
 
468
 
 
469
                        *(log_sys->buf + i) = '\0';
 
470
                }
 
471
 
 
472
                start_lsn = end_lsn;
 
473
        }
 
474
}
 
475
 
 
476
/********************************************************//**
 
477
Copies the log segment between group->recovered_lsn and recovered_lsn from the
 
478
most up-to-date log group to group, so that it contains the latest log data. */
 
479
static
 
480
void
 
481
recv_copy_group(
 
482
/*============*/
 
483
        log_group_t*    up_to_date_group,       /*!< in: the most up-to-date log
 
484
                                                group */
 
485
        log_group_t*    group,                  /*!< in: copy to this log
 
486
                                                group */
 
487
        ib_uint64_t     recovered_lsn)          /*!< in: recovery succeeded up
 
488
                                                to this lsn */
 
489
{
 
490
        ib_uint64_t     start_lsn;
 
491
        ib_uint64_t     end_lsn;
 
492
        ulint           len;
 
493
 
 
494
        if (group->scanned_lsn >= recovered_lsn) {
 
495
 
 
496
                return;
 
497
        }
 
498
 
 
499
        ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 
500
 
 
501
        start_lsn = ut_uint64_align_down(group->scanned_lsn,
 
502
                                         OS_FILE_LOG_BLOCK_SIZE);
 
503
        for (;;) {
 
504
                end_lsn = start_lsn + RECV_SCAN_SIZE;
 
505
 
 
506
                if (end_lsn > recovered_lsn) {
 
507
                        end_lsn = ut_uint64_align_up(recovered_lsn,
 
508
                                                     OS_FILE_LOG_BLOCK_SIZE);
 
509
                }
 
510
 
 
511
                log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 
512
                                       up_to_date_group, start_lsn, end_lsn);
 
513
 
 
514
                len = (ulint) (end_lsn - start_lsn);
 
515
 
 
516
                log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 
517
 
 
518
                if (end_lsn >= recovered_lsn) {
 
519
 
 
520
                        return;
 
521
                }
 
522
 
 
523
                start_lsn = end_lsn;
 
524
        }
 
525
}
 
526
 
 
527
/********************************************************//**
 
528
Copies a log segment from the most up-to-date log group to the other log
 
529
groups, so that they all contain the latest log data. Also writes the info
 
530
about the latest checkpoint to the groups, and inits the fields in the group
 
531
memory structs to up-to-date values. */
 
532
static
 
533
void
 
534
recv_synchronize_groups(
 
535
/*====================*/
 
536
        log_group_t*    up_to_date_group)       /*!< in: the most up-to-date
 
537
                                                log group */
 
538
{
 
539
        log_group_t*    group;
 
540
        ib_uint64_t     start_lsn;
 
541
        ib_uint64_t     end_lsn;
 
542
        ib_uint64_t     recovered_lsn;
 
543
        ib_uint64_t     limit_lsn;
 
544
 
 
545
        recovered_lsn = recv_sys->recovered_lsn;
 
546
        limit_lsn = recv_sys->limit_lsn;
 
547
 
 
548
        /* Read the last recovered log block to the recovery system buffer:
 
549
        the block is always incomplete */
 
550
 
 
551
        start_lsn = ut_uint64_align_down(recovered_lsn,
 
552
                                         OS_FILE_LOG_BLOCK_SIZE);
 
553
        end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
554
 
 
555
        ut_a(start_lsn != end_lsn);
 
556
 
 
557
        log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 
558
                               up_to_date_group, start_lsn, end_lsn);
 
559
 
 
560
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
561
 
 
562
        while (group) {
 
563
                if (group != up_to_date_group) {
 
564
 
 
565
                        /* Copy log data if needed */
 
566
 
 
567
                        recv_copy_group(group, up_to_date_group,
 
568
                                        recovered_lsn);
 
569
                }
 
570
 
 
571
                /* Update the fields in the group struct to correspond to
 
572
                recovered_lsn */
 
573
 
 
574
                log_group_set_fields(group, recovered_lsn);
 
575
 
 
576
                group = UT_LIST_GET_NEXT(log_groups, group);
 
577
        }
 
578
 
 
579
        /* Copy the checkpoint info to the groups; remember that we have
 
580
        incremented checkpoint_no by one, and the info will not be written
 
581
        over the max checkpoint info, thus making the preservation of max
 
582
        checkpoint info on disk certain */
 
583
 
 
584
        log_groups_write_checkpoint_info();
 
585
 
 
586
        mutex_exit(&(log_sys->mutex));
 
587
 
 
588
        /* Wait for the checkpoint write to complete */
 
589
        rw_lock_s_lock(&(log_sys->checkpoint_lock));
 
590
        rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 
591
 
 
592
        mutex_enter(&(log_sys->mutex));
 
593
}
 
594
#endif /* !UNIV_HOTBACKUP */
 
595
 
 
596
/***********************************************************************//**
 
597
Checks the consistency of the checkpoint info
 
598
@return TRUE if ok */
 
599
static
 
600
ibool
 
601
recv_check_cp_is_consistent(
 
602
/*========================*/
 
603
        const byte*     buf)    /*!< in: buffer containing checkpoint info */
 
604
{
 
605
        ulint   fold;
 
606
 
 
607
        fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 
608
 
 
609
        if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 
610
                    buf + LOG_CHECKPOINT_CHECKSUM_1)) {
 
611
                return(FALSE);
 
612
        }
 
613
 
 
614
        fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 
615
                              LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 
616
 
 
617
        if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 
618
                    buf + LOG_CHECKPOINT_CHECKSUM_2)) {
 
619
                return(FALSE);
 
620
        }
 
621
 
 
622
        return(TRUE);
 
623
}
 
624
 
 
625
#ifndef UNIV_HOTBACKUP
 
626
/********************************************************//**
 
627
Looks for the maximum consistent checkpoint from the log groups.
 
628
@return error code or DB_SUCCESS */
 
629
static
 
630
ulint
 
631
recv_find_max_checkpoint(
 
632
/*=====================*/
 
633
        log_group_t**   max_group,      /*!< out: max group */
 
634
        ulint*          max_field)      /*!< out: LOG_CHECKPOINT_1 or
 
635
                                        LOG_CHECKPOINT_2 */
 
636
{
 
637
        log_group_t*    group;
 
638
        ib_uint64_t     max_no;
 
639
        ib_uint64_t     checkpoint_no;
 
640
        ulint           field;
 
641
        byte*           buf;
 
642
 
 
643
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
644
 
 
645
        max_no = 0;
 
646
        *max_group = NULL;
 
647
        *max_field = 0;
 
648
 
 
649
        buf = log_sys->checkpoint_buf;
 
650
 
 
651
        while (group) {
 
652
                group->state = LOG_GROUP_CORRUPTED;
 
653
 
 
654
                for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 
655
                     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 
656
 
 
657
                        log_group_read_checkpoint_info(group, field);
 
658
 
 
659
                        if (!recv_check_cp_is_consistent(buf)) {
 
660
#ifdef UNIV_DEBUG
 
661
                                if (log_debug_writes) {
 
662
                                        fprintf(stderr,
 
663
                                                "InnoDB: Checkpoint in group"
 
664
                                                " %lu at %lu invalid, %lu\n",
 
665
                                                (ulong) group->id,
 
666
                                                (ulong) field,
 
667
                                                (ulong) mach_read_from_4(
 
668
                                                        buf
 
669
                                                        + LOG_CHECKPOINT_CHECKSUM_1));
 
670
 
 
671
                                }
 
672
#endif /* UNIV_DEBUG */
 
673
                                goto not_consistent;
 
674
                        }
 
675
 
 
676
                        group->state = LOG_GROUP_OK;
 
677
 
 
678
                        group->lsn = mach_read_ull(
 
679
                                buf + LOG_CHECKPOINT_LSN);
 
680
                        group->lsn_offset = mach_read_from_4(
 
681
                                buf + LOG_CHECKPOINT_OFFSET);
 
682
                        checkpoint_no = mach_read_ull(
 
683
                                buf + LOG_CHECKPOINT_NO);
 
684
 
 
685
#ifdef UNIV_DEBUG
 
686
                        if (log_debug_writes) {
 
687
                                fprintf(stderr,
 
688
                                        "InnoDB: Checkpoint number %lu"
 
689
                                        " found in group %lu\n",
 
690
                                        (ulong) checkpoint_no,
 
691
                                        (ulong) group->id);
 
692
                        }
 
693
#endif /* UNIV_DEBUG */
 
694
 
 
695
                        if (checkpoint_no >= max_no) {
 
696
                                *max_group = group;
 
697
                                *max_field = field;
 
698
                                max_no = checkpoint_no;
 
699
                        }
 
700
 
 
701
not_consistent:
 
702
                        ;
 
703
                }
 
704
 
 
705
                group = UT_LIST_GET_NEXT(log_groups, group);
 
706
        }
 
707
 
 
708
        if (*max_group == NULL) {
 
709
 
 
710
                fprintf(stderr,
 
711
                        "InnoDB: No valid checkpoint found.\n"
 
712
                        "InnoDB: If this error appears when you are"
 
713
                        " creating an InnoDB database,\n"
 
714
                        "InnoDB: the problem may be that during"
 
715
                        " an earlier attempt you managed\n"
 
716
                        "InnoDB: to create the InnoDB data files,"
 
717
                        " but log file creation failed.\n"
 
718
                        "InnoDB: If that is the case, please refer to\n"
 
719
                        "InnoDB: " REFMAN "error-creating-innodb.html\n");
 
720
                return(DB_ERROR);
 
721
        }
 
722
 
 
723
        return(DB_SUCCESS);
 
724
}
 
725
#else /* !UNIV_HOTBACKUP */
 
726
/*******************************************************************//**
 
727
Reads the checkpoint info needed in hot backup.
 
728
@return TRUE if success */
 
729
UNIV_INTERN
 
730
ibool
 
731
recv_read_cp_info_for_backup(
 
732
/*=========================*/
 
733
        const byte*     hdr,    /*!< in: buffer containing the log group
 
734
                                header */
 
735
        ib_uint64_t*    lsn,    /*!< out: checkpoint lsn */
 
736
        ulint*          offset, /*!< out: checkpoint offset in the log group */
 
737
        ulint*          fsp_limit,/*!< out: fsp limit of space 0,
 
738
                                1000000000 if the database is running
 
739
                                with < version 3.23.50 of InnoDB */
 
740
        ib_uint64_t*    cp_no,  /*!< out: checkpoint number */
 
741
        ib_uint64_t*    first_header_lsn)
 
742
                                /*!< out: lsn of of the start of the
 
743
                                first log file */
 
744
{
 
745
        ulint           max_cp          = 0;
 
746
        ib_uint64_t     max_cp_no       = 0;
 
747
        const byte*     cp_buf;
 
748
 
 
749
        cp_buf = hdr + LOG_CHECKPOINT_1;
 
750
 
 
751
        if (recv_check_cp_is_consistent(cp_buf)) {
 
752
                max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
 
753
                max_cp = LOG_CHECKPOINT_1;
 
754
        }
 
755
 
 
756
        cp_buf = hdr + LOG_CHECKPOINT_2;
 
757
 
 
758
        if (recv_check_cp_is_consistent(cp_buf)) {
 
759
                if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
 
760
                        max_cp = LOG_CHECKPOINT_2;
 
761
                }
 
762
        }
 
763
 
 
764
        if (max_cp == 0) {
 
765
                return(FALSE);
 
766
        }
 
767
 
 
768
        cp_buf = hdr + max_cp;
 
769
 
 
770
        *lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
 
771
        *offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
 
772
 
 
773
        /* If the user is running a pre-3.23.50 version of InnoDB, its
 
774
        checkpoint data does not contain the fsp limit info */
 
775
        if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
 
776
            == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
 
777
 
 
778
                *fsp_limit = mach_read_from_4(
 
779
                        cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
 
780
 
 
781
                if (*fsp_limit == 0) {
 
782
                        *fsp_limit = 1000000000;
 
783
                }
 
784
        } else {
 
785
                *fsp_limit = 1000000000;
 
786
        }
 
787
 
 
788
        /*      fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
 
789
 
 
790
        *cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
 
791
 
 
792
        *first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
 
793
 
 
794
        return(TRUE);
 
795
}
 
796
#endif /* !UNIV_HOTBACKUP */
 
797
 
 
798
/******************************************************//**
 
799
Checks the 4-byte checksum to the trailer checksum field of a log
 
800
block.  We also accept a log block in the old format before
 
801
InnoDB-3.23.52 where the checksum field contains the log block number.
 
802
@return TRUE if ok, or if the log block may be in the format of InnoDB
 
803
version predating 3.23.52 */
 
804
static
 
805
ibool
 
806
log_block_checksum_is_ok_or_old_format(
 
807
/*===================================*/
 
808
        const byte*     block)  /*!< in: pointer to a log block */
 
809
{
 
810
#ifdef UNIV_LOG_DEBUG
 
811
        return(TRUE);
 
812
#endif /* UNIV_LOG_DEBUG */
 
813
        if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
 
814
 
 
815
                return(TRUE);
 
816
        }
 
817
 
 
818
        if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
 
819
 
 
820
                /* We assume the log block is in the format of
 
821
                InnoDB version < 3.23.52 and the block is ok */
 
822
#if 0
 
823
                fprintf(stderr,
 
824
                        "InnoDB: Scanned old format < InnoDB-3.23.52"
 
825
                        " log block number %lu\n",
 
826
                        log_block_get_hdr_no(block));
 
827
#endif
 
828
                return(TRUE);
 
829
        }
 
830
 
 
831
        return(FALSE);
 
832
}
 
833
 
 
834
#ifdef UNIV_HOTBACKUP
 
835
/*******************************************************************//**
 
836
Scans the log segment and n_bytes_scanned is set to the length of valid
 
837
log scanned. */
 
838
UNIV_INTERN
 
839
void
 
840
recv_scan_log_seg_for_backup(
 
841
/*=========================*/
 
842
        byte*           buf,            /*!< in: buffer containing log data */
 
843
        ulint           buf_len,        /*!< in: data length in that buffer */
 
844
        ib_uint64_t*    scanned_lsn,    /*!< in/out: lsn of buffer start,
 
845
                                        we return scanned lsn */
 
846
        ulint*          scanned_checkpoint_no,
 
847
                                        /*!< in/out: 4 lowest bytes of the
 
848
                                        highest scanned checkpoint number so
 
849
                                        far */
 
850
        ulint*          n_bytes_scanned)/*!< out: how much we were able to
 
851
                                        scan, smaller than buf_len if log
 
852
                                        data ended here */
 
853
{
 
854
        ulint   data_len;
 
855
        byte*   log_block;
 
856
        ulint   no;
 
857
 
 
858
        *n_bytes_scanned = 0;
 
859
 
 
860
        for (log_block = buf; log_block < buf + buf_len;
 
861
             log_block += OS_FILE_LOG_BLOCK_SIZE) {
 
862
 
 
863
                no = log_block_get_hdr_no(log_block);
 
864
 
 
865
#if 0
 
866
                fprintf(stderr, "Log block header no %lu\n", no);
 
867
#endif
 
868
 
 
869
                if (no != log_block_convert_lsn_to_no(*scanned_lsn)
 
870
                    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 
871
#if 0
 
872
                        fprintf(stderr,
 
873
                                "Log block n:o %lu, scanned lsn n:o %lu\n",
 
874
                                no, log_block_convert_lsn_to_no(*scanned_lsn));
 
875
#endif
 
876
                        /* Garbage or an incompletely written log block */
 
877
 
 
878
                        log_block += OS_FILE_LOG_BLOCK_SIZE;
 
879
#if 0
 
880
                        fprintf(stderr,
 
881
                                "Next log block n:o %lu\n",
 
882
                                log_block_get_hdr_no(log_block));
 
883
#endif
 
884
                        break;
 
885
                }
 
886
 
 
887
                if (*scanned_checkpoint_no > 0
 
888
                    && log_block_get_checkpoint_no(log_block)
 
889
                    < *scanned_checkpoint_no
 
890
                    && *scanned_checkpoint_no
 
891
                    - log_block_get_checkpoint_no(log_block)
 
892
                    > 0x80000000UL) {
 
893
 
 
894
                        /* Garbage from a log buffer flush which was made
 
895
                        before the most recent database recovery */
 
896
#if 0
 
897
                        fprintf(stderr,
 
898
                                "Scanned cp n:o %lu, block cp n:o %lu\n",
 
899
                                *scanned_checkpoint_no,
 
900
                                log_block_get_checkpoint_no(log_block));
 
901
#endif
 
902
                        break;
 
903
                }
 
904
 
 
905
                data_len = log_block_get_data_len(log_block);
 
906
 
 
907
                *scanned_checkpoint_no
 
908
                        = log_block_get_checkpoint_no(log_block);
 
909
                *scanned_lsn += data_len;
 
910
 
 
911
                *n_bytes_scanned += data_len;
 
912
 
 
913
                if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 
914
                        /* Log data ends here */
 
915
 
 
916
#if 0
 
917
                        fprintf(stderr, "Log block data len %lu\n",
 
918
                                data_len);
 
919
#endif
 
920
                        break;
 
921
                }
 
922
        }
 
923
}
 
924
#endif /* UNIV_HOTBACKUP */
 
925
 
 
926
/*******************************************************************//**
 
927
Tries to parse a single log record body and also applies it to a page if
 
928
specified. File ops are parsed, but not applied in this function.
 
929
@return log record end, NULL if not a complete record */
 
930
static
 
931
byte*
 
932
recv_parse_or_apply_log_rec_body(
 
933
/*=============================*/
 
934
        byte            type,   /*!< in: type */
 
935
        byte*           ptr,    /*!< in: pointer to a buffer */
 
936
        byte*           end_ptr,/*!< in: pointer to the buffer end */
 
937
        buf_block_t*    block,  /*!< in/out: buffer block or NULL; if
 
938
                                not NULL, then the log record is
 
939
                                applied to the page, and the log
 
940
                                record should be complete then */
 
941
        mtr_t*          mtr)    /*!< in: mtr or NULL; should be non-NULL
 
942
                                if and only if block is non-NULL */
 
943
{
 
944
        dict_index_t*   index   = NULL;
 
945
        page_t*         page;
 
946
        page_zip_des_t* page_zip;
 
947
#ifdef UNIV_DEBUG
 
948
        ulint           page_type;
 
949
#endif /* UNIV_DEBUG */
 
950
 
 
951
        ut_ad(!block == !mtr);
 
952
 
 
953
        if (block) {
 
954
                page = block->frame;
 
955
                page_zip = buf_block_get_page_zip(block);
 
956
                ut_d(page_type = fil_page_get_type(page));
 
957
        } else {
 
958
                page = NULL;
 
959
                page_zip = NULL;
 
960
                ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
 
961
        }
 
962
 
 
963
        switch (type) {
 
964
#ifdef UNIV_LOG_LSN_DEBUG
 
965
        case MLOG_LSN:
 
966
                /* The LSN is checked in recv_parse_log_rec(). */
 
967
                break;
 
968
#endif /* UNIV_LOG_LSN_DEBUG */
 
969
        case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
 
970
#ifdef UNIV_DEBUG
 
971
                if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
 
972
                    && end_ptr >= ptr + 2) {
 
973
                        /* It is OK to set FIL_PAGE_TYPE and certain
 
974
                        list node fields on an empty page.  Any other
 
975
                        write is not OK. */
 
976
 
 
977
                        /* NOTE: There may be bogus assertion failures for
 
978
                        dict_hdr_create(), trx_rseg_header_create(),
 
979
                        trx_sys_create_doublewrite_buf(), and
 
980
                        trx_sysf_create().
 
981
                        These are only called during database creation. */
 
982
                        ulint   offs = mach_read_from_2(ptr);
 
983
 
 
984
                        switch (type) {
 
985
                        default:
 
986
                                ut_error;
 
987
                        case MLOG_2BYTES:
 
988
                                /* Note that this can fail when the
 
989
                                redo log been written with something
 
990
                                older than InnoDB Plugin 1.0.4. */
 
991
                                ut_ad(offs == FIL_PAGE_TYPE
 
992
                                      || offs == IBUF_TREE_SEG_HEADER
 
993
                                      + IBUF_HEADER + FSEG_HDR_OFFSET
 
994
                                      || offs == PAGE_BTR_IBUF_FREE_LIST
 
995
                                      + PAGE_HEADER + FIL_ADDR_BYTE
 
996
                                      || offs == PAGE_BTR_IBUF_FREE_LIST
 
997
                                      + PAGE_HEADER + FIL_ADDR_BYTE
 
998
                                      + FIL_ADDR_SIZE
 
999
                                      || offs == PAGE_BTR_SEG_LEAF
 
1000
                                      + PAGE_HEADER + FSEG_HDR_OFFSET
 
1001
                                      || offs == PAGE_BTR_SEG_TOP
 
1002
                                      + PAGE_HEADER + FSEG_HDR_OFFSET
 
1003
                                      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 
1004
                                      + PAGE_HEADER + FIL_ADDR_BYTE
 
1005
                                      + 0 /*FLST_PREV*/
 
1006
                                      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 
1007
                                      + PAGE_HEADER + FIL_ADDR_BYTE
 
1008
                                      + FIL_ADDR_SIZE /*FLST_NEXT*/);
 
1009
                                break;
 
1010
                        case MLOG_4BYTES:
 
1011
                                /* Note that this can fail when the
 
1012
                                redo log been written with something
 
1013
                                older than InnoDB Plugin 1.0.4. */
 
1014
                                ut_ad(0
 
1015
                                      || offs == IBUF_TREE_SEG_HEADER
 
1016
                                      + IBUF_HEADER + FSEG_HDR_SPACE
 
1017
                                      || offs == IBUF_TREE_SEG_HEADER
 
1018
                                      + IBUF_HEADER + FSEG_HDR_PAGE_NO
 
1019
                                      || offs == PAGE_BTR_IBUF_FREE_LIST
 
1020
                                      + PAGE_HEADER/* flst_init */
 
1021
                                      || offs == PAGE_BTR_IBUF_FREE_LIST
 
1022
                                      + PAGE_HEADER + FIL_ADDR_PAGE
 
1023
                                      || offs == PAGE_BTR_IBUF_FREE_LIST
 
1024
                                      + PAGE_HEADER + FIL_ADDR_PAGE
 
1025
                                      + FIL_ADDR_SIZE
 
1026
                                      || offs == PAGE_BTR_SEG_LEAF
 
1027
                                      + PAGE_HEADER + FSEG_HDR_PAGE_NO
 
1028
                                      || offs == PAGE_BTR_SEG_LEAF
 
1029
                                      + PAGE_HEADER + FSEG_HDR_SPACE
 
1030
                                      || offs == PAGE_BTR_SEG_TOP
 
1031
                                      + PAGE_HEADER + FSEG_HDR_PAGE_NO
 
1032
                                      || offs == PAGE_BTR_SEG_TOP
 
1033
                                      + PAGE_HEADER + FSEG_HDR_SPACE
 
1034
                                      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 
1035
                                      + PAGE_HEADER + FIL_ADDR_PAGE
 
1036
                                      + 0 /*FLST_PREV*/
 
1037
                                      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 
1038
                                      + PAGE_HEADER + FIL_ADDR_PAGE
 
1039
                                      + FIL_ADDR_SIZE /*FLST_NEXT*/);
 
1040
                                break;
 
1041
                        }
 
1042
                }
 
1043
#endif /* UNIV_DEBUG */
 
1044
                ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
 
1045
                break;
 
1046
        case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
 
1047
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1048
 
 
1049
                if (NULL != (ptr = mlog_parse_index(
 
1050
                                     ptr, end_ptr,
 
1051
                                     type == MLOG_COMP_REC_INSERT,
 
1052
                                     &index))) {
 
1053
                        ut_a(!page
 
1054
                             || (ibool)!!page_is_comp(page)
 
1055
                             == dict_table_is_comp(index->table));
 
1056
                        ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
 
1057
                                                        block, index, mtr);
 
1058
                }
 
1059
                break;
 
1060
        case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
 
1061
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1062
 
 
1063
                if (NULL != (ptr = mlog_parse_index(
 
1064
                                     ptr, end_ptr,
 
1065
                                     type == MLOG_COMP_REC_CLUST_DELETE_MARK,
 
1066
                                     &index))) {
 
1067
                        ut_a(!page
 
1068
                             || (ibool)!!page_is_comp(page)
 
1069
                             == dict_table_is_comp(index->table));
 
1070
                        ptr = btr_cur_parse_del_mark_set_clust_rec(
 
1071
                                ptr, end_ptr, page, page_zip, index);
 
1072
                }
 
1073
                break;
 
1074
        case MLOG_COMP_REC_SEC_DELETE_MARK:
 
1075
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1076
                /* This log record type is obsolete, but we process it for
 
1077
                backward compatibility with MySQL 5.0.3 and 5.0.4. */
 
1078
                ut_a(!page || page_is_comp(page));
 
1079
                ut_a(!page_zip);
 
1080
                ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
 
1081
                if (!ptr) {
 
1082
                        break;
 
1083
                }
 
1084
                /* Fall through */
 
1085
        case MLOG_REC_SEC_DELETE_MARK:
 
1086
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1087
                ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
 
1088
                                                         page, page_zip);
 
1089
                break;
 
1090
        case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
 
1091
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1092
 
 
1093
                if (NULL != (ptr = mlog_parse_index(
 
1094
                                     ptr, end_ptr,
 
1095
                                     type == MLOG_COMP_REC_UPDATE_IN_PLACE,
 
1096
                                     &index))) {
 
1097
                        ut_a(!page
 
1098
                             || (ibool)!!page_is_comp(page)
 
1099
                             == dict_table_is_comp(index->table));
 
1100
                        ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
 
1101
                                                            page_zip, index);
 
1102
                }
 
1103
                break;
 
1104
        case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 
1105
        case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
 
1106
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1107
 
 
1108
                if (NULL != (ptr = mlog_parse_index(
 
1109
                                     ptr, end_ptr,
 
1110
                                     type == MLOG_COMP_LIST_END_DELETE
 
1111
                                     || type == MLOG_COMP_LIST_START_DELETE,
 
1112
                                     &index))) {
 
1113
                        ut_a(!page
 
1114
                             || (ibool)!!page_is_comp(page)
 
1115
                             == dict_table_is_comp(index->table));
 
1116
                        ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
 
1117
                                                         block, index, mtr);
 
1118
                }
 
1119
                break;
 
1120
        case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
 
1121
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1122
 
 
1123
                if (NULL != (ptr = mlog_parse_index(
 
1124
                                     ptr, end_ptr,
 
1125
                                     type == MLOG_COMP_LIST_END_COPY_CREATED,
 
1126
                                     &index))) {
 
1127
                        ut_a(!page
 
1128
                             || (ibool)!!page_is_comp(page)
 
1129
                             == dict_table_is_comp(index->table));
 
1130
                        ptr = page_parse_copy_rec_list_to_created_page(
 
1131
                                ptr, end_ptr, block, index, mtr);
 
1132
                }
 
1133
                break;
 
1134
        case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
 
1135
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1136
 
 
1137
                if (NULL != (ptr = mlog_parse_index(
 
1138
                                     ptr, end_ptr,
 
1139
                                     type == MLOG_COMP_PAGE_REORGANIZE,
 
1140
                                     &index))) {
 
1141
                        ut_a(!page
 
1142
                             || (ibool)!!page_is_comp(page)
 
1143
                             == dict_table_is_comp(index->table));
 
1144
                        ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
 
1145
                                                        block, mtr);
 
1146
                }
 
1147
                break;
 
1148
        case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
 
1149
                /* Allow anything in page_type when creating a page. */
 
1150
                ut_a(!page_zip);
 
1151
                ptr = page_parse_create(ptr, end_ptr,
 
1152
                                        type == MLOG_COMP_PAGE_CREATE,
 
1153
                                        block, mtr);
 
1154
                break;
 
1155
        case MLOG_UNDO_INSERT:
 
1156
                ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 
1157
                ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 
1158
                break;
 
1159
        case MLOG_UNDO_ERASE_END:
 
1160
                ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 
1161
                ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
 
1162
                break;
 
1163
        case MLOG_UNDO_INIT:
 
1164
                /* Allow anything in page_type when creating a page. */
 
1165
                ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
 
1166
                break;
 
1167
        case MLOG_UNDO_HDR_DISCARD:
 
1168
                ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 
1169
                ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
 
1170
                break;
 
1171
        case MLOG_UNDO_HDR_CREATE:
 
1172
        case MLOG_UNDO_HDR_REUSE:
 
1173
                ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 
1174
                ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
 
1175
                                                 page, mtr);
 
1176
                break;
 
1177
        case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
 
1178
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1179
                /* On a compressed page, MLOG_COMP_REC_MIN_MARK
 
1180
                will be followed by MLOG_COMP_REC_DELETE
 
1181
                or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
 
1182
                in the same mini-transaction. */
 
1183
                ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
 
1184
                ptr = btr_parse_set_min_rec_mark(
 
1185
                        ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
 
1186
                        page, mtr);
 
1187
                break;
 
1188
        case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
 
1189
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1190
 
 
1191
                if (NULL != (ptr = mlog_parse_index(
 
1192
                                     ptr, end_ptr,
 
1193
                                     type == MLOG_COMP_REC_DELETE,
 
1194
                                     &index))) {
 
1195
                        ut_a(!page
 
1196
                             || (ibool)!!page_is_comp(page)
 
1197
                             == dict_table_is_comp(index->table));
 
1198
                        ptr = page_cur_parse_delete_rec(ptr, end_ptr,
 
1199
                                                        block, index, mtr);
 
1200
                }
 
1201
                break;
 
1202
        case MLOG_IBUF_BITMAP_INIT:
 
1203
                /* Allow anything in page_type when creating a page. */
 
1204
                ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
 
1205
                break;
 
1206
        case MLOG_INIT_FILE_PAGE:
 
1207
                /* Allow anything in page_type when creating a page. */
 
1208
                ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
 
1209
                break;
 
1210
        case MLOG_WRITE_STRING:
 
1211
                ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
 
1212
                ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 
1213
                break;
 
1214
        case MLOG_FILE_CREATE:
 
1215
        case MLOG_FILE_RENAME:
 
1216
        case MLOG_FILE_DELETE:
 
1217
        case MLOG_FILE_CREATE2:
 
1218
                ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
 
1219
                break;
 
1220
        case MLOG_ZIP_WRITE_NODE_PTR:
 
1221
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1222
                ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
 
1223
                                                    page, page_zip);
 
1224
                break;
 
1225
        case MLOG_ZIP_WRITE_BLOB_PTR:
 
1226
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1227
                ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
 
1228
                                                    page, page_zip);
 
1229
                break;
 
1230
        case MLOG_ZIP_WRITE_HEADER:
 
1231
                ut_ad(!page || page_type == FIL_PAGE_INDEX);
 
1232
                ptr = page_zip_parse_write_header(ptr, end_ptr,
 
1233
                                                  page, page_zip);
 
1234
                break;
 
1235
        case MLOG_ZIP_PAGE_COMPRESS:
 
1236
                /* Allow anything in page_type when creating a page. */
 
1237
                ptr = page_zip_parse_compress(ptr, end_ptr,
 
1238
                                              page, page_zip);
 
1239
                break;
 
1240
        default:
 
1241
                ptr = NULL;
 
1242
                recv_sys->found_corrupt_log = TRUE;
 
1243
        }
 
1244
 
 
1245
        if (index) {
 
1246
                dict_table_t*   table = index->table;
 
1247
 
 
1248
                dict_mem_index_free(index);
 
1249
                dict_mem_table_free(table);
 
1250
        }
 
1251
 
 
1252
        return(ptr);
 
1253
}
 
1254
 
 
1255
/*********************************************************************//**
 
1256
Calculates the fold value of a page file address: used in inserting or
 
1257
searching for a log record in the hash table.
 
1258
@return folded value */
 
1259
UNIV_INLINE
 
1260
ulint
 
1261
recv_fold(
 
1262
/*======*/
 
1263
        ulint   space,  /*!< in: space */
 
1264
        ulint   page_no)/*!< in: page number */
 
1265
{
 
1266
        return(ut_fold_ulint_pair(space, page_no));
 
1267
}
 
1268
 
 
1269
/*********************************************************************//**
 
1270
Calculates the hash value of a page file address: used in inserting or
 
1271
searching for a log record in the hash table.
 
1272
@return folded value */
 
1273
UNIV_INLINE
 
1274
ulint
 
1275
recv_hash(
 
1276
/*======*/
 
1277
        ulint   space,  /*!< in: space */
 
1278
        ulint   page_no)/*!< in: page number */
 
1279
{
 
1280
        return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
 
1281
}
 
1282
 
 
1283
/*********************************************************************//**
 
1284
Gets the hashed file address struct for a page.
 
1285
@return file address struct, NULL if not found from the hash table */
 
1286
static
 
1287
recv_addr_t*
 
1288
recv_get_fil_addr_struct(
 
1289
/*=====================*/
 
1290
        ulint   space,  /*!< in: space id */
 
1291
        ulint   page_no)/*!< in: page number */
 
1292
{
 
1293
        recv_addr_t*    recv_addr;
 
1294
 
 
1295
        recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
 
1296
                                   recv_hash(space, page_no));
 
1297
        while (recv_addr) {
 
1298
                if ((recv_addr->space == space)
 
1299
                    && (recv_addr->page_no == page_no)) {
 
1300
 
 
1301
                        break;
 
1302
                }
 
1303
 
 
1304
                recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1305
        }
 
1306
 
 
1307
        return(recv_addr);
 
1308
}
 
1309
 
 
1310
/*******************************************************************//**
 
1311
Adds a new log record to the hash table of log records. */
 
1312
static
 
1313
void
 
1314
recv_add_to_hash_table(
 
1315
/*===================*/
 
1316
        byte            type,           /*!< in: log record type */
 
1317
        ulint           space,          /*!< in: space id */
 
1318
        ulint           page_no,        /*!< in: page number */
 
1319
        byte*           body,           /*!< in: log record body */
 
1320
        byte*           rec_end,        /*!< in: log record end */
 
1321
        ib_uint64_t     start_lsn,      /*!< in: start lsn of the mtr */
 
1322
        ib_uint64_t     end_lsn)        /*!< in: end lsn of the mtr */
 
1323
{
 
1324
        recv_t*         recv;
 
1325
        ulint           len;
 
1326
        recv_data_t*    recv_data;
 
1327
        recv_data_t**   prev_field;
 
1328
        recv_addr_t*    recv_addr;
 
1329
 
 
1330
        if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
 
1331
                /* The tablespace does not exist any more: do not store the
 
1332
                log record */
 
1333
 
 
1334
                return;
 
1335
        }
 
1336
 
 
1337
        len = rec_end - body;
 
1338
 
 
1339
        recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
 
1340
        recv->type = type;
 
1341
        recv->len = rec_end - body;
 
1342
        recv->start_lsn = start_lsn;
 
1343
        recv->end_lsn = end_lsn;
 
1344
 
 
1345
        recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1346
 
 
1347
        if (recv_addr == NULL) {
 
1348
                recv_addr = mem_heap_alloc(recv_sys->heap,
 
1349
                                           sizeof(recv_addr_t));
 
1350
                recv_addr->space = space;
 
1351
                recv_addr->page_no = page_no;
 
1352
                recv_addr->state = RECV_NOT_PROCESSED;
 
1353
 
 
1354
                UT_LIST_INIT(recv_addr->rec_list);
 
1355
 
 
1356
                HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 
1357
                            recv_fold(space, page_no), recv_addr);
 
1358
                recv_sys->n_addrs++;
 
1359
#if 0
 
1360
                fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
 
1361
                        space, page_no);
 
1362
#endif
 
1363
        }
 
1364
 
 
1365
        UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
 
1366
 
 
1367
        prev_field = &(recv->data);
 
1368
 
 
1369
        /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
 
1370
        recv_sys->heap grows into the buffer pool, and bigger chunks could not
 
1371
        be allocated */
 
1372
 
 
1373
        while (rec_end > body) {
 
1374
 
 
1375
                len = rec_end - body;
 
1376
 
 
1377
                if (len > RECV_DATA_BLOCK_SIZE) {
 
1378
                        len = RECV_DATA_BLOCK_SIZE;
 
1379
                }
 
1380
 
 
1381
                recv_data = mem_heap_alloc(recv_sys->heap,
 
1382
                                           sizeof(recv_data_t) + len);
 
1383
                *prev_field = recv_data;
 
1384
 
 
1385
                memcpy(recv_data + 1, body, len);
 
1386
 
 
1387
                prev_field = &(recv_data->next);
 
1388
 
 
1389
                body += len;
 
1390
        }
 
1391
 
 
1392
        *prev_field = NULL;
 
1393
}
 
1394
 
 
1395
/*********************************************************************//**
 
1396
Copies the log record body from recv to buf. */
 
1397
static
 
1398
void
 
1399
recv_data_copy_to_buf(
 
1400
/*==================*/
 
1401
        byte*   buf,    /*!< in: buffer of length at least recv->len */
 
1402
        recv_t* recv)   /*!< in: log record */
 
1403
{
 
1404
        recv_data_t*    recv_data;
 
1405
        ulint           part_len;
 
1406
        ulint           len;
 
1407
 
 
1408
        len = recv->len;
 
1409
        recv_data = recv->data;
 
1410
 
 
1411
        while (len > 0) {
 
1412
                if (len > RECV_DATA_BLOCK_SIZE) {
 
1413
                        part_len = RECV_DATA_BLOCK_SIZE;
 
1414
                } else {
 
1415
                        part_len = len;
 
1416
                }
 
1417
 
 
1418
                ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
 
1419
                          part_len);
 
1420
                buf += part_len;
 
1421
                len -= part_len;
 
1422
 
 
1423
                recv_data = recv_data->next;
 
1424
        }
 
1425
}
 
1426
 
 
1427
/************************************************************************//**
 
1428
Applies the hashed log records to the page, if the page lsn is less than the
 
1429
lsn of a log record. This can be called when a buffer page has just been
 
1430
read in, or also for a page already in the buffer pool. */
 
1431
UNIV_INTERN
 
1432
void
 
1433
recv_recover_page_func(
 
1434
/*===================*/
 
1435
#ifndef UNIV_HOTBACKUP
 
1436
        ibool           just_read_in,
 
1437
                                /*!< in: TRUE if the i/o handler calls
 
1438
                                this for a freshly read page */
 
1439
#endif /* !UNIV_HOTBACKUP */
 
1440
        buf_block_t*    block)  /*!< in/out: buffer block */
 
1441
{
 
1442
        page_t*         page;
 
1443
        page_zip_des_t* page_zip;
 
1444
        recv_addr_t*    recv_addr;
 
1445
        recv_t*         recv;
 
1446
        byte*           buf;
 
1447
        ib_uint64_t     start_lsn;
 
1448
        ib_uint64_t     end_lsn;
 
1449
        ib_uint64_t     page_lsn;
 
1450
        ib_uint64_t     page_newest_lsn;
 
1451
        ibool           modification_to_page;
 
1452
#ifndef UNIV_HOTBACKUP
 
1453
        ibool           success;
 
1454
#endif /* !UNIV_HOTBACKUP */
 
1455
        mtr_t           mtr;
 
1456
 
 
1457
        mutex_enter(&(recv_sys->mutex));
 
1458
 
 
1459
        if (recv_sys->apply_log_recs == FALSE) {
 
1460
 
 
1461
                /* Log records should not be applied now */
 
1462
 
 
1463
                mutex_exit(&(recv_sys->mutex));
 
1464
 
 
1465
                return;
 
1466
        }
 
1467
 
 
1468
        recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
 
1469
                                             buf_block_get_page_no(block));
 
1470
 
 
1471
        if ((recv_addr == NULL)
 
1472
            || (recv_addr->state == RECV_BEING_PROCESSED)
 
1473
            || (recv_addr->state == RECV_PROCESSED)) {
 
1474
 
 
1475
                mutex_exit(&(recv_sys->mutex));
 
1476
 
 
1477
                return;
 
1478
        }
 
1479
 
 
1480
#if 0
 
1481
        fprintf(stderr, "Recovering space %lu, page %lu\n",
 
1482
                buf_block_get_space(block), buf_block_get_page_no(block));
 
1483
#endif
 
1484
 
 
1485
        recv_addr->state = RECV_BEING_PROCESSED;
 
1486
 
 
1487
        mutex_exit(&(recv_sys->mutex));
 
1488
 
 
1489
        mtr_start(&mtr);
 
1490
        mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 
1491
 
 
1492
        page = block->frame;
 
1493
        page_zip = buf_block_get_page_zip(block);
 
1494
 
 
1495
#ifndef UNIV_HOTBACKUP
 
1496
        if (just_read_in) {
 
1497
                /* Move the ownership of the x-latch on the page to
 
1498
                this OS thread, so that we can acquire a second
 
1499
                x-latch on it.  This is needed for the operations to
 
1500
                the page to pass the debug checks. */
 
1501
 
 
1502
                rw_lock_x_lock_move_ownership(&block->lock);
 
1503
        }
 
1504
 
 
1505
        success = buf_page_get_known_nowait(RW_X_LATCH, block,
 
1506
                                            BUF_KEEP_OLD,
 
1507
                                            __FILE__, __LINE__,
 
1508
                                            &mtr);
 
1509
        ut_a(success);
 
1510
 
 
1511
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
1512
#endif /* !UNIV_HOTBACKUP */
 
1513
 
 
1514
        /* Read the newest modification lsn from the page */
 
1515
        page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
 
1516
 
 
1517
#ifndef UNIV_HOTBACKUP
 
1518
        /* It may be that the page has been modified in the buffer
 
1519
        pool: read the newest modification lsn there */
 
1520
 
 
1521
        page_newest_lsn = buf_page_get_newest_modification(&block->page);
 
1522
 
 
1523
        if (page_newest_lsn) {
 
1524
 
 
1525
                page_lsn = page_newest_lsn;
 
1526
        }
 
1527
#else /* !UNIV_HOTBACKUP */
 
1528
        /* In recovery from a backup we do not really use the buffer pool */
 
1529
        page_newest_lsn = 0;
 
1530
#endif /* !UNIV_HOTBACKUP */
 
1531
 
 
1532
        modification_to_page = FALSE;
 
1533
        start_lsn = end_lsn = 0;
 
1534
 
 
1535
        recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
 
1536
 
 
1537
        while (recv) {
 
1538
                end_lsn = recv->end_lsn;
 
1539
 
 
1540
                if (recv->len > RECV_DATA_BLOCK_SIZE) {
 
1541
                        /* We have to copy the record body to a separate
 
1542
                        buffer */
 
1543
 
 
1544
                        buf = mem_alloc(recv->len);
 
1545
 
 
1546
                        recv_data_copy_to_buf(buf, recv);
 
1547
                } else {
 
1548
                        buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 
1549
                }
 
1550
 
 
1551
                if (recv->type == MLOG_INIT_FILE_PAGE) {
 
1552
                        page_lsn = page_newest_lsn;
 
1553
 
 
1554
                        memset(FIL_PAGE_LSN + page, 0, 8);
 
1555
                        memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
 
1556
                               + page, 0, 8);
 
1557
 
 
1558
                        if (page_zip) {
 
1559
                                memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
 
1560
                        }
 
1561
                }
 
1562
 
 
1563
                if (recv->start_lsn >= page_lsn) {
 
1564
 
 
1565
                        ib_uint64_t     end_lsn;
 
1566
 
 
1567
                        if (!modification_to_page) {
 
1568
 
 
1569
                                modification_to_page = TRUE;
 
1570
                                start_lsn = recv->start_lsn;
 
1571
                        }
 
1572
 
 
1573
#ifdef UNIV_DEBUG
 
1574
                        if (log_debug_writes) {
 
1575
                                fprintf(stderr,
 
1576
                                        "InnoDB: Applying log rec"
 
1577
                                        " type %lu len %lu"
 
1578
                                        " to space %lu page no %lu\n",
 
1579
                                        (ulong) recv->type, (ulong) recv->len,
 
1580
                                        (ulong) recv_addr->space,
 
1581
                                        (ulong) recv_addr->page_no);
 
1582
                        }
 
1583
#endif /* UNIV_DEBUG */
 
1584
 
 
1585
                        recv_parse_or_apply_log_rec_body(recv->type, buf,
 
1586
                                                         buf + recv->len,
 
1587
                                                         block, &mtr);
 
1588
 
 
1589
                        end_lsn = recv->start_lsn + recv->len;
 
1590
                        mach_write_ull(FIL_PAGE_LSN + page, end_lsn);
 
1591
                        mach_write_ull(UNIV_PAGE_SIZE
 
1592
                                       - FIL_PAGE_END_LSN_OLD_CHKSUM
 
1593
                                       + page, end_lsn);
 
1594
 
 
1595
                        if (page_zip) {
 
1596
                                mach_write_ull(FIL_PAGE_LSN
 
1597
                                               + page_zip->data, end_lsn);
 
1598
                        }
 
1599
                }
 
1600
 
 
1601
                if (recv->len > RECV_DATA_BLOCK_SIZE) {
 
1602
                        mem_free(buf);
 
1603
                }
 
1604
 
 
1605
                recv = UT_LIST_GET_NEXT(rec_list, recv);
 
1606
        }
 
1607
 
 
1608
#ifdef UNIV_ZIP_DEBUG
 
1609
        if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
 
1610
                page_zip_des_t* page_zip = buf_block_get_page_zip(block);
 
1611
 
 
1612
                if (page_zip) {
 
1613
                        ut_a(page_zip_validate_low(page_zip, page, FALSE));
 
1614
                }
 
1615
        }
 
1616
#endif /* UNIV_ZIP_DEBUG */
 
1617
 
 
1618
        mutex_enter(&(recv_sys->mutex));
 
1619
 
 
1620
        if (recv_max_page_lsn < page_lsn) {
 
1621
                recv_max_page_lsn = page_lsn;
 
1622
        }
 
1623
 
 
1624
        recv_addr->state = RECV_PROCESSED;
 
1625
 
 
1626
        ut_a(recv_sys->n_addrs);
 
1627
        recv_sys->n_addrs--;
 
1628
 
 
1629
        mutex_exit(&(recv_sys->mutex));
 
1630
 
 
1631
#ifndef UNIV_HOTBACKUP
 
1632
        if (modification_to_page) {
 
1633
                ut_a(block);
 
1634
 
 
1635
                buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 
1636
        }
 
1637
#endif /* !UNIV_HOTBACKUP */
 
1638
 
 
1639
        /* Make sure that committing mtr does not change the modification
 
1640
        lsn values of page */
 
1641
 
 
1642
        mtr.modifications = FALSE;
 
1643
 
 
1644
        mtr_commit(&mtr);
 
1645
}
 
1646
 
 
1647
#ifndef UNIV_HOTBACKUP
 
1648
/*******************************************************************//**
 
1649
Reads in pages which have hashed log records, from an area around a given
 
1650
page number.
 
1651
@return number of pages found */
 
1652
static
 
1653
ulint
 
1654
recv_read_in_area(
 
1655
/*==============*/
 
1656
        ulint   space,  /*!< in: space */
 
1657
        ulint   zip_size,/*!< in: compressed page size in bytes, or 0 */
 
1658
        ulint   page_no)/*!< in: page number */
 
1659
{
 
1660
        recv_addr_t* recv_addr;
 
1661
        ulint   page_nos[RECV_READ_AHEAD_AREA];
 
1662
        ulint   low_limit;
 
1663
        ulint   n;
 
1664
 
 
1665
        low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
 
1666
 
 
1667
        n = 0;
 
1668
 
 
1669
        for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
 
1670
             page_no++) {
 
1671
                recv_addr = recv_get_fil_addr_struct(space, page_no);
 
1672
 
 
1673
                if (recv_addr && !buf_page_peek(space, page_no)) {
 
1674
 
 
1675
                        mutex_enter(&(recv_sys->mutex));
 
1676
 
 
1677
                        if (recv_addr->state == RECV_NOT_PROCESSED) {
 
1678
                                recv_addr->state = RECV_BEING_READ;
 
1679
 
 
1680
                                page_nos[n] = page_no;
 
1681
 
 
1682
                                n++;
 
1683
                        }
 
1684
 
 
1685
                        mutex_exit(&(recv_sys->mutex));
 
1686
                }
 
1687
        }
 
1688
 
 
1689
        buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
 
1690
        /*
 
1691
        fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
 
1692
        */
 
1693
        return(n);
 
1694
}
 
1695
 
 
1696
/*******************************************************************//**
 
1697
Empties the hash table of stored log records, applying them to appropriate
 
1698
pages. */
 
1699
UNIV_INTERN
 
1700
void
 
1701
recv_apply_hashed_log_recs(
 
1702
/*=======================*/
 
1703
        ibool   allow_ibuf)     /*!< in: if TRUE, also ibuf operations are
 
1704
                                allowed during the application; if FALSE,
 
1705
                                no ibuf operations are allowed, and after
 
1706
                                the application all file pages are flushed to
 
1707
                                disk and invalidated in buffer pool: this
 
1708
                                alternative means that no new log records
 
1709
                                can be generated during the application;
 
1710
                                the caller must in this case own the log
 
1711
                                mutex */
 
1712
{
 
1713
        recv_addr_t* recv_addr;
 
1714
        ulint   i;
 
1715
        ulint   n_pages;
 
1716
        ibool   has_printed     = FALSE;
 
1717
        mtr_t   mtr;
 
1718
loop:
 
1719
        mutex_enter(&(recv_sys->mutex));
 
1720
 
 
1721
        if (recv_sys->apply_batch_on) {
 
1722
 
 
1723
                mutex_exit(&(recv_sys->mutex));
 
1724
 
 
1725
                os_thread_sleep(500000);
 
1726
 
 
1727
                goto loop;
 
1728
        }
 
1729
 
 
1730
        ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
 
1731
 
 
1732
        if (!allow_ibuf) {
 
1733
                recv_no_ibuf_operations = TRUE;
 
1734
        }
 
1735
 
 
1736
        recv_sys->apply_log_recs = TRUE;
 
1737
        recv_sys->apply_batch_on = TRUE;
 
1738
 
 
1739
        for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
 
1740
 
 
1741
                recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
 
1742
 
 
1743
                while (recv_addr) {
 
1744
                        ulint   space = recv_addr->space;
 
1745
                        ulint   zip_size = fil_space_get_zip_size(space);
 
1746
                        ulint   page_no = recv_addr->page_no;
 
1747
 
 
1748
                        if (recv_addr->state == RECV_NOT_PROCESSED) {
 
1749
                                if (!has_printed) {
 
1750
                                        ut_print_timestamp(stderr);
 
1751
                                        fputs("  InnoDB: Starting an"
 
1752
                                              " apply batch of log records"
 
1753
                                              " to the database...\n"
 
1754
                                              "InnoDB: Progress in percents: ",
 
1755
                                              stderr);
 
1756
                                        has_printed = TRUE;
 
1757
                                }
 
1758
 
 
1759
                                mutex_exit(&(recv_sys->mutex));
 
1760
 
 
1761
                                if (buf_page_peek(space, page_no)) {
 
1762
                                        buf_block_t*    block;
 
1763
 
 
1764
                                        mtr_start(&mtr);
 
1765
 
 
1766
                                        block = buf_page_get(
 
1767
                                                space, zip_size, page_no,
 
1768
                                                RW_X_LATCH, &mtr);
 
1769
                                        buf_block_dbg_add_level(
 
1770
                                                block, SYNC_NO_ORDER_CHECK);
 
1771
 
 
1772
                                        recv_recover_page(FALSE, block);
 
1773
                                        mtr_commit(&mtr);
 
1774
                                } else {
 
1775
                                        recv_read_in_area(space, zip_size,
 
1776
                                                          page_no);
 
1777
                                }
 
1778
 
 
1779
                                mutex_enter(&(recv_sys->mutex));
 
1780
                        }
 
1781
 
 
1782
                        recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1783
                }
 
1784
 
 
1785
                if (has_printed
 
1786
                    && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
 
1787
                    != ((i + 1) * 100)
 
1788
                    / hash_get_n_cells(recv_sys->addr_hash)) {
 
1789
 
 
1790
                        fprintf(stderr, "%lu ", (ulong)
 
1791
                                ((i * 100)
 
1792
                                 / hash_get_n_cells(recv_sys->addr_hash)));
 
1793
                }
 
1794
        }
 
1795
 
 
1796
        /* Wait until all the pages have been processed */
 
1797
 
 
1798
        while (recv_sys->n_addrs != 0) {
 
1799
 
 
1800
                mutex_exit(&(recv_sys->mutex));
 
1801
 
 
1802
                os_thread_sleep(500000);
 
1803
 
 
1804
                mutex_enter(&(recv_sys->mutex));
 
1805
        }
 
1806
 
 
1807
        if (has_printed) {
 
1808
 
 
1809
                fprintf(stderr, "\n");
 
1810
        }
 
1811
 
 
1812
        if (!allow_ibuf) {
 
1813
                /* Flush all the file pages to disk and invalidate them in
 
1814
                the buffer pool */
 
1815
 
 
1816
                ut_d(recv_no_log_write = TRUE);
 
1817
                mutex_exit(&(recv_sys->mutex));
 
1818
                mutex_exit(&(log_sys->mutex));
 
1819
 
 
1820
                n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
 
1821
                                          IB_ULONGLONG_MAX);
 
1822
                ut_a(n_pages != ULINT_UNDEFINED);
 
1823
 
 
1824
                buf_flush_wait_batch_end(BUF_FLUSH_LIST);
 
1825
 
 
1826
                buf_pool_invalidate();
 
1827
 
 
1828
                mutex_enter(&(log_sys->mutex));
 
1829
                mutex_enter(&(recv_sys->mutex));
 
1830
                ut_d(recv_no_log_write = FALSE);
 
1831
 
 
1832
                recv_no_ibuf_operations = FALSE;
 
1833
        }
 
1834
 
 
1835
        recv_sys->apply_log_recs = FALSE;
 
1836
        recv_sys->apply_batch_on = FALSE;
 
1837
 
 
1838
        recv_sys_empty_hash();
 
1839
 
 
1840
        if (has_printed) {
 
1841
                fprintf(stderr, "InnoDB: Apply batch completed\n");
 
1842
        }
 
1843
 
 
1844
        mutex_exit(&(recv_sys->mutex));
 
1845
}
 
1846
#else /* !UNIV_HOTBACKUP */
 
1847
/*******************************************************************//**
 
1848
Applies log records in the hash table to a backup. */
 
1849
UNIV_INTERN
 
1850
void
 
1851
recv_apply_log_recs_for_backup(void)
 
1852
/*================================*/
 
1853
{
 
1854
        recv_addr_t*    recv_addr;
 
1855
        ulint           n_hash_cells;
 
1856
        buf_block_t*    block;
 
1857
        ulint           actual_size;
 
1858
        ibool           success;
 
1859
        ulint           error;
 
1860
        ulint           i;
 
1861
 
 
1862
        recv_sys->apply_log_recs = TRUE;
 
1863
        recv_sys->apply_batch_on = TRUE;
 
1864
 
 
1865
        block = back_block1;
 
1866
 
 
1867
        fputs("InnoDB: Starting an apply batch of log records"
 
1868
              " to the database...\n"
 
1869
              "InnoDB: Progress in percents: ", stderr);
 
1870
 
 
1871
        n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
 
1872
 
 
1873
        for (i = 0; i < n_hash_cells; i++) {
 
1874
                /* The address hash table is externally chained */
 
1875
                recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
 
1876
 
 
1877
                while (recv_addr != NULL) {
 
1878
 
 
1879
                        ulint   zip_size
 
1880
                                = fil_space_get_zip_size(recv_addr->space);
 
1881
 
 
1882
                        if (zip_size == ULINT_UNDEFINED) {
 
1883
#if 0
 
1884
                                fprintf(stderr,
 
1885
                                        "InnoDB: Warning: cannot apply"
 
1886
                                        " log record to"
 
1887
                                        " tablespace %lu page %lu,\n"
 
1888
                                        "InnoDB: because tablespace with"
 
1889
                                        " that id does not exist.\n",
 
1890
                                        recv_addr->space, recv_addr->page_no);
 
1891
#endif
 
1892
                                recv_addr->state = RECV_PROCESSED;
 
1893
 
 
1894
                                ut_a(recv_sys->n_addrs);
 
1895
                                recv_sys->n_addrs--;
 
1896
 
 
1897
                                goto skip_this_recv_addr;
 
1898
                        }
 
1899
 
 
1900
                        /* We simulate a page read made by the buffer pool, to
 
1901
                        make sure the recovery apparatus works ok. We must init
 
1902
                        the block. */
 
1903
 
 
1904
                        buf_page_init_for_backup_restore(
 
1905
                                recv_addr->space, recv_addr->page_no,
 
1906
                                zip_size, block);
 
1907
 
 
1908
                        /* Extend the tablespace's last file if the page_no
 
1909
                        does not fall inside its bounds; we assume the last
 
1910
                        file is auto-extending, and ibbackup copied the file
 
1911
                        when it still was smaller */
 
1912
 
 
1913
                        success = fil_extend_space_to_desired_size(
 
1914
                                &actual_size,
 
1915
                                recv_addr->space, recv_addr->page_no + 1);
 
1916
                        if (!success) {
 
1917
                                fprintf(stderr,
 
1918
                                        "InnoDB: Fatal error: cannot extend"
 
1919
                                        " tablespace %lu to hold %lu pages\n",
 
1920
                                        recv_addr->space, recv_addr->page_no);
 
1921
 
 
1922
                                exit(1);
 
1923
                        }
 
1924
 
 
1925
                        /* Read the page from the tablespace file using the
 
1926
                        fil0fil.c routines */
 
1927
 
 
1928
                        if (zip_size) {
 
1929
                                error = fil_io(OS_FILE_READ, TRUE,
 
1930
                                               recv_addr->space, zip_size,
 
1931
                                               recv_addr->page_no, 0, zip_size,
 
1932
                                               block->page.zip.data, NULL);
 
1933
                                if (error == DB_SUCCESS
 
1934
                                    && !buf_zip_decompress(block, TRUE)) {
 
1935
                                        exit(1);
 
1936
                                }
 
1937
                        } else {
 
1938
                                error = fil_io(OS_FILE_READ, TRUE,
 
1939
                                               recv_addr->space, 0,
 
1940
                                               recv_addr->page_no, 0,
 
1941
                                               UNIV_PAGE_SIZE,
 
1942
                                               block->frame, NULL);
 
1943
                        }
 
1944
 
 
1945
                        if (error != DB_SUCCESS) {
 
1946
                                fprintf(stderr,
 
1947
                                        "InnoDB: Fatal error: cannot read"
 
1948
                                        " from tablespace"
 
1949
                                        " %lu page number %lu\n",
 
1950
                                        (ulong) recv_addr->space,
 
1951
                                        (ulong) recv_addr->page_no);
 
1952
 
 
1953
                                exit(1);
 
1954
                        }
 
1955
 
 
1956
                        /* Apply the log records to this page */
 
1957
                        recv_recover_page(FALSE, block);
 
1958
 
 
1959
                        /* Write the page back to the tablespace file using the
 
1960
                        fil0fil.c routines */
 
1961
 
 
1962
                        buf_flush_init_for_writing(
 
1963
                                block->frame, buf_block_get_page_zip(block),
 
1964
                                mach_read_ull(block->frame + FIL_PAGE_LSN));
 
1965
 
 
1966
                        if (zip_size) {
 
1967
                                error = fil_io(OS_FILE_WRITE, TRUE,
 
1968
                                               recv_addr->space, zip_size,
 
1969
                                               recv_addr->page_no, 0,
 
1970
                                               zip_size,
 
1971
                                               block->page.zip.data, NULL);
 
1972
                        } else {
 
1973
                                error = fil_io(OS_FILE_WRITE, TRUE,
 
1974
                                               recv_addr->space, 0,
 
1975
                                               recv_addr->page_no, 0,
 
1976
                                               UNIV_PAGE_SIZE,
 
1977
                                               block->frame, NULL);
 
1978
                        }
 
1979
skip_this_recv_addr:
 
1980
                        recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 
1981
                }
 
1982
 
 
1983
                if ((100 * i) / n_hash_cells
 
1984
                    != (100 * (i + 1)) / n_hash_cells) {
 
1985
                        fprintf(stderr, "%lu ",
 
1986
                                (ulong) ((100 * i) / n_hash_cells));
 
1987
                        fflush(stderr);
 
1988
                }
 
1989
        }
 
1990
 
 
1991
        recv_sys_empty_hash();
 
1992
}
 
1993
#endif /* !UNIV_HOTBACKUP */
 
1994
 
 
1995
/*******************************************************************//**
 
1996
Tries to parse a single log record and returns its length.
 
1997
@return length of the record, or 0 if the record was not complete */
 
1998
static
 
1999
ulint
 
2000
recv_parse_log_rec(
 
2001
/*===============*/
 
2002
        byte*   ptr,    /*!< in: pointer to a buffer */
 
2003
        byte*   end_ptr,/*!< in: pointer to the buffer end */
 
2004
        byte*   type,   /*!< out: type */
 
2005
        ulint*  space,  /*!< out: space id */
 
2006
        ulint*  page_no,/*!< out: page number */
 
2007
        byte**  body)   /*!< out: log record body start */
 
2008
{
 
2009
        byte*   new_ptr;
 
2010
 
 
2011
        *body = NULL;
 
2012
 
 
2013
        if (ptr == end_ptr) {
 
2014
 
 
2015
                return(0);
 
2016
        }
 
2017
 
 
2018
        if (*ptr == MLOG_MULTI_REC_END) {
 
2019
 
 
2020
                *type = *ptr;
 
2021
 
 
2022
                return(1);
 
2023
        }
 
2024
 
 
2025
        if (*ptr == MLOG_DUMMY_RECORD) {
 
2026
                *type = *ptr;
 
2027
 
 
2028
                *space = ULINT_UNDEFINED - 1; /* For debugging */
 
2029
 
 
2030
                return(1);
 
2031
        }
 
2032
 
 
2033
        new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
 
2034
                                                page_no);
 
2035
        *body = new_ptr;
 
2036
 
 
2037
        if (UNIV_UNLIKELY(!new_ptr)) {
 
2038
 
 
2039
                return(0);
 
2040
        }
 
2041
 
 
2042
#ifdef UNIV_LOG_LSN_DEBUG
 
2043
        if (*type == MLOG_LSN) {
 
2044
                ib_uint64_t     lsn = (ib_uint64_t) *space << 32 | *page_no;
 
2045
# ifdef UNIV_LOG_DEBUG
 
2046
                ut_a(lsn == log_sys->old_lsn);
 
2047
# else /* UNIV_LOG_DEBUG */
 
2048
                ut_a(lsn == recv_sys->recovered_lsn);
 
2049
# endif /* UNIV_LOG_DEBUG */
 
2050
        }
 
2051
#endif /* UNIV_LOG_LSN_DEBUG */
 
2052
 
 
2053
        /* Check that page_no is sensible */
 
2054
 
 
2055
        if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
 
2056
 
 
2057
                recv_sys->found_corrupt_log = TRUE;
 
2058
 
 
2059
                return(0);
 
2060
        }
 
2061
 
 
2062
        new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
 
2063
                                                   NULL, NULL);
 
2064
        if (UNIV_UNLIKELY(new_ptr == NULL)) {
 
2065
 
 
2066
                return(0);
 
2067
        }
 
2068
 
 
2069
        if (*page_no > recv_max_parsed_page_no) {
 
2070
                recv_max_parsed_page_no = *page_no;
 
2071
        }
 
2072
 
 
2073
        return(new_ptr - ptr);
 
2074
}
 
2075
 
 
2076
/*******************************************************//**
 
2077
Calculates the new value for lsn when more data is added to the log. */
 
2078
static
 
2079
ib_uint64_t
 
2080
recv_calc_lsn_on_data_add(
 
2081
/*======================*/
 
2082
        ib_uint64_t     lsn,    /*!< in: old lsn */
 
2083
        ib_uint64_t     len)    /*!< in: this many bytes of data is
 
2084
                                added, log block headers not included */
 
2085
{
 
2086
        ulint   frag_len;
 
2087
        ulint   lsn_len;
 
2088
 
 
2089
        frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
 
2090
                - LOG_BLOCK_HDR_SIZE;
 
2091
        ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 
2092
              - LOG_BLOCK_TRL_SIZE);
 
2093
        lsn_len = (ulint) len;
 
2094
        lsn_len += (lsn_len + frag_len)
 
2095
                / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 
2096
                   - LOG_BLOCK_TRL_SIZE)
 
2097
                * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 
2098
 
 
2099
        return(lsn + lsn_len);
 
2100
}
 
2101
 
 
2102
#ifdef UNIV_LOG_DEBUG
 
2103
/*******************************************************//**
 
2104
Checks that the parser recognizes incomplete initial segments of a log
 
2105
record as incomplete. */
 
2106
static
 
2107
void
 
2108
recv_check_incomplete_log_recs(
 
2109
/*===========================*/
 
2110
        byte*   ptr,    /*!< in: pointer to a complete log record */
 
2111
        ulint   len)    /*!< in: length of the log record */
 
2112
{
 
2113
        ulint   i;
 
2114
        byte    type;
 
2115
        ulint   space;
 
2116
        ulint   page_no;
 
2117
        byte*   body;
 
2118
 
 
2119
        for (i = 0; i < len; i++) {
 
2120
                ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
 
2121
                                             &page_no, &body));
 
2122
        }
 
2123
}
 
2124
#endif /* UNIV_LOG_DEBUG */
 
2125
 
 
2126
/*******************************************************//**
 
2127
Prints diagnostic info of corrupt log. */
 
2128
static
 
2129
void
 
2130
recv_report_corrupt_log(
 
2131
/*====================*/
 
2132
        byte*   ptr,    /*!< in: pointer to corrupt log record */
 
2133
        byte    type,   /*!< in: type of the record */
 
2134
        ulint   space,  /*!< in: space id, this may also be garbage */
 
2135
        ulint   page_no)/*!< in: page number, this may also be garbage */
 
2136
{
 
2137
        fprintf(stderr,
 
2138
                "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 
2139
                "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
 
2140
                "InnoDB: Log parsing proceeded successfully up to %llu\n"
 
2141
                "InnoDB: Previous log record type %lu, is multi %lu\n"
 
2142
                "InnoDB: Recv offset %lu, prev %lu\n",
 
2143
                (ulong) type, (ulong) space, (ulong) page_no,
 
2144
                recv_sys->recovered_lsn,
 
2145
                (ulong) recv_previous_parsed_rec_type,
 
2146
                (ulong) recv_previous_parsed_rec_is_multi,
 
2147
                (ulong) (ptr - recv_sys->buf),
 
2148
                (ulong) recv_previous_parsed_rec_offset);
 
2149
 
 
2150
        if ((ulint)(ptr - recv_sys->buf + 100)
 
2151
            > recv_previous_parsed_rec_offset
 
2152
            && (ulint)(ptr - recv_sys->buf + 100
 
2153
                       - recv_previous_parsed_rec_offset)
 
2154
            < 200000) {
 
2155
                fputs("InnoDB: Hex dump of corrupt log starting"
 
2156
                      " 100 bytes before the start\n"
 
2157
                      "InnoDB: of the previous log rec,\n"
 
2158
                      "InnoDB: and ending 100 bytes after the start"
 
2159
                      " of the corrupt rec:\n",
 
2160
                      stderr);
 
2161
 
 
2162
                ut_print_buf(stderr,
 
2163
                             recv_sys->buf
 
2164
                             + recv_previous_parsed_rec_offset - 100,
 
2165
                             ptr - recv_sys->buf + 200
 
2166
                             - recv_previous_parsed_rec_offset);
 
2167
                putc('\n', stderr);
 
2168
        }
 
2169
 
 
2170
        fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
 
2171
              "InnoDB: is possible that the log scan did not proceed\n"
 
2172
              "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
 
2173
              "InnoDB: on your InnoDB tables to check that they are ok!\n"
 
2174
              "InnoDB: If mysqld crashes after this recovery, look at\n"
 
2175
              "InnoDB: " REFMAN "forcing-recovery.html\n"
 
2176
              "InnoDB: about forcing recovery.\n", stderr);
 
2177
 
 
2178
        fflush(stderr);
 
2179
}
 
2180
 
 
2181
/*******************************************************//**
 
2182
Parses log records from a buffer and stores them to a hash table to wait
 
2183
merging to file pages.
 
2184
@return currently always returns FALSE */
 
2185
static
 
2186
ibool
 
2187
recv_parse_log_recs(
 
2188
/*================*/
 
2189
        ibool   store_to_hash)  /*!< in: TRUE if the records should be stored
 
2190
                                to the hash table; this is set to FALSE if just
 
2191
                                debug checking is needed */
 
2192
{
 
2193
        byte*           ptr;
 
2194
        byte*           end_ptr;
 
2195
        ulint           single_rec;
 
2196
        ulint           len;
 
2197
        ulint           total_len;
 
2198
        ib_uint64_t     new_recovered_lsn;
 
2199
        ib_uint64_t     old_lsn;
 
2200
        byte            type;
 
2201
        ulint           space;
 
2202
        ulint           page_no;
 
2203
        byte*           body;
 
2204
        ulint           n_recs;
 
2205
 
 
2206
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2207
        ut_ad(recv_sys->parse_start_lsn != 0);
 
2208
loop:
 
2209
        ptr = recv_sys->buf + recv_sys->recovered_offset;
 
2210
 
 
2211
        end_ptr = recv_sys->buf + recv_sys->len;
 
2212
 
 
2213
        if (ptr == end_ptr) {
 
2214
 
 
2215
                return(FALSE);
 
2216
        }
 
2217
 
 
2218
        single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
 
2219
 
 
2220
        if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
 
2221
                /* The mtr only modified a single page, or this is a file op */
 
2222
 
 
2223
                old_lsn = recv_sys->recovered_lsn;
 
2224
 
 
2225
                /* Try to parse a log record, fetching its type, space id,
 
2226
                page no, and a pointer to the body of the log record */
 
2227
 
 
2228
                len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
2229
                                         &page_no, &body);
 
2230
 
 
2231
                if (len == 0 || recv_sys->found_corrupt_log) {
 
2232
                        if (recv_sys->found_corrupt_log) {
 
2233
 
 
2234
                                recv_report_corrupt_log(ptr,
 
2235
                                                        type, space, page_no);
 
2236
                        }
 
2237
 
 
2238
                        return(FALSE);
 
2239
                }
 
2240
 
 
2241
                new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
 
2242
 
 
2243
                if (new_recovered_lsn > recv_sys->scanned_lsn) {
 
2244
                        /* The log record filled a log block, and we require
 
2245
                        that also the next log block should have been scanned
 
2246
                        in */
 
2247
 
 
2248
                        return(FALSE);
 
2249
                }
 
2250
 
 
2251
                recv_previous_parsed_rec_type = (ulint)type;
 
2252
                recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
 
2253
                recv_previous_parsed_rec_is_multi = 0;
 
2254
 
 
2255
                recv_sys->recovered_offset += len;
 
2256
                recv_sys->recovered_lsn = new_recovered_lsn;
 
2257
 
 
2258
#ifdef UNIV_DEBUG
 
2259
                if (log_debug_writes) {
 
2260
                        fprintf(stderr,
 
2261
                                "InnoDB: Parsed a single log rec"
 
2262
                                " type %lu len %lu space %lu page no %lu\n",
 
2263
                                (ulong) type, (ulong) len, (ulong) space,
 
2264
                                (ulong) page_no);
 
2265
                }
 
2266
#endif /* UNIV_DEBUG */
 
2267
 
 
2268
                if (type == MLOG_DUMMY_RECORD) {
 
2269
                        /* Do nothing */
 
2270
 
 
2271
                } else if (!store_to_hash) {
 
2272
                        /* In debug checking, update a replicate page
 
2273
                        according to the log record, and check that it
 
2274
                        becomes identical with the original page */
 
2275
#ifdef UNIV_LOG_DEBUG
 
2276
                        recv_check_incomplete_log_recs(ptr, len);
 
2277
#endif/* UNIV_LOG_DEBUG */
 
2278
 
 
2279
                } else if (type == MLOG_FILE_CREATE
 
2280
                           || type == MLOG_FILE_CREATE2
 
2281
                           || type == MLOG_FILE_RENAME
 
2282
                           || type == MLOG_FILE_DELETE) {
 
2283
                        ut_a(space);
 
2284
#ifdef UNIV_HOTBACKUP
 
2285
                        if (recv_replay_file_ops) {
 
2286
 
 
2287
                                /* In ibbackup --apply-log, replay an .ibd file
 
2288
                                operation, if possible; note that
 
2289
                                fil_path_to_mysql_datadir is set in ibbackup to
 
2290
                                point to the datadir we should use there */
 
2291
 
 
2292
                                if (NULL == fil_op_log_parse_or_replay(
 
2293
                                            body, end_ptr, type,
 
2294
                                            space, page_no)) {
 
2295
                                        fprintf(stderr,
 
2296
                                                "InnoDB: Error: file op"
 
2297
                                                " log record of type %lu"
 
2298
                                                " space %lu not complete in\n"
 
2299
                                                "InnoDB: the replay phase."
 
2300
                                                " Path %s\n",
 
2301
                                                (ulint)type, space,
 
2302
                                                (char*)(body + 2));
 
2303
 
 
2304
                                        ut_error;
 
2305
                                }
 
2306
                        }
 
2307
#endif
 
2308
                        /* In normal mysqld crash recovery we do not try to
 
2309
                        replay file operations */
 
2310
#ifdef UNIV_LOG_LSN_DEBUG
 
2311
                } else if (type == MLOG_LSN) {
 
2312
                        /* Do not add these records to the hash table.
 
2313
                        The page number and space id fields are misused
 
2314
                        for something else. */
 
2315
#endif /* UNIV_LOG_LSN_DEBUG */
 
2316
                } else {
 
2317
                        recv_add_to_hash_table(type, space, page_no, body,
 
2318
                                               ptr + len, old_lsn,
 
2319
                                               recv_sys->recovered_lsn);
 
2320
                }
 
2321
        } else {
 
2322
                /* Check that all the records associated with the single mtr
 
2323
                are included within the buffer */
 
2324
 
 
2325
                total_len = 0;
 
2326
                n_recs = 0;
 
2327
 
 
2328
                for (;;) {
 
2329
                        len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
2330
                                                 &page_no, &body);
 
2331
                        if (len == 0 || recv_sys->found_corrupt_log) {
 
2332
 
 
2333
                                if (recv_sys->found_corrupt_log) {
 
2334
 
 
2335
                                        recv_report_corrupt_log(
 
2336
                                                ptr, type, space, page_no);
 
2337
                                }
 
2338
 
 
2339
                                return(FALSE);
 
2340
                        }
 
2341
 
 
2342
                        recv_previous_parsed_rec_type = (ulint)type;
 
2343
                        recv_previous_parsed_rec_offset
 
2344
                                = recv_sys->recovered_offset + total_len;
 
2345
                        recv_previous_parsed_rec_is_multi = 1;
 
2346
 
 
2347
#ifdef UNIV_LOG_DEBUG
 
2348
                        if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
 
2349
                                recv_check_incomplete_log_recs(ptr, len);
 
2350
                        }
 
2351
#endif /* UNIV_LOG_DEBUG */
 
2352
 
 
2353
#ifdef UNIV_DEBUG
 
2354
                        if (log_debug_writes) {
 
2355
                                fprintf(stderr,
 
2356
                                        "InnoDB: Parsed a multi log rec"
 
2357
                                        " type %lu len %lu"
 
2358
                                        " space %lu page no %lu\n",
 
2359
                                        (ulong) type, (ulong) len,
 
2360
                                        (ulong) space, (ulong) page_no);
 
2361
                        }
 
2362
#endif /* UNIV_DEBUG */
 
2363
 
 
2364
                        total_len += len;
 
2365
                        n_recs++;
 
2366
 
 
2367
                        ptr += len;
 
2368
 
 
2369
                        if (type == MLOG_MULTI_REC_END) {
 
2370
 
 
2371
                                /* Found the end mark for the records */
 
2372
 
 
2373
                                break;
 
2374
                        }
 
2375
                }
 
2376
 
 
2377
                new_recovered_lsn = recv_calc_lsn_on_data_add(
 
2378
                        recv_sys->recovered_lsn, total_len);
 
2379
 
 
2380
                if (new_recovered_lsn > recv_sys->scanned_lsn) {
 
2381
                        /* The log record filled a log block, and we require
 
2382
                        that also the next log block should have been scanned
 
2383
                        in */
 
2384
 
 
2385
                        return(FALSE);
 
2386
                }
 
2387
 
 
2388
                /* Add all the records to the hash table */
 
2389
 
 
2390
                ptr = recv_sys->buf + recv_sys->recovered_offset;
 
2391
 
 
2392
                for (;;) {
 
2393
                        old_lsn = recv_sys->recovered_lsn;
 
2394
                        len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 
2395
                                                 &page_no, &body);
 
2396
                        if (recv_sys->found_corrupt_log) {
 
2397
 
 
2398
                                recv_report_corrupt_log(ptr,
 
2399
                                                        type, space, page_no);
 
2400
                        }
 
2401
 
 
2402
                        ut_a(len != 0);
 
2403
                        ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
 
2404
 
 
2405
                        recv_sys->recovered_offset += len;
 
2406
                        recv_sys->recovered_lsn
 
2407
                                = recv_calc_lsn_on_data_add(old_lsn, len);
 
2408
                        if (type == MLOG_MULTI_REC_END) {
 
2409
 
 
2410
                                /* Found the end mark for the records */
 
2411
 
 
2412
                                break;
 
2413
                        }
 
2414
 
 
2415
                        if (store_to_hash
 
2416
#ifdef UNIV_LOG_LSN_DEBUG
 
2417
                            && type != MLOG_LSN
 
2418
#endif /* UNIV_LOG_LSN_DEBUG */
 
2419
                            ) {
 
2420
                                recv_add_to_hash_table(type, space, page_no,
 
2421
                                                       body, ptr + len,
 
2422
                                                       old_lsn,
 
2423
                                                       new_recovered_lsn);
 
2424
                        }
 
2425
 
 
2426
                        ptr += len;
 
2427
                }
 
2428
        }
 
2429
 
 
2430
        goto loop;
 
2431
}
 
2432
 
 
2433
/*******************************************************//**
 
2434
Adds data from a new log block to the parsing buffer of recv_sys if
 
2435
recv_sys->parse_start_lsn is non-zero.
 
2436
@return TRUE if more data added */
 
2437
static
 
2438
ibool
 
2439
recv_sys_add_to_parsing_buf(
 
2440
/*========================*/
 
2441
        const byte*     log_block,      /*!< in: log block */
 
2442
        ib_uint64_t     scanned_lsn)    /*!< in: lsn of how far we were able
 
2443
                                        to find data in this log block */
 
2444
{
 
2445
        ulint   more_len;
 
2446
        ulint   data_len;
 
2447
        ulint   start_offset;
 
2448
        ulint   end_offset;
 
2449
 
 
2450
        ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
 
2451
 
 
2452
        if (!recv_sys->parse_start_lsn) {
 
2453
                /* Cannot start parsing yet because no start point for
 
2454
                it found */
 
2455
 
 
2456
                return(FALSE);
 
2457
        }
 
2458
 
 
2459
        data_len = log_block_get_data_len(log_block);
 
2460
 
 
2461
        if (recv_sys->parse_start_lsn >= scanned_lsn) {
 
2462
 
 
2463
                return(FALSE);
 
2464
 
 
2465
        } else if (recv_sys->scanned_lsn >= scanned_lsn) {
 
2466
 
 
2467
                return(FALSE);
 
2468
 
 
2469
        } else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
 
2470
                more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
 
2471
        } else {
 
2472
                more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
 
2473
        }
 
2474
 
 
2475
        if (more_len == 0) {
 
2476
 
 
2477
                return(FALSE);
 
2478
        }
 
2479
 
 
2480
        ut_ad(data_len >= more_len);
 
2481
 
 
2482
        start_offset = data_len - more_len;
 
2483
 
 
2484
        if (start_offset < LOG_BLOCK_HDR_SIZE) {
 
2485
                start_offset = LOG_BLOCK_HDR_SIZE;
 
2486
        }
 
2487
 
 
2488
        end_offset = data_len;
 
2489
 
 
2490
        if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
2491
                end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 
2492
        }
 
2493
 
 
2494
        ut_ad(start_offset <= end_offset);
 
2495
 
 
2496
        if (start_offset < end_offset) {
 
2497
                ut_memcpy(recv_sys->buf + recv_sys->len,
 
2498
                          log_block + start_offset, end_offset - start_offset);
 
2499
 
 
2500
                recv_sys->len += end_offset - start_offset;
 
2501
 
 
2502
                ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 
2503
        }
 
2504
 
 
2505
        return(TRUE);
 
2506
}
 
2507
 
 
2508
/*******************************************************//**
 
2509
Moves the parsing buffer data left to the buffer start. */
 
2510
static
 
2511
void
 
2512
recv_sys_justify_left_parsing_buf(void)
 
2513
/*===================================*/
 
2514
{
 
2515
        ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
 
2516
                   recv_sys->len - recv_sys->recovered_offset);
 
2517
 
 
2518
        recv_sys->len -= recv_sys->recovered_offset;
 
2519
 
 
2520
        recv_sys->recovered_offset = 0;
 
2521
}
 
2522
 
 
2523
/*******************************************************//**
 
2524
Scans log from a buffer and stores new log data to the parsing buffer.
 
2525
Parses and hashes the log records if new data found.  Unless
 
2526
UNIV_HOTBACKUP is defined, this function will apply log records
 
2527
automatically when the hash table becomes full.
 
2528
@return TRUE if limit_lsn has been reached, or not able to scan any
 
2529
more in this log group */
 
2530
UNIV_INTERN
 
2531
ibool
 
2532
recv_scan_log_recs(
 
2533
/*===============*/
 
2534
        ulint           available_memory,/*!< in: we let the hash table of recs
 
2535
                                        to grow to this size, at the maximum */
 
2536
        ibool           store_to_hash,  /*!< in: TRUE if the records should be
 
2537
                                        stored to the hash table; this is set
 
2538
                                        to FALSE if just debug checking is
 
2539
                                        needed */
 
2540
        const byte*     buf,            /*!< in: buffer containing a log
 
2541
                                        segment or garbage */
 
2542
        ulint           len,            /*!< in: buffer length */
 
2543
        ib_uint64_t     start_lsn,      /*!< in: buffer start lsn */
 
2544
        ib_uint64_t*    contiguous_lsn, /*!< in/out: it is known that all log
 
2545
                                        groups contain contiguous log data up
 
2546
                                        to this lsn */
 
2547
        ib_uint64_t*    group_scanned_lsn)/*!< out: scanning succeeded up to
 
2548
                                        this lsn */
 
2549
{
 
2550
        const byte*     log_block;
 
2551
        ulint           no;
 
2552
        ib_uint64_t     scanned_lsn;
 
2553
        ibool           finished;
 
2554
        ulint           data_len;
 
2555
        ibool           more_data;
 
2556
 
 
2557
        ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2558
        ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2559
        ut_ad(len > 0);
 
2560
        ut_a(store_to_hash <= TRUE);
 
2561
 
 
2562
        finished = FALSE;
 
2563
 
 
2564
        log_block = buf;
 
2565
        scanned_lsn = start_lsn;
 
2566
        more_data = FALSE;
 
2567
 
 
2568
        do {
 
2569
                no = log_block_get_hdr_no(log_block);
 
2570
                /*
 
2571
                fprintf(stderr, "Log block header no %lu\n", no);
 
2572
 
 
2573
                fprintf(stderr, "Scanned lsn no %lu\n",
 
2574
                log_block_convert_lsn_to_no(scanned_lsn));
 
2575
                */
 
2576
                if (no != log_block_convert_lsn_to_no(scanned_lsn)
 
2577
                    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 
2578
 
 
2579
                        if (no == log_block_convert_lsn_to_no(scanned_lsn)
 
2580
                            && !log_block_checksum_is_ok_or_old_format(
 
2581
                                    log_block)) {
 
2582
                                fprintf(stderr,
 
2583
                                        "InnoDB: Log block no %lu at"
 
2584
                                        " lsn %llu has\n"
 
2585
                                        "InnoDB: ok header, but checksum field"
 
2586
                                        " contains %lu, should be %lu\n",
 
2587
                                        (ulong) no,
 
2588
                                        scanned_lsn,
 
2589
                                        (ulong) log_block_get_checksum(
 
2590
                                                log_block),
 
2591
                                        (ulong) log_block_calc_checksum(
 
2592
                                                log_block));
 
2593
                        }
 
2594
 
 
2595
                        /* Garbage or an incompletely written log block */
 
2596
 
 
2597
                        finished = TRUE;
 
2598
 
 
2599
                        break;
 
2600
                }
 
2601
 
 
2602
                if (log_block_get_flush_bit(log_block)) {
 
2603
                        /* This block was a start of a log flush operation:
 
2604
                        we know that the previous flush operation must have
 
2605
                        been completed for all log groups before this block
 
2606
                        can have been flushed to any of the groups. Therefore,
 
2607
                        we know that log data is contiguous up to scanned_lsn
 
2608
                        in all non-corrupt log groups. */
 
2609
 
 
2610
                        if (scanned_lsn > *contiguous_lsn) {
 
2611
                                *contiguous_lsn = scanned_lsn;
 
2612
                        }
 
2613
                }
 
2614
 
 
2615
                data_len = log_block_get_data_len(log_block);
 
2616
 
 
2617
                if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
 
2618
                    && scanned_lsn + data_len > recv_sys->scanned_lsn
 
2619
                    && (recv_sys->scanned_checkpoint_no > 0)
 
2620
                    && (log_block_get_checkpoint_no(log_block)
 
2621
                        < recv_sys->scanned_checkpoint_no)
 
2622
                    && (recv_sys->scanned_checkpoint_no
 
2623
                        - log_block_get_checkpoint_no(log_block)
 
2624
                        > 0x80000000UL)) {
 
2625
 
 
2626
                        /* Garbage from a log buffer flush which was made
 
2627
                        before the most recent database recovery */
 
2628
 
 
2629
                        finished = TRUE;
 
2630
#ifdef UNIV_LOG_DEBUG
 
2631
                        /* This is not really an error, but currently
 
2632
                        we stop here in the debug version: */
 
2633
 
 
2634
                        ut_error;
 
2635
#endif
 
2636
                        break;
 
2637
                }
 
2638
 
 
2639
                if (!recv_sys->parse_start_lsn
 
2640
                    && (log_block_get_first_rec_group(log_block) > 0)) {
 
2641
 
 
2642
                        /* We found a point from which to start the parsing
 
2643
                        of log records */
 
2644
 
 
2645
                        recv_sys->parse_start_lsn = scanned_lsn
 
2646
                                + log_block_get_first_rec_group(log_block);
 
2647
                        recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 
2648
                        recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
2649
                }
 
2650
 
 
2651
                scanned_lsn += data_len;
 
2652
 
 
2653
                if (scanned_lsn > recv_sys->scanned_lsn) {
 
2654
 
 
2655
                        /* We have found more entries. If this scan is
 
2656
                        of startup type, we must initiate crash recovery
 
2657
                        environment before parsing these log records. */
 
2658
 
 
2659
#ifndef UNIV_HOTBACKUP
 
2660
                        if (recv_log_scan_is_startup_type
 
2661
                            && !recv_needed_recovery) {
 
2662
 
 
2663
                                fprintf(stderr,
 
2664
                                        "InnoDB: Log scan progressed"
 
2665
                                        " past the checkpoint lsn %llu\n",
 
2666
                                        recv_sys->scanned_lsn);
 
2667
                                recv_init_crash_recovery();
 
2668
                        }
 
2669
#endif /* !UNIV_HOTBACKUP */
 
2670
 
 
2671
                        /* We were able to find more log data: add it to the
 
2672
                        parsing buffer if parse_start_lsn is already
 
2673
                        non-zero */
 
2674
 
 
2675
                        if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 
2676
                            >= RECV_PARSING_BUF_SIZE) {
 
2677
                                fprintf(stderr,
 
2678
                                        "InnoDB: Error: log parsing"
 
2679
                                        " buffer overflow."
 
2680
                                        " Recovery may have failed!\n");
 
2681
 
 
2682
                                recv_sys->found_corrupt_log = TRUE;
 
2683
 
 
2684
                        } else if (!recv_sys->found_corrupt_log) {
 
2685
                                more_data = recv_sys_add_to_parsing_buf(
 
2686
                                        log_block, scanned_lsn);
 
2687
                        }
 
2688
 
 
2689
                        recv_sys->scanned_lsn = scanned_lsn;
 
2690
                        recv_sys->scanned_checkpoint_no
 
2691
                                = log_block_get_checkpoint_no(log_block);
 
2692
                }
 
2693
 
 
2694
                if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 
2695
                        /* Log data for this group ends here */
 
2696
 
 
2697
                        finished = TRUE;
 
2698
                        break;
 
2699
                } else {
 
2700
                        log_block += OS_FILE_LOG_BLOCK_SIZE;
 
2701
                }
 
2702
        } while (log_block < buf + len && !finished);
 
2703
 
 
2704
        *group_scanned_lsn = scanned_lsn;
 
2705
 
 
2706
        if (recv_needed_recovery
 
2707
            || (recv_is_from_backup && !recv_is_making_a_backup)) {
 
2708
                recv_scan_print_counter++;
 
2709
 
 
2710
                if (finished || (recv_scan_print_counter % 80 == 0)) {
 
2711
 
 
2712
                        fprintf(stderr,
 
2713
                                "InnoDB: Doing recovery: scanned up to"
 
2714
                                " log sequence number %llu\n",
 
2715
                                *group_scanned_lsn);
 
2716
                }
 
2717
        }
 
2718
 
 
2719
        if (more_data && !recv_sys->found_corrupt_log) {
 
2720
                /* Try to parse more log records */
 
2721
 
 
2722
                recv_parse_log_recs(store_to_hash);
 
2723
 
 
2724
#ifndef UNIV_HOTBACKUP
 
2725
                if (store_to_hash && mem_heap_get_size(recv_sys->heap)
 
2726
                    > available_memory) {
 
2727
 
 
2728
                        /* Hash table of log records has grown too big:
 
2729
                        empty it; FALSE means no ibuf operations
 
2730
                        allowed, as we cannot add new records to the
 
2731
                        log yet: they would be produced by ibuf
 
2732
                        operations */
 
2733
 
 
2734
                        recv_apply_hashed_log_recs(FALSE);
 
2735
                }
 
2736
#endif /* !UNIV_HOTBACKUP */
 
2737
 
 
2738
                if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 
2739
                        /* Move parsing buffer data to the buffer start */
 
2740
 
 
2741
                        recv_sys_justify_left_parsing_buf();
 
2742
                }
 
2743
        }
 
2744
 
 
2745
        return(finished);
 
2746
}
 
2747
 
 
2748
#ifndef UNIV_HOTBACKUP
 
2749
/*******************************************************//**
 
2750
Scans log from a buffer and stores new log data to the parsing buffer. Parses
 
2751
and hashes the log records if new data found. */
 
2752
static
 
2753
void
 
2754
recv_group_scan_log_recs(
 
2755
/*=====================*/
 
2756
        log_group_t*    group,          /*!< in: log group */
 
2757
        ib_uint64_t*    contiguous_lsn, /*!< in/out: it is known that all log
 
2758
                                        groups contain contiguous log data up
 
2759
                                        to this lsn */
 
2760
        ib_uint64_t*    group_scanned_lsn)/*!< out: scanning succeeded up to
 
2761
                                        this lsn */
 
2762
{
 
2763
        ibool           finished;
 
2764
        ib_uint64_t     start_lsn;
 
2765
        ib_uint64_t     end_lsn;
 
2766
 
 
2767
        finished = FALSE;
 
2768
 
 
2769
        start_lsn = *contiguous_lsn;
 
2770
 
 
2771
        while (!finished) {
 
2772
                end_lsn = start_lsn + RECV_SCAN_SIZE;
 
2773
 
 
2774
                log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 
2775
                                       group, start_lsn, end_lsn);
 
2776
 
 
2777
                finished = recv_scan_log_recs(
 
2778
                        (buf_pool->curr_size - recv_n_pool_free_frames)
 
2779
                        * UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
 
2780
                        start_lsn, contiguous_lsn, group_scanned_lsn);
 
2781
                start_lsn = end_lsn;
 
2782
        }
 
2783
 
 
2784
#ifdef UNIV_DEBUG
 
2785
        if (log_debug_writes) {
 
2786
                fprintf(stderr,
 
2787
                        "InnoDB: Scanned group %lu up to"
 
2788
                        " log sequence number %llu\n",
 
2789
                        (ulong) group->id,
 
2790
                        *group_scanned_lsn);
 
2791
        }
 
2792
#endif /* UNIV_DEBUG */
 
2793
}
 
2794
 
 
2795
/*******************************************************//**
 
2796
Initialize crash recovery environment. Can be called iff
 
2797
recv_needed_recovery == FALSE. */
 
2798
static
 
2799
void
 
2800
recv_init_crash_recovery(void)
 
2801
/*==========================*/
 
2802
{
 
2803
        ut_a(!recv_needed_recovery);
 
2804
 
 
2805
        recv_needed_recovery = TRUE;
 
2806
 
 
2807
        ut_print_timestamp(stderr);
 
2808
 
 
2809
        fprintf(stderr,
 
2810
                "  InnoDB: Database was not"
 
2811
                " shut down normally!\n"
 
2812
                "InnoDB: Starting crash recovery.\n");
 
2813
 
 
2814
        fprintf(stderr,
 
2815
                "InnoDB: Reading tablespace information"
 
2816
                " from the .ibd files...\n");
 
2817
 
 
2818
        fil_load_single_table_tablespaces();
 
2819
 
 
2820
        /* If we are using the doublewrite method, we will
 
2821
        check if there are half-written pages in data files,
 
2822
        and restore them from the doublewrite buffer if
 
2823
        possible */
 
2824
 
 
2825
        if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
2826
 
 
2827
                fprintf(stderr,
 
2828
                        "InnoDB: Restoring possible"
 
2829
                        " half-written data pages from"
 
2830
                        " the doublewrite\n"
 
2831
                        "InnoDB: buffer...\n");
 
2832
                trx_sys_doublewrite_init_or_restore_pages(TRUE);
 
2833
        }
 
2834
}
 
2835
 
 
2836
/********************************************************//**
 
2837
Recovers from a checkpoint. When this function returns, the database is able
 
2838
to start processing of new user transactions, but the function
 
2839
recv_recovery_from_checkpoint_finish should be called later to complete
 
2840
the recovery and free the resources used in it.
 
2841
@return error code or DB_SUCCESS */
 
2842
UNIV_INTERN
 
2843
ulint
 
2844
recv_recovery_from_checkpoint_start_func(
 
2845
/*=====================================*/
 
2846
#ifdef UNIV_LOG_ARCHIVE
 
2847
        ulint           type,           /*!< in: LOG_CHECKPOINT or
 
2848
                                        LOG_ARCHIVE */
 
2849
        ib_uint64_t     limit_lsn,      /*!< in: recover up to this lsn
 
2850
                                        if possible */
 
2851
#endif /* UNIV_LOG_ARCHIVE */
 
2852
        ib_uint64_t     min_flushed_lsn,/*!< in: min flushed lsn from
 
2853
                                        data files */
 
2854
        ib_uint64_t     max_flushed_lsn)/*!< in: max flushed lsn from
 
2855
                                        data files */
 
2856
{
 
2857
        log_group_t*    group;
 
2858
        log_group_t*    max_cp_group;
 
2859
        log_group_t*    up_to_date_group;
 
2860
        ulint           max_cp_field;
 
2861
        ib_uint64_t     checkpoint_lsn;
 
2862
        ib_uint64_t     checkpoint_no;
 
2863
        ib_uint64_t     old_scanned_lsn;
 
2864
        ib_uint64_t     group_scanned_lsn;
 
2865
        ib_uint64_t     contiguous_lsn;
 
2866
        ib_uint64_t     archived_lsn;
 
2867
        byte*           buf;
 
2868
        byte            log_hdr_buf[LOG_FILE_HDR_SIZE];
 
2869
        ulint           err;
 
2870
 
 
2871
#ifdef UNIV_LOG_ARCHIVE
 
2872
        ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
 
2873
/** TRUE when recovering from a checkpoint */
 
2874
# define TYPE_CHECKPOINT        (type == LOG_CHECKPOINT)
 
2875
/** Recover up to this log sequence number */
 
2876
# define LIMIT_LSN              limit_lsn
 
2877
#else /* UNIV_LOG_ARCHIVE */
 
2878
/** TRUE when recovering from a checkpoint */
 
2879
# define TYPE_CHECKPOINT        1
 
2880
/** Recover up to this log sequence number */
 
2881
# define LIMIT_LSN              IB_ULONGLONG_MAX
 
2882
#endif /* UNIV_LOG_ARCHIVE */
 
2883
 
 
2884
        if (TYPE_CHECKPOINT) {
 
2885
                recv_sys_create();
 
2886
                recv_sys_init(buf_pool_get_curr_size());
 
2887
        }
 
2888
 
 
2889
        if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
 
2890
                fprintf(stderr,
 
2891
                        "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
 
2892
                fprintf(stderr,
 
2893
                        "InnoDB: Skipping log redo\n");
 
2894
 
 
2895
                return(DB_SUCCESS);
 
2896
        }
 
2897
 
 
2898
        recv_recovery_on = TRUE;
 
2899
 
 
2900
        recv_sys->limit_lsn = LIMIT_LSN;
 
2901
 
 
2902
        mutex_enter(&(log_sys->mutex));
 
2903
 
 
2904
        /* Look for the latest checkpoint from any of the log groups */
 
2905
 
 
2906
        err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
 
2907
 
 
2908
        if (err != DB_SUCCESS) {
 
2909
 
 
2910
                mutex_exit(&(log_sys->mutex));
 
2911
 
 
2912
                return(err);
 
2913
        }
 
2914
 
 
2915
        log_group_read_checkpoint_info(max_cp_group, max_cp_field);
 
2916
 
 
2917
        buf = log_sys->checkpoint_buf;
 
2918
 
 
2919
        checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
 
2920
        checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
 
2921
        archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
 
2922
 
 
2923
        /* Read the first log file header to print a note if this is
 
2924
        a recovery from a restored InnoDB Hot Backup */
 
2925
 
 
2926
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
 
2927
               0, 0, LOG_FILE_HDR_SIZE,
 
2928
               log_hdr_buf, max_cp_group);
 
2929
 
 
2930
        if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
2931
                           (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
 
2932
                /* This log file was created by ibbackup --restore: print
 
2933
                a note to the user about it */
 
2934
 
 
2935
                fprintf(stderr,
 
2936
                        "InnoDB: The log file was created by"
 
2937
                        " ibbackup --apply-log at\n"
 
2938
                        "InnoDB: %s\n",
 
2939
                        log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
 
2940
                fprintf(stderr,
 
2941
                        "InnoDB: NOTE: the following crash recovery"
 
2942
                        " is part of a normal restore.\n");
 
2943
 
 
2944
                /* Wipe over the label now */
 
2945
 
 
2946
                memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
2947
                       ' ', 4);
 
2948
                /* Write to the log file to wipe over the label */
 
2949
                fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
 
2950
                       max_cp_group->space_id, 0,
 
2951
                       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 
2952
                       log_hdr_buf, max_cp_group);
 
2953
        }
 
2954
 
 
2955
#ifdef UNIV_LOG_ARCHIVE
 
2956
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2957
 
 
2958
        while (group) {
 
2959
                log_checkpoint_get_nth_group_info(buf, group->id,
 
2960
                                                  &(group->archived_file_no),
 
2961
                                                  &(group->archived_offset));
 
2962
 
 
2963
                group = UT_LIST_GET_NEXT(log_groups, group);
 
2964
        }
 
2965
#endif /* UNIV_LOG_ARCHIVE */
 
2966
 
 
2967
        if (TYPE_CHECKPOINT) {
 
2968
                /* Start reading the log groups from the checkpoint lsn up. The
 
2969
                variable contiguous_lsn contains an lsn up to which the log is
 
2970
                known to be contiguously written to all log groups. */
 
2971
 
 
2972
                recv_sys->parse_start_lsn = checkpoint_lsn;
 
2973
                recv_sys->scanned_lsn = checkpoint_lsn;
 
2974
                recv_sys->scanned_checkpoint_no = 0;
 
2975
                recv_sys->recovered_lsn = checkpoint_lsn;
 
2976
 
 
2977
                srv_start_lsn = checkpoint_lsn;
 
2978
        }
 
2979
 
 
2980
        contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
 
2981
                                              OS_FILE_LOG_BLOCK_SIZE);
 
2982
        if (TYPE_CHECKPOINT) {
 
2983
                up_to_date_group = max_cp_group;
 
2984
#ifdef UNIV_LOG_ARCHIVE
 
2985
        } else {
 
2986
                ulint   capacity;
 
2987
 
 
2988
                /* Try to recover the remaining part from logs: first from
 
2989
                the logs of the archived group */
 
2990
 
 
2991
                group = recv_sys->archive_group;
 
2992
                capacity = log_group_get_capacity(group);
 
2993
 
 
2994
                if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
 
2995
                    || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
 
2996
 
 
2997
                        mutex_exit(&(log_sys->mutex));
 
2998
 
 
2999
                        /* The group does not contain enough log: probably
 
3000
                        an archived log file was missing or corrupt */
 
3001
 
 
3002
                        return(DB_ERROR);
 
3003
                }
 
3004
 
 
3005
                recv_group_scan_log_recs(group, &contiguous_lsn,
 
3006
                                         &group_scanned_lsn);
 
3007
                if (recv_sys->scanned_lsn < checkpoint_lsn) {
 
3008
 
 
3009
                        mutex_exit(&(log_sys->mutex));
 
3010
 
 
3011
                        /* The group did not contain enough log: an archived
 
3012
                        log file was missing or invalid, or the log group
 
3013
                        was corrupt */
 
3014
 
 
3015
                        return(DB_ERROR);
 
3016
                }
 
3017
 
 
3018
                group->scanned_lsn = group_scanned_lsn;
 
3019
                up_to_date_group = group;
 
3020
#endif /* UNIV_LOG_ARCHIVE */
 
3021
        }
 
3022
 
 
3023
        ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 
3024
 
 
3025
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
3026
 
 
3027
#ifdef UNIV_LOG_ARCHIVE
 
3028
        if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
 
3029
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3030
        }
 
3031
#endif /* UNIV_LOG_ARCHIVE */
 
3032
 
 
3033
        /* Set the flag to publish that we are doing startup scan. */
 
3034
        recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
 
3035
        while (group) {
 
3036
                old_scanned_lsn = recv_sys->scanned_lsn;
 
3037
 
 
3038
                recv_group_scan_log_recs(group, &contiguous_lsn,
 
3039
                                         &group_scanned_lsn);
 
3040
                group->scanned_lsn = group_scanned_lsn;
 
3041
 
 
3042
                if (old_scanned_lsn < group_scanned_lsn) {
 
3043
                        /* We found a more up-to-date group */
 
3044
 
 
3045
                        up_to_date_group = group;
 
3046
                }
 
3047
 
 
3048
#ifdef UNIV_LOG_ARCHIVE
 
3049
                if ((type == LOG_ARCHIVE)
 
3050
                    && (group == recv_sys->archive_group)) {
 
3051
                        group = UT_LIST_GET_NEXT(log_groups, group);
 
3052
                }
 
3053
#endif /* UNIV_LOG_ARCHIVE */
 
3054
 
 
3055
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3056
        }
 
3057
 
 
3058
        /* Done with startup scan. Clear the flag. */
 
3059
        recv_log_scan_is_startup_type = FALSE;
 
3060
        if (TYPE_CHECKPOINT) {
 
3061
                /* NOTE: we always do a 'recovery' at startup, but only if
 
3062
                there is something wrong we will print a message to the
 
3063
                user about recovery: */
 
3064
 
 
3065
                if (checkpoint_lsn != max_flushed_lsn
 
3066
                    || checkpoint_lsn != min_flushed_lsn) {
 
3067
 
 
3068
                        if (checkpoint_lsn < max_flushed_lsn) {
 
3069
                                fprintf(stderr,
 
3070
                                        "InnoDB: #########################"
 
3071
                                        "#################################\n"
 
3072
                                        "InnoDB:                          "
 
3073
                                        "WARNING!\n"
 
3074
                                        "InnoDB: The log sequence number"
 
3075
                                        " in ibdata files is higher\n"
 
3076
                                        "InnoDB: than the log sequence number"
 
3077
                                        " in the ib_logfiles! Are you sure\n"
 
3078
                                        "InnoDB: you are using the right"
 
3079
                                        " ib_logfiles to start up"
 
3080
                                        " the database?\n"
 
3081
                                        "InnoDB: Log sequence number in"
 
3082
                                        " ib_logfiles is %llu, log\n"
 
3083
                                        "InnoDB: sequence numbers stamped"
 
3084
                                        " to ibdata file headers are between\n"
 
3085
                                        "InnoDB: %llu and %llu.\n"
 
3086
                                        "InnoDB: #########################"
 
3087
                                        "#################################\n",
 
3088
                                        checkpoint_lsn,
 
3089
                                        min_flushed_lsn,
 
3090
                                        max_flushed_lsn);
 
3091
                        }
 
3092
 
 
3093
                        if (!recv_needed_recovery) {
 
3094
                                fprintf(stderr,
 
3095
                                        "InnoDB: The log sequence number"
 
3096
                                        " in ibdata files does not match\n"
 
3097
                                        "InnoDB: the log sequence number"
 
3098
                                        " in the ib_logfiles!\n");
 
3099
                                recv_init_crash_recovery();
 
3100
                        }
 
3101
                }
 
3102
 
 
3103
                if (!recv_needed_recovery) {
 
3104
                        /* Init the doublewrite buffer memory structure */
 
3105
                        trx_sys_doublewrite_init_or_restore_pages(FALSE);
 
3106
                }
 
3107
        }
 
3108
 
 
3109
        /* We currently have only one log group */
 
3110
        if (group_scanned_lsn < checkpoint_lsn) {
 
3111
                ut_print_timestamp(stderr);
 
3112
                fprintf(stderr,
 
3113
                        "  InnoDB: ERROR: We were only able to scan the log"
 
3114
                        " up to\n"
 
3115
                        "InnoDB: %llu, but a checkpoint was at %llu.\n"
 
3116
                        "InnoDB: It is possible that"
 
3117
                        " the database is now corrupt!\n",
 
3118
                        group_scanned_lsn,
 
3119
                        checkpoint_lsn);
 
3120
        }
 
3121
 
 
3122
        if (group_scanned_lsn < recv_max_page_lsn) {
 
3123
                ut_print_timestamp(stderr);
 
3124
                fprintf(stderr,
 
3125
                        "  InnoDB: ERROR: We were only able to scan the log"
 
3126
                        " up to %llu\n"
 
3127
                        "InnoDB: but a database page a had an lsn %llu."
 
3128
                        " It is possible that the\n"
 
3129
                        "InnoDB: database is now corrupt!\n",
 
3130
                        group_scanned_lsn,
 
3131
                        recv_max_page_lsn);
 
3132
        }
 
3133
 
 
3134
        if (recv_sys->recovered_lsn < checkpoint_lsn) {
 
3135
 
 
3136
                mutex_exit(&(log_sys->mutex));
 
3137
 
 
3138
                if (recv_sys->recovered_lsn >= LIMIT_LSN) {
 
3139
 
 
3140
                        return(DB_SUCCESS);
 
3141
                }
 
3142
 
 
3143
                ut_error;
 
3144
 
 
3145
                return(DB_ERROR);
 
3146
        }
 
3147
 
 
3148
        /* Synchronize the uncorrupted log groups to the most up-to-date log
 
3149
        group; we also copy checkpoint info to groups */
 
3150
 
 
3151
        log_sys->next_checkpoint_lsn = checkpoint_lsn;
 
3152
        log_sys->next_checkpoint_no = checkpoint_no + 1;
 
3153
 
 
3154
#ifdef UNIV_LOG_ARCHIVE
 
3155
        log_sys->archived_lsn = archived_lsn;
 
3156
#endif /* UNIV_LOG_ARCHIVE */
 
3157
 
 
3158
        recv_synchronize_groups(up_to_date_group);
 
3159
 
 
3160
        if (!recv_needed_recovery) {
 
3161
                ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
 
3162
        } else {
 
3163
                srv_start_lsn = recv_sys->recovered_lsn;
 
3164
        }
 
3165
 
 
3166
        log_sys->lsn = recv_sys->recovered_lsn;
 
3167
 
 
3168
        ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 
3169
 
 
3170
        log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
 
3171
        log_sys->buf_next_to_write = log_sys->buf_free;
 
3172
        log_sys->written_to_some_lsn = log_sys->lsn;
 
3173
        log_sys->written_to_all_lsn = log_sys->lsn;
 
3174
 
 
3175
        log_sys->last_checkpoint_lsn = checkpoint_lsn;
 
3176
 
 
3177
        log_sys->next_checkpoint_no = checkpoint_no + 1;
 
3178
 
 
3179
#ifdef UNIV_LOG_ARCHIVE
 
3180
        if (archived_lsn == IB_ULONGLONG_MAX) {
 
3181
 
 
3182
                log_sys->archiving_state = LOG_ARCH_OFF;
 
3183
        }
 
3184
#endif /* UNIV_LOG_ARCHIVE */
 
3185
 
 
3186
        mutex_enter(&(recv_sys->mutex));
 
3187
 
 
3188
        recv_sys->apply_log_recs = TRUE;
 
3189
 
 
3190
        mutex_exit(&(recv_sys->mutex));
 
3191
 
 
3192
        mutex_exit(&(log_sys->mutex));
 
3193
 
 
3194
        recv_lsn_checks_on = TRUE;
 
3195
 
 
3196
        /* The database is now ready to start almost normal processing of user
 
3197
        transactions: transaction rollbacks and the application of the log
 
3198
        records in the hash table can be run in background. */
 
3199
 
 
3200
        return(DB_SUCCESS);
 
3201
 
 
3202
#undef TYPE_CHECKPOINT
 
3203
#undef LIMIT_LSN
 
3204
}
 
3205
 
 
3206
/********************************************************//**
 
3207
Completes recovery from a checkpoint. */
 
3208
UNIV_INTERN
 
3209
void
 
3210
recv_recovery_from_checkpoint_finish(void)
 
3211
/*======================================*/
 
3212
{
 
3213
        int             i;
 
3214
 
 
3215
        /* Apply the hashed log records to the respective file pages */
 
3216
 
 
3217
        if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 
3218
 
 
3219
                recv_apply_hashed_log_recs(TRUE);
 
3220
        }
 
3221
 
 
3222
#ifdef UNIV_DEBUG
 
3223
        if (log_debug_writes) {
 
3224
                fprintf(stderr,
 
3225
                        "InnoDB: Log records applied to the database\n");
 
3226
        }
 
3227
#endif /* UNIV_DEBUG */
 
3228
 
 
3229
        if (recv_needed_recovery) {
 
3230
                trx_sys_print_mysql_master_log_pos();
 
3231
                trx_sys_print_mysql_binlog_offset();
 
3232
        }
 
3233
 
 
3234
        if (recv_sys->found_corrupt_log) {
 
3235
 
 
3236
                fprintf(stderr,
 
3237
                        "InnoDB: WARNING: the log file may have been"
 
3238
                        " corrupt and it\n"
 
3239
                        "InnoDB: is possible that the log scan or parsing"
 
3240
                        " did not proceed\n"
 
3241
                        "InnoDB: far enough in recovery. Please run"
 
3242
                        " CHECK TABLE\n"
 
3243
                        "InnoDB: on your InnoDB tables to check that"
 
3244
                        " they are ok!\n"
 
3245
                        "InnoDB: It may be safest to recover your"
 
3246
                        " InnoDB database from\n"
 
3247
                        "InnoDB: a backup!\n");
 
3248
        }
 
3249
 
 
3250
        /* Free the resources of the recovery system */
 
3251
 
 
3252
        recv_recovery_on = FALSE;
 
3253
 
 
3254
#ifndef UNIV_LOG_DEBUG
 
3255
        recv_sys_debug_free();
 
3256
#endif
 
3257
        /* Roll back any recovered data dictionary transactions, so
 
3258
        that the data dictionary tables will be free of any locks.
 
3259
        The data dictionary latch should guarantee that there is at
 
3260
        most one data dictionary transaction active at a time. */
 
3261
        trx_rollback_or_clean_recovered(FALSE);
 
3262
 
 
3263
        /* Drop partially created indexes. */
 
3264
        row_merge_drop_temp_indexes();
 
3265
 
 
3266
#ifdef UNIV_SYNC_DEBUG
 
3267
        /* Wait for a while so that created threads have time to suspend
 
3268
        themselves before we switch the latching order checks on */
 
3269
        os_thread_sleep(1000000);
 
3270
 
 
3271
        /* Switch latching order checks on in sync0sync.c */
 
3272
        sync_order_checks_on = TRUE;
 
3273
#endif
 
3274
        if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
 
3275
                /* Rollback the uncommitted transactions which have no user
 
3276
                session */
 
3277
 
 
3278
                os_thread_create(trx_rollback_or_clean_all_recovered,
 
3279
                                 (void *)&i, NULL);
 
3280
        }
 
3281
}
 
3282
 
 
3283
/******************************************************//**
 
3284
Resets the logs. The contents of log files will be lost! */
 
3285
UNIV_INTERN
 
3286
void
 
3287
recv_reset_logs(
 
3288
/*============*/
 
3289
        ib_uint64_t     lsn,            /*!< in: reset to this lsn
 
3290
                                        rounded up to be divisible by
 
3291
                                        OS_FILE_LOG_BLOCK_SIZE, after
 
3292
                                        which we add
 
3293
                                        LOG_BLOCK_HDR_SIZE */
 
3294
#ifdef UNIV_LOG_ARCHIVE
 
3295
        ulint           arch_log_no,    /*!< in: next archived log file number */
 
3296
#endif /* UNIV_LOG_ARCHIVE */
 
3297
        ibool           new_logs_created)/*!< in: TRUE if resetting logs
 
3298
                                        is done at the log creation;
 
3299
                                        FALSE if it is done after
 
3300
                                        archive recovery */
 
3301
{
 
3302
        log_group_t*    group;
 
3303
 
 
3304
        ut_ad(mutex_own(&(log_sys->mutex)));
 
3305
 
 
3306
        log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 
3307
 
 
3308
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
3309
 
 
3310
        while (group) {
 
3311
                group->lsn = log_sys->lsn;
 
3312
                group->lsn_offset = LOG_FILE_HDR_SIZE;
 
3313
#ifdef UNIV_LOG_ARCHIVE
 
3314
                group->archived_file_no = arch_log_no;
 
3315
                group->archived_offset = 0;
 
3316
#endif /* UNIV_LOG_ARCHIVE */
 
3317
 
 
3318
                if (!new_logs_created) {
 
3319
                        recv_truncate_group(group, group->lsn, group->lsn,
 
3320
                                            group->lsn, group->lsn);
 
3321
                }
 
3322
 
 
3323
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3324
        }
 
3325
 
 
3326
        log_sys->buf_next_to_write = 0;
 
3327
        log_sys->written_to_some_lsn = log_sys->lsn;
 
3328
        log_sys->written_to_all_lsn = log_sys->lsn;
 
3329
 
 
3330
        log_sys->next_checkpoint_no = 0;
 
3331
        log_sys->last_checkpoint_lsn = 0;
 
3332
 
 
3333
#ifdef UNIV_LOG_ARCHIVE
 
3334
        log_sys->archived_lsn = log_sys->lsn;
 
3335
#endif /* UNIV_LOG_ARCHIVE */
 
3336
 
 
3337
        log_block_init(log_sys->buf, log_sys->lsn);
 
3338
        log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
3339
 
 
3340
        log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 
3341
        log_sys->lsn += LOG_BLOCK_HDR_SIZE;
 
3342
 
 
3343
        mutex_exit(&(log_sys->mutex));
 
3344
 
 
3345
        /* Reset the checkpoint fields in logs */
 
3346
 
 
3347
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
3348
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
3349
 
 
3350
        mutex_enter(&(log_sys->mutex));
 
3351
}
 
3352
#endif /* !UNIV_HOTBACKUP */
 
3353
 
 
3354
#ifdef UNIV_HOTBACKUP
 
3355
/******************************************************//**
 
3356
Creates new log files after a backup has been restored. */
 
3357
UNIV_INTERN
 
3358
void
 
3359
recv_reset_log_files_for_backup(
 
3360
/*============================*/
 
3361
        const char*     log_dir,        /*!< in: log file directory path */
 
3362
        ulint           n_log_files,    /*!< in: number of log files */
 
3363
        ulint           log_file_size,  /*!< in: log file size */
 
3364
        ib_uint64_t     lsn)            /*!< in: new start lsn, must be
 
3365
                                        divisible by OS_FILE_LOG_BLOCK_SIZE */
 
3366
{
 
3367
        os_file_t       log_file;
 
3368
        ibool           success;
 
3369
        byte*           buf;
 
3370
        ulint           i;
 
3371
        ulint           log_dir_len;
 
3372
        char            name[5000];
 
3373
        static const char ib_logfile_basename[] = "ib_logfile";
 
3374
 
 
3375
        log_dir_len = strlen(log_dir);
 
3376
        /* full path name of ib_logfile consists of log dir path + basename
 
3377
        + number. This must fit in the name buffer.
 
3378
        */
 
3379
        ut_a(log_dir_len + strlen(ib_logfile_basename) + 11  < sizeof(name));
 
3380
 
 
3381
        buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
3382
        memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
3383
 
 
3384
        for (i = 0; i < n_log_files; i++) {
 
3385
 
 
3386
                sprintf(name, "%s%s%lu", log_dir,
 
3387
                        ib_logfile_basename, (ulong)i);
 
3388
 
 
3389
                log_file = os_file_create_simple(name, OS_FILE_CREATE,
 
3390
                                                 OS_FILE_READ_WRITE, &success);
 
3391
                if (!success) {
 
3392
                        fprintf(stderr,
 
3393
                                "InnoDB: Cannot create %s. Check that"
 
3394
                                " the file does not exist yet.\n", name);
 
3395
 
 
3396
                        exit(1);
 
3397
                }
 
3398
 
 
3399
                fprintf(stderr,
 
3400
                        "Setting log file size to %lu %lu\n",
 
3401
                        (ulong) ut_get_high32(log_file_size),
 
3402
                        (ulong) log_file_size & 0xFFFFFFFFUL);
 
3403
 
 
3404
                success = os_file_set_size(name, log_file,
 
3405
                                           log_file_size & 0xFFFFFFFFUL,
 
3406
                                           ut_get_high32(log_file_size));
 
3407
 
 
3408
                if (!success) {
 
3409
                        fprintf(stderr,
 
3410
                                "InnoDB: Cannot set %s size to %lu %lu\n",
 
3411
                                name, (ulong) ut_get_high32(log_file_size),
 
3412
                                (ulong) (log_file_size & 0xFFFFFFFFUL));
 
3413
                        exit(1);
 
3414
                }
 
3415
 
 
3416
                os_file_flush(log_file);
 
3417
                os_file_close(log_file);
 
3418
        }
 
3419
 
 
3420
        /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
 
3421
 
 
3422
        log_reset_first_header_and_checkpoint(buf, lsn);
 
3423
 
 
3424
        log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
 
3425
        log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 
3426
                                      LOG_BLOCK_HDR_SIZE);
 
3427
        sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
 
3428
 
 
3429
        log_file = os_file_create_simple(name, OS_FILE_OPEN,
 
3430
                                         OS_FILE_READ_WRITE, &success);
 
3431
        if (!success) {
 
3432
                fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
 
3433
 
 
3434
                exit(1);
 
3435
        }
 
3436
 
 
3437
        os_file_write(name, log_file, buf, 0, 0,
 
3438
                      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
3439
        os_file_flush(log_file);
 
3440
        os_file_close(log_file);
 
3441
 
 
3442
        ut_free(buf);
 
3443
}
 
3444
#endif /* UNIV_HOTBACKUP */
 
3445
 
 
3446
#ifdef UNIV_LOG_ARCHIVE
 
3447
/******************************************************//**
 
3448
Reads from the archive of a log group and performs recovery.
 
3449
@return TRUE if no more complete consistent archive files */
 
3450
static
 
3451
ibool
 
3452
log_group_recover_from_archive_file(
 
3453
/*================================*/
 
3454
        log_group_t*    group)          /*!< in: log group */
 
3455
{
 
3456
        os_file_t       file_handle;
 
3457
        ib_uint64_t     start_lsn;
 
3458
        ib_uint64_t     file_end_lsn;
 
3459
        ib_uint64_t     dummy_lsn;
 
3460
        ib_uint64_t     scanned_lsn;
 
3461
        ulint           len;
 
3462
        ibool           ret;
 
3463
        byte*           buf;
 
3464
        ulint           read_offset;
 
3465
        ulint           file_size;
 
3466
        ulint           file_size_high;
 
3467
        int             input_char;
 
3468
        char            name[10000];
 
3469
 
 
3470
        ut_a(0);
 
3471
 
 
3472
try_open_again:
 
3473
        buf = log_sys->buf;
 
3474
 
 
3475
        /* Add the file to the archive file space; open the file */
 
3476
 
 
3477
        log_archived_file_name_gen(name, group->id, group->archived_file_no);
 
3478
 
 
3479
        file_handle = os_file_create(name, OS_FILE_OPEN,
 
3480
                                     OS_FILE_LOG, OS_FILE_AIO, &ret);
 
3481
 
 
3482
        if (ret == FALSE) {
 
3483
ask_again:
 
3484
                fprintf(stderr,
 
3485
                        "InnoDB: Do you want to copy additional"
 
3486
                        " archived log files\n"
 
3487
                        "InnoDB: to the directory\n");
 
3488
                fprintf(stderr,
 
3489
                        "InnoDB: or were these all the files needed"
 
3490
                        " in recovery?\n");
 
3491
                fprintf(stderr,
 
3492
                        "InnoDB: (Y == copy more files; N == this is all)?");
 
3493
 
 
3494
                input_char = getchar();
 
3495
 
 
3496
                if (input_char == (int) 'N') {
 
3497
 
 
3498
                        return(TRUE);
 
3499
                } else if (input_char == (int) 'Y') {
 
3500
 
 
3501
                        goto try_open_again;
 
3502
                } else {
 
3503
                        goto ask_again;
 
3504
                }
 
3505
        }
 
3506
 
 
3507
        ret = os_file_get_size(file_handle, &file_size, &file_size_high);
 
3508
        ut_a(ret);
 
3509
 
 
3510
        ut_a(file_size_high == 0);
 
3511
 
 
3512
        fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
 
3513
 
 
3514
        ret = os_file_close(file_handle);
 
3515
 
 
3516
        if (file_size < LOG_FILE_HDR_SIZE) {
 
3517
                fprintf(stderr,
 
3518
                        "InnoDB: Archive file header incomplete %s\n", name);
 
3519
 
 
3520
                return(TRUE);
 
3521
        }
 
3522
 
 
3523
        ut_a(ret);
 
3524
 
 
3525
        /* Add the archive file as a node to the space */
 
3526
 
 
3527
        fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
 
3528
                        group->archive_space_id, FALSE);
 
3529
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
 
3530
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
 
3531
#endif
 
3532
 
 
3533
        /* Read the archive file header */
 
3534
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
 
3535
               LOG_FILE_HDR_SIZE, buf, NULL);
 
3536
 
 
3537
        /* Check if the archive file header is consistent */
 
3538
 
 
3539
        if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
 
3540
            || mach_read_from_4(buf + LOG_FILE_NO)
 
3541
            != group->archived_file_no) {
 
3542
                fprintf(stderr,
 
3543
                        "InnoDB: Archive file header inconsistent %s\n", name);
 
3544
 
 
3545
                return(TRUE);
 
3546
        }
 
3547
 
 
3548
        if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
 
3549
                fprintf(stderr,
 
3550
                        "InnoDB: Archive file not completely written %s\n",
 
3551
                        name);
 
3552
 
 
3553
                return(TRUE);
 
3554
        }
 
3555
 
 
3556
        start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
 
3557
        file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
 
3558
 
 
3559
        if (!recv_sys->scanned_lsn) {
 
3560
 
 
3561
                if (recv_sys->parse_start_lsn < start_lsn) {
 
3562
                        fprintf(stderr,
 
3563
                                "InnoDB: Archive log file %s"
 
3564
                                " starts from too big a lsn\n",
 
3565
                                name);
 
3566
                        return(TRUE);
 
3567
                }
 
3568
 
 
3569
                recv_sys->scanned_lsn = start_lsn;
 
3570
        }
 
3571
 
 
3572
        if (recv_sys->scanned_lsn != start_lsn) {
 
3573
 
 
3574
                fprintf(stderr,
 
3575
                        "InnoDB: Archive log file %s starts from"
 
3576
                        " a wrong lsn\n",
 
3577
                        name);
 
3578
                return(TRUE);
 
3579
        }
 
3580
 
 
3581
        read_offset = LOG_FILE_HDR_SIZE;
 
3582
 
 
3583
        for (;;) {
 
3584
                len = RECV_SCAN_SIZE;
 
3585
 
 
3586
                if (read_offset + len > file_size) {
 
3587
                        len = ut_calc_align_down(file_size - read_offset,
 
3588
                                                 OS_FILE_LOG_BLOCK_SIZE);
 
3589
                }
 
3590
 
 
3591
                if (len == 0) {
 
3592
 
 
3593
                        break;
 
3594
                }
 
3595
 
 
3596
#ifdef UNIV_DEBUG
 
3597
                if (log_debug_writes) {
 
3598
                        fprintf(stderr,
 
3599
                                "InnoDB: Archive read starting at"
 
3600
                                " lsn %llu, len %lu from file %s\n",
 
3601
                                start_lsn,
 
3602
                                (ulong) len, name);
 
3603
                }
 
3604
#endif /* UNIV_DEBUG */
 
3605
 
 
3606
                fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
 
3607
                       group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
 
3608
                       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 
3609
 
 
3610
                ret = recv_scan_log_recs(
 
3611
                        (buf_pool->n_frames - recv_n_pool_free_frames)
 
3612
                        * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
 
3613
                        &dummy_lsn, &scanned_lsn);
 
3614
 
 
3615
                if (scanned_lsn == file_end_lsn) {
 
3616
 
 
3617
                        return(FALSE);
 
3618
                }
 
3619
 
 
3620
                if (ret) {
 
3621
                        fprintf(stderr,
 
3622
                                "InnoDB: Archive log file %s"
 
3623
                                " does not scan right\n",
 
3624
                                name);
 
3625
                        return(TRUE);
 
3626
                }
 
3627
 
 
3628
                read_offset += len;
 
3629
                start_lsn += len;
 
3630
 
 
3631
                ut_ad(start_lsn == scanned_lsn);
 
3632
        }
 
3633
 
 
3634
        return(FALSE);
 
3635
}
 
3636
 
 
3637
/********************************************************//**
 
3638
Recovers from archived log files, and also from log files, if they exist.
 
3639
@return error code or DB_SUCCESS */
 
3640
UNIV_INTERN
 
3641
ulint
 
3642
recv_recovery_from_archive_start(
 
3643
/*=============================*/
 
3644
        ib_uint64_t     min_flushed_lsn,/*!< in: min flushed lsn field from the
 
3645
                                        data files */
 
3646
        ib_uint64_t     limit_lsn,      /*!< in: recover up to this lsn if
 
3647
                                        possible */
 
3648
        ulint           first_log_no)   /*!< in: number of the first archived
 
3649
                                        log file to use in the recovery; the
 
3650
                                        file will be searched from
 
3651
                                        INNOBASE_LOG_ARCH_DIR specified in
 
3652
                                        server config file */
 
3653
{
 
3654
        log_group_t*    group;
 
3655
        ulint           group_id;
 
3656
        ulint           trunc_len;
 
3657
        ibool           ret;
 
3658
        ulint           err;
 
3659
 
 
3660
        ut_a(0);
 
3661
 
 
3662
        recv_sys_create();
 
3663
        recv_sys_init(buf_pool_get_curr_size());
 
3664
 
 
3665
        recv_recovery_on = TRUE;
 
3666
        recv_recovery_from_backup_on = TRUE;
 
3667
 
 
3668
        recv_sys->limit_lsn = limit_lsn;
 
3669
 
 
3670
        group_id = 0;
 
3671
 
 
3672
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
3673
 
 
3674
        while (group) {
 
3675
                if (group->id == group_id) {
 
3676
 
 
3677
                        break;
 
3678
                }
 
3679
 
 
3680
                group = UT_LIST_GET_NEXT(log_groups, group);
 
3681
        }
 
3682
 
 
3683
        if (!group) {
 
3684
                fprintf(stderr,
 
3685
                        "InnoDB: There is no log group defined with id %lu!\n",
 
3686
                        (ulong) group_id);
 
3687
                return(DB_ERROR);
 
3688
        }
 
3689
 
 
3690
        group->archived_file_no = first_log_no;
 
3691
 
 
3692
        recv_sys->parse_start_lsn = min_flushed_lsn;
 
3693
 
 
3694
        recv_sys->scanned_lsn = 0;
 
3695
        recv_sys->scanned_checkpoint_no = 0;
 
3696
        recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 
3697
 
 
3698
        recv_sys->archive_group = group;
 
3699
 
 
3700
        ret = FALSE;
 
3701
 
 
3702
        mutex_enter(&(log_sys->mutex));
 
3703
 
 
3704
        while (!ret) {
 
3705
                ret = log_group_recover_from_archive_file(group);
 
3706
 
 
3707
                /* Close and truncate a possible processed archive file
 
3708
                from the file space */
 
3709
 
 
3710
                trunc_len = UNIV_PAGE_SIZE
 
3711
                        * fil_space_get_size(group->archive_space_id);
 
3712
                if (trunc_len > 0) {
 
3713
                        fil_space_truncate_start(group->archive_space_id,
 
3714
                                                 trunc_len);
 
3715
                }
 
3716
 
 
3717
                group->archived_file_no++;
 
3718
        }
 
3719
 
 
3720
        if (recv_sys->recovered_lsn < limit_lsn) {
 
3721
 
 
3722
                if (!recv_sys->scanned_lsn) {
 
3723
 
 
3724
                        recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 
3725
                }
 
3726
 
 
3727
                mutex_exit(&(log_sys->mutex));
 
3728
 
 
3729
                err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
 
3730
                                                          limit_lsn,
 
3731
                                                          IB_ULONGLONG_MAX,
 
3732
                                                          IB_ULONGLONG_MAX);
 
3733
                if (err != DB_SUCCESS) {
 
3734
 
 
3735
                        return(err);
 
3736
                }
 
3737
 
 
3738
                mutex_enter(&(log_sys->mutex));
 
3739
        }
 
3740
 
 
3741
        if (limit_lsn != IB_ULONGLONG_MAX) {
 
3742
 
 
3743
                recv_apply_hashed_log_recs(FALSE);
 
3744
 
 
3745
                recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
 
3746
        }
 
3747
 
 
3748
        mutex_exit(&(log_sys->mutex));
 
3749
 
 
3750
        return(DB_SUCCESS);
 
3751
}
 
3752
 
 
3753
/********************************************************//**
 
3754
Completes recovery from archive. */
 
3755
UNIV_INTERN
 
3756
void
 
3757
recv_recovery_from_archive_finish(void)
 
3758
/*===================================*/
 
3759
{
 
3760
        recv_recovery_from_checkpoint_finish();
 
3761
 
 
3762
        recv_recovery_from_backup_on = FALSE;
 
3763
}
 
3764
#endif /* UNIV_LOG_ARCHIVE */