~ubuntu-branches/ubuntu/precise/mysql-5.1/precise

« back to all changes in this revision

Viewing changes to storage/innodb_plugin/trx/trx0sys.c

  • Committer: Bazaar Package Importer
  • Author(s): Norbert Tretkowski
  • Date: 2010-03-17 14:56:02 UTC
  • Revision ID: james.westby@ubuntu.com-20100317145602-x7e30l1b2sb5s6w6
Tags: upstream-5.1.45
ImportĀ upstreamĀ versionĀ 5.1.45

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************
 
2
 
 
3
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
 
4
 
 
5
This program is free software; you can redistribute it and/or modify it under
 
6
the terms of the GNU General Public License as published by the Free Software
 
7
Foundation; version 2 of the License.
 
8
 
 
9
This program is distributed in the hope that it will be useful, but WITHOUT
 
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
12
 
 
13
You should have received a copy of the GNU General Public License along with
 
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 
15
Place, Suite 330, Boston, MA 02111-1307 USA
 
16
 
 
17
*****************************************************************************/
 
18
 
 
19
/**************************************************//**
 
20
@file trx/trx0sys.c
 
21
Transaction system
 
22
 
 
23
Created 3/26/1996 Heikki Tuuri
 
24
*******************************************************/
 
25
 
 
26
#include "trx0sys.h"
 
27
 
 
28
#ifdef UNIV_NONINL
 
29
#include "trx0sys.ic"
 
30
#endif
 
31
 
 
32
#ifndef UNIV_HOTBACKUP
 
33
#include "fsp0fsp.h"
 
34
#include "mtr0log.h"
 
35
#include "mtr0log.h"
 
36
#include "trx0trx.h"
 
37
#include "trx0rseg.h"
 
38
#include "trx0undo.h"
 
39
#include "srv0srv.h"
 
40
#include "trx0purge.h"
 
41
#include "log0log.h"
 
42
#include "os0file.h"
 
43
#include "read0read.h"
 
44
 
 
45
/** The file format tag structure with id and name. */
 
46
struct file_format_struct {
 
47
        ulint           id;             /*!< id of the file format */
 
48
        const char*     name;           /*!< text representation of the
 
49
                                        file format */
 
50
        mutex_t         mutex;          /*!< covers changes to the above
 
51
                                        fields */
 
52
};
 
53
 
 
54
/** The file format tag */
 
55
typedef struct file_format_struct       file_format_t;
 
56
 
 
57
/** The transaction system */
 
58
UNIV_INTERN trx_sys_t*          trx_sys         = NULL;
 
59
/** The doublewrite buffer */
 
60
UNIV_INTERN trx_doublewrite_t*  trx_doublewrite = NULL;
 
61
 
 
62
/** The following is set to TRUE when we are upgrading from pre-4.1
 
63
format data files to the multiple tablespaces format data files */
 
64
UNIV_INTERN ibool       trx_doublewrite_must_reset_space_ids    = FALSE;
 
65
/** Set to TRUE when the doublewrite buffer is being created */
 
66
UNIV_INTERN ibool       trx_doublewrite_buf_is_being_created = FALSE;
 
67
 
 
68
/** The following is TRUE when we are using the database in the
 
69
post-4.1 format, i.e., we have successfully upgraded, or have created
 
70
a new database installation */
 
71
UNIV_INTERN ibool       trx_sys_multiple_tablespace_format      = FALSE;
 
72
 
 
73
/** In a MySQL replication slave, in crash recovery we store the master log
 
74
file name and position here. */
 
75
/* @{ */
 
76
/** Master binlog file name */
 
77
UNIV_INTERN char        trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 
78
/** Master binlog file position.  We have successfully got the updates
 
79
up to this position.  -1 means that no crash recovery was needed, or
 
80
there was no master log position info inside InnoDB.*/
 
81
UNIV_INTERN ib_int64_t  trx_sys_mysql_master_log_pos    = -1;
 
82
/* @} */
 
83
 
 
84
/** If this MySQL server uses binary logging, after InnoDB has been inited
 
85
and if it has done a crash recovery, we store the binlog file name and position
 
86
here. */
 
87
/* @{ */
 
88
/** Binlog file name */
 
89
UNIV_INTERN char        trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 
90
/** Binlog file position, or -1 if unknown */
 
91
UNIV_INTERN ib_int64_t  trx_sys_mysql_bin_log_pos       = -1;
 
92
/* @} */
 
93
#endif /* !UNIV_HOTBACKUP */
 
94
 
 
95
/** List of animal names representing file format. */
 
96
static const char*      file_format_name_map[] = {
 
97
        "Antelope",
 
98
        "Barracuda",
 
99
        "Cheetah",
 
100
        "Dragon",
 
101
        "Elk",
 
102
        "Fox",
 
103
        "Gazelle",
 
104
        "Hornet",
 
105
        "Impala",
 
106
        "Jaguar",
 
107
        "Kangaroo",
 
108
        "Leopard",
 
109
        "Moose",
 
110
        "Nautilus",
 
111
        "Ocelot",
 
112
        "Porpoise",
 
113
        "Quail",
 
114
        "Rabbit",
 
115
        "Shark",
 
116
        "Tiger",
 
117
        "Urchin",
 
118
        "Viper",
 
119
        "Whale",
 
120
        "Xenops",
 
121
        "Yak",
 
122
        "Zebra"
 
123
};
 
124
 
 
125
/** The number of elements in the file format name array. */
 
126
static const ulint      FILE_FORMAT_NAME_N
 
127
        = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
 
128
 
 
129
#ifndef UNIV_HOTBACKUP
 
130
/** This is used to track the maximum file format id known to InnoDB. It's
 
131
updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
 
132
or create a table. */
 
133
static  file_format_t   file_format_max;
 
134
 
 
135
/****************************************************************//**
 
136
Determines if a page number is located inside the doublewrite buffer.
 
137
@return TRUE if the location is inside the two blocks of the
 
138
doublewrite buffer */
 
139
UNIV_INTERN
 
140
ibool
 
141
trx_doublewrite_page_inside(
 
142
/*========================*/
 
143
        ulint   page_no)        /*!< in: page number */
 
144
{
 
145
        if (trx_doublewrite == NULL) {
 
146
 
 
147
                return(FALSE);
 
148
        }
 
149
 
 
150
        if (page_no >= trx_doublewrite->block1
 
151
            && page_no < trx_doublewrite->block1
 
152
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
153
                return(TRUE);
 
154
        }
 
155
 
 
156
        if (page_no >= trx_doublewrite->block2
 
157
            && page_no < trx_doublewrite->block2
 
158
            + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
159
                return(TRUE);
 
160
        }
 
161
 
 
162
        return(FALSE);
 
163
}
 
164
 
 
165
/****************************************************************//**
 
166
Creates or initialializes the doublewrite buffer at a database start. */
 
167
static
 
168
void
 
169
trx_doublewrite_init(
 
170
/*=================*/
 
171
        byte*   doublewrite)    /*!< in: pointer to the doublewrite buf
 
172
                                header on trx sys page */
 
173
{
 
174
        trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
 
175
 
 
176
        /* Since we now start to use the doublewrite buffer, no need to call
 
177
        fsync() after every write to a data file */
 
178
#ifdef UNIV_DO_FLUSH
 
179
        os_do_not_call_flush_at_each_write = TRUE;
 
180
#endif /* UNIV_DO_FLUSH */
 
181
 
 
182
        mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
 
183
 
 
184
        trx_doublewrite->first_free = 0;
 
185
 
 
186
        trx_doublewrite->block1 = mach_read_from_4(
 
187
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
 
188
        trx_doublewrite->block2 = mach_read_from_4(
 
189
                doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
 
190
        trx_doublewrite->write_buf_unaligned = ut_malloc(
 
191
                (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
 
192
 
 
193
        trx_doublewrite->write_buf = ut_align(
 
194
                trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
 
195
        trx_doublewrite->buf_block_arr = mem_alloc(
 
196
                2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
 
197
}
 
198
 
 
199
/****************************************************************//**
 
200
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
 
201
multiple tablespace format. */
 
202
UNIV_INTERN
 
203
void
 
204
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
 
205
/*===============================================*/
 
206
{
 
207
        buf_block_t*    block;
 
208
        byte*           doublewrite;
 
209
        mtr_t           mtr;
 
210
 
 
211
        /* We upgraded to 4.1.x and reset the space id fields in the
 
212
        doublewrite buffer. Let us mark to the trx_sys header that the upgrade
 
213
        has been done. */
 
214
 
 
215
        mtr_start(&mtr);
 
216
 
 
217
        block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
 
218
                             RW_X_LATCH, &mtr);
 
219
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
220
 
 
221
        doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
222
 
 
223
        mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 
224
                         TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
 
225
                         MLOG_4BYTES, &mtr);
 
226
        mtr_commit(&mtr);
 
227
 
 
228
        /* Flush the modified pages to disk and make a checkpoint */
 
229
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
230
 
 
231
        trx_sys_multiple_tablespace_format = TRUE;
 
232
}
 
233
 
 
234
/****************************************************************//**
 
235
Creates the doublewrite buffer to a new InnoDB installation. The header of the
 
236
doublewrite buffer is placed on the trx system header page. */
 
237
UNIV_INTERN
 
238
void
 
239
trx_sys_create_doublewrite_buf(void)
 
240
/*================================*/
 
241
{
 
242
        buf_block_t*    block;
 
243
        buf_block_t*    block2;
 
244
        buf_block_t*    new_block;
 
245
        byte*   doublewrite;
 
246
        byte*   fseg_header;
 
247
        ulint   page_no;
 
248
        ulint   prev_page_no;
 
249
        ulint   i;
 
250
        mtr_t   mtr;
 
251
 
 
252
        if (trx_doublewrite) {
 
253
                /* Already inited */
 
254
 
 
255
                return;
 
256
        }
 
257
 
 
258
start_again:
 
259
        mtr_start(&mtr);
 
260
        trx_doublewrite_buf_is_being_created = TRUE;
 
261
 
 
262
        block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
 
263
                             RW_X_LATCH, &mtr);
 
264
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
265
 
 
266
        doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
 
267
 
 
268
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 
269
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 
270
                /* The doublewrite buffer has already been created:
 
271
                just read in some numbers */
 
272
 
 
273
                trx_doublewrite_init(doublewrite);
 
274
 
 
275
                mtr_commit(&mtr);
 
276
                trx_doublewrite_buf_is_being_created = FALSE;
 
277
        } else {
 
278
                fprintf(stderr,
 
279
                        "InnoDB: Doublewrite buffer not found:"
 
280
                        " creating new\n");
 
281
 
 
282
                if (buf_pool_get_curr_size()
 
283
                    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 
284
                        + FSP_EXTENT_SIZE / 2 + 100)
 
285
                       * UNIV_PAGE_SIZE)) {
 
286
                        fprintf(stderr,
 
287
                                "InnoDB: Cannot create doublewrite buffer:"
 
288
                                " you must\n"
 
289
                                "InnoDB: increase your buffer pool size.\n"
 
290
                                "InnoDB: Cannot continue operation.\n");
 
291
 
 
292
                        exit(1);
 
293
                }
 
294
 
 
295
                block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
 
296
                                     TRX_SYS_DOUBLEWRITE
 
297
                                     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
 
298
 
 
299
                /* fseg_create acquires a second latch on the page,
 
300
                therefore we must declare it: */
 
301
 
 
302
                buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
303
 
 
304
                if (block2 == NULL) {
 
305
                        fprintf(stderr,
 
306
                                "InnoDB: Cannot create doublewrite buffer:"
 
307
                                " you must\n"
 
308
                                "InnoDB: increase your tablespace size.\n"
 
309
                                "InnoDB: Cannot continue operation.\n");
 
310
 
 
311
                        /* We exit without committing the mtr to prevent
 
312
                        its modifications to the database getting to disk */
 
313
 
 
314
                        exit(1);
 
315
                }
 
316
 
 
317
                fseg_header = buf_block_get_frame(block)
 
318
                        + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
 
319
                prev_page_no = 0;
 
320
 
 
321
                for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
 
322
                             + FSP_EXTENT_SIZE / 2; i++) {
 
323
                        page_no = fseg_alloc_free_page(fseg_header,
 
324
                                                       prev_page_no + 1,
 
325
                                                       FSP_UP, &mtr);
 
326
                        if (page_no == FIL_NULL) {
 
327
                                fprintf(stderr,
 
328
                                        "InnoDB: Cannot create doublewrite"
 
329
                                        " buffer: you must\n"
 
330
                                        "InnoDB: increase your"
 
331
                                        " tablespace size.\n"
 
332
                                        "InnoDB: Cannot continue operation.\n"
 
333
                                        );
 
334
 
 
335
                                exit(1);
 
336
                        }
 
337
 
 
338
                        /* We read the allocated pages to the buffer pool;
 
339
                        when they are written to disk in a flush, the space
 
340
                        id and page number fields are also written to the
 
341
                        pages. When we at database startup read pages
 
342
                        from the doublewrite buffer, we know that if the
 
343
                        space id and page number in them are the same as
 
344
                        the page position in the tablespace, then the page
 
345
                        has not been written to in doublewrite. */
 
346
 
 
347
                        new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
 
348
                                                 RW_X_LATCH, &mtr);
 
349
                        buf_block_dbg_add_level(new_block,
 
350
                                                SYNC_NO_ORDER_CHECK);
 
351
 
 
352
                        if (i == FSP_EXTENT_SIZE / 2) {
 
353
                                ut_a(page_no == FSP_EXTENT_SIZE);
 
354
                                mlog_write_ulint(doublewrite
 
355
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 
356
                                                 page_no, MLOG_4BYTES, &mtr);
 
357
                                mlog_write_ulint(doublewrite
 
358
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
 
359
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK1,
 
360
                                                 page_no, MLOG_4BYTES, &mtr);
 
361
                        } else if (i == FSP_EXTENT_SIZE / 2
 
362
                                   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
363
                                ut_a(page_no == 2 * FSP_EXTENT_SIZE);
 
364
                                mlog_write_ulint(doublewrite
 
365
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 
366
                                                 page_no, MLOG_4BYTES, &mtr);
 
367
                                mlog_write_ulint(doublewrite
 
368
                                                 + TRX_SYS_DOUBLEWRITE_REPEAT
 
369
                                                 + TRX_SYS_DOUBLEWRITE_BLOCK2,
 
370
                                                 page_no, MLOG_4BYTES, &mtr);
 
371
                        } else if (i > FSP_EXTENT_SIZE / 2) {
 
372
                                ut_a(page_no == prev_page_no + 1);
 
373
                        }
 
374
 
 
375
                        prev_page_no = page_no;
 
376
                }
 
377
 
 
378
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
 
379
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
380
                                 MLOG_4BYTES, &mtr);
 
381
                mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
 
382
                                 + TRX_SYS_DOUBLEWRITE_REPEAT,
 
383
                                 TRX_SYS_DOUBLEWRITE_MAGIC_N,
 
384
                                 MLOG_4BYTES, &mtr);
 
385
 
 
386
                mlog_write_ulint(doublewrite
 
387
                                 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
 
388
                                 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
 
389
                                 MLOG_4BYTES, &mtr);
 
390
                mtr_commit(&mtr);
 
391
 
 
392
                /* Flush the modified pages to disk and make a checkpoint */
 
393
                log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
394
 
 
395
                fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
 
396
 
 
397
                trx_sys_multiple_tablespace_format = TRUE;
 
398
 
 
399
                goto start_again;
 
400
        }
 
401
}
 
402
 
 
403
/****************************************************************//**
 
404
At a database startup initializes the doublewrite buffer memory structure if
 
405
we already have a doublewrite buffer created in the data files. If we are
 
406
upgrading to an InnoDB version which supports multiple tablespaces, then this
 
407
function performs the necessary update operations. If we are in a crash
 
408
recovery, this function uses a possible doublewrite buffer to restore
 
409
half-written pages in the data files. */
 
410
UNIV_INTERN
 
411
void
 
412
trx_sys_doublewrite_init_or_restore_pages(
 
413
/*======================================*/
 
414
        ibool   restore_corrupt_pages)  /*!< in: TRUE=restore pages */
 
415
{
 
416
        byte*   buf;
 
417
        byte*   read_buf;
 
418
        byte*   unaligned_read_buf;
 
419
        ulint   block1;
 
420
        ulint   block2;
 
421
        ulint   source_page_no;
 
422
        byte*   page;
 
423
        byte*   doublewrite;
 
424
        ulint   space_id;
 
425
        ulint   page_no;
 
426
        ulint   i;
 
427
 
 
428
        /* We do the file i/o past the buffer pool */
 
429
 
 
430
        unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
 
431
        read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
 
432
 
 
433
        /* Read the trx sys header to check if we are using the doublewrite
 
434
        buffer */
 
435
 
 
436
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
 
437
               UNIV_PAGE_SIZE, read_buf, NULL);
 
438
        doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
439
 
 
440
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 
441
            == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 
442
                /* The doublewrite buffer has been created */
 
443
 
 
444
                trx_doublewrite_init(doublewrite);
 
445
 
 
446
                block1 = trx_doublewrite->block1;
 
447
                block2 = trx_doublewrite->block2;
 
448
 
 
449
                buf = trx_doublewrite->write_buf;
 
450
        } else {
 
451
                goto leave_func;
 
452
        }
 
453
 
 
454
        if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
 
455
            != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
 
456
 
 
457
                /* We are upgrading from a version < 4.1.x to a version where
 
458
                multiple tablespaces are supported. We must reset the space id
 
459
                field in the pages in the doublewrite buffer because starting
 
460
                from this version the space id is stored to
 
461
                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
 
462
 
 
463
                trx_doublewrite_must_reset_space_ids = TRUE;
 
464
 
 
465
                fprintf(stderr,
 
466
                        "InnoDB: Resetting space id's in the"
 
467
                        " doublewrite buffer\n");
 
468
        } else {
 
469
                trx_sys_multiple_tablespace_format = TRUE;
 
470
        }
 
471
 
 
472
        /* Read the pages from the doublewrite buffer to memory */
 
473
 
 
474
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
 
475
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
476
               buf, NULL);
 
477
        fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
 
478
               TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
479
               buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 
480
               NULL);
 
481
        /* Check if any of these pages is half-written in data files, in the
 
482
        intended position */
 
483
 
 
484
        page = buf;
 
485
 
 
486
        for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
 
487
 
 
488
                page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
 
489
 
 
490
                if (trx_doublewrite_must_reset_space_ids) {
 
491
 
 
492
                        space_id = 0;
 
493
                        mach_write_to_4(page
 
494
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
 
495
                        /* We do not need to calculate new checksums for the
 
496
                        pages because the field .._SPACE_ID does not affect
 
497
                        them. Write the page back to where we read it from. */
 
498
 
 
499
                        if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 
500
                                source_page_no = block1 + i;
 
501
                        } else {
 
502
                                source_page_no = block2
 
503
                                        + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 
504
                        }
 
505
 
 
506
                        fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
 
507
                               UNIV_PAGE_SIZE, page, NULL);
 
508
                        /* printf("Resetting space id in page %lu\n",
 
509
                        source_page_no); */
 
510
                } else {
 
511
                        space_id = mach_read_from_4(
 
512
                                page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
513
                }
 
514
 
 
515
                if (!restore_corrupt_pages) {
 
516
                        /* The database was shut down gracefully: no need to
 
517
                        restore pages */
 
518
 
 
519
                } else if (!fil_tablespace_exists_in_mem(space_id)) {
 
520
                        /* Maybe we have dropped the single-table tablespace
 
521
                        and this page once belonged to it: do nothing */
 
522
 
 
523
                } else if (!fil_check_adress_in_tablespace(space_id,
 
524
                                                           page_no)) {
 
525
                        fprintf(stderr,
 
526
                                "InnoDB: Warning: a page in the"
 
527
                                " doublewrite buffer is not within space\n"
 
528
                                "InnoDB: bounds; space id %lu"
 
529
                                " page number %lu, page %lu in"
 
530
                                " doublewrite buf.\n",
 
531
                                (ulong) space_id, (ulong) page_no, (ulong) i);
 
532
 
 
533
                } else if (space_id == TRX_SYS_SPACE
 
534
                           && ((page_no >= block1
 
535
                                && page_no
 
536
                                < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
 
537
                               || (page_no >= block2
 
538
                                   && page_no
 
539
                                   < (block2
 
540
                                      + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
 
541
 
 
542
                        /* It is an unwritten doublewrite buffer page:
 
543
                        do nothing */
 
544
                } else {
 
545
                        ulint   zip_size = fil_space_get_zip_size(space_id);
 
546
 
 
547
                        /* Read in the actual page from the file */
 
548
                        fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
 
549
                               page_no, 0,
 
550
                               zip_size ? zip_size : UNIV_PAGE_SIZE,
 
551
                               read_buf, NULL);
 
552
 
 
553
                        /* Check if the page is corrupt */
 
554
 
 
555
                        if (UNIV_UNLIKELY
 
556
                            (buf_page_is_corrupted(read_buf, zip_size))) {
 
557
 
 
558
                                fprintf(stderr,
 
559
                                        "InnoDB: Warning: database page"
 
560
                                        " corruption or a failed\n"
 
561
                                        "InnoDB: file read of"
 
562
                                        " space %lu page %lu.\n"
 
563
                                        "InnoDB: Trying to recover it from"
 
564
                                        " the doublewrite buffer.\n",
 
565
                                        (ulong) space_id, (ulong) page_no);
 
566
 
 
567
                                if (buf_page_is_corrupted(page, zip_size)) {
 
568
                                        fprintf(stderr,
 
569
                                                "InnoDB: Dump of the page:\n");
 
570
                                        buf_page_print(read_buf, zip_size);
 
571
                                        fprintf(stderr,
 
572
                                                "InnoDB: Dump of"
 
573
                                                " corresponding page"
 
574
                                                " in doublewrite buffer:\n");
 
575
                                        buf_page_print(page, zip_size);
 
576
 
 
577
                                        fprintf(stderr,
 
578
                                                "InnoDB: Also the page in the"
 
579
                                                " doublewrite buffer"
 
580
                                                " is corrupt.\n"
 
581
                                                "InnoDB: Cannot continue"
 
582
                                                " operation.\n"
 
583
                                                "InnoDB: You can try to"
 
584
                                                " recover the database"
 
585
                                                " with the my.cnf\n"
 
586
                                                "InnoDB: option:\n"
 
587
                                                "InnoDB: set-variable="
 
588
                                                "innodb_force_recovery=6\n");
 
589
                                        exit(1);
 
590
                                }
 
591
 
 
592
                                /* Write the good page from the
 
593
                                doublewrite buffer to the intended
 
594
                                position */
 
595
 
 
596
                                fil_io(OS_FILE_WRITE, TRUE, space_id,
 
597
                                       zip_size, page_no, 0,
 
598
                                       zip_size ? zip_size : UNIV_PAGE_SIZE,
 
599
                                       page, NULL);
 
600
                                fprintf(stderr,
 
601
                                        "InnoDB: Recovered the page from"
 
602
                                        " the doublewrite buffer.\n");
 
603
                        }
 
604
                }
 
605
 
 
606
                page += UNIV_PAGE_SIZE;
 
607
        }
 
608
 
 
609
        fil_flush_file_spaces(FIL_TABLESPACE);
 
610
 
 
611
leave_func:
 
612
        ut_free(unaligned_read_buf);
 
613
}
 
614
 
 
615
/****************************************************************//**
 
616
Checks that trx is in the trx list.
 
617
@return TRUE if is in */
 
618
UNIV_INTERN
 
619
ibool
 
620
trx_in_trx_list(
 
621
/*============*/
 
622
        trx_t*  in_trx) /*!< in: trx */
 
623
{
 
624
        trx_t*  trx;
 
625
 
 
626
        ut_ad(mutex_own(&(kernel_mutex)));
 
627
 
 
628
        trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 
629
 
 
630
        while (trx != NULL) {
 
631
 
 
632
                if (trx == in_trx) {
 
633
 
 
634
                        return(TRUE);
 
635
                }
 
636
 
 
637
                trx = UT_LIST_GET_NEXT(trx_list, trx);
 
638
        }
 
639
 
 
640
        return(FALSE);
 
641
}
 
642
 
 
643
/*****************************************************************//**
 
644
Writes the value of max_trx_id to the file based trx system header. */
 
645
UNIV_INTERN
 
646
void
 
647
trx_sys_flush_max_trx_id(void)
 
648
/*==========================*/
 
649
{
 
650
        trx_sysf_t*     sys_header;
 
651
        mtr_t           mtr;
 
652
 
 
653
        ut_ad(mutex_own(&kernel_mutex));
 
654
 
 
655
        mtr_start(&mtr);
 
656
 
 
657
        sys_header = trx_sysf_get(&mtr);
 
658
 
 
659
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
 
660
                          trx_sys->max_trx_id, &mtr);
 
661
        mtr_commit(&mtr);
 
662
}
 
663
 
 
664
/*****************************************************************//**
 
665
Updates the offset information about the end of the MySQL binlog entry
 
666
which corresponds to the transaction just being committed. In a MySQL
 
667
replication slave updates the latest master binlog position up to which
 
668
replication has proceeded. */
 
669
UNIV_INTERN
 
670
void
 
671
trx_sys_update_mysql_binlog_offset(
 
672
/*===============================*/
 
673
        const char*     file_name,/*!< in: MySQL log file name */
 
674
        ib_int64_t      offset, /*!< in: position in that log file */
 
675
        ulint           field,  /*!< in: offset of the MySQL log info field in
 
676
                                the trx sys header */
 
677
        mtr_t*          mtr)    /*!< in: mtr */
 
678
{
 
679
        trx_sysf_t*     sys_header;
 
680
 
 
681
        if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
 
682
 
 
683
                /* We cannot fit the name to the 512 bytes we have reserved */
 
684
 
 
685
                return;
 
686
        }
 
687
 
 
688
        sys_header = trx_sysf_get(mtr);
 
689
 
 
690
        if (mach_read_from_4(sys_header + field
 
691
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
692
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
693
 
 
694
                mlog_write_ulint(sys_header + field
 
695
                                 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
 
696
                                 TRX_SYS_MYSQL_LOG_MAGIC_N,
 
697
                                 MLOG_4BYTES, mtr);
 
698
        }
 
699
 
 
700
        if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
 
701
                        file_name)) {
 
702
 
 
703
                mlog_write_string(sys_header + field
 
704
                                  + TRX_SYS_MYSQL_LOG_NAME,
 
705
                                  (byte*) file_name, 1 + ut_strlen(file_name),
 
706
                                  mtr);
 
707
        }
 
708
 
 
709
        if (mach_read_from_4(sys_header + field
 
710
                             + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
 
711
            || (offset >> 32) > 0) {
 
712
 
 
713
                mlog_write_ulint(sys_header + field
 
714
                                 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
 
715
                                 (ulint)(offset >> 32),
 
716
                                 MLOG_4BYTES, mtr);
 
717
        }
 
718
 
 
719
        mlog_write_ulint(sys_header + field
 
720
                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
 
721
                         (ulint)(offset & 0xFFFFFFFFUL),
 
722
                         MLOG_4BYTES, mtr);
 
723
}
 
724
 
 
725
/*****************************************************************//**
 
726
Stores the MySQL binlog offset info in the trx system header if
 
727
the magic number shows it valid, and print the info to stderr */
 
728
UNIV_INTERN
 
729
void
 
730
trx_sys_print_mysql_binlog_offset(void)
 
731
/*===================================*/
 
732
{
 
733
        trx_sysf_t*     sys_header;
 
734
        mtr_t           mtr;
 
735
        ulint           trx_sys_mysql_bin_log_pos_high;
 
736
        ulint           trx_sys_mysql_bin_log_pos_low;
 
737
 
 
738
        mtr_start(&mtr);
 
739
 
 
740
        sys_header = trx_sysf_get(&mtr);
 
741
 
 
742
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 
743
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
744
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
745
 
 
746
                mtr_commit(&mtr);
 
747
 
 
748
                return;
 
749
        }
 
750
 
 
751
        trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
 
752
                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
753
                + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
 
754
        trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
 
755
                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
756
                + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
 
757
 
 
758
        trx_sys_mysql_bin_log_pos
 
759
                = (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
 
760
                + (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
 
761
 
 
762
        ut_memcpy(trx_sys_mysql_bin_log_name,
 
763
                  sys_header + TRX_SYS_MYSQL_LOG_INFO
 
764
                  + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
 
765
 
 
766
        fprintf(stderr,
 
767
                "InnoDB: Last MySQL binlog file position %lu %lu,"
 
768
                " file name %s\n",
 
769
                trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
 
770
                trx_sys_mysql_bin_log_name);
 
771
 
 
772
        mtr_commit(&mtr);
 
773
}
 
774
 
 
775
/*****************************************************************//**
 
776
Prints to stderr the MySQL master log offset info in the trx system header if
 
777
the magic number shows it valid. */
 
778
UNIV_INTERN
 
779
void
 
780
trx_sys_print_mysql_master_log_pos(void)
 
781
/*====================================*/
 
782
{
 
783
        trx_sysf_t*     sys_header;
 
784
        mtr_t           mtr;
 
785
 
 
786
        mtr_start(&mtr);
 
787
 
 
788
        sys_header = trx_sysf_get(&mtr);
 
789
 
 
790
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
791
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
792
            != TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
793
 
 
794
                mtr_commit(&mtr);
 
795
 
 
796
                return;
 
797
        }
 
798
 
 
799
        fprintf(stderr,
 
800
                "InnoDB: In a MySQL replication slave the last"
 
801
                " master binlog file\n"
 
802
                "InnoDB: position %lu %lu, file name %s\n",
 
803
                (ulong) mach_read_from_4(sys_header
 
804
                                         + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
805
                                         + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 
806
                (ulong) mach_read_from_4(sys_header
 
807
                                         + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
808
                                         + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 
809
                sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
810
                + TRX_SYS_MYSQL_LOG_NAME);
 
811
        /* Copy the master log position info to global variables we can
 
812
        use in ha_innobase.cc to initialize glob_mi to right values */
 
813
 
 
814
        ut_memcpy(trx_sys_mysql_master_log_name,
 
815
                  sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
816
                  + TRX_SYS_MYSQL_LOG_NAME,
 
817
                  TRX_SYS_MYSQL_LOG_NAME_LEN);
 
818
 
 
819
        trx_sys_mysql_master_log_pos
 
820
                = (((ib_int64_t) mach_read_from_4(
 
821
                            sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
822
                            + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
 
823
                + ((ib_int64_t) mach_read_from_4(
 
824
                           sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 
825
                           + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
 
826
        mtr_commit(&mtr);
 
827
}
 
828
 
 
829
/****************************************************************//**
 
830
Looks for a free slot for a rollback segment in the trx system file copy.
 
831
@return slot index or ULINT_UNDEFINED if not found */
 
832
UNIV_INTERN
 
833
ulint
 
834
trx_sysf_rseg_find_free(
 
835
/*====================*/
 
836
        mtr_t*  mtr)    /*!< in: mtr */
 
837
{
 
838
        trx_sysf_t*     sys_header;
 
839
        ulint           page_no;
 
840
        ulint           i;
 
841
 
 
842
        ut_ad(mutex_own(&(kernel_mutex)));
 
843
 
 
844
        sys_header = trx_sysf_get(mtr);
 
845
 
 
846
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 
847
 
 
848
                page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
 
849
 
 
850
                if (page_no == FIL_NULL) {
 
851
 
 
852
                        return(i);
 
853
                }
 
854
        }
 
855
 
 
856
        return(ULINT_UNDEFINED);
 
857
}
 
858
 
 
859
/*****************************************************************//**
 
860
Creates the file page for the transaction system. This function is called only
 
861
at the database creation, before trx_sys_init. */
 
862
static
 
863
void
 
864
trx_sysf_create(
 
865
/*============*/
 
866
        mtr_t*  mtr)    /*!< in: mtr */
 
867
{
 
868
        trx_sysf_t*     sys_header;
 
869
        ulint           slot_no;
 
870
        buf_block_t*    block;
 
871
        page_t*         page;
 
872
        ulint           page_no;
 
873
        ulint           i;
 
874
 
 
875
        ut_ad(mtr);
 
876
 
 
877
        /* Note that below we first reserve the file space x-latch, and
 
878
        then enter the kernel: we must do it in this order to conform
 
879
        to the latching order rules. */
 
880
 
 
881
        mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
 
882
        mutex_enter(&kernel_mutex);
 
883
 
 
884
        /* Create the trx sys file block in a new allocated file segment */
 
885
        block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
 
886
                            mtr);
 
887
        buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
 
888
 
 
889
        ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
 
890
 
 
891
        page = buf_block_get_frame(block);
 
892
 
 
893
        mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
 
894
                         MLOG_2BYTES, mtr);
 
895
 
 
896
        /* Reset the doublewrite buffer magic number to zero so that we
 
897
        know that the doublewrite buffer has not yet been created (this
 
898
        suppresses a Valgrind warning) */
 
899
 
 
900
        mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
 
901
                         + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
 
902
 
 
903
        sys_header = trx_sysf_get(mtr);
 
904
 
 
905
        /* Start counting transaction ids from number 1 up */
 
906
        mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
 
907
                          ut_dulint_create(0, 1), mtr);
 
908
 
 
909
        /* Reset the rollback segment slots */
 
910
        for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
 
911
 
 
912
                trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
 
913
                trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
 
914
        }
 
915
 
 
916
        /* The remaining area (up to the page trailer) is uninitialized.
 
917
        Silence Valgrind warnings about it. */
 
918
        UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
 
919
                                     + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
 
920
                                     + TRX_SYS_RSEG_SPACE),
 
921
                       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
 
922
                        - (TRX_SYS_RSEGS
 
923
                           + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
 
924
                           + TRX_SYS_RSEG_SPACE))
 
925
                       + page - sys_header);
 
926
 
 
927
        /* Create the first rollback segment in the SYSTEM tablespace */
 
928
        page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
 
929
                                         mtr);
 
930
        ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
 
931
        ut_a(page_no != FIL_NULL);
 
932
 
 
933
        mutex_exit(&kernel_mutex);
 
934
}
 
935
 
 
936
/*****************************************************************//**
 
937
Creates and initializes the central memory structures for the transaction
 
938
system. This is called when the database is started. */
 
939
UNIV_INTERN
 
940
void
 
941
trx_sys_init_at_db_start(void)
 
942
/*==========================*/
 
943
{
 
944
        trx_sysf_t*     sys_header;
 
945
        ib_int64_t      rows_to_undo    = 0;
 
946
        const char*     unit            = "";
 
947
        trx_t*          trx;
 
948
        mtr_t           mtr;
 
949
 
 
950
        mtr_start(&mtr);
 
951
 
 
952
        ut_ad(trx_sys == NULL);
 
953
 
 
954
        mutex_enter(&kernel_mutex);
 
955
 
 
956
        trx_sys = mem_alloc(sizeof(trx_sys_t));
 
957
 
 
958
        sys_header = trx_sysf_get(&mtr);
 
959
 
 
960
        trx_rseg_list_and_array_init(sys_header, &mtr);
 
961
 
 
962
        trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
 
963
 
 
964
        /* VERY important: after the database is started, max_trx_id value is
 
965
        divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
 
966
        trx_sys_get_new_trx_id will evaluate to TRUE when the function
 
967
        is first time called, and the value for trx id will be written
 
968
        to the disk-based header! Thus trx id values will not overlap when
 
969
        the database is repeatedly started! */
 
970
 
 
971
        trx_sys->max_trx_id = ut_dulint_add(
 
972
                ut_dulint_align_up(mtr_read_dulint(
 
973
                                           sys_header
 
974
                                           + TRX_SYS_TRX_ID_STORE, &mtr),
 
975
                                   TRX_SYS_TRX_ID_WRITE_MARGIN),
 
976
                2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
 
977
 
 
978
        UT_LIST_INIT(trx_sys->mysql_trx_list);
 
979
        trx_dummy_sess = sess_open();
 
980
        trx_lists_init_at_db_start();
 
981
 
 
982
        if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
 
983
                trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 
984
 
 
985
                for (;;) {
 
986
 
 
987
                        if ( trx->conc_state != TRX_PREPARED) {
 
988
                                rows_to_undo += ut_conv_dulint_to_longlong(
 
989
                                        trx->undo_no);
 
990
                        }
 
991
 
 
992
                        trx = UT_LIST_GET_NEXT(trx_list, trx);
 
993
 
 
994
                        if (!trx) {
 
995
                                break;
 
996
                        }
 
997
                }
 
998
 
 
999
                if (rows_to_undo > 1000000000) {
 
1000
                        unit = "M";
 
1001
                        rows_to_undo = rows_to_undo / 1000000;
 
1002
                }
 
1003
 
 
1004
                fprintf(stderr,
 
1005
                        "InnoDB: %lu transaction(s) which must be"
 
1006
                        " rolled back or cleaned up\n"
 
1007
                        "InnoDB: in total %lu%s row operations to undo\n",
 
1008
                        (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
 
1009
                        (ulong) rows_to_undo, unit);
 
1010
 
 
1011
                fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
 
1012
                        TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
 
1013
        }
 
1014
 
 
1015
        UT_LIST_INIT(trx_sys->view_list);
 
1016
 
 
1017
        trx_purge_sys_create();
 
1018
 
 
1019
        mutex_exit(&kernel_mutex);
 
1020
 
 
1021
        mtr_commit(&mtr);
 
1022
}
 
1023
 
 
1024
/*****************************************************************//**
 
1025
Creates and initializes the transaction system at the database creation. */
 
1026
UNIV_INTERN
 
1027
void
 
1028
trx_sys_create(void)
 
1029
/*================*/
 
1030
{
 
1031
        mtr_t   mtr;
 
1032
 
 
1033
        mtr_start(&mtr);
 
1034
 
 
1035
        trx_sysf_create(&mtr);
 
1036
 
 
1037
        mtr_commit(&mtr);
 
1038
 
 
1039
        trx_sys_init_at_db_start();
 
1040
}
 
1041
 
 
1042
/*****************************************************************//**
 
1043
Update the file format tag.
 
1044
@return always TRUE */
 
1045
static
 
1046
ibool
 
1047
trx_sys_file_format_max_write(
 
1048
/*==========================*/
 
1049
        ulint           format_id,      /*!< in: file format id */
 
1050
        const char**    name)           /*!< out: max file format name, can
 
1051
                                        be NULL */
 
1052
{
 
1053
        mtr_t           mtr;
 
1054
        byte*           ptr;
 
1055
        buf_block_t*    block;
 
1056
        ulint           tag_value_low;
 
1057
 
 
1058
        mtr_start(&mtr);
 
1059
 
 
1060
        block = buf_page_get(
 
1061
                TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
 
1062
 
 
1063
        file_format_max.id = format_id;
 
1064
        file_format_max.name = trx_sys_file_format_id_to_name(format_id);
 
1065
 
 
1066
        ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 
1067
        tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1068
 
 
1069
        if (name) {
 
1070
                *name = file_format_max.name;
 
1071
        }
 
1072
 
 
1073
        mlog_write_dulint(
 
1074
                ptr,
 
1075
                ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
 
1076
                                 tag_value_low),
 
1077
                &mtr);
 
1078
 
 
1079
        mtr_commit(&mtr);
 
1080
 
 
1081
        return(TRUE);
 
1082
}
 
1083
 
 
1084
/*****************************************************************//**
 
1085
Read the file format tag.
 
1086
@return the file format or ULINT_UNDEFINED if not set. */
 
1087
static
 
1088
ulint
 
1089
trx_sys_file_format_max_read(void)
 
1090
/*==============================*/
 
1091
{
 
1092
        mtr_t                   mtr;
 
1093
        const byte*             ptr;
 
1094
        const buf_block_t*      block;
 
1095
        ulint                   format_id;
 
1096
        dulint                  file_format_id;
 
1097
 
 
1098
        /* Since this is called during the startup phase it's safe to
 
1099
        read the value without a covering mutex. */
 
1100
        mtr_start(&mtr);
 
1101
 
 
1102
        block = buf_page_get(
 
1103
                TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
 
1104
 
 
1105
        ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
 
1106
        file_format_id = mach_read_from_8(ptr);
 
1107
 
 
1108
        mtr_commit(&mtr);
 
1109
 
 
1110
        format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1111
 
 
1112
        if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
 
1113
            || format_id >= FILE_FORMAT_NAME_N) {
 
1114
 
 
1115
                /* Either it has never been tagged, or garbage in it. */
 
1116
                return(ULINT_UNDEFINED);
 
1117
        }
 
1118
 
 
1119
        return(format_id);
 
1120
}
 
1121
 
 
1122
/*****************************************************************//**
 
1123
Get the name representation of the file format from its id.
 
1124
@return pointer to the name */
 
1125
UNIV_INTERN
 
1126
const char*
 
1127
trx_sys_file_format_id_to_name(
 
1128
/*===========================*/
 
1129
        const ulint     id)     /*!< in: id of the file format */
 
1130
{
 
1131
        ut_a(id < FILE_FORMAT_NAME_N);
 
1132
 
 
1133
        return(file_format_name_map[id]);
 
1134
}
 
1135
 
 
1136
/*****************************************************************//**
 
1137
Check for the max file format tag stored on disk. Note: If max_format_id
 
1138
is == DICT_TF_FORMAT_MAX + 1 then we only print a warning.
 
1139
@return DB_SUCCESS or error code */
 
1140
UNIV_INTERN
 
1141
ulint
 
1142
trx_sys_file_format_max_check(
 
1143
/*==========================*/
 
1144
        ulint   max_format_id)  /*!< in: max format id to check */
 
1145
{
 
1146
        ulint   format_id;
 
1147
 
 
1148
        /* Check the file format in the tablespace. Do not try to
 
1149
        recover if the file format is not supported by the engine
 
1150
        unless forced by the user. */
 
1151
        format_id = trx_sys_file_format_max_read();
 
1152
        if (format_id == ULINT_UNDEFINED) {
 
1153
                /* Format ID was not set. Set it to minimum possible
 
1154
                value. */
 
1155
                format_id = DICT_TF_FORMAT_51;
 
1156
        }
 
1157
 
 
1158
        ut_print_timestamp(stderr);
 
1159
        fprintf(stderr,
 
1160
                "  InnoDB: highest supported file format is %s.\n",
 
1161
                trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
 
1162
 
 
1163
        if (format_id > DICT_TF_FORMAT_MAX) {
 
1164
 
 
1165
                ut_a(format_id < FILE_FORMAT_NAME_N);
 
1166
 
 
1167
                ut_print_timestamp(stderr);
 
1168
                fprintf(stderr,
 
1169
                        "  InnoDB: %s: the system tablespace is in a file "
 
1170
                        "format that this version doesn't support - %s\n",
 
1171
                        ((max_format_id <= DICT_TF_FORMAT_MAX)
 
1172
                                ? "Error" : "Warning"),
 
1173
                        trx_sys_file_format_id_to_name(format_id));
 
1174
 
 
1175
                if (max_format_id <= DICT_TF_FORMAT_MAX) {
 
1176
                        return(DB_ERROR);
 
1177
                }
 
1178
        }
 
1179
 
 
1180
        format_id = (format_id > max_format_id) ? format_id : max_format_id;
 
1181
 
 
1182
        /* We don't need a mutex here, as this function should only
 
1183
        be called once at start up. */
 
1184
        file_format_max.id = format_id;
 
1185
        file_format_max.name = trx_sys_file_format_id_to_name(format_id);
 
1186
 
 
1187
        return(DB_SUCCESS);
 
1188
}
 
1189
 
 
1190
/*****************************************************************//**
 
1191
Set the file format id unconditionally except if it's already the
 
1192
same value.
 
1193
@return TRUE if value updated */
 
1194
UNIV_INTERN
 
1195
ibool
 
1196
trx_sys_file_format_max_set(
 
1197
/*========================*/
 
1198
        ulint           format_id,      /*!< in: file format id */
 
1199
        const char**    name)           /*!< out: max file format name or
 
1200
                                        NULL if not needed. */
 
1201
{
 
1202
        ibool           ret = FALSE;
 
1203
 
 
1204
        ut_a(format_id <= DICT_TF_FORMAT_MAX);
 
1205
 
 
1206
        mutex_enter(&file_format_max.mutex);
 
1207
 
 
1208
        /* Only update if not already same value. */
 
1209
        if (format_id != file_format_max.id) {
 
1210
 
 
1211
                ret = trx_sys_file_format_max_write(format_id, name);
 
1212
        }
 
1213
 
 
1214
        mutex_exit(&file_format_max.mutex);
 
1215
 
 
1216
        return(ret);
 
1217
}
 
1218
 
 
1219
/********************************************************************//**
 
1220
Tags the system table space with minimum format id if it has not been
 
1221
tagged yet.
 
1222
WARNING: This function is only called during the startup and AFTER the
 
1223
redo log application during recovery has finished. */
 
1224
UNIV_INTERN
 
1225
void
 
1226
trx_sys_file_format_tag_init(void)
 
1227
/*==============================*/
 
1228
{
 
1229
        ulint   format_id;
 
1230
 
 
1231
        format_id = trx_sys_file_format_max_read();
 
1232
 
 
1233
        /* If format_id is not set then set it to the minimum. */
 
1234
        if (format_id == ULINT_UNDEFINED) {
 
1235
                trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
 
1236
        }
 
1237
}
 
1238
 
 
1239
/********************************************************************//**
 
1240
Update the file format tag in the system tablespace only if the given
 
1241
format id is greater than the known max id.
 
1242
@return TRUE if format_id was bigger than the known max id */
 
1243
UNIV_INTERN
 
1244
ibool
 
1245
trx_sys_file_format_max_upgrade(
 
1246
/*============================*/
 
1247
        const char**    name,           /*!< out: max file format name */
 
1248
        ulint           format_id)      /*!< in: file format identifier */
 
1249
{
 
1250
        ibool           ret = FALSE;
 
1251
 
 
1252
        ut_a(name);
 
1253
        ut_a(file_format_max.name != NULL);
 
1254
        ut_a(format_id <= DICT_TF_FORMAT_MAX);
 
1255
 
 
1256
        mutex_enter(&file_format_max.mutex);
 
1257
 
 
1258
        if (format_id > file_format_max.id) {
 
1259
 
 
1260
                ret = trx_sys_file_format_max_write(format_id, name);
 
1261
        }
 
1262
 
 
1263
        mutex_exit(&file_format_max.mutex);
 
1264
 
 
1265
        return(ret);
 
1266
}
 
1267
 
 
1268
/*****************************************************************//**
 
1269
Get the name representation of the file format from its id.
 
1270
@return pointer to the max format name */
 
1271
UNIV_INTERN
 
1272
const char*
 
1273
trx_sys_file_format_max_get(void)
 
1274
/*=============================*/
 
1275
{
 
1276
        return(file_format_max.name);
 
1277
}
 
1278
 
 
1279
/*****************************************************************//**
 
1280
Initializes the tablespace tag system. */
 
1281
UNIV_INTERN
 
1282
void
 
1283
trx_sys_file_format_init(void)
 
1284
/*==========================*/
 
1285
{
 
1286
        mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
 
1287
 
 
1288
        /* We don't need a mutex here, as this function should only
 
1289
        be called once at start up. */
 
1290
        file_format_max.id = DICT_TF_FORMAT_51;
 
1291
 
 
1292
        file_format_max.name = trx_sys_file_format_id_to_name(
 
1293
                file_format_max.id);
 
1294
}
 
1295
 
 
1296
/*****************************************************************//**
 
1297
Closes the tablespace tag system. */
 
1298
UNIV_INTERN
 
1299
void
 
1300
trx_sys_file_format_close(void)
 
1301
/*===========================*/
 
1302
{
 
1303
        /* Does nothing at the moment */
 
1304
}
 
1305
#else /* !UNIV_HOTBACKUP */
 
1306
/*****************************************************************//**
 
1307
Prints to stderr the MySQL binlog info in the system header if the
 
1308
magic number shows it valid. */
 
1309
UNIV_INTERN
 
1310
void
 
1311
trx_sys_print_mysql_binlog_offset_from_page(
 
1312
/*========================================*/
 
1313
        const byte*     page)   /*!< in: buffer containing the trx
 
1314
                                system header page, i.e., page number
 
1315
                                TRX_SYS_PAGE_NO in the tablespace */
 
1316
{
 
1317
        const trx_sysf_t*       sys_header;
 
1318
 
 
1319
        sys_header = page + TRX_SYS;
 
1320
 
 
1321
        if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 
1322
                             + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
 
1323
            == TRX_SYS_MYSQL_LOG_MAGIC_N) {
 
1324
 
 
1325
                fprintf(stderr,
 
1326
                        "ibbackup: Last MySQL binlog file position %lu %lu,"
 
1327
                        " file name %s\n",
 
1328
                        (ulong) mach_read_from_4(
 
1329
                                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
1330
                                + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
 
1331
                        (ulong) mach_read_from_4(
 
1332
                                sys_header + TRX_SYS_MYSQL_LOG_INFO
 
1333
                                + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 
1334
                        sys_header + TRX_SYS_MYSQL_LOG_INFO
 
1335
                        + TRX_SYS_MYSQL_LOG_NAME);
 
1336
        }
 
1337
}
 
1338
 
 
1339
 
 
1340
/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
 
1341
   (This code duplicaton should be fixed at some point!)
 
1342
*/
 
1343
 
 
1344
#define TRX_SYS_SPACE   0       /* the SYSTEM tablespace */
 
1345
/* The offset of the file format tag on the trx system header page */
 
1346
#define TRX_SYS_FILE_FORMAT_TAG         (UNIV_PAGE_SIZE - 16)
 
1347
/* We use these random constants to reduce the probability of reading
 
1348
garbage (from previous versions) that maps to an actual format id. We
 
1349
use these as bit masks at the time of  reading and writing from/to disk. */
 
1350
#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW     3645922177UL
 
1351
#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH    2745987765UL
 
1352
 
 
1353
/* END OF COPIED DEFINITIONS */
 
1354
 
 
1355
 
 
1356
/*****************************************************************//**
 
1357
Reads the file format id from the first system table space file.
 
1358
Even if the call succeeds and returns TRUE, the returned format id
 
1359
may be ULINT_UNDEFINED signalling that the format id was not present
 
1360
in the data file.
 
1361
@return TRUE if call succeeds */
 
1362
UNIV_INTERN
 
1363
ibool
 
1364
trx_sys_read_file_format_id(
 
1365
/*========================*/
 
1366
        const char *pathname,  /*!< in: pathname of the first system
 
1367
                                        table space file */
 
1368
        ulint *format_id)      /*!< out: file format of the system table
 
1369
                                         space */
 
1370
{
 
1371
        os_file_t       file;
 
1372
        ibool           success;
 
1373
        byte            buf[UNIV_PAGE_SIZE * 2];
 
1374
        page_t*         page = ut_align(buf, UNIV_PAGE_SIZE);
 
1375
        const byte*     ptr;
 
1376
        dulint          file_format_id;
 
1377
 
 
1378
        *format_id = ULINT_UNDEFINED;
 
1379
        
 
1380
        file = os_file_create_simple_no_error_handling(
 
1381
                pathname,
 
1382
                OS_FILE_OPEN,
 
1383
                OS_FILE_READ_ONLY,
 
1384
                &success
 
1385
        );
 
1386
        if (!success) {
 
1387
                /* The following call prints an error message */
 
1388
                os_file_get_last_error(TRUE);
 
1389
        
 
1390
                ut_print_timestamp(stderr);
 
1391
        
 
1392
                fprintf(stderr,
 
1393
"  ibbackup: Error: trying to read system tablespace file format,\n"
 
1394
"  ibbackup: but could not open the tablespace file %s!\n",
 
1395
                        pathname
 
1396
                );
 
1397
                return(FALSE);
 
1398
        }
 
1399
 
 
1400
        /* Read the page on which file format is stored */
 
1401
 
 
1402
        success = os_file_read_no_error_handling(
 
1403
                file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
 
1404
        );
 
1405
        if (!success) {
 
1406
                /* The following call prints an error message */
 
1407
                os_file_get_last_error(TRUE);
 
1408
        
 
1409
                ut_print_timestamp(stderr);
 
1410
        
 
1411
                fprintf(stderr,
 
1412
"  ibbackup: Error: trying to read system table space file format,\n"
 
1413
"  ibbackup: but failed to read the tablespace file %s!\n",
 
1414
                        pathname
 
1415
                );
 
1416
                os_file_close(file);
 
1417
                return(FALSE);
 
1418
        }
 
1419
        os_file_close(file);
 
1420
 
 
1421
        /* get the file format from the page */
 
1422
        ptr = page + TRX_SYS_FILE_FORMAT_TAG;
 
1423
        file_format_id = mach_read_from_8(ptr);
 
1424
 
 
1425
        *format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
 
1426
 
 
1427
        if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
 
1428
            || *format_id >= FILE_FORMAT_NAME_N) {
 
1429
 
 
1430
                /* Either it has never been tagged, or garbage in it. */
 
1431
                *format_id = ULINT_UNDEFINED;
 
1432
                return(TRUE);
 
1433
        }
 
1434
        
 
1435
        return(TRUE);
 
1436
}
 
1437
 
 
1438
 
 
1439
/*****************************************************************//**
 
1440
Reads the file format id from the given per-table data file.
 
1441
@return TRUE if call succeeds */
 
1442
UNIV_INTERN
 
1443
ibool
 
1444
trx_sys_read_pertable_file_format_id(
 
1445
/*=================================*/
 
1446
        const char *pathname,  /*!< in: pathname of a per-table
 
1447
                                        datafile */
 
1448
        ulint *format_id)      /*!< out: file format of the per-table
 
1449
                                         data file */
 
1450
{
 
1451
        os_file_t       file;
 
1452
        ibool           success;
 
1453
        byte            buf[UNIV_PAGE_SIZE * 2];
 
1454
        page_t*         page = ut_align(buf, UNIV_PAGE_SIZE);
 
1455
        const byte*     ptr;
 
1456
        ib_uint32_t     flags;
 
1457
 
 
1458
        *format_id = ULINT_UNDEFINED;
 
1459
        
 
1460
        file = os_file_create_simple_no_error_handling(
 
1461
                pathname,
 
1462
                OS_FILE_OPEN,
 
1463
                OS_FILE_READ_ONLY,
 
1464
                &success
 
1465
        );
 
1466
        if (!success) {
 
1467
                /* The following call prints an error message */
 
1468
                os_file_get_last_error(TRUE);
 
1469
        
 
1470
                ut_print_timestamp(stderr);
 
1471
        
 
1472
                fprintf(stderr,
 
1473
"  ibbackup: Error: trying to read per-table tablespace format,\n"
 
1474
"  ibbackup: but could not open the tablespace file %s!\n",
 
1475
                        pathname
 
1476
                );
 
1477
                return(FALSE);
 
1478
        }
 
1479
 
 
1480
        /* Read the first page of the per-table datafile */
 
1481
 
 
1482
        success = os_file_read_no_error_handling(
 
1483
                file, page, 0, 0, UNIV_PAGE_SIZE
 
1484
        );
 
1485
        if (!success) {
 
1486
                /* The following call prints an error message */
 
1487
                os_file_get_last_error(TRUE);
 
1488
        
 
1489
                ut_print_timestamp(stderr);
 
1490
        
 
1491
                fprintf(stderr,
 
1492
"  ibbackup: Error: trying to per-table data file format,\n"
 
1493
"  ibbackup: but failed to read the tablespace file %s!\n",
 
1494
                        pathname
 
1495
                );
 
1496
                os_file_close(file);
 
1497
                return(FALSE);
 
1498
        }
 
1499
        os_file_close(file);
 
1500
 
 
1501
        /* get the file format from the page */
 
1502
        ptr = page + 54;
 
1503
        flags = mach_read_from_4(ptr);
 
1504
        if (flags == 0) {
 
1505
                /* file format is Antelope */
 
1506
                *format_id = 0;
 
1507
                return (TRUE);
 
1508
        } else if (flags & 1) {
 
1509
                /* tablespace flags are ok */
 
1510
                *format_id = (flags / 32) % 128;
 
1511
                return (TRUE);
 
1512
        } else {
 
1513
                /* bad tablespace flags */
 
1514
                return(FALSE);
 
1515
        }
 
1516
}
 
1517
 
 
1518
 
 
1519
/*****************************************************************//**
 
1520
Get the name representation of the file format from its id.
 
1521
@return pointer to the name */
 
1522
UNIV_INTERN
 
1523
const char*
 
1524
trx_sys_file_format_id_to_name(
 
1525
/*===========================*/
 
1526
        const ulint     id)     /*!< in: id of the file format */
 
1527
{
 
1528
        if (!(id < FILE_FORMAT_NAME_N)) {
 
1529
                /* unknown id */
 
1530
                return ("Unknown");
 
1531
        }
 
1532
 
 
1533
        return(file_format_name_map[id]);
 
1534
}
 
1535
 
 
1536
#endif /* !UNIV_HOTBACKUP */
 
1537
 
 
1538
/*********************************************************************
 
1539
Shutdown/Close the transaction system. */
 
1540
UNIV_INTERN
 
1541
void
 
1542
trx_sys_close(void)
 
1543
/*===============*/
 
1544
{
 
1545
        trx_rseg_t*     rseg;
 
1546
        read_view_t*    view;
 
1547
 
 
1548
        ut_ad(trx_sys != NULL);
 
1549
 
 
1550
        /* Check that all read views are closed except read view owned
 
1551
        by a purge. */
 
1552
 
 
1553
        if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
 
1554
                fprintf(stderr,
 
1555
                        "InnoDB: Error: all read views were not closed"
 
1556
                        " before shutdown:\n"
 
1557
                        "InnoDB: %lu read views open \n",
 
1558
                        UT_LIST_GET_LEN(trx_sys->view_list) - 1);
 
1559
        }
 
1560
 
 
1561
        sess_close(trx_dummy_sess);
 
1562
        trx_dummy_sess = NULL;
 
1563
 
 
1564
        trx_purge_sys_close();
 
1565
 
 
1566
        mutex_enter(&kernel_mutex);
 
1567
 
 
1568
        /* Free the double write data structures. */
 
1569
        ut_a(trx_doublewrite != NULL);
 
1570
        ut_free(trx_doublewrite->write_buf_unaligned);
 
1571
        trx_doublewrite->write_buf_unaligned = NULL;
 
1572
 
 
1573
        mem_free(trx_doublewrite->buf_block_arr);
 
1574
        trx_doublewrite->buf_block_arr = NULL;
 
1575
 
 
1576
        mutex_free(&trx_doublewrite->mutex);
 
1577
        mem_free(trx_doublewrite);
 
1578
        trx_doublewrite = NULL;
 
1579
 
 
1580
        /* There can't be any active transactions. */
 
1581
        rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
 
1582
 
 
1583
        while (rseg != NULL) {
 
1584
                trx_rseg_t*     prev_rseg = rseg;
 
1585
 
 
1586
                rseg = UT_LIST_GET_NEXT(rseg_list, prev_rseg);
 
1587
                UT_LIST_REMOVE(rseg_list, trx_sys->rseg_list, prev_rseg);
 
1588
 
 
1589
                trx_rseg_mem_free(prev_rseg);
 
1590
        }
 
1591
 
 
1592
        view = UT_LIST_GET_FIRST(trx_sys->view_list);
 
1593
 
 
1594
        while (view != NULL) {
 
1595
                read_view_t*    prev_view = view;
 
1596
 
 
1597
                view = UT_LIST_GET_NEXT(view_list, prev_view);
 
1598
 
 
1599
                /* Views are allocated from the trx_sys->global_read_view_heap.
 
1600
                So, we simply remove the element here. */
 
1601
                UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
 
1602
        }
 
1603
 
 
1604
        ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == 0);
 
1605
        ut_a(UT_LIST_GET_LEN(trx_sys->rseg_list) == 0);
 
1606
        ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
 
1607
        ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
 
1608
 
 
1609
        mem_free(trx_sys);
 
1610
 
 
1611
        trx_sys = NULL;
 
1612
        mutex_exit(&kernel_mutex);
 
1613
}