1
/*****************************************************************************
3
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/**************************************************//**
23
Created 3/26/1996 Heikki Tuuri
24
*******************************************************/
32
#ifndef UNIV_HOTBACKUP
40
#include "trx0purge.h"
43
#include "read0read.h"
45
/** The file format tag structure with id and name. */
46
struct file_format_struct {
47
ulint id; /*!< id of the file format */
48
const char* name; /*!< text representation of the
50
mutex_t mutex; /*!< covers changes to the above
54
/** The file format tag */
55
typedef struct file_format_struct file_format_t;
57
/** The transaction system */
58
UNIV_INTERN trx_sys_t* trx_sys = NULL;
59
/** The doublewrite buffer */
60
UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
62
/** The following is set to TRUE when we are upgrading from pre-4.1
63
format data files to the multiple tablespaces format data files */
64
UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
65
/** Set to TRUE when the doublewrite buffer is being created */
66
UNIV_INTERN ibool trx_doublewrite_buf_is_being_created = FALSE;
68
/** The following is TRUE when we are using the database in the
69
post-4.1 format, i.e., we have successfully upgraded, or have created
70
a new database installation */
71
UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
73
/** In a MySQL replication slave, in crash recovery we store the master log
74
file name and position here. */
76
/** Master binlog file name */
77
UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
78
/** Master binlog file position. We have successfully got the updates
79
up to this position. -1 means that no crash recovery was needed, or
80
there was no master log position info inside InnoDB.*/
81
UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
84
/** If this MySQL server uses binary logging, after InnoDB has been inited
85
and if it has done a crash recovery, we store the binlog file name and position
88
/** Binlog file name */
89
UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
90
/** Binlog file position, or -1 if unknown */
91
UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
93
#endif /* !UNIV_HOTBACKUP */
95
/** List of animal names representing file format. */
96
static const char* file_format_name_map[] = {
125
/** The number of elements in the file format name array. */
126
static const ulint FILE_FORMAT_NAME_N
127
= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
129
#ifndef UNIV_HOTBACKUP
130
/** This is used to track the maximum file format id known to InnoDB. It's
131
updated via SET GLOBAL innodb_file_format_check = 'x' or when we open
132
or create a table. */
133
static file_format_t file_format_max;
135
/****************************************************************//**
136
Determines if a page number is located inside the doublewrite buffer.
137
@return TRUE if the location is inside the two blocks of the
138
doublewrite buffer */
141
trx_doublewrite_page_inside(
142
/*========================*/
143
ulint page_no) /*!< in: page number */
145
if (trx_doublewrite == NULL) {
150
if (page_no >= trx_doublewrite->block1
151
&& page_no < trx_doublewrite->block1
152
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
156
if (page_no >= trx_doublewrite->block2
157
&& page_no < trx_doublewrite->block2
158
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
165
/****************************************************************//**
166
Creates or initialializes the doublewrite buffer at a database start. */
169
trx_doublewrite_init(
170
/*=================*/
171
byte* doublewrite) /*!< in: pointer to the doublewrite buf
172
header on trx sys page */
174
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
176
/* Since we now start to use the doublewrite buffer, no need to call
177
fsync() after every write to a data file */
179
os_do_not_call_flush_at_each_write = TRUE;
180
#endif /* UNIV_DO_FLUSH */
182
mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
184
trx_doublewrite->first_free = 0;
186
trx_doublewrite->block1 = mach_read_from_4(
187
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
188
trx_doublewrite->block2 = mach_read_from_4(
189
doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
190
trx_doublewrite->write_buf_unaligned = ut_malloc(
191
(1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
193
trx_doublewrite->write_buf = ut_align(
194
trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
195
trx_doublewrite->buf_block_arr = mem_alloc(
196
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
199
/****************************************************************//**
200
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
201
multiple tablespace format. */
204
trx_sys_mark_upgraded_to_multiple_tablespaces(void)
205
/*===============================================*/
211
/* We upgraded to 4.1.x and reset the space id fields in the
212
doublewrite buffer. Let us mark to the trx_sys header that the upgrade
217
block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
219
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
221
doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
223
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
224
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
228
/* Flush the modified pages to disk and make a checkpoint */
229
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
231
trx_sys_multiple_tablespace_format = TRUE;
234
/****************************************************************//**
235
Creates the doublewrite buffer to a new InnoDB installation. The header of the
236
doublewrite buffer is placed on the trx system header page. */
239
trx_sys_create_doublewrite_buf(void)
240
/*================================*/
244
buf_block_t* new_block;
252
if (trx_doublewrite) {
260
trx_doublewrite_buf_is_being_created = TRUE;
262
block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
264
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
266
doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
268
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
269
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
270
/* The doublewrite buffer has already been created:
271
just read in some numbers */
273
trx_doublewrite_init(doublewrite);
276
trx_doublewrite_buf_is_being_created = FALSE;
279
"InnoDB: Doublewrite buffer not found:"
282
if (buf_pool_get_curr_size()
283
< ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
284
+ FSP_EXTENT_SIZE / 2 + 100)
287
"InnoDB: Cannot create doublewrite buffer:"
289
"InnoDB: increase your buffer pool size.\n"
290
"InnoDB: Cannot continue operation.\n");
295
block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
297
+ TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
299
/* fseg_create acquires a second latch on the page,
300
therefore we must declare it: */
302
buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
304
if (block2 == NULL) {
306
"InnoDB: Cannot create doublewrite buffer:"
308
"InnoDB: increase your tablespace size.\n"
309
"InnoDB: Cannot continue operation.\n");
311
/* We exit without committing the mtr to prevent
312
its modifications to the database getting to disk */
317
fseg_header = buf_block_get_frame(block)
318
+ TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
321
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
322
+ FSP_EXTENT_SIZE / 2; i++) {
323
page_no = fseg_alloc_free_page(fseg_header,
326
if (page_no == FIL_NULL) {
328
"InnoDB: Cannot create doublewrite"
329
" buffer: you must\n"
330
"InnoDB: increase your"
331
" tablespace size.\n"
332
"InnoDB: Cannot continue operation.\n"
338
/* We read the allocated pages to the buffer pool;
339
when they are written to disk in a flush, the space
340
id and page number fields are also written to the
341
pages. When we at database startup read pages
342
from the doublewrite buffer, we know that if the
343
space id and page number in them are the same as
344
the page position in the tablespace, then the page
345
has not been written to in doublewrite. */
347
new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no,
349
buf_block_dbg_add_level(new_block,
350
SYNC_NO_ORDER_CHECK);
352
if (i == FSP_EXTENT_SIZE / 2) {
353
ut_a(page_no == FSP_EXTENT_SIZE);
354
mlog_write_ulint(doublewrite
355
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
356
page_no, MLOG_4BYTES, &mtr);
357
mlog_write_ulint(doublewrite
358
+ TRX_SYS_DOUBLEWRITE_REPEAT
359
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
360
page_no, MLOG_4BYTES, &mtr);
361
} else if (i == FSP_EXTENT_SIZE / 2
362
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
363
ut_a(page_no == 2 * FSP_EXTENT_SIZE);
364
mlog_write_ulint(doublewrite
365
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
366
page_no, MLOG_4BYTES, &mtr);
367
mlog_write_ulint(doublewrite
368
+ TRX_SYS_DOUBLEWRITE_REPEAT
369
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
370
page_no, MLOG_4BYTES, &mtr);
371
} else if (i > FSP_EXTENT_SIZE / 2) {
372
ut_a(page_no == prev_page_no + 1);
375
prev_page_no = page_no;
378
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
379
TRX_SYS_DOUBLEWRITE_MAGIC_N,
381
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
382
+ TRX_SYS_DOUBLEWRITE_REPEAT,
383
TRX_SYS_DOUBLEWRITE_MAGIC_N,
386
mlog_write_ulint(doublewrite
387
+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
388
TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
392
/* Flush the modified pages to disk and make a checkpoint */
393
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
395
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
397
trx_sys_multiple_tablespace_format = TRUE;
403
/****************************************************************//**
404
At a database startup initializes the doublewrite buffer memory structure if
405
we already have a doublewrite buffer created in the data files. If we are
406
upgrading to an InnoDB version which supports multiple tablespaces, then this
407
function performs the necessary update operations. If we are in a crash
408
recovery, this function uses a possible doublewrite buffer to restore
409
half-written pages in the data files. */
412
trx_sys_doublewrite_init_or_restore_pages(
413
/*======================================*/
414
ibool restore_corrupt_pages) /*!< in: TRUE=restore pages */
418
byte* unaligned_read_buf;
421
ulint source_page_no;
428
/* We do the file i/o past the buffer pool */
430
unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
431
read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
433
/* Read the trx sys header to check if we are using the doublewrite
436
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
437
UNIV_PAGE_SIZE, read_buf, NULL);
438
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
440
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
441
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
442
/* The doublewrite buffer has been created */
444
trx_doublewrite_init(doublewrite);
446
block1 = trx_doublewrite->block1;
447
block2 = trx_doublewrite->block2;
449
buf = trx_doublewrite->write_buf;
454
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
455
!= TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
457
/* We are upgrading from a version < 4.1.x to a version where
458
multiple tablespaces are supported. We must reset the space id
459
field in the pages in the doublewrite buffer because starting
460
from this version the space id is stored to
461
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
463
trx_doublewrite_must_reset_space_ids = TRUE;
466
"InnoDB: Resetting space id's in the"
467
" doublewrite buffer\n");
469
trx_sys_multiple_tablespace_format = TRUE;
472
/* Read the pages from the doublewrite buffer to memory */
474
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
475
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
477
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
478
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
479
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
481
/* Check if any of these pages is half-written in data files, in the
486
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
488
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
490
if (trx_doublewrite_must_reset_space_ids) {
494
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
495
/* We do not need to calculate new checksums for the
496
pages because the field .._SPACE_ID does not affect
497
them. Write the page back to where we read it from. */
499
if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
500
source_page_no = block1 + i;
502
source_page_no = block2
503
+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
506
fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
507
UNIV_PAGE_SIZE, page, NULL);
508
/* printf("Resetting space id in page %lu\n",
511
space_id = mach_read_from_4(
512
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
515
if (!restore_corrupt_pages) {
516
/* The database was shut down gracefully: no need to
519
} else if (!fil_tablespace_exists_in_mem(space_id)) {
520
/* Maybe we have dropped the single-table tablespace
521
and this page once belonged to it: do nothing */
523
} else if (!fil_check_adress_in_tablespace(space_id,
526
"InnoDB: Warning: a page in the"
527
" doublewrite buffer is not within space\n"
528
"InnoDB: bounds; space id %lu"
529
" page number %lu, page %lu in"
530
" doublewrite buf.\n",
531
(ulong) space_id, (ulong) page_no, (ulong) i);
533
} else if (space_id == TRX_SYS_SPACE
534
&& ((page_no >= block1
536
< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
537
|| (page_no >= block2
540
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
542
/* It is an unwritten doublewrite buffer page:
545
ulint zip_size = fil_space_get_zip_size(space_id);
547
/* Read in the actual page from the file */
548
fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
550
zip_size ? zip_size : UNIV_PAGE_SIZE,
553
/* Check if the page is corrupt */
556
(buf_page_is_corrupted(read_buf, zip_size))) {
559
"InnoDB: Warning: database page"
560
" corruption or a failed\n"
561
"InnoDB: file read of"
562
" space %lu page %lu.\n"
563
"InnoDB: Trying to recover it from"
564
" the doublewrite buffer.\n",
565
(ulong) space_id, (ulong) page_no);
567
if (buf_page_is_corrupted(page, zip_size)) {
569
"InnoDB: Dump of the page:\n");
570
buf_page_print(read_buf, zip_size);
573
" corresponding page"
574
" in doublewrite buffer:\n");
575
buf_page_print(page, zip_size);
578
"InnoDB: Also the page in the"
579
" doublewrite buffer"
581
"InnoDB: Cannot continue"
583
"InnoDB: You can try to"
584
" recover the database"
587
"InnoDB: set-variable="
588
"innodb_force_recovery=6\n");
592
/* Write the good page from the
593
doublewrite buffer to the intended
596
fil_io(OS_FILE_WRITE, TRUE, space_id,
597
zip_size, page_no, 0,
598
zip_size ? zip_size : UNIV_PAGE_SIZE,
601
"InnoDB: Recovered the page from"
602
" the doublewrite buffer.\n");
606
page += UNIV_PAGE_SIZE;
609
fil_flush_file_spaces(FIL_TABLESPACE);
612
ut_free(unaligned_read_buf);
615
/****************************************************************//**
616
Checks that trx is in the trx list.
617
@return TRUE if is in */
622
trx_t* in_trx) /*!< in: trx */
626
ut_ad(mutex_own(&(kernel_mutex)));
628
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
630
while (trx != NULL) {
637
trx = UT_LIST_GET_NEXT(trx_list, trx);
643
/*****************************************************************//**
644
Writes the value of max_trx_id to the file based trx system header. */
647
trx_sys_flush_max_trx_id(void)
648
/*==========================*/
650
trx_sysf_t* sys_header;
653
ut_ad(mutex_own(&kernel_mutex));
657
sys_header = trx_sysf_get(&mtr);
659
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
660
trx_sys->max_trx_id, &mtr);
664
/*****************************************************************//**
665
Updates the offset information about the end of the MySQL binlog entry
666
which corresponds to the transaction just being committed. In a MySQL
667
replication slave updates the latest master binlog position up to which
668
replication has proceeded. */
671
trx_sys_update_mysql_binlog_offset(
672
/*===============================*/
673
const char* file_name,/*!< in: MySQL log file name */
674
ib_int64_t offset, /*!< in: position in that log file */
675
ulint field, /*!< in: offset of the MySQL log info field in
676
the trx sys header */
677
mtr_t* mtr) /*!< in: mtr */
679
trx_sysf_t* sys_header;
681
if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
683
/* We cannot fit the name to the 512 bytes we have reserved */
688
sys_header = trx_sysf_get(mtr);
690
if (mach_read_from_4(sys_header + field
691
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
692
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
694
mlog_write_ulint(sys_header + field
695
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
696
TRX_SYS_MYSQL_LOG_MAGIC_N,
700
if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
703
mlog_write_string(sys_header + field
704
+ TRX_SYS_MYSQL_LOG_NAME,
705
(byte*) file_name, 1 + ut_strlen(file_name),
709
if (mach_read_from_4(sys_header + field
710
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
711
|| (offset >> 32) > 0) {
713
mlog_write_ulint(sys_header + field
714
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
715
(ulint)(offset >> 32),
719
mlog_write_ulint(sys_header + field
720
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
721
(ulint)(offset & 0xFFFFFFFFUL),
725
/*****************************************************************//**
726
Stores the MySQL binlog offset info in the trx system header if
727
the magic number shows it valid, and print the info to stderr */
730
trx_sys_print_mysql_binlog_offset(void)
731
/*===================================*/
733
trx_sysf_t* sys_header;
735
ulint trx_sys_mysql_bin_log_pos_high;
736
ulint trx_sys_mysql_bin_log_pos_low;
740
sys_header = trx_sysf_get(&mtr);
742
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
743
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
744
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
751
trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
752
sys_header + TRX_SYS_MYSQL_LOG_INFO
753
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
754
trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
755
sys_header + TRX_SYS_MYSQL_LOG_INFO
756
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
758
trx_sys_mysql_bin_log_pos
759
= (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
760
+ (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
762
ut_memcpy(trx_sys_mysql_bin_log_name,
763
sys_header + TRX_SYS_MYSQL_LOG_INFO
764
+ TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
767
"InnoDB: Last MySQL binlog file position %lu %lu,"
769
trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
770
trx_sys_mysql_bin_log_name);
775
/*****************************************************************//**
776
Prints to stderr the MySQL master log offset info in the trx system header if
777
the magic number shows it valid. */
780
trx_sys_print_mysql_master_log_pos(void)
781
/*====================================*/
783
trx_sysf_t* sys_header;
788
sys_header = trx_sysf_get(&mtr);
790
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
791
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
792
!= TRX_SYS_MYSQL_LOG_MAGIC_N) {
800
"InnoDB: In a MySQL replication slave the last"
801
" master binlog file\n"
802
"InnoDB: position %lu %lu, file name %s\n",
803
(ulong) mach_read_from_4(sys_header
804
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
805
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
806
(ulong) mach_read_from_4(sys_header
807
+ TRX_SYS_MYSQL_MASTER_LOG_INFO
808
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
809
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
810
+ TRX_SYS_MYSQL_LOG_NAME);
811
/* Copy the master log position info to global variables we can
812
use in ha_innobase.cc to initialize glob_mi to right values */
814
ut_memcpy(trx_sys_mysql_master_log_name,
815
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
816
+ TRX_SYS_MYSQL_LOG_NAME,
817
TRX_SYS_MYSQL_LOG_NAME_LEN);
819
trx_sys_mysql_master_log_pos
820
= (((ib_int64_t) mach_read_from_4(
821
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
822
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
823
+ ((ib_int64_t) mach_read_from_4(
824
sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
825
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW));
829
/****************************************************************//**
830
Looks for a free slot for a rollback segment in the trx system file copy.
831
@return slot index or ULINT_UNDEFINED if not found */
834
trx_sysf_rseg_find_free(
835
/*====================*/
836
mtr_t* mtr) /*!< in: mtr */
838
trx_sysf_t* sys_header;
842
ut_ad(mutex_own(&(kernel_mutex)));
844
sys_header = trx_sysf_get(mtr);
846
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
848
page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
850
if (page_no == FIL_NULL) {
856
return(ULINT_UNDEFINED);
859
/*****************************************************************//**
860
Creates the file page for the transaction system. This function is called only
861
at the database creation, before trx_sys_init. */
866
mtr_t* mtr) /*!< in: mtr */
868
trx_sysf_t* sys_header;
877
/* Note that below we first reserve the file space x-latch, and
878
then enter the kernel: we must do it in this order to conform
879
to the latching order rules. */
881
mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
882
mutex_enter(&kernel_mutex);
884
/* Create the trx sys file block in a new allocated file segment */
885
block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
887
buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
889
ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
891
page = buf_block_get_frame(block);
893
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
896
/* Reset the doublewrite buffer magic number to zero so that we
897
know that the doublewrite buffer has not yet been created (this
898
suppresses a Valgrind warning) */
900
mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
901
+ TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
903
sys_header = trx_sysf_get(mtr);
905
/* Start counting transaction ids from number 1 up */
906
mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
907
ut_dulint_create(0, 1), mtr);
909
/* Reset the rollback segment slots */
910
for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
912
trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
913
trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
916
/* The remaining area (up to the page trailer) is uninitialized.
917
Silence Valgrind warnings about it. */
918
UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
919
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
920
+ TRX_SYS_RSEG_SPACE),
921
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
923
+ TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
924
+ TRX_SYS_RSEG_SPACE))
925
+ page - sys_header);
927
/* Create the first rollback segment in the SYSTEM tablespace */
928
page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no,
930
ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
931
ut_a(page_no != FIL_NULL);
933
mutex_exit(&kernel_mutex);
936
/*****************************************************************//**
937
Creates and initializes the central memory structures for the transaction
938
system. This is called when the database is started. */
941
trx_sys_init_at_db_start(void)
942
/*==========================*/
944
trx_sysf_t* sys_header;
945
ib_int64_t rows_to_undo = 0;
946
const char* unit = "";
952
ut_ad(trx_sys == NULL);
954
mutex_enter(&kernel_mutex);
956
trx_sys = mem_alloc(sizeof(trx_sys_t));
958
sys_header = trx_sysf_get(&mtr);
960
trx_rseg_list_and_array_init(sys_header, &mtr);
962
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
964
/* VERY important: after the database is started, max_trx_id value is
965
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
966
trx_sys_get_new_trx_id will evaluate to TRUE when the function
967
is first time called, and the value for trx id will be written
968
to the disk-based header! Thus trx id values will not overlap when
969
the database is repeatedly started! */
971
trx_sys->max_trx_id = ut_dulint_add(
972
ut_dulint_align_up(mtr_read_dulint(
974
+ TRX_SYS_TRX_ID_STORE, &mtr),
975
TRX_SYS_TRX_ID_WRITE_MARGIN),
976
2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
978
UT_LIST_INIT(trx_sys->mysql_trx_list);
979
trx_dummy_sess = sess_open();
980
trx_lists_init_at_db_start();
982
if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
983
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
987
if ( trx->conc_state != TRX_PREPARED) {
988
rows_to_undo += ut_conv_dulint_to_longlong(
992
trx = UT_LIST_GET_NEXT(trx_list, trx);
999
if (rows_to_undo > 1000000000) {
1001
rows_to_undo = rows_to_undo / 1000000;
1005
"InnoDB: %lu transaction(s) which must be"
1006
" rolled back or cleaned up\n"
1007
"InnoDB: in total %lu%s row operations to undo\n",
1008
(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
1009
(ulong) rows_to_undo, unit);
1011
fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
1012
TRX_ID_PREP_PRINTF(trx_sys->max_trx_id));
1015
UT_LIST_INIT(trx_sys->view_list);
1017
trx_purge_sys_create();
1019
mutex_exit(&kernel_mutex);
1024
/*****************************************************************//**
1025
Creates and initializes the transaction system at the database creation. */
1028
trx_sys_create(void)
1029
/*================*/
1035
trx_sysf_create(&mtr);
1039
trx_sys_init_at_db_start();
1042
/*****************************************************************//**
1043
Update the file format tag.
1044
@return always TRUE */
1047
trx_sys_file_format_max_write(
1048
/*==========================*/
1049
ulint format_id, /*!< in: file format id */
1050
const char** name) /*!< out: max file format name, can
1056
ulint tag_value_low;
1060
block = buf_page_get(
1061
TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
1063
file_format_max.id = format_id;
1064
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1066
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
1067
tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1070
*name = file_format_max.name;
1075
ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH,
1084
/*****************************************************************//**
1085
Read the file format tag.
1086
@return the file format or ULINT_UNDEFINED if not set. */
1089
trx_sys_file_format_max_read(void)
1090
/*==============================*/
1094
const buf_block_t* block;
1096
dulint file_format_id;
1098
/* Since this is called during the startup phase it's safe to
1099
read the value without a covering mutex. */
1102
block = buf_page_get(
1103
TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
1105
ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
1106
file_format_id = mach_read_from_8(ptr);
1110
format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1112
if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
1113
|| format_id >= FILE_FORMAT_NAME_N) {
1115
/* Either it has never been tagged, or garbage in it. */
1116
return(ULINT_UNDEFINED);
1122
/*****************************************************************//**
1123
Get the name representation of the file format from its id.
1124
@return pointer to the name */
1127
trx_sys_file_format_id_to_name(
1128
/*===========================*/
1129
const ulint id) /*!< in: id of the file format */
1131
ut_a(id < FILE_FORMAT_NAME_N);
1133
return(file_format_name_map[id]);
1136
/*****************************************************************//**
1137
Check for the max file format tag stored on disk. Note: If max_format_id
1138
is == DICT_TF_FORMAT_MAX + 1 then we only print a warning.
1139
@return DB_SUCCESS or error code */
1142
trx_sys_file_format_max_check(
1143
/*==========================*/
1144
ulint max_format_id) /*!< in: max format id to check */
1148
/* Check the file format in the tablespace. Do not try to
1149
recover if the file format is not supported by the engine
1150
unless forced by the user. */
1151
format_id = trx_sys_file_format_max_read();
1152
if (format_id == ULINT_UNDEFINED) {
1153
/* Format ID was not set. Set it to minimum possible
1155
format_id = DICT_TF_FORMAT_51;
1158
ut_print_timestamp(stderr);
1160
" InnoDB: highest supported file format is %s.\n",
1161
trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
1163
if (format_id > DICT_TF_FORMAT_MAX) {
1165
ut_a(format_id < FILE_FORMAT_NAME_N);
1167
ut_print_timestamp(stderr);
1169
" InnoDB: %s: the system tablespace is in a file "
1170
"format that this version doesn't support - %s\n",
1171
((max_format_id <= DICT_TF_FORMAT_MAX)
1172
? "Error" : "Warning"),
1173
trx_sys_file_format_id_to_name(format_id));
1175
if (max_format_id <= DICT_TF_FORMAT_MAX) {
1180
format_id = (format_id > max_format_id) ? format_id : max_format_id;
1182
/* We don't need a mutex here, as this function should only
1183
be called once at start up. */
1184
file_format_max.id = format_id;
1185
file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1190
/*****************************************************************//**
1191
Set the file format id unconditionally except if it's already the
1193
@return TRUE if value updated */
1196
trx_sys_file_format_max_set(
1197
/*========================*/
1198
ulint format_id, /*!< in: file format id */
1199
const char** name) /*!< out: max file format name or
1200
NULL if not needed. */
1204
ut_a(format_id <= DICT_TF_FORMAT_MAX);
1206
mutex_enter(&file_format_max.mutex);
1208
/* Only update if not already same value. */
1209
if (format_id != file_format_max.id) {
1211
ret = trx_sys_file_format_max_write(format_id, name);
1214
mutex_exit(&file_format_max.mutex);
1219
/********************************************************************//**
1220
Tags the system table space with minimum format id if it has not been
1222
WARNING: This function is only called during the startup and AFTER the
1223
redo log application during recovery has finished. */
1226
trx_sys_file_format_tag_init(void)
1227
/*==============================*/
1231
format_id = trx_sys_file_format_max_read();
1233
/* If format_id is not set then set it to the minimum. */
1234
if (format_id == ULINT_UNDEFINED) {
1235
trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL);
1239
/********************************************************************//**
1240
Update the file format tag in the system tablespace only if the given
1241
format id is greater than the known max id.
1242
@return TRUE if format_id was bigger than the known max id */
1245
trx_sys_file_format_max_upgrade(
1246
/*============================*/
1247
const char** name, /*!< out: max file format name */
1248
ulint format_id) /*!< in: file format identifier */
1253
ut_a(file_format_max.name != NULL);
1254
ut_a(format_id <= DICT_TF_FORMAT_MAX);
1256
mutex_enter(&file_format_max.mutex);
1258
if (format_id > file_format_max.id) {
1260
ret = trx_sys_file_format_max_write(format_id, name);
1263
mutex_exit(&file_format_max.mutex);
1268
/*****************************************************************//**
1269
Get the name representation of the file format from its id.
1270
@return pointer to the max format name */
1273
trx_sys_file_format_max_get(void)
1274
/*=============================*/
1276
return(file_format_max.name);
1279
/*****************************************************************//**
1280
Initializes the tablespace tag system. */
1283
trx_sys_file_format_init(void)
1284
/*==========================*/
1286
mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
1288
/* We don't need a mutex here, as this function should only
1289
be called once at start up. */
1290
file_format_max.id = DICT_TF_FORMAT_51;
1292
file_format_max.name = trx_sys_file_format_id_to_name(
1293
file_format_max.id);
1296
/*****************************************************************//**
1297
Closes the tablespace tag system. */
1300
trx_sys_file_format_close(void)
1301
/*===========================*/
1303
/* Does nothing at the moment */
1305
#else /* !UNIV_HOTBACKUP */
1306
/*****************************************************************//**
1307
Prints to stderr the MySQL binlog info in the system header if the
1308
magic number shows it valid. */
1311
trx_sys_print_mysql_binlog_offset_from_page(
1312
/*========================================*/
1313
const byte* page) /*!< in: buffer containing the trx
1314
system header page, i.e., page number
1315
TRX_SYS_PAGE_NO in the tablespace */
1317
const trx_sysf_t* sys_header;
1319
sys_header = page + TRX_SYS;
1321
if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
1322
+ TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
1323
== TRX_SYS_MYSQL_LOG_MAGIC_N) {
1326
"ibbackup: Last MySQL binlog file position %lu %lu,"
1328
(ulong) mach_read_from_4(
1329
sys_header + TRX_SYS_MYSQL_LOG_INFO
1330
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
1331
(ulong) mach_read_from_4(
1332
sys_header + TRX_SYS_MYSQL_LOG_INFO
1333
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
1334
sys_header + TRX_SYS_MYSQL_LOG_INFO
1335
+ TRX_SYS_MYSQL_LOG_NAME);
1340
/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
1341
(This code duplicaton should be fixed at some point!)
1344
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
1345
/* The offset of the file format tag on the trx system header page */
1346
#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
1347
/* We use these random constants to reduce the probability of reading
1348
garbage (from previous versions) that maps to an actual format id. We
1349
use these as bit masks at the time of reading and writing from/to disk. */
1350
#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
1351
#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
1353
/* END OF COPIED DEFINITIONS */
1356
/*****************************************************************//**
1357
Reads the file format id from the first system table space file.
1358
Even if the call succeeds and returns TRUE, the returned format id
1359
may be ULINT_UNDEFINED signalling that the format id was not present
1361
@return TRUE if call succeeds */
1364
trx_sys_read_file_format_id(
1365
/*========================*/
1366
const char *pathname, /*!< in: pathname of the first system
1368
ulint *format_id) /*!< out: file format of the system table
1373
byte buf[UNIV_PAGE_SIZE * 2];
1374
page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1376
dulint file_format_id;
1378
*format_id = ULINT_UNDEFINED;
1380
file = os_file_create_simple_no_error_handling(
1387
/* The following call prints an error message */
1388
os_file_get_last_error(TRUE);
1390
ut_print_timestamp(stderr);
1393
" ibbackup: Error: trying to read system tablespace file format,\n"
1394
" ibbackup: but could not open the tablespace file %s!\n",
1400
/* Read the page on which file format is stored */
1402
success = os_file_read_no_error_handling(
1403
file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
1406
/* The following call prints an error message */
1407
os_file_get_last_error(TRUE);
1409
ut_print_timestamp(stderr);
1412
" ibbackup: Error: trying to read system table space file format,\n"
1413
" ibbackup: but failed to read the tablespace file %s!\n",
1416
os_file_close(file);
1419
os_file_close(file);
1421
/* get the file format from the page */
1422
ptr = page + TRX_SYS_FILE_FORMAT_TAG;
1423
file_format_id = mach_read_from_8(ptr);
1425
*format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW;
1427
if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH
1428
|| *format_id >= FILE_FORMAT_NAME_N) {
1430
/* Either it has never been tagged, or garbage in it. */
1431
*format_id = ULINT_UNDEFINED;
1439
/*****************************************************************//**
1440
Reads the file format id from the given per-table data file.
1441
@return TRUE if call succeeds */
1444
trx_sys_read_pertable_file_format_id(
1445
/*=================================*/
1446
const char *pathname, /*!< in: pathname of a per-table
1448
ulint *format_id) /*!< out: file format of the per-table
1453
byte buf[UNIV_PAGE_SIZE * 2];
1454
page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1458
*format_id = ULINT_UNDEFINED;
1460
file = os_file_create_simple_no_error_handling(
1467
/* The following call prints an error message */
1468
os_file_get_last_error(TRUE);
1470
ut_print_timestamp(stderr);
1473
" ibbackup: Error: trying to read per-table tablespace format,\n"
1474
" ibbackup: but could not open the tablespace file %s!\n",
1480
/* Read the first page of the per-table datafile */
1482
success = os_file_read_no_error_handling(
1483
file, page, 0, 0, UNIV_PAGE_SIZE
1486
/* The following call prints an error message */
1487
os_file_get_last_error(TRUE);
1489
ut_print_timestamp(stderr);
1492
" ibbackup: Error: trying to per-table data file format,\n"
1493
" ibbackup: but failed to read the tablespace file %s!\n",
1496
os_file_close(file);
1499
os_file_close(file);
1501
/* get the file format from the page */
1503
flags = mach_read_from_4(ptr);
1505
/* file format is Antelope */
1508
} else if (flags & 1) {
1509
/* tablespace flags are ok */
1510
*format_id = (flags / 32) % 128;
1513
/* bad tablespace flags */
1519
/*****************************************************************//**
1520
Get the name representation of the file format from its id.
1521
@return pointer to the name */
1524
trx_sys_file_format_id_to_name(
1525
/*===========================*/
1526
const ulint id) /*!< in: id of the file format */
1528
if (!(id < FILE_FORMAT_NAME_N)) {
1533
return(file_format_name_map[id]);
1536
#endif /* !UNIV_HOTBACKUP */
1538
/*********************************************************************
1539
Shutdown/Close the transaction system. */
1548
ut_ad(trx_sys != NULL);
1550
/* Check that all read views are closed except read view owned
1553
if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
1555
"InnoDB: Error: all read views were not closed"
1556
" before shutdown:\n"
1557
"InnoDB: %lu read views open \n",
1558
UT_LIST_GET_LEN(trx_sys->view_list) - 1);
1561
sess_close(trx_dummy_sess);
1562
trx_dummy_sess = NULL;
1564
trx_purge_sys_close();
1566
mutex_enter(&kernel_mutex);
1568
/* Free the double write data structures. */
1569
ut_a(trx_doublewrite != NULL);
1570
ut_free(trx_doublewrite->write_buf_unaligned);
1571
trx_doublewrite->write_buf_unaligned = NULL;
1573
mem_free(trx_doublewrite->buf_block_arr);
1574
trx_doublewrite->buf_block_arr = NULL;
1576
mutex_free(&trx_doublewrite->mutex);
1577
mem_free(trx_doublewrite);
1578
trx_doublewrite = NULL;
1580
/* There can't be any active transactions. */
1581
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
1583
while (rseg != NULL) {
1584
trx_rseg_t* prev_rseg = rseg;
1586
rseg = UT_LIST_GET_NEXT(rseg_list, prev_rseg);
1587
UT_LIST_REMOVE(rseg_list, trx_sys->rseg_list, prev_rseg);
1589
trx_rseg_mem_free(prev_rseg);
1592
view = UT_LIST_GET_FIRST(trx_sys->view_list);
1594
while (view != NULL) {
1595
read_view_t* prev_view = view;
1597
view = UT_LIST_GET_NEXT(view_list, prev_view);
1599
/* Views are allocated from the trx_sys->global_read_view_heap.
1600
So, we simply remove the element here. */
1601
UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
1604
ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == 0);
1605
ut_a(UT_LIST_GET_LEN(trx_sys->rseg_list) == 0);
1606
ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
1607
ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
1612
mutex_exit(&kernel_mutex);