1
/******************************************************
2
The tablespace memory cache
6
Created 10/25/1995 Heikki Tuuri
7
*******************************************************/
12
#include "sync0sync.h"
13
#include "hash0hash.h"
16
#include "mach0data.h"
17
#include "ibuf0ibuf.h"
25
#include "srv0start.h"
28
#include "dict0dict.h"
32
IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
33
=============================================
35
The tablespace cache is responsible for providing fast read/write access to
36
tablespaces and logs of the database. File creation and deletion is done
37
in other modules which know more of the logic of the operation, however.
39
A tablespace consists of a chain of files. The size of the files does not
40
have to be divisible by the database block size, because we may just leave
41
the last incomplete block unused. When a new file is appended to the
42
tablespace, the maximum size of the file is also specified. At the moment,
43
we think that it is best to extend the file to its maximum size already at
44
the creation of the file, because then we can avoid dynamically extending
45
the file when more space is needed for the tablespace.
47
A block's position in the tablespace is specified with a 32-bit unsigned
48
integer. The files in the chain are thought to be catenated, and the block
49
corresponding to an address n is the nth block in the catenated file (where
50
the first block is named the 0th block, and the incomplete block fragments
51
at the end of files are not taken into account). A tablespace can be extended
52
by appending a new file at the end of the chain.
54
Our tablespace concept is similar to the one of Oracle.
56
To acquire more speed in disk transfers, a technique called disk striping is
57
sometimes used. This means that logical block addresses are divided in a
58
round-robin fashion across several disks. Windows NT supports disk striping,
59
so there we do not need to support it in the database. Disk striping is
60
implemented in hardware in RAID disks. We conclude that it is not necessary
61
to implement it in the database. Oracle 7 does not support disk striping,
64
Another trick used at some database sites is replacing tablespace files by
65
raw disks, that is, the whole physical disk drive, or a partition of it, is
66
opened as a single file, and it is accessed through byte offsets calculated
67
from the start of the disk or the partition. This is recommended in some
68
books on database tuning to achieve more speed in i/o. Using raw disk
69
certainly prevents the OS from fragmenting disk space, but it is not clear
70
if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
71
system + EIDE Conner disk only a negligible difference in speed when reading
72
from a file, versus reading from a raw disk.
74
To have fast access to a tablespace or a log file, we put the data structures
75
to a hash table. Each tablespace and log file is given an unique 32-bit
78
Some operating systems do not support many open files at the same time,
79
though NT seems to tolerate at least 900 open files. Therefore, we put the
80
open files in an LRU-list. If we need to open another file, we may close the
81
file at the end of the LRU-list. When an i/o-operation is pending on a file,
82
the file cannot be closed. We take the file nodes with pending i/o-operations
83
out of the LRU-list and keep a count of pending operations. When an operation
84
completes, we decrement the count and return the file node to the LRU-list if
85
the count drops to zero. */
87
/* When mysqld is run, the default directory "." is the mysqld datadir,
88
but in the MySQL Embedded Server Library and ibbackup it is not the default
89
directory, and we must set the base file path explicitly */
90
const char* fil_path_to_mysql_datadir = ".";
92
/* The number of fsyncs done to the log */
93
ulint fil_n_log_flushes = 0;
95
ulint fil_n_pending_log_flushes = 0;
96
ulint fil_n_pending_tablespace_flushes = 0;
98
/* Null file address */
99
fil_addr_t fil_addr_null = {FIL_NULL, 0};
101
/* File node of a tablespace or the log data space */
102
struct fil_node_struct {
103
fil_space_t* space; /* backpointer to the space where this node
105
char* name; /* path to the file */
106
ibool open; /* TRUE if file open */
107
os_file_t handle; /* OS handle to the file, if file open */
108
ibool is_raw_disk;/* TRUE if the 'file' is actually a raw
109
device or a raw disk partition */
110
ulint size; /* size of the file in database pages, 0 if
111
not known yet; the possible last incomplete
112
megabyte may be ignored if space == 0 */
114
/* count of pending i/o's on this file;
115
closing of the file is not allowed if
117
ulint n_pending_flushes;
118
/* count of pending flushes on this file;
119
closing of the file is not allowed if
121
ib_longlong modification_counter;/* when we write to the file we
122
increment this by one */
123
ib_longlong flush_counter;/* up to what modification_counter value
124
we have flushed the modifications to disk */
125
UT_LIST_NODE_T(fil_node_t) chain;
126
/* link field for the file chain */
127
UT_LIST_NODE_T(fil_node_t) LRU;
128
/* link field for the LRU list */
132
#define FIL_NODE_MAGIC_N 89389
134
/* Tablespace or log data space: let us call them by a common name space */
135
struct fil_space_struct {
136
char* name; /* space name = the path to the first file in
138
ulint id; /* space id */
139
ib_longlong tablespace_version;
140
/* in DISCARD/IMPORT this timestamp is used to
141
check if we should ignore an insert buffer
142
merge request for a page because it actually
143
was for the previous incarnation of the
145
ibool mark; /* this is set to TRUE at database startup if
146
the space corresponds to a table in the InnoDB
147
data dictionary; so we can print a warning of
148
orphaned tablespaces */
149
ibool stop_ios;/* TRUE if we want to rename the .ibd file of
150
tablespace and want to stop temporarily
151
posting of new i/o requests on the file */
152
ibool stop_ibuf_merges;
153
/* we set this TRUE when we start deleting a
154
single-table tablespace */
155
ibool is_being_deleted;
156
/* this is set to TRUE when we start
157
deleting a single-table tablespace and its
158
file; when this flag is set no further i/o
159
or flush requests can be placed on this space,
160
though there may be such requests still being
161
processed on this space */
162
ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
163
UT_LIST_BASE_NODE_T(fil_node_t) chain;
164
/* base node for the file chain */
165
ulint size; /* space size in pages; 0 if a single-table
166
tablespace whose size we do not know yet;
167
last incomplete megabytes in data files may be
168
ignored if space == 0 */
169
ulint n_reserved_extents;
170
/* number of reserved free extents for
171
ongoing operations like B-tree page split */
172
ulint n_pending_flushes; /* this is > 0 when flushing
173
the tablespace to disk; dropping of the
174
tablespace is forbidden if this is > 0 */
175
ulint n_pending_ibuf_merges;/* this is > 0 when merging
176
insert buffer entries to a page so that we
177
may need to access the ibuf bitmap page in the
178
tablespade: dropping of the tablespace is
179
forbidden if this is > 0 */
180
hash_node_t hash; /* hash chain node */
181
hash_node_t name_hash;/* hash chain the name_hash table */
182
rw_lock_t latch; /* latch protecting the file space storage
184
UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
185
/* list of spaces with at least one unflushed
186
file we have written to */
187
ibool is_in_unflushed_spaces; /* TRUE if this space is
188
currently in the list above */
189
UT_LIST_NODE_T(fil_space_t) space_list;
190
/* list of all spaces */
191
ibuf_data_t* ibuf_data;
192
/* insert buffer data */
196
#define FIL_SPACE_MAGIC_N 89472
198
/* The tablespace memory cache; also the totality of logs = the log data space,
199
is stored here; below we talk about tablespaces, but also the ib_logfiles
200
form a 'space' and it is handled here */
202
typedef struct fil_system_struct fil_system_t;
203
struct fil_system_struct {
204
mutex_t mutex; /* The mutex protecting the cache */
205
hash_table_t* spaces; /* The hash table of spaces in the
206
system; they are hashed on the space
208
hash_table_t* name_hash; /* hash table based on the space
210
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
211
/* base node for the LRU list of the
212
most recently used open files with no
213
pending i/o's; if we start an i/o on
214
the file, we first remove it from this
215
list, and return it to the start of
216
the list when the i/o ends;
217
log files and the system tablespace are
218
not put to this list: they are opened
219
after the startup, and kept open until
221
UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
222
/* base node for the list of those
223
tablespaces whose files contain
224
unflushed writes; those spaces have
225
at least one file node where
226
modification_counter > flush_counter */
227
ulint n_open; /* number of files currently open */
228
ulint max_n_open; /* n_open is not allowed to exceed
230
ib_longlong modification_counter;/* when we write to a file we
231
increment this by one */
232
ulint max_assigned_id;/* maximum space id in the existing
233
tables, or assigned during the time
234
mysqld has been up; at an InnoDB
235
startup we scan the data dictionary
236
and set here the maximum of the
237
space id's of the tables there */
238
ib_longlong tablespace_version;
239
/* a counter which is incremented for
240
every space object memory creation;
241
every space mem object gets a
242
'timestamp' from this; in DISCARD/
243
IMPORT this is used to check if we
244
should ignore an insert buffer merge
246
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
247
/* list of all file spaces */
250
/* The tablespace memory cache. This variable is NULL before the module is
252
fil_system_t* fil_system = NULL;
255
/************************************************************************
256
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
258
Prepares a file node for i/o. Opens the file if it is closed. Updates the
259
pending i/o's field in the node and the system appropriately. Takes the node
260
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
264
fil_node_prepare_for_io(
265
/*====================*/
266
fil_node_t* node, /* in: file node */
267
fil_system_t* system, /* in: tablespace memory cache */
268
fil_space_t* space); /* in: space */
269
/************************************************************************
270
Updates the data structures when an i/o operation finishes. Updates the
271
pending i/o's field in the node appropriately. */
274
fil_node_complete_io(
275
/*=================*/
276
fil_node_t* node, /* in: file node */
277
fil_system_t* system, /* in: tablespace memory cache */
278
ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks
279
the node as modified if
280
type == OS_FILE_WRITE */
281
/***********************************************************************
282
Checks if a single-table tablespace for a given table name exists in the
283
tablespace memory cache. */
286
fil_get_space_id_for_table(
287
/*=======================*/
288
/* out: space id, ULINT_UNDEFINED if not
290
const char* name); /* in: table name in the standard
291
'databasename/tablename' format */
294
/***********************************************************************
295
Returns the version number of a tablespace, -1 if not found. */
298
fil_space_get_version(
299
/*==================*/
300
/* out: version number, -1 if the tablespace does not
301
exist in the memory cache */
302
ulint id) /* in: space id */
304
fil_system_t* system = fil_system;
306
ib_longlong version = -1;
310
mutex_enter(&(system->mutex));
312
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
315
version = space->tablespace_version;
318
mutex_exit(&(system->mutex));
323
/***********************************************************************
324
Returns the latch of a file space. */
329
/* out: latch protecting storage allocation */
330
ulint id) /* in: space id */
332
fil_system_t* system = fil_system;
337
mutex_enter(&(system->mutex));
339
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
343
mutex_exit(&(system->mutex));
345
return(&(space->latch));
348
/***********************************************************************
349
Returns the type of a file space. */
354
/* out: FIL_TABLESPACE or FIL_LOG */
355
ulint id) /* in: space id */
357
fil_system_t* system = fil_system;
362
mutex_enter(&(system->mutex));
364
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
368
mutex_exit(&(system->mutex));
370
return(space->purpose);
373
/***********************************************************************
374
Returns the ibuf data of a file space. */
377
fil_space_get_ibuf_data(
378
/*====================*/
379
/* out: ibuf data for this space */
380
ulint id) /* in: space id */
382
fil_system_t* system = fil_system;
389
mutex_enter(&(system->mutex));
391
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
393
mutex_exit(&(system->mutex));
397
return(space->ibuf_data);
400
/**************************************************************************
401
Checks if all the file nodes in a space are flushed. The caller must hold
402
the fil_system mutex. */
405
fil_space_is_flushed(
406
/*=================*/
407
/* out: TRUE if all are flushed */
408
fil_space_t* space) /* in: space */
412
ut_ad(mutex_own(&(fil_system->mutex)));
414
node = UT_LIST_GET_FIRST(space->chain);
417
if (node->modification_counter > node->flush_counter) {
422
node = UT_LIST_GET_NEXT(chain, node);
428
/***********************************************************************
429
Appends a new file to the chain of files of a space. File must be closed. */
434
const char* name, /* in: file name (file must be closed) */
435
ulint size, /* in: file size in database blocks, rounded
436
downwards to an integer */
437
ulint id, /* in: space id where to append */
438
ibool is_raw) /* in: TRUE if a raw device or
439
a raw disk partition */
441
fil_system_t* system = fil_system;
448
mutex_enter(&(system->mutex));
450
node = mem_alloc(sizeof(fil_node_t));
452
node->name = mem_strdup(name);
455
ut_a(!is_raw || srv_start_raw_disk_in_use);
457
node->is_raw_disk = is_raw;
459
node->magic_n = FIL_NODE_MAGIC_N;
461
node->n_pending_flushes = 0;
463
node->modification_counter = 0;
464
node->flush_counter = 0;
466
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
469
ut_print_timestamp(stderr);
471
" InnoDB: Error: Could not find tablespace %lu for\n"
472
"InnoDB: file ", (ulong) id);
473
ut_print_filename(stderr, name);
474
fputs(" in the tablespace memory cache.\n", stderr);
475
mem_free(node->name);
479
mutex_exit(&(system->mutex));
488
UT_LIST_ADD_LAST(chain, space->chain, node);
490
mutex_exit(&(system->mutex));
493
/************************************************************************
494
Opens a the file of a node of a tablespace. The caller must own the fil_system
500
fil_node_t* node, /* in: file node */
501
fil_system_t* system, /* in: tablespace memory cache */
502
fil_space_t* space) /* in: space */
504
ib_longlong size_bytes;
509
#ifndef UNIV_HOTBACKUP
513
#endif /* !UNIV_HOTBACKUP */
515
ut_ad(mutex_own(&(system->mutex)));
516
ut_a(node->n_pending == 0);
517
ut_a(node->open == FALSE);
519
if (node->size == 0) {
520
/* It must be a single-table tablespace and we do not know the
521
size of the file yet. First we open the file in the normal
522
mode, no async I/O here, for simplicity. Then do some checks,
523
and close the file again.
524
NOTE that we could not use the simple file read function
525
os_file_read() in Windows to read from a file opened for
528
node->handle = os_file_create_simple_no_error_handling(
529
node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
531
/* The following call prints an error message */
532
os_file_get_last_error(TRUE);
534
ut_print_timestamp(stderr);
537
" InnoDB: Fatal error: cannot open %s\n."
538
"InnoDB: Have you deleted .ibd files"
539
" under a running mysqld server?\n",
544
os_file_get_size(node->handle, &size_low, &size_high);
546
size_bytes = (((ib_longlong)size_high) << 32)
547
+ (ib_longlong)size_low;
548
#ifdef UNIV_HOTBACKUP
549
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
552
ut_a(space->purpose != FIL_LOG);
553
ut_a(space->id != 0);
555
if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
557
"InnoDB: Error: the size of single-table"
558
" tablespace file %s\n"
559
"InnoDB: is only %lu %lu,"
560
" should be at least %lu!\n",
564
(ulong) (FIL_IBD_FILE_INITIAL_SIZE
570
/* Read the first page of the tablespace */
572
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
573
/* Align the memory for file i/o if we might have O_DIRECT
575
page = ut_align(buf2, UNIV_PAGE_SIZE);
577
success = os_file_read(node->handle, page, 0, 0,
579
space_id = fsp_header_get_space_id(page);
583
/* Close the file now that we have read the space id from it */
585
os_file_close(node->handle);
587
if (space_id == ULINT_UNDEFINED || space_id == 0) {
589
"InnoDB: Error: tablespace id %lu"
590
" in file %s is not sensible\n",
591
(ulong) space_id, node->name);
596
if (space_id != space->id) {
598
"InnoDB: Error: tablespace id is %lu"
599
" in the data dictionary\n"
600
"InnoDB: but in file %s it is %lu!\n",
601
space->id, node->name, space_id);
606
if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
608
((size_bytes / (1024 * 1024))
609
* ((1024 * 1024) / UNIV_PAGE_SIZE));
611
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
614
space->size += node->size;
617
/* printf("Opening file %s\n", node->name); */
619
/* Open the file for reading and writing, in Windows normally in the
620
unbuffered async I/O mode, though global variables may make
621
os_file_create() to fall back to the normal file I/O mode. */
623
if (space->purpose == FIL_LOG) {
624
node->handle = os_file_create(node->name, OS_FILE_OPEN,
625
OS_FILE_AIO, OS_LOG_FILE, &ret);
626
} else if (node->is_raw_disk) {
627
node->handle = os_file_create(node->name,
629
OS_FILE_AIO, OS_DATA_FILE, &ret);
631
node->handle = os_file_create(node->name, OS_FILE_OPEN,
632
OS_FILE_AIO, OS_DATA_FILE, &ret);
641
if (space->purpose == FIL_TABLESPACE && space->id != 0) {
642
/* Put the node to the LRU list */
643
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
647
/**************************************************************************
653
fil_node_t* node, /* in: file node */
654
fil_system_t* system) /* in: tablespace memory cache */
658
ut_ad(node && system);
659
ut_ad(mutex_own(&(system->mutex)));
661
ut_a(node->n_pending == 0);
662
ut_a(node->n_pending_flushes == 0);
663
ut_a(node->modification_counter == node->flush_counter);
665
ret = os_file_close(node->handle);
668
/* printf("Closing file %s\n", node->name); */
671
ut_a(system->n_open > 0);
674
if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
675
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
677
/* The node is in the LRU list, remove it */
678
UT_LIST_REMOVE(LRU, system->LRU, node);
682
/************************************************************************
683
Tries to close a file in the LRU list. The caller must hold the fil_sys
687
fil_try_to_close_file_in_LRU(
688
/*=========================*/
689
/* out: TRUE if success, FALSE if should retry
690
later; since i/o's generally complete in <
691
100 ms, and as InnoDB writes at most 128 pages
692
from the buffer pool in a batch, and then
693
immediately flushes the files, there is a good
694
chance that the next time we find a suitable
695
node from the LRU list */
696
ibool print_info) /* in: if TRUE, prints information why it
697
cannot close a file */
699
fil_system_t* system = fil_system;
702
ut_ad(mutex_own(&(system->mutex)));
704
node = UT_LIST_GET_LAST(system->LRU);
708
"InnoDB: fil_sys open file LRU len %lu\n",
709
(ulong) UT_LIST_GET_LEN(system->LRU));
712
while (node != NULL) {
713
if (node->modification_counter == node->flush_counter
714
&& node->n_pending_flushes == 0) {
716
fil_node_close_file(node, system);
721
if (print_info && node->n_pending_flushes > 0) {
722
fputs("InnoDB: cannot close file ", stderr);
723
ut_print_filename(stderr, node->name);
724
fprintf(stderr, ", because n_pending_flushes %lu\n",
725
(ulong) node->n_pending_flushes);
729
&& node->modification_counter != node->flush_counter) {
730
fputs("InnoDB: cannot close file ", stderr);
731
ut_print_filename(stderr, node->name);
733
", because mod_count %ld != fl_count %ld\n",
734
(long) node->modification_counter,
735
(long) node->flush_counter);
738
node = UT_LIST_GET_PREV(LRU, node);
744
/***********************************************************************
745
Reserves the fil_system mutex and tries to make sure we can open at least one
746
file while holding it. This should be called before calling
747
fil_node_prepare_for_io(), because that function may need to open a file. */
750
fil_mutex_enter_and_prepare_for_io(
751
/*===============================*/
752
ulint space_id) /* in: space id */
754
fil_system_t* system = fil_system;
757
ibool print_info = FALSE;
761
ut_ad(!mutex_own(&(system->mutex)));
763
mutex_enter(&(system->mutex));
765
if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
766
/* We keep log files and system tablespace files always open;
767
this is important in preventing deadlocks in this module, as
768
a page read completion often performs another read from the
769
insert buffer. The insert buffer is in tablespace 0, and we
770
cannot end up waiting in this function. */
775
if (system->n_open < system->max_n_open) {
780
HASH_SEARCH(hash, system->spaces, space_id, space,
781
space->id == space_id);
782
if (space != NULL && space->stop_ios) {
783
/* We are going to do a rename file and want to stop new i/o's
786
if (count2 > 20000) {
787
fputs("InnoDB: Warning: tablespace ", stderr);
788
ut_print_filename(stderr, space->name);
790
" has i/o ops stopped for a long time %lu\n",
794
mutex_exit(&(system->mutex));
796
os_thread_sleep(20000);
803
/* If the file is already open, no need to do anything; if the space
804
does not exist, we handle the situation in the function which called
807
if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
816
/* Too many files are open, try to close some */
818
success = fil_try_to_close_file_in_LRU(print_info);
820
if (success && system->n_open >= system->max_n_open) {
825
if (system->n_open < system->max_n_open) {
832
ut_print_timestamp(stderr);
834
" InnoDB: Warning: too many (%lu) files stay open"
835
" while the maximum\n"
836
"InnoDB: allowed value would be %lu.\n"
837
"InnoDB: You may need to raise the value of"
838
" innodb_max_files_open in\n"
840
(ulong) system->n_open, (ulong) system->max_n_open);
845
mutex_exit(&(system->mutex));
847
#ifndef UNIV_HOTBACKUP
848
/* Wake the i/o-handler threads to make sure pending i/o's are
850
os_aio_simulated_wake_handler_threads();
852
os_thread_sleep(20000);
854
/* Flush tablespaces so that we can close modified files in the LRU
857
fil_flush_file_spaces(FIL_TABLESPACE);
864
/***********************************************************************
865
Frees a file node object from a tablespace memory cache. */
870
fil_node_t* node, /* in, own: file node */
871
fil_system_t* system, /* in: tablespace memory cache */
872
fil_space_t* space) /* in: space where the file node is chained */
874
ut_ad(node && system && space);
875
ut_ad(mutex_own(&(system->mutex)));
876
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
877
ut_a(node->n_pending == 0);
880
/* We fool the assertion in fil_node_close_file() to think
881
there are no unflushed modifications in the file */
883
node->modification_counter = node->flush_counter;
885
if (space->is_in_unflushed_spaces
886
&& fil_space_is_flushed(space)) {
888
space->is_in_unflushed_spaces = FALSE;
890
UT_LIST_REMOVE(unflushed_spaces,
891
system->unflushed_spaces,
895
fil_node_close_file(node, system);
898
space->size -= node->size;
900
UT_LIST_REMOVE(chain, space->chain, node);
902
mem_free(node->name);
906
/********************************************************************
907
Drops files from the start of a file space, so that its size is cut by
911
fil_space_truncate_start(
912
/*=====================*/
913
ulint id, /* in: space id */
914
ulint trunc_len) /* in: truncate by this much; it is an error
915
if this does not equal to the combined size of
916
some initial files in the space */
918
fil_system_t* system = fil_system;
922
mutex_enter(&(system->mutex));
924
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
928
while (trunc_len > 0) {
929
node = UT_LIST_GET_FIRST(space->chain);
931
ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
933
trunc_len -= node->size * UNIV_PAGE_SIZE;
935
fil_node_free(node, system, space);
938
mutex_exit(&(system->mutex));
941
/***********************************************************************
942
Creates a space memory object and puts it to the tablespace memory cache. If
943
there is an error, prints an error message to the .err log. */
948
/* out: TRUE if success */
949
const char* name, /* in: space name */
950
ulint id, /* in: space id */
951
ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
953
fil_system_t* system = fil_system;
958
"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
964
mutex_enter(&(system->mutex));
966
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
967
0 == strcmp(name, space->name));
969
ut_print_timestamp(stderr);
971
" InnoDB: Warning: trying to init to the"
972
" tablespace memory cache\n"
973
"InnoDB: a tablespace %lu of name ", (ulong) id);
974
ut_print_filename(stderr, name);
975
fprintf(stderr, ",\n"
976
"InnoDB: but a tablespace %lu of the same name\n"
977
"InnoDB: already exists in the"
978
" tablespace memory cache!\n",
981
if (id == 0 || purpose != FIL_TABLESPACE) {
983
mutex_exit(&(system->mutex));
989
"InnoDB: We assume that InnoDB did a crash recovery,"
991
"InnoDB: an .ibd file for which the table"
992
" did not exist in the\n"
993
"InnoDB: InnoDB internal data dictionary in the"
995
"InnoDB: We assume that you later removed the"
996
" .ibd and .frm files,\n"
997
"InnoDB: and are now trying to recreate the table."
998
" We now remove the\n"
999
"InnoDB: conflicting tablespace object"
1000
" from the memory cache and try\n"
1001
"InnoDB: the init again.\n");
1003
namesake_id = space->id;
1005
mutex_exit(&(system->mutex));
1007
fil_space_free(namesake_id);
1012
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1014
if (space != NULL) {
1016
"InnoDB: Error: trying to add tablespace %lu"
1017
" of name ", (ulong) id);
1018
ut_print_filename(stderr, name);
1019
fprintf(stderr, "\n"
1020
"InnoDB: to the tablespace memory cache,"
1022
"InnoDB: %lu of name ", (ulong) space->id);
1023
ut_print_filename(stderr, space->name);
1024
fputs(" already exists in the tablespace\n"
1025
"InnoDB: memory cache!\n", stderr);
1027
mutex_exit(&(system->mutex));
1032
space = mem_alloc(sizeof(fil_space_t));
1034
space->name = mem_strdup(name);
1037
system->tablespace_version++;
1038
space->tablespace_version = system->tablespace_version;
1039
space->mark = FALSE;
1041
if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
1042
system->max_assigned_id = id;
1045
space->stop_ios = FALSE;
1046
space->stop_ibuf_merges = FALSE;
1047
space->is_being_deleted = FALSE;
1048
space->purpose = purpose;
1051
space->n_reserved_extents = 0;
1053
space->n_pending_flushes = 0;
1054
space->n_pending_ibuf_merges = 0;
1056
UT_LIST_INIT(space->chain);
1057
space->magic_n = FIL_SPACE_MAGIC_N;
1059
space->ibuf_data = NULL;
1061
rw_lock_create(&space->latch, SYNC_FSP);
1063
HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
1065
HASH_INSERT(fil_space_t, name_hash, system->name_hash,
1066
ut_fold_string(name), space);
1067
space->is_in_unflushed_spaces = FALSE;
1069
UT_LIST_ADD_LAST(space_list, system->space_list, space);
1071
mutex_exit(&(system->mutex));
1076
/***********************************************************************
1077
Assigns a new space id for a new single-table tablespace. This works simply by
1078
incrementing the global counter. If 4 billion id's is not enough, we may need
1082
fil_assign_new_space_id(void)
1083
/*=========================*/
1084
/* out: new tablespace id; ULINT_UNDEFINED if could
1087
fil_system_t* system = fil_system;
1090
mutex_enter(&(system->mutex));
1092
system->max_assigned_id++;
1094
id = system->max_assigned_id;
1096
if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1097
ut_print_timestamp(stderr);
1099
"InnoDB: Warning: you are running out of new"
1100
" single-table tablespace id's.\n"
1101
"InnoDB: Current counter is %lu and it"
1102
" must not exceed %lu!\n"
1103
"InnoDB: To reset the counter to zero"
1104
" you have to dump all your tables and\n"
1105
"InnoDB: recreate the whole InnoDB installation.\n",
1107
(ulong) SRV_LOG_SPACE_FIRST_ID);
1110
if (id >= SRV_LOG_SPACE_FIRST_ID) {
1111
ut_print_timestamp(stderr);
1113
"InnoDB: You have run out of single-table"
1114
" tablespace id's!\n"
1115
"InnoDB: Current counter is %lu.\n"
1116
"InnoDB: To reset the counter to zero you"
1117
" have to dump all your tables and\n"
1118
"InnoDB: recreate the whole InnoDB installation.\n",
1120
system->max_assigned_id--;
1122
id = ULINT_UNDEFINED;
1125
mutex_exit(&(system->mutex));
1130
/***********************************************************************
1131
Frees a space object from the tablespace memory cache. Closes the files in
1132
the chain but does not delete them. There must not be any pending i/o's or
1133
flushes on the files. */
1138
/* out: TRUE if success */
1139
ulint id) /* in: space id */
1141
fil_system_t* system = fil_system;
1143
fil_space_t* namespace;
1144
fil_node_t* fil_node;
1146
mutex_enter(&(system->mutex));
1148
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1151
ut_print_timestamp(stderr);
1153
" InnoDB: Error: trying to remove tablespace %lu"
1154
" from the cache but\n"
1155
"InnoDB: it is not there.\n", (ulong) id);
1157
mutex_exit(&(system->mutex));
1162
HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
1164
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
1165
namespace, 0 == strcmp(space->name, namespace->name));
1167
ut_a(space == namespace);
1169
HASH_DELETE(fil_space_t, name_hash, system->name_hash,
1170
ut_fold_string(space->name), space);
1172
if (space->is_in_unflushed_spaces) {
1173
space->is_in_unflushed_spaces = FALSE;
1175
UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces,
1179
UT_LIST_REMOVE(space_list, system->space_list, space);
1181
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1182
ut_a(0 == space->n_pending_flushes);
1184
fil_node = UT_LIST_GET_FIRST(space->chain);
1186
while (fil_node != NULL) {
1187
fil_node_free(fil_node, system, space);
1189
fil_node = UT_LIST_GET_FIRST(space->chain);
1192
ut_a(0 == UT_LIST_GET_LEN(space->chain));
1194
mutex_exit(&(system->mutex));
1196
rw_lock_free(&(space->latch));
1198
mem_free(space->name);
1204
#ifdef UNIV_HOTBACKUP
1205
/***********************************************************************
1206
Returns the tablespace object for a given id, or NULL if not found from the
1207
tablespace memory cache. */
1210
fil_get_space_for_id_low(
1211
/*=====================*/
1212
/* out: tablespace object or NULL; NOTE that you must
1213
own &(fil_system->mutex) to call this function! */
1214
ulint id) /* in: space id */
1216
fil_system_t* system = fil_system;
1221
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1227
/***********************************************************************
1228
Returns the size of the space in pages. The tablespace must be cached in the
1234
/* out: space size, 0 if space not found */
1235
ulint id) /* in: space id */
1237
fil_system_t* system = fil_system;
1244
fil_mutex_enter_and_prepare_for_io(id);
1246
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1248
if (space == NULL) {
1249
mutex_exit(&(system->mutex));
1254
if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1257
ut_a(1 == UT_LIST_GET_LEN(space->chain));
1259
node = UT_LIST_GET_FIRST(space->chain);
1261
/* It must be a single-table tablespace and we have not opened
1262
the file yet; the following calls will open it and update the
1265
fil_node_prepare_for_io(node, system, space);
1266
fil_node_complete_io(node, system, OS_FILE_READ);
1271
mutex_exit(&(system->mutex));
1276
/***********************************************************************
1277
Checks if the pair space, page_no refers to an existing page in a tablespace
1278
file space. The tablespace must be cached in the memory cache. */
1281
fil_check_adress_in_tablespace(
1282
/*===========================*/
1283
/* out: TRUE if the address is meaningful */
1284
ulint id, /* in: space id */
1285
ulint page_no)/* in: page number */
1287
if (fil_space_get_size(id) > page_no) {
1295
/********************************************************************
1296
Creates a the tablespace memory cache. */
1301
/* out, own: tablespace memory cache */
1302
ulint hash_size, /* in: hash table size */
1303
ulint max_n_open) /* in: maximum number of open files; must be
1306
fil_system_t* system;
1308
ut_a(hash_size > 0);
1309
ut_a(max_n_open > 0);
1311
system = mem_alloc(sizeof(fil_system_t));
1313
mutex_create(&system->mutex, SYNC_ANY_LATCH);
1315
system->spaces = hash_create(hash_size);
1316
system->name_hash = hash_create(hash_size);
1318
UT_LIST_INIT(system->LRU);
1321
system->max_n_open = max_n_open;
1323
system->modification_counter = 0;
1324
system->max_assigned_id = 0;
1326
system->tablespace_version = 0;
1328
UT_LIST_INIT(system->unflushed_spaces);
1329
UT_LIST_INIT(system->space_list);
1334
/********************************************************************
1335
Initializes the tablespace memory cache. */
1340
ulint max_n_open) /* in: max number of open files */
1344
ut_a(fil_system == NULL);
1346
if (srv_file_per_table) {
1352
fil_system = fil_system_create(hash_size, max_n_open);
1355
/***********************************************************************
1356
Opens all log files and system tablespace data files. They stay open until the
1357
database server shutdown. This should be called at a server startup after the
1358
space objects for the log and the system tablespace have been created. The
1359
purpose of this operation is to make sure we never run out of file descriptors
1360
if we need to read from the insert buffer or to write to the log. */
1363
fil_open_log_and_system_tablespace_files(void)
1364
/*==========================================*/
1366
fil_system_t* system = fil_system;
1370
mutex_enter(&(system->mutex));
1372
space = UT_LIST_GET_FIRST(system->space_list);
1374
while (space != NULL) {
1375
if (space->purpose != FIL_TABLESPACE || space->id == 0) {
1376
node = UT_LIST_GET_FIRST(space->chain);
1378
while (node != NULL) {
1380
fil_node_open_file(node, system,
1383
if (system->max_n_open < 10 + system->n_open) {
1385
"InnoDB: Warning: you must"
1386
" raise the value of"
1387
" innodb_max_open_files in\n"
1388
"InnoDB: my.cnf! Remember that"
1389
" InnoDB keeps all log files"
1391
"InnoDB: tablespace files open"
1392
" for the whole time mysqld is"
1394
"InnoDB: needs to open also"
1395
" some .ibd files if the"
1396
" file-per-table storage\n"
1397
"InnoDB: model is used."
1398
" Current open files %lu,"
1400
" open files %lu.\n",
1401
(ulong) system->n_open,
1402
(ulong) system->max_n_open);
1404
node = UT_LIST_GET_NEXT(chain, node);
1407
space = UT_LIST_GET_NEXT(space_list, space);
1410
mutex_exit(&(system->mutex));
1413
/***********************************************************************
1414
Closes all open files. There must not be any pending i/o's or not flushed
1415
modifications in the files. */
1418
fil_close_all_files(void)
1419
/*=====================*/
1421
fil_system_t* system = fil_system;
1425
mutex_enter(&(system->mutex));
1427
space = UT_LIST_GET_FIRST(system->space_list);
1429
while (space != NULL) {
1430
node = UT_LIST_GET_FIRST(space->chain);
1432
while (node != NULL) {
1434
fil_node_close_file(node, system);
1436
node = UT_LIST_GET_NEXT(chain, node);
1438
space = UT_LIST_GET_NEXT(space_list, space);
1441
mutex_exit(&(system->mutex));
1444
/***********************************************************************
1445
Sets the max tablespace id counter if the given number is bigger than the
1449
fil_set_max_space_id_if_bigger(
1450
/*===========================*/
1451
ulint max_id) /* in: maximum known id */
1453
fil_system_t* system = fil_system;
1455
if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1457
"InnoDB: Fatal error: max tablespace id"
1458
" is too high, %lu\n", (ulong) max_id);
1462
mutex_enter(&(system->mutex));
1464
if (system->max_assigned_id < max_id) {
1466
system->max_assigned_id = max_id;
1469
mutex_exit(&(system->mutex));
1472
/********************************************************************
1473
Initializes the ibuf data structure for space 0 == the system tablespace.
1474
This can be called after the file space headers have been created and the
1475
dictionary system has been initialized. */
1478
fil_ibuf_init_at_db_start(void)
1479
/*===========================*/
1483
space = UT_LIST_GET_FIRST(fil_system->space_list);
1486
ut_a(space->purpose == FIL_TABLESPACE);
1488
space->ibuf_data = ibuf_data_init_for_space(space->id);
1491
/********************************************************************
1492
Writes the flushed lsn and the latest archived log number to the page header
1493
of the first page of a data file. */
1496
fil_write_lsn_and_arch_no_to_file(
1497
/*==============================*/
1498
ulint space_id, /* in: space number */
1499
ulint sum_of_sizes, /* in: combined size of previous files in
1500
space, in database pages */
1501
dulint lsn, /* in: lsn to write */
1502
ulint arch_log_no /* in: archived log number to write */
1503
__attribute__((unused)))
1508
buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
1509
buf = ut_align(buf1, UNIV_PAGE_SIZE);
1511
fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1513
mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1515
fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1520
/********************************************************************
1521
Writes the flushed lsn and the latest archived log number to the page
1522
header of the first page of each data file in the system tablespace. */
1525
fil_write_flushed_lsn_to_data_files(
1526
/*================================*/
1527
/* out: DB_SUCCESS or error number */
1528
dulint lsn, /* in: lsn to write */
1529
ulint arch_log_no) /* in: latest archived log file number */
1536
mutex_enter(&(fil_system->mutex));
1538
space = UT_LIST_GET_FIRST(fil_system->space_list);
1541
/* We only write the lsn to all existing data files which have
1542
been open during the lifetime of the mysqld process; they are
1543
represented by the space objects in the tablespace memory
1544
cache. Note that all data files in the system tablespace 0 are
1547
if (space->purpose == FIL_TABLESPACE
1548
&& space->id == 0) {
1551
node = UT_LIST_GET_FIRST(space->chain);
1553
mutex_exit(&(fil_system->mutex));
1555
err = fil_write_lsn_and_arch_no_to_file(
1556
space->id, sum_of_sizes, lsn,
1558
if (err != DB_SUCCESS) {
1563
mutex_enter(&(fil_system->mutex));
1565
sum_of_sizes += node->size;
1566
node = UT_LIST_GET_NEXT(chain, node);
1569
space = UT_LIST_GET_NEXT(space_list, space);
1572
mutex_exit(&(fil_system->mutex));
1577
/***********************************************************************
1578
Reads the flushed lsn and arch no fields from a data file at database
1582
fil_read_flushed_lsn_and_arch_log_no(
1583
/*=================================*/
1584
os_file_t data_file, /* in: open data file */
1585
ibool one_read_already, /* in: TRUE if min and max parameters
1586
below already contain sensible data */
1587
#ifdef UNIV_LOG_ARCHIVE
1588
ulint* min_arch_log_no, /* in/out: */
1589
ulint* max_arch_log_no, /* in/out: */
1590
#endif /* UNIV_LOG_ARCHIVE */
1591
dulint* min_flushed_lsn, /* in/out: */
1592
dulint* max_flushed_lsn) /* in/out: */
1598
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
1599
/* Align the memory for a possible read from a raw device */
1600
buf = ut_align(buf2, UNIV_PAGE_SIZE);
1602
os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
1604
flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
1608
if (!one_read_already) {
1609
*min_flushed_lsn = flushed_lsn;
1610
*max_flushed_lsn = flushed_lsn;
1611
#ifdef UNIV_LOG_ARCHIVE
1612
*min_arch_log_no = arch_log_no;
1613
*max_arch_log_no = arch_log_no;
1614
#endif /* UNIV_LOG_ARCHIVE */
1618
if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
1619
*min_flushed_lsn = flushed_lsn;
1621
if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
1622
*max_flushed_lsn = flushed_lsn;
1624
#ifdef UNIV_LOG_ARCHIVE
1625
if (*min_arch_log_no > arch_log_no) {
1626
*min_arch_log_no = arch_log_no;
1628
if (*max_arch_log_no < arch_log_no) {
1629
*max_arch_log_no = arch_log_no;
1631
#endif /* UNIV_LOG_ARCHIVE */
1634
/*================ SINGLE-TABLE TABLESPACES ==========================*/
1636
/***********************************************************************
1637
Increments the count of pending insert buffer page merges, if space is not
1641
fil_inc_pending_ibuf_merges(
1642
/*========================*/
1643
/* out: TRUE if being deleted, and ibuf merges should
1645
ulint id) /* in: space id */
1647
fil_system_t* system = fil_system;
1650
mutex_enter(&(system->mutex));
1652
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1654
if (space == NULL) {
1656
"InnoDB: Error: trying to do ibuf merge to a"
1657
" dropped tablespace %lu\n",
1661
if (space == NULL || space->stop_ibuf_merges) {
1662
mutex_exit(&(system->mutex));
1667
space->n_pending_ibuf_merges++;
1669
mutex_exit(&(system->mutex));
1674
/***********************************************************************
1675
Decrements the count of pending insert buffer page merges. */
1678
fil_decr_pending_ibuf_merges(
1679
/*=========================*/
1680
ulint id) /* in: space id */
1682
fil_system_t* system = fil_system;
1685
mutex_enter(&(system->mutex));
1687
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1689
if (space == NULL) {
1691
"InnoDB: Error: decrementing ibuf merge of a"
1692
" dropped tablespace %lu\n",
1696
if (space != NULL) {
1697
space->n_pending_ibuf_merges--;
1700
mutex_exit(&(system->mutex));
1703
/************************************************************
1704
Creates the database directory for a table if it does not exist yet. */
1707
fil_create_directory_for_tablename(
1708
/*===============================*/
1709
const char* name) /* in: name in the standard
1710
'databasename/tablename' format */
1716
len = strlen(fil_path_to_mysql_datadir);
1717
namend = strchr(name, '/');
1719
path = mem_alloc(len + (namend - name) + 2);
1721
memcpy(path, fil_path_to_mysql_datadir, len);
1723
memcpy(path + len + 1, name, namend - name);
1724
path[len + (namend - name) + 1] = 0;
1726
srv_normalize_path_for_win(path);
1728
ut_a(os_file_create_directory(path, FALSE));
1732
#ifndef UNIV_HOTBACKUP
1733
/************************************************************
1734
Writes a log record about an .ibd file create/rename/delete. */
1739
ulint type, /* in: MLOG_FILE_CREATE,
1740
MLOG_FILE_DELETE, or
1742
ulint space_id, /* in: space id */
1743
ulint log_flags, /* in: redo log flags (stored
1744
in the page number field) */
1745
const char* name, /* in: table name in the familiar
1746
'databasename/tablename' format, or
1747
the file path in the case of
1749
const char* new_name, /* in: if type is MLOG_FILE_RENAME,
1750
the new table name in the
1751
'databasename/tablename' format */
1752
mtr_t* mtr) /* in: mini-transaction handle */
1757
log_ptr = mlog_open(mtr, 11 + 2);
1760
/* Logging in mtr is switched off during crash recovery:
1761
in that case mlog_open returns NULL */
1765
log_ptr = mlog_write_initial_log_record_for_file_op(
1766
type, space_id, log_flags, log_ptr, mtr);
1767
/* Let us store the strings as null-terminated for easier readability
1770
len = strlen(name) + 1;
1772
mach_write_to_2(log_ptr, len);
1774
mlog_close(mtr, log_ptr);
1776
mlog_catenate_string(mtr, (byte*) name, len);
1778
if (type == MLOG_FILE_RENAME) {
1779
ulint len = strlen(new_name) + 1;
1780
log_ptr = mlog_open(mtr, 2 + len);
1782
mach_write_to_2(log_ptr, len);
1784
mlog_close(mtr, log_ptr);
1786
mlog_catenate_string(mtr, (byte*) new_name, len);
1791
/***********************************************************************
1792
Parses the body of a log record written about an .ibd file operation. That is,
1793
the log record part after the standard (type, space id, page no) header of the
1796
If desired, also replays the delete or rename operation if the .ibd file
1797
exists and the space id in it matches. Replays the create operation if a file
1798
at that path does not exist yet. If the database directory for the file to be
1799
created does not exist, then we create the directory, too.
1801
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
1802
datadir that we should use in replaying the file operations. */
1805
fil_op_log_parse_or_replay(
1806
/*=======================*/
1807
/* out: end of log record, or NULL if the
1808
record was not completely contained between
1810
byte* ptr, /* in: buffer containing the log record body,
1811
or an initial segment of it, if the record does
1812
not fir completely between ptr and end_ptr */
1813
byte* end_ptr, /* in: buffer end */
1814
ulint type, /* in: the type of this log record */
1815
ulint space_id, /* in: the space id of the tablespace in
1816
question, or 0 if the log record should
1817
only be parsed but not replayed */
1818
ulint log_flags) /* in: redo log flags
1819
(stored in the page number parameter) */
1824
const char* new_name = NULL;
1826
if (end_ptr < ptr + 2) {
1831
name_len = mach_read_from_2(ptr);
1835
if (end_ptr < ptr + name_len) {
1840
name = (const char*) ptr;
1844
if (type == MLOG_FILE_RENAME) {
1845
if (end_ptr < ptr + 2) {
1850
new_name_len = mach_read_from_2(ptr);
1854
if (end_ptr < ptr + new_name_len) {
1859
new_name = (const char*) ptr;
1861
ptr += new_name_len;
1864
/* We managed to parse a full log record body */
1866
printf("Parsed log rec of type %lu space %lu\n"
1867
"name %s\n", type, space_id, name);
1869
if (type == MLOG_FILE_RENAME) {
1870
printf("new name %s\n", new_name);
1878
/* Let us try to perform the file operation, if sensible. Note that
1879
ibbackup has at this stage already read in all space id info to the
1880
fil0fil.c data structures.
1882
NOTE that our algorithm is not guaranteed to work correctly if there
1883
were renames of tables during the backup. See ibbackup code for more
1886
if (type == MLOG_FILE_DELETE) {
1887
if (fil_tablespace_exists_in_mem(space_id)) {
1888
ut_a(fil_delete_tablespace(space_id));
1890
} else if (type == MLOG_FILE_RENAME) {
1891
/* We do the rename based on space id, not old file name;
1892
this should guarantee that after the log replay each .ibd file
1893
has the correct name for the latest log sequence number; the
1894
proof is left as an exercise :) */
1896
if (fil_tablespace_exists_in_mem(space_id)) {
1897
/* Create the database directory for the new name, if
1898
it does not exist yet */
1899
fil_create_directory_for_tablename(new_name);
1901
/* Rename the table if there is not yet a tablespace
1902
with the same name */
1904
if (fil_get_space_id_for_table(new_name)
1905
== ULINT_UNDEFINED) {
1906
/* We do not care of the old name, that is
1907
why we pass NULL as the first argument */
1908
if (!fil_rename_tablespace(NULL, space_id,
1915
ut_a(type == MLOG_FILE_CREATE);
1917
if (fil_tablespace_exists_in_mem(space_id)) {
1919
} else if (fil_get_space_id_for_table(name)
1920
!= ULINT_UNDEFINED) {
1922
} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
1923
/* Temporary table, do nothing */
1925
/* Create the database directory for name, if it does
1927
fil_create_directory_for_tablename(name);
1929
ut_a(space_id != 0);
1931
if (fil_create_new_single_table_tablespace(
1932
&space_id, name, FALSE,
1933
FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
1942
/***********************************************************************
1943
Deletes a single-table tablespace. The tablespace must be cached in the
1947
fil_delete_tablespace(
1948
/*==================*/
1949
/* out: TRUE if success */
1950
ulint id) /* in: space id */
1952
fil_system_t* system = fil_system;
1961
mutex_enter(&(system->mutex));
1963
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1965
if (space != NULL) {
1966
space->stop_ibuf_merges = TRUE;
1968
if (space->n_pending_ibuf_merges == 0) {
1969
mutex_exit(&(system->mutex));
1976
ut_print_timestamp(stderr);
1977
fputs(" InnoDB: Warning: trying to"
1978
" delete tablespace ", stderr);
1979
ut_print_filename(stderr, space->name);
1980
fprintf(stderr, ",\n"
1981
"InnoDB: but there are %lu pending"
1982
" ibuf merges on it.\n"
1983
"InnoDB: Loop %lu.\n",
1984
(ulong) space->n_pending_ibuf_merges,
1988
mutex_exit(&(system->mutex));
1990
os_thread_sleep(20000);
1993
goto stop_ibuf_merges;
1997
mutex_exit(&(system->mutex));
2001
mutex_enter(&(system->mutex));
2003
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
2005
if (space == NULL) {
2006
ut_print_timestamp(stderr);
2008
" InnoDB: Error: cannot delete tablespace %lu\n"
2009
"InnoDB: because it is not found in the"
2010
" tablespace memory cache.\n",
2013
mutex_exit(&(system->mutex));
2019
ut_a(space->n_pending_ibuf_merges == 0);
2021
space->is_being_deleted = TRUE;
2023
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2024
node = UT_LIST_GET_FIRST(space->chain);
2026
if (space->n_pending_flushes > 0 || node->n_pending > 0) {
2028
ut_print_timestamp(stderr);
2029
fputs(" InnoDB: Warning: trying to"
2030
" delete tablespace ", stderr);
2031
ut_print_filename(stderr, space->name);
2032
fprintf(stderr, ",\n"
2033
"InnoDB: but there are %lu flushes"
2034
" and %lu pending i/o's on it\n"
2035
"InnoDB: Loop %lu.\n",
2036
(ulong) space->n_pending_flushes,
2037
(ulong) node->n_pending,
2040
mutex_exit(&(system->mutex));
2041
os_thread_sleep(20000);
2048
path = mem_strdup(space->name);
2050
mutex_exit(&(system->mutex));
2051
#ifndef UNIV_HOTBACKUP
2052
/* Invalidate in the buffer pool all pages belonging to the
2053
tablespace. Since we have set space->is_being_deleted = TRUE, readahead
2054
or ibuf merge can no longer read more pages of this tablespace to the
2055
buffer pool. Thus we can clean the tablespace out of the buffer pool
2056
completely and permanently. The flag is_being_deleted also prevents
2057
fil_flush() from being applied to this tablespace. */
2059
buf_LRU_invalidate_tablespace(id);
2061
/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
2063
success = fil_space_free(id);
2066
success = os_file_delete(path);
2069
success = os_file_delete_if_exists(path);
2074
#ifndef UNIV_HOTBACKUP
2075
/* Write a log record about the deletion of the .ibd
2076
file, so that ibbackup can replay it in the
2077
--apply-log phase. We use a dummy mtr and the familiar
2078
log write mechanism. */
2081
/* When replaying the operation in ibbackup, do not try
2082
to write any log record */
2085
fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, &mtr);
2098
/***********************************************************************
2099
Discards a single-table tablespace. The tablespace must be cached in the
2100
memory cache. Discarding is like deleting a tablespace, but
2101
1) we do not drop the table from the data dictionary;
2102
2) we remove all insert buffer entries for the tablespace immediately; in DROP
2103
TABLE they are only removed gradually in the background;
2104
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
2105
as it originally had. */
2108
fil_discard_tablespace(
2109
/*===================*/
2110
/* out: TRUE if success */
2111
ulint id) /* in: space id */
2115
success = fil_delete_tablespace(id);
2119
"InnoDB: Warning: cannot delete tablespace %lu"
2120
" in DISCARD TABLESPACE.\n"
2121
"InnoDB: But let us remove the"
2122
" insert buffer entries for this tablespace.\n",
2126
/* Remove all insert buffer entries for the tablespace */
2128
ibuf_delete_for_discarded_space(id);
2133
/***********************************************************************
2134
Renames the memory cache structures of a single-table tablespace. */
2137
fil_rename_tablespace_in_mem(
2138
/*=========================*/
2139
/* out: TRUE if success */
2140
fil_space_t* space, /* in: tablespace memory object */
2141
fil_node_t* node, /* in: file node of that tablespace */
2142
const char* path) /* in: new name */
2144
fil_system_t* system = fil_system;
2145
fil_space_t* space2;
2146
const char* old_name = space->name;
2148
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
2149
space2, 0 == strcmp(old_name, space2->name));
2150
if (space != space2) {
2151
fputs("InnoDB: Error: cannot find ", stderr);
2152
ut_print_filename(stderr, old_name);
2153
fputs(" in tablespace memory cache\n", stderr);
2158
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
2159
space2, 0 == strcmp(path, space2->name));
2160
if (space2 != NULL) {
2161
fputs("InnoDB: Error: ", stderr);
2162
ut_print_filename(stderr, path);
2163
fputs(" is already in tablespace memory cache\n", stderr);
2168
HASH_DELETE(fil_space_t, name_hash, system->name_hash,
2169
ut_fold_string(space->name), space);
2170
mem_free(space->name);
2171
mem_free(node->name);
2173
space->name = mem_strdup(path);
2174
node->name = mem_strdup(path);
2176
HASH_INSERT(fil_space_t, name_hash, system->name_hash,
2177
ut_fold_string(path), space);
2181
/***********************************************************************
2182
Allocates a file name for a single-table tablespace. The string must be freed
2183
by caller with mem_free(). */
2188
/* out, own: file name */
2189
const char* name, /* in: table name or a dir path of a
2191
ibool is_temp) /* in: TRUE if it is a dir path */
2193
ulint namelen = strlen(name);
2194
ulint dirlen = strlen(fil_path_to_mysql_datadir);
2195
char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd");
2198
memcpy(filename, name, namelen);
2199
memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2201
memcpy(filename, fil_path_to_mysql_datadir, dirlen);
2202
filename[dirlen] = '/';
2204
memcpy(filename + dirlen + 1, name, namelen);
2205
memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
2208
srv_normalize_path_for_win(filename);
2213
/***********************************************************************
2214
Renames a single-table tablespace. The tablespace must be cached in the
2215
tablespace memory cache. */
2218
fil_rename_tablespace(
2219
/*==================*/
2220
/* out: TRUE if success */
2221
const char* old_name, /* in: old table name in the standard
2222
databasename/tablename format of
2223
InnoDB, or NULL if we do the rename
2224
based on the space id only */
2225
ulint id, /* in: space id */
2226
const char* new_name) /* in: new table name in the standard
2227
databasename/tablename format
2230
fil_system_t* system = fil_system;
2236
ibool old_name_was_specified = TRUE;
2241
if (old_name == NULL) {
2242
old_name = "(name not specified)";
2243
old_name_was_specified = FALSE;
2249
ut_print_timestamp(stderr);
2250
fputs(" InnoDB: Warning: problems renaming ", stderr);
2251
ut_print_filename(stderr, old_name);
2252
fputs(" to ", stderr);
2253
ut_print_filename(stderr, new_name);
2254
fprintf(stderr, ", %lu iterations\n", (ulong) count);
2257
mutex_enter(&(system->mutex));
2259
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
2261
if (space == NULL) {
2263
"InnoDB: Error: cannot find space id %lu"
2264
" in the tablespace memory cache\n"
2265
"InnoDB: though the table ", (ulong) id);
2266
ut_print_filename(stderr, old_name);
2267
fputs(" in a rename operation should have that id\n", stderr);
2268
mutex_exit(&(system->mutex));
2273
if (count > 25000) {
2274
space->stop_ios = FALSE;
2275
mutex_exit(&(system->mutex));
2280
/* We temporarily close the .ibd file because we do not trust that
2281
operating systems can rename an open file. For the closing we have to
2282
wait until there are no pending i/o's or flushes on the file. */
2284
space->stop_ios = TRUE;
2286
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2287
node = UT_LIST_GET_FIRST(space->chain);
2289
if (node->n_pending > 0 || node->n_pending_flushes > 0) {
2290
/* There are pending i/o's or flushes, sleep for a while and
2293
mutex_exit(&(system->mutex));
2295
os_thread_sleep(20000);
2299
} else if (node->modification_counter > node->flush_counter) {
2300
/* Flush the space */
2302
mutex_exit(&(system->mutex));
2304
os_thread_sleep(20000);
2310
} else if (node->open) {
2311
/* Close the file */
2313
fil_node_close_file(node, system);
2316
/* Check that the old name in the space is right */
2318
if (old_name_was_specified) {
2319
old_path = fil_make_ibd_name(old_name, FALSE);
2321
ut_a(strcmp(space->name, old_path) == 0);
2322
ut_a(strcmp(node->name, old_path) == 0);
2324
old_path = mem_strdup(space->name);
2327
/* Rename the tablespace and the node in the memory cache */
2328
path = fil_make_ibd_name(new_name, FALSE);
2329
success = fil_rename_tablespace_in_mem(space, node, path);
2332
success = os_file_rename(old_path, path);
2335
/* We have to revert the changes we made
2336
to the tablespace memory cache */
2338
ut_a(fil_rename_tablespace_in_mem(space, node,
2346
space->stop_ios = FALSE;
2348
mutex_exit(&(system->mutex));
2350
#ifndef UNIV_HOTBACKUP
2356
fil_op_write_log(MLOG_FILE_RENAME, id, 0, old_name, new_name,
2364
/***********************************************************************
2365
Creates a new single-table tablespace to a database directory of MySQL.
2366
Database directories are under the 'datadir' of MySQL. The datadir is the
2367
directory of a running mysqld program. We can refer to it by simply the
2368
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
2369
dir of the mysqld server. */
2372
fil_create_new_single_table_tablespace(
2373
/*===================================*/
2374
/* out: DB_SUCCESS or error code */
2375
ulint* space_id, /* in/out: space id; if this is != 0,
2376
then this is an input parameter,
2378
const char* tablename, /* in: the table name in the usual
2379
databasename/tablename format
2380
of InnoDB, or a dir path to a temp
2382
ibool is_temp, /* in: TRUE if a table created with
2383
CREATE TEMPORARY TABLE */
2384
ulint size) /* in: the initial size of the
2385
tablespace file in pages,
2386
must be >= FIL_IBD_FILE_INITIAL_SIZE */
2396
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
2398
path = fil_make_ibd_name(tablename, is_temp);
2400
file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
2401
OS_DATA_FILE, &ret);
2403
ut_print_timestamp(stderr);
2404
fputs(" InnoDB: Error creating file ", stderr);
2405
ut_print_filename(stderr, path);
2406
fputs(".\n", stderr);
2408
/* The following call will print an error message */
2410
err = os_file_get_last_error(TRUE);
2412
if (err == OS_FILE_ALREADY_EXISTS) {
2413
fputs("InnoDB: The file already exists though"
2414
" the corresponding table did not\n"
2415
"InnoDB: exist in the InnoDB data dictionary."
2416
" Have you moved InnoDB\n"
2417
"InnoDB: .ibd files around without using the"
2419
"InnoDB: DISCARD TABLESPACE and"
2420
" IMPORT TABLESPACE, or did\n"
2421
"InnoDB: mysqld crash in the middle of"
2422
" CREATE TABLE? You can\n"
2423
"InnoDB: resolve the problem by"
2424
" removing the file ", stderr);
2425
ut_print_filename(stderr, path);
2427
"InnoDB: under the 'datadir' of MySQL.\n",
2431
return(DB_TABLESPACE_ALREADY_EXISTS);
2434
if (err == OS_FILE_DISK_FULL) {
2437
return(DB_OUT_OF_FILE_SPACE);
2444
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2445
/* Align the memory for file i/o if we might have O_DIRECT set */
2446
page = ut_align(buf2, UNIV_PAGE_SIZE);
2448
ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
2452
os_file_close(file);
2453
os_file_delete(path);
2456
return(DB_OUT_OF_FILE_SPACE);
2459
if (*space_id == 0) {
2460
*space_id = fil_assign_new_space_id();
2463
/* printf("Creating tablespace %s id %lu\n", path, *space_id); */
2465
if (*space_id == ULINT_UNDEFINED) {
2468
os_file_close(file);
2470
os_file_delete(path);
2476
/* We have to write the space id to the file immediately and flush the
2477
file to disk. This is because in crash recovery we must be aware what
2478
tablespaces exist and what are their space id's, so that we can apply
2479
the log records to the right file. It may take quite a while until
2480
buffer pool flush algorithms write anything to the file and flush it to
2481
disk. If we would not write here anything, the file would be filled
2482
with zeros from the call of os_file_set_size(), until a buffer pool
2483
flush would write to it. */
2485
memset(page, '\0', UNIV_PAGE_SIZE);
2487
fsp_header_write_space_id(page, *space_id);
2489
buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
2491
ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
2496
fputs("InnoDB: Error: could not write the first page"
2497
" to tablespace ", stderr);
2498
ut_print_filename(stderr, path);
2503
ret = os_file_flush(file);
2506
fputs("InnoDB: Error: file flush of tablespace ", stderr);
2507
ut_print_filename(stderr, path);
2508
fputs(" failed\n", stderr);
2512
os_file_close(file);
2514
if (*space_id == ULINT_UNDEFINED) {
2518
success = fil_space_create(path, *space_id, FIL_TABLESPACE);
2524
fil_node_create(path, size, *space_id, FALSE);
2526
#ifndef UNIV_HOTBACKUP
2532
fil_op_write_log(MLOG_FILE_CREATE, *space_id,
2533
is_temp ? MLOG_FILE_FLAG_TEMP : 0,
2534
tablename, NULL, &mtr);
2543
/************************************************************************
2544
It is possible, though very improbable, that the lsn's in the tablespace to be
2545
imported have risen above the current system lsn, if a lengthy purge, ibuf
2546
merge, or rollback was performed on a backup taken with ibbackup. If that is
2547
the case, reset page lsn's in the file. We assume that mysqld was shut down
2548
after it performed these cleanup operations on the .ibd file, so that it at
2549
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
2550
first page of the .ibd file, and we can determine whether we need to reset the
2551
lsn's just by looking at that flush lsn. */
2554
fil_reset_too_high_lsns(
2555
/*====================*/
2556
/* out: TRUE if success */
2557
const char* name, /* in: table name in the
2558
databasename/tablename format */
2559
dulint current_lsn) /* in: reset lsn's if the lsn stamped
2560
to FIL_PAGE_FILE_FLUSH_LSN in the
2561
first page is too high */
2569
ib_longlong file_size;
2574
filepath = fil_make_ibd_name(name, FALSE);
2576
file = os_file_create_simple_no_error_handling(
2577
filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
2579
/* The following call prints an error message */
2580
os_file_get_last_error(TRUE);
2582
ut_print_timestamp(stderr);
2584
fputs(" InnoDB: Error: trying to open a table,"
2586
"InnoDB: open the tablespace file ", stderr);
2587
ut_print_filename(stderr, filepath);
2588
fputs("!\n", stderr);
2594
/* Read the first page of the tablespace */
2596
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2597
/* Align the memory for file i/o if we might have O_DIRECT set */
2598
page = ut_align(buf2, UNIV_PAGE_SIZE);
2600
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2606
/* We have to read the file flush lsn from the header of the file */
2608
flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2610
if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) {
2617
space_id = fsp_header_get_space_id(page);
2619
ut_print_timestamp(stderr);
2621
" InnoDB: Flush lsn in the tablespace file %lu"
2623
"InnoDB: is %lu %lu, which exceeds current"
2624
" system lsn %lu %lu.\n"
2625
"InnoDB: We reset the lsn's in the file ",
2627
(ulong) ut_dulint_get_high(flush_lsn),
2628
(ulong) ut_dulint_get_low(flush_lsn),
2629
(ulong) ut_dulint_get_high(current_lsn),
2630
(ulong) ut_dulint_get_low(current_lsn));
2631
ut_print_filename(stderr, filepath);
2632
fputs(".\n", stderr);
2634
/* Loop through all the pages in the tablespace and reset the lsn and
2635
the page checksum if necessary */
2637
file_size = os_file_get_size_as_iblonglong(file);
2639
for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) {
2640
success = os_file_read(file, page,
2641
(ulint)(offset & 0xFFFFFFFFUL),
2642
(ulint)(offset >> 32), UNIV_PAGE_SIZE);
2647
if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
2649
/* We have to reset the lsn */
2650
space_id = mach_read_from_4(
2651
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
2652
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
2654
buf_flush_init_for_writing(page, current_lsn, space_id,
2656
success = os_file_write(filepath, file, page,
2657
(ulint)(offset & 0xFFFFFFFFUL),
2658
(ulint)(offset >> 32),
2667
success = os_file_flush(file);
2673
/* We now update the flush_lsn stamp at the start of the file */
2674
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2680
mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
2682
success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
2687
success = os_file_flush(file);
2689
os_file_close(file);
2696
/************************************************************************
2697
Tries to open a single-table tablespace and optionally checks the space id is
2698
right in it. If does not succeed, prints an error message to the .err log. This
2699
function is used to open a tablespace when we start up mysqld, and also in
2701
NOTE that we assume this operation is used either at the database startup
2702
or under the protection of the dictionary mutex, so that two users cannot
2703
race here. This operation does not leave the file associated with the
2704
tablespace open, but closes it after we have looked at the space id in it. */
2707
fil_open_single_table_tablespace(
2708
/*=============================*/
2709
/* out: TRUE if success */
2710
ibool check_space_id, /* in: should we check that the space
2711
id in the file is right; we assume
2712
that this function runs much faster
2713
if no check is made, since accessing
2714
the file inode probably is much
2715
faster (the OS caches them) than
2716
accessing the first page of the file */
2717
ulint id, /* in: space id */
2718
const char* name) /* in: table name in the
2719
databasename/tablename format */
2729
filepath = fil_make_ibd_name(name, FALSE);
2731
file = os_file_create_simple_no_error_handling(
2732
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
2734
/* The following call prints an error message */
2735
os_file_get_last_error(TRUE);
2737
ut_print_timestamp(stderr);
2739
fputs(" InnoDB: Error: trying to open a table,"
2741
"InnoDB: open the tablespace file ", stderr);
2742
ut_print_filename(stderr, filepath);
2744
"InnoDB: Have you moved InnoDB .ibd files around"
2745
" without using the\n"
2746
"InnoDB: commands DISCARD TABLESPACE and"
2747
" IMPORT TABLESPACE?\n"
2748
"InnoDB: It is also possible that this is"
2749
" a temporary table #sql...,\n"
2750
"InnoDB: and MySQL removed the .ibd file for this.\n"
2751
"InnoDB: Please refer to\n"
2752
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
2753
"innodb-troubleshooting-datadict.html\n"
2754
"InnoDB: for how to resolve the issue.\n", stderr);
2761
if (!check_space_id) {
2767
/* Read the first page of the tablespace */
2769
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2770
/* Align the memory for file i/o if we might have O_DIRECT set */
2771
page = ut_align(buf2, UNIV_PAGE_SIZE);
2773
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2775
/* We have to read the tablespace id from the file */
2777
space_id = fsp_header_get_space_id(page);
2781
if (space_id != id) {
2782
ut_print_timestamp(stderr);
2784
fputs(" InnoDB: Error: tablespace id in file ", stderr);
2785
ut_print_filename(stderr, filepath);
2786
fprintf(stderr, " is %lu, but in the InnoDB\n"
2787
"InnoDB: data dictionary it is %lu.\n"
2788
"InnoDB: Have you moved InnoDB .ibd files"
2789
" around without using the\n"
2790
"InnoDB: commands DISCARD TABLESPACE and"
2791
" IMPORT TABLESPACE?\n"
2792
"InnoDB: Please refer to\n"
2793
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
2794
"innodb-troubleshooting-datadict.html\n"
2795
"InnoDB: for how to resolve the issue.\n",
2796
(ulong) space_id, (ulong) id);
2804
success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
2810
/* We do not measure the size of the file, that is why we pass the 0
2813
fil_node_create(filepath, 0, space_id, FALSE);
2815
os_file_close(file);
2821
#ifdef UNIV_HOTBACKUP
2822
/***********************************************************************
2823
Allocates a file name for an old version of a single-table tablespace.
2824
The string must be freed by caller with mem_free()! */
2827
fil_make_ibbackup_old_name(
2828
/*=======================*/
2829
/* out, own: file name */
2830
const char* name) /* in: original file name */
2832
static const char suffix[] = "_ibbackup_old_vers_";
2833
ulint len = strlen(name);
2834
char* path = mem_alloc(len + (15 + sizeof suffix));
2836
memcpy(path, name, len);
2837
memcpy(path + len, suffix, (sizeof suffix) - 1);
2838
ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
2841
#endif /* UNIV_HOTBACKUP */
2843
/************************************************************************
2844
Opens an .ibd file and adds the associated single-table tablespace to the
2845
InnoDB fil0fil.c data structures. */
2848
fil_load_single_table_tablespace(
2849
/*=============================*/
2850
const char* dbname, /* in: database name */
2851
const char* filename) /* in: file name (not a path),
2852
including the .ibd extension */
2863
#ifdef UNIV_HOTBACKUP
2866
filepath = mem_alloc(strlen(dbname) + strlen(filename)
2867
+ strlen(fil_path_to_mysql_datadir) + 3);
2869
sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
2871
srv_normalize_path_for_win(filepath);
2873
# ifndef UNIV_HOTBACKUP
2874
/* If lower_case_table_names is 0 or 2, then MySQL allows database
2875
directory names with upper case letters. On Windows, all table and
2876
database names in InnoDB are internally always in lower case. Put the
2877
file path to lower case, so that we are consistent with InnoDB's
2878
internal data dictionary. */
2880
dict_casedn_str(filepath);
2881
# endif /* !UNIV_HOTBACKUP */
2883
file = os_file_create_simple_no_error_handling(
2884
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
2886
/* The following call prints an error message */
2887
os_file_get_last_error(TRUE);
2890
"InnoDB: Error: could not open single-table tablespace"
2893
"InnoDB: We do not continue the crash recovery,"
2894
" because the table may become\n"
2895
"InnoDB: corrupt if we cannot apply the log records"
2896
" in the InnoDB log to it.\n"
2897
"InnoDB: To fix the problem and start mysqld:\n"
2898
"InnoDB: 1) If there is a permission problem"
2899
" in the file and mysqld cannot\n"
2900
"InnoDB: open the file, you should"
2901
" modify the permissions.\n"
2902
"InnoDB: 2) If the table is not needed, or you can"
2903
" restore it from a backup,\n"
2904
"InnoDB: then you can remove the .ibd file,"
2905
" and InnoDB will do a normal\n"
2906
"InnoDB: crash recovery and ignore that table.\n"
2907
"InnoDB: 3) If the file system or the"
2908
" disk is broken, and you cannot remove\n"
2909
"InnoDB: the .ibd file, you can set"
2910
" innodb_force_recovery > 0 in my.cnf\n"
2911
"InnoDB: and force InnoDB to continue crash"
2912
" recovery here.\n", filepath);
2916
if (srv_force_recovery > 0) {
2918
"InnoDB: innodb_force_recovery"
2919
" was set to %lu. Continuing crash recovery\n"
2920
"InnoDB: even though we cannot access"
2921
" the .ibd file of this table.\n",
2922
srv_force_recovery);
2929
success = os_file_get_size(file, &size_low, &size_high);
2932
/* The following call prints an error message */
2933
os_file_get_last_error(TRUE);
2936
"InnoDB: Error: could not measure the size"
2937
" of single-table tablespace file\n"
2939
"InnoDB: We do not continue crash recovery,"
2940
" because the table will become\n"
2941
"InnoDB: corrupt if we cannot apply the log records"
2942
" in the InnoDB log to it.\n"
2943
"InnoDB: To fix the problem and start mysqld:\n"
2944
"InnoDB: 1) If there is a permission problem"
2945
" in the file and mysqld cannot\n"
2946
"InnoDB: access the file, you should"
2947
" modify the permissions.\n"
2948
"InnoDB: 2) If the table is not needed,"
2949
" or you can restore it from a backup,\n"
2950
"InnoDB: then you can remove the .ibd file,"
2951
" and InnoDB will do a normal\n"
2952
"InnoDB: crash recovery and ignore that table.\n"
2953
"InnoDB: 3) If the file system or the disk is broken,"
2954
" and you cannot remove\n"
2955
"InnoDB: the .ibd file, you can set"
2956
" innodb_force_recovery > 0 in my.cnf\n"
2957
"InnoDB: and force InnoDB to continue"
2958
" crash recovery here.\n", filepath);
2960
os_file_close(file);
2963
if (srv_force_recovery > 0) {
2965
"InnoDB: innodb_force_recovery"
2966
" was set to %lu. Continuing crash recovery\n"
2967
"InnoDB: even though we cannot access"
2968
" the .ibd file of this table.\n",
2969
srv_force_recovery);
2976
/* TODO: What to do in other cases where we cannot access an .ibd
2977
file during a crash recovery? */
2979
/* Every .ibd file is created >= 4 pages in size. Smaller files
2982
size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
2983
#ifndef UNIV_HOTBACKUP
2984
if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
2986
"InnoDB: Error: the size of single-table tablespace"
2988
"InnoDB: is only %lu %lu, should be at least %lu!",
2991
(ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
2992
os_file_close(file);
2998
/* Read the first page of the tablespace if the size big enough */
3000
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
3001
/* Align the memory for file i/o if we might have O_DIRECT set */
3002
page = ut_align(buf2, UNIV_PAGE_SIZE);
3004
if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3005
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3007
/* We have to read the tablespace id from the file */
3009
space_id = fsp_header_get_space_id(page);
3011
space_id = ULINT_UNDEFINED;
3014
#ifndef UNIV_HOTBACKUP
3015
if (space_id == ULINT_UNDEFINED || space_id == 0) {
3017
"InnoDB: Error: tablespace id %lu in file %s"
3018
" is not sensible\n",
3024
if (space_id == ULINT_UNDEFINED || space_id == 0) {
3028
"InnoDB: Renaming tablespace %s of id %lu,\n"
3029
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3030
"InnoDB: because its size %lld is too small"
3031
" (< 4 pages 16 kB each),\n"
3032
"InnoDB: or the space id in the file header"
3033
" is not sensible.\n"
3034
"InnoDB: This can happen in an ibbackup run,"
3035
" and is not dangerous.\n",
3036
filepath, space_id, filepath, size);
3037
os_file_close(file);
3039
new_path = fil_make_ibbackup_old_name(filepath);
3040
ut_a(os_file_rename(filepath, new_path));
3049
/* A backup may contain the same space several times, if the space got
3050
renamed at a sensitive time. Since it is enough to have one version of
3051
the space, we rename the file if a space with the same space id
3052
already exists in the tablespace memory cache. We rather rename the
3053
file than delete it, because if there is a bug, we do not want to
3054
destroy valuable data. */
3056
mutex_enter(&(fil_system->mutex));
3058
space = fil_get_space_for_id_low(space_id);
3064
"InnoDB: Renaming tablespace %s of id %lu,\n"
3065
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3066
"InnoDB: because space %s with the same id\n"
3067
"InnoDB: was scanned earlier. This can happen"
3068
" if you have renamed tables\n"
3069
"InnoDB: during an ibbackup run.\n",
3070
filepath, space_id, filepath,
3072
os_file_close(file);
3074
new_path = fil_make_ibbackup_old_name(filepath);
3076
mutex_exit(&(fil_system->mutex));
3078
ut_a(os_file_rename(filepath, new_path));
3086
mutex_exit(&(fil_system->mutex));
3088
success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
3095
/* We do not use the size information we have about the file, because
3096
the rounding formula for extents and pages is somewhat complex; we
3097
let fil_node_open() do that task. */
3099
fil_node_create(filepath, 0, space_id, FALSE);
3101
os_file_close(file);
3106
/***************************************************************************
3107
A fault-tolerant function that tries to read the next file name in the
3108
directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
3109
idea is to read as much good data as we can and jump over bad data. */
3112
fil_file_readdir_next_file(
3113
/*=======================*/
3114
/* out: 0 if ok, -1 if error even after the
3115
retries, 1 if at the end of the directory */
3116
ulint* err, /* out: this is set to DB_ERROR if an error
3117
was encountered, otherwise not changed */
3118
const char* dirname,/* in: directory name or path */
3119
os_file_dir_t dir, /* in: directory stream */
3120
os_file_stat_t* info) /* in/out: buffer where the info is returned */
3125
for (i = 0; i < 100; i++) {
3126
ret = os_file_readdir_next_file(dirname, dir, info);
3134
"InnoDB: Error: os_file_readdir_next_file()"
3136
"InnoDB: directory %s\n"
3137
"InnoDB: Crash recovery may have failed"
3138
" for some .ibd files!\n", dirname);
3146
/************************************************************************
3147
At the server startup, if we need crash recovery, scans the database
3148
directories under the MySQL datadir, looking for .ibd files. Those files are
3149
single-table tablespaces. We need to know the space id in each of them so that
3150
we know into which file we should look to check the contents of a page stored
3151
in the doublewrite buffer, also to know where to apply log records where the
3152
space id is != 0. */
3155
fil_load_single_table_tablespaces(void)
3156
/*===================================*/
3157
/* out: DB_SUCCESS or error number */
3160
char* dbpath = NULL;
3161
ulint dbpath_len = 100;
3163
os_file_dir_t dbdir;
3164
os_file_stat_t dbinfo;
3165
os_file_stat_t fileinfo;
3166
ulint err = DB_SUCCESS;
3168
/* The datadir of MySQL is always the default directory of mysqld */
3170
dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
3177
dbpath = mem_alloc(dbpath_len);
3179
/* Scan all directories under the datadir. They are the database
3180
directories of MySQL. */
3182
ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
3186
/* printf("Looking at %s in datadir\n", dbinfo.name); */
3188
if (dbinfo.type == OS_FILE_TYPE_FILE
3189
|| dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
3191
goto next_datadir_item;
3194
/* We found a symlink or a directory; try opening it to see
3195
if a symlink is a directory */
3197
len = strlen(fil_path_to_mysql_datadir)
3198
+ strlen (dbinfo.name) + 2;
3199
if (len > dbpath_len) {
3206
dbpath = mem_alloc(dbpath_len);
3208
sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
3210
srv_normalize_path_for_win(dbpath);
3212
dbdir = os_file_opendir(dbpath, FALSE);
3214
if (dbdir != NULL) {
3215
/* printf("Opened dir %s\n", dbinfo.name); */
3217
/* We found a database directory; loop through it,
3218
looking for possible .ibd files in it */
3220
ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
3224
" Looking at file %s\n", fileinfo.name); */
3226
if (fileinfo.type == OS_FILE_TYPE_DIR) {
3228
goto next_file_item;
3231
/* We found a symlink or a file */
3232
if (strlen(fileinfo.name) > 4
3233
&& 0 == strcmp(fileinfo.name
3234
+ strlen(fileinfo.name) - 4,
3236
/* The name ends in .ibd; try opening
3238
fil_load_single_table_tablespace(
3239
dbinfo.name, fileinfo.name);
3242
ret = fil_file_readdir_next_file(&err,
3247
if (0 != os_file_closedir(dbdir)) {
3248
fputs("InnoDB: Warning: could not"
3249
" close database directory ", stderr);
3250
ut_print_filename(stderr, dbpath);
3258
ret = fil_file_readdir_next_file(&err,
3259
fil_path_to_mysql_datadir,
3265
if (0 != os_file_closedir(dir)) {
3267
"InnoDB: Error: could not close MySQL datadir\n");
3275
/************************************************************************
3276
If we need crash recovery, and we have called
3277
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
3278
we can call this function to print an error message of orphaned .ibd files
3279
for which there is not a data dictionary entry with a matching table name
3283
fil_print_orphaned_tablespaces(void)
3284
/*================================*/
3286
fil_system_t* system = fil_system;
3289
mutex_enter(&(system->mutex));
3291
space = UT_LIST_GET_FIRST(system->space_list);
3294
if (space->purpose == FIL_TABLESPACE && space->id != 0
3296
fputs("InnoDB: Warning: tablespace ", stderr);
3297
ut_print_filename(stderr, space->name);
3298
fprintf(stderr, " of id %lu has no matching table in\n"
3299
"InnoDB: the InnoDB data dictionary.\n",
3303
space = UT_LIST_GET_NEXT(space_list, space);
3306
mutex_exit(&(system->mutex));
3309
/***********************************************************************
3310
Returns TRUE if a single-table tablespace does not exist in the memory cache,
3311
or is being deleted there. */
3314
fil_tablespace_deleted_or_being_deleted_in_mem(
3315
/*===========================================*/
3316
/* out: TRUE if does not exist or is being\
3318
ulint id, /* in: space id */
3319
ib_longlong version)/* in: tablespace_version should be this; if
3320
you pass -1 as the value of this, then this
3321
parameter is ignored */
3323
fil_system_t* system = fil_system;
3328
mutex_enter(&(system->mutex));
3330
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3332
if (space == NULL || space->is_being_deleted) {
3333
mutex_exit(&(system->mutex));
3338
if (version != ((ib_longlong)-1)
3339
&& space->tablespace_version != version) {
3340
mutex_exit(&(system->mutex));
3345
mutex_exit(&(system->mutex));
3350
/***********************************************************************
3351
Returns TRUE if a single-table tablespace exists in the memory cache. */
3354
fil_tablespace_exists_in_mem(
3355
/*=========================*/
3356
/* out: TRUE if exists */
3357
ulint id) /* in: space id */
3359
fil_system_t* system = fil_system;
3364
mutex_enter(&(system->mutex));
3366
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3368
if (space == NULL) {
3369
mutex_exit(&(system->mutex));
3374
mutex_exit(&(system->mutex));
3379
/***********************************************************************
3380
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
3381
cache. Note that if we have not done a crash recovery at the database startup,
3382
there may be many tablespaces which are not yet in the memory cache. */
3385
fil_space_for_table_exists_in_mem(
3386
/*==============================*/
3387
/* out: TRUE if a matching tablespace
3388
exists in the memory cache */
3389
ulint id, /* in: space id */
3390
const char* name, /* in: table name in the standard
3391
'databasename/tablename' format or
3392
the dir path to a temp table */
3393
ibool is_temp, /* in: TRUE if created with CREATE
3395
ibool mark_space, /* in: in crash recovery, at database
3396
startup we mark all spaces which have
3397
an associated table in the InnoDB
3398
data dictionary, so that
3399
we can print a warning about orphaned
3401
ibool print_error_if_does_not_exist)
3402
/* in: print detailed error
3403
information to the .err log if a
3404
matching tablespace is not found from
3407
fil_system_t* system = fil_system;
3408
fil_space_t* namespace;
3414
mutex_enter(&(system->mutex));
3416
path = fil_make_ibd_name(name, is_temp);
3418
/* Look if there is a space with the same id */
3420
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3422
/* Look if there is a space with the same name; the name is the
3423
directory path from the datadir to the file */
3425
HASH_SEARCH(name_hash, system->name_hash,
3426
ut_fold_string(path), namespace,
3427
0 == strcmp(namespace->name, path));
3428
if (space && space == namespace) {
3436
mutex_exit(&(system->mutex));
3441
if (!print_error_if_does_not_exist) {
3444
mutex_exit(&(system->mutex));
3449
if (space == NULL) {
3450
if (namespace == NULL) {
3451
ut_print_timestamp(stderr);
3452
fputs(" InnoDB: Error: table ", stderr);
3453
ut_print_filename(stderr, name);
3454
fprintf(stderr, "\n"
3455
"InnoDB: in InnoDB data dictionary"
3456
" has tablespace id %lu,\n"
3457
"InnoDB: but tablespace with that id"
3458
" or name does not exist. Have\n"
3459
"InnoDB: you deleted or moved .ibd files?\n"
3460
"InnoDB: This may also be a table created with"
3461
" CREATE TEMPORARY TABLE\n"
3462
"InnoDB: whose .ibd and .frm files"
3463
" MySQL automatically removed, but the\n"
3464
"InnoDB: table still exists in the"
3465
" InnoDB internal data dictionary.\n",
3468
ut_print_timestamp(stderr);
3469
fputs(" InnoDB: Error: table ", stderr);
3470
ut_print_filename(stderr, name);
3471
fprintf(stderr, "\n"
3472
"InnoDB: in InnoDB data dictionary has"
3473
" tablespace id %lu,\n"
3474
"InnoDB: but a tablespace with that id"
3475
" does not exist. There is\n"
3476
"InnoDB: a tablespace of name %s and id %lu,"
3478
"InnoDB: you deleted or moved .ibd files?\n",
3479
(ulong) id, namespace->name,
3480
(ulong) namespace->id);
3483
fputs("InnoDB: Please refer to\n"
3484
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3485
"innodb-troubleshooting-datadict.html\n"
3486
"InnoDB: for how to resolve the issue.\n", stderr);
3489
mutex_exit(&(system->mutex));
3494
if (0 != strcmp(space->name, path)) {
3495
ut_print_timestamp(stderr);
3496
fputs(" InnoDB: Error: table ", stderr);
3497
ut_print_filename(stderr, name);
3498
fprintf(stderr, "\n"
3499
"InnoDB: in InnoDB data dictionary has"
3500
" tablespace id %lu,\n"
3501
"InnoDB: but the tablespace with that id"
3503
"InnoDB: Have you deleted or moved .ibd files?\n",
3504
(ulong) id, space->name);
3506
if (namespace != NULL) {
3507
fputs("InnoDB: There is a tablespace"
3508
" with the right name\n"
3509
"InnoDB: ", stderr);
3510
ut_print_filename(stderr, namespace->name);
3511
fprintf(stderr, ", but its id is %lu.\n",
3512
(ulong) namespace->id);
3519
mutex_exit(&(system->mutex));
3524
/***********************************************************************
3525
Checks if a single-table tablespace for a given table name exists in the
3526
tablespace memory cache. */
3529
fil_get_space_id_for_table(
3530
/*=======================*/
3531
/* out: space id, ULINT_UNDEFINED if not
3533
const char* name) /* in: table name in the standard
3534
'databasename/tablename' format */
3536
fil_system_t* system = fil_system;
3537
fil_space_t* namespace;
3538
ulint id = ULINT_UNDEFINED;
3543
mutex_enter(&(system->mutex));
3545
path = fil_make_ibd_name(name, FALSE);
3547
/* Look if there is a space with the same name; the name is the
3548
directory path to the file */
3550
HASH_SEARCH(name_hash, system->name_hash,
3551
ut_fold_string(path), namespace,
3552
0 == strcmp(namespace->name, path));
3559
mutex_exit(&(system->mutex));
3564
/**************************************************************************
3565
Tries to extend a data file so that it would accommodate the number of pages
3566
given. The tablespace must be cached in the memory cache. If the space is big
3567
enough already, does nothing. */
3570
fil_extend_space_to_desired_size(
3571
/*=============================*/
3572
/* out: TRUE if success */
3573
ulint* actual_size, /* out: size of the space after extension;
3574
if we ran out of disk space this may be lower
3575
than the desired size */
3576
ulint space_id, /* in: space id */
3577
ulint size_after_extend)/* in: desired size in pages after the
3578
extension; if the current space size is bigger
3579
than this already, the function does nothing */
3581
fil_system_t* system = fil_system;
3587
ulint start_page_no;
3588
ulint file_start_page_no;
3591
ibool success = TRUE;
3593
fil_mutex_enter_and_prepare_for_io(space_id);
3595
HASH_SEARCH(hash, system->spaces, space_id, space,
3596
space->id == space_id);
3599
if (space->size >= size_after_extend) {
3600
/* Space already big enough */
3602
*actual_size = space->size;
3604
mutex_exit(&(system->mutex));
3609
node = UT_LIST_GET_LAST(space->chain);
3611
fil_node_prepare_for_io(node, system, space);
3613
start_page_no = space->size;
3614
file_start_page_no = space->size - node->size;
3616
/* Extend at most 64 pages at a time */
3617
buf_size = ut_min(64, size_after_extend - start_page_no)
3619
buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE);
3620
buf = ut_align(buf2, UNIV_PAGE_SIZE);
3622
memset(buf, 0, buf_size);
3624
while (start_page_no < size_after_extend) {
3625
ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE,
3626
size_after_extend - start_page_no);
3628
offset_high = (start_page_no - file_start_page_no)
3629
/ (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
3630
offset_low = ((start_page_no - file_start_page_no)
3631
% (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)))
3633
#ifdef UNIV_HOTBACKUP
3634
success = os_file_write(node->name, node->handle, buf,
3635
offset_low, offset_high,
3636
UNIV_PAGE_SIZE * n_pages);
3638
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
3639
node->name, node->handle, buf,
3640
offset_low, offset_high,
3641
UNIV_PAGE_SIZE * n_pages,
3645
node->size += n_pages;
3646
space->size += n_pages;
3648
os_has_said_disk_full = FALSE;
3650
/* Let us measure the size of the file to determine
3651
how much we were able to extend it */
3654
(os_file_get_size_as_iblonglong
3656
/ UNIV_PAGE_SIZE)) - node->size;
3658
node->size += n_pages;
3659
space->size += n_pages;
3664
start_page_no += n_pages;
3669
fil_node_complete_io(node, system, OS_FILE_WRITE);
3671
*actual_size = space->size;
3673
#ifndef UNIV_HOTBACKUP
3674
if (space_id == 0) {
3675
ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE;
3677
/* Keep the last data file size info up to date, rounded to
3680
srv_data_file_sizes[srv_n_data_files - 1]
3681
= (node->size / pages_per_mb) * pages_per_mb;
3683
#endif /* !UNIV_HOTBACKUP */
3686
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
3687
size_after_extend, *actual_size); */
3688
mutex_exit(&(system->mutex));
3690
fil_flush(space_id);
3695
#ifdef UNIV_HOTBACKUP
3696
/************************************************************************
3697
Extends all tablespaces to the size stored in the space header. During the
3698
ibbackup --apply-log phase we extended the spaces on-demand so that log records
3699
could be applied, but that may have left spaces still too small compared to
3700
the size stored in the space header. */
3703
fil_extend_tablespaces_to_stored_len(void)
3704
/*======================================*/
3706
fil_system_t* system = fil_system;
3710
ulint size_in_header;
3714
buf = mem_alloc(UNIV_PAGE_SIZE);
3716
mutex_enter(&(system->mutex));
3718
space = UT_LIST_GET_FIRST(system->space_list);
3721
ut_a(space->purpose == FIL_TABLESPACE);
3723
mutex_exit(&(system->mutex)); /* no need to protect with a
3724
mutex, because this is a
3725
single-threaded operation */
3726
error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf,
3728
ut_a(error == DB_SUCCESS);
3730
size_in_header = fsp_get_size_low(buf);
3732
success = fil_extend_space_to_desired_size(
3733
&actual_size, space->id, size_in_header);
3736
"InnoDB: Error: could not extend the"
3737
" tablespace of %s\n"
3738
"InnoDB: to the size stored in header,"
3740
"InnoDB: size after extension %lu pages\n"
3741
"InnoDB: Check that you have free disk space"
3743
space->name, size_in_header, actual_size);
3747
mutex_enter(&(system->mutex));
3749
space = UT_LIST_GET_NEXT(space_list, space);
3752
mutex_exit(&(system->mutex));
3758
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
3760
/***********************************************************************
3761
Tries to reserve free extents in a file space. */
3764
fil_space_reserve_free_extents(
3765
/*===========================*/
3766
/* out: TRUE if succeed */
3767
ulint id, /* in: space id */
3768
ulint n_free_now, /* in: number of free extents now */
3769
ulint n_to_reserve) /* in: how many one wants to reserve */
3771
fil_system_t* system = fil_system;
3777
mutex_enter(&(system->mutex));
3779
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3783
if (space->n_reserved_extents + n_to_reserve > n_free_now) {
3786
space->n_reserved_extents += n_to_reserve;
3790
mutex_exit(&(system->mutex));
3795
/***********************************************************************
3796
Releases free extents in a file space. */
3799
fil_space_release_free_extents(
3800
/*===========================*/
3801
ulint id, /* in: space id */
3802
ulint n_reserved) /* in: how many one reserved */
3804
fil_system_t* system = fil_system;
3809
mutex_enter(&(system->mutex));
3811
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3814
ut_a(space->n_reserved_extents >= n_reserved);
3816
space->n_reserved_extents -= n_reserved;
3818
mutex_exit(&(system->mutex));
3821
/***********************************************************************
3822
Gets the number of reserved extents. If the database is silent, this number
3826
fil_space_get_n_reserved_extents(
3827
/*=============================*/
3828
ulint id) /* in: space id */
3830
fil_system_t* system = fil_system;
3836
mutex_enter(&(system->mutex));
3838
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3842
n = space->n_reserved_extents;
3844
mutex_exit(&(system->mutex));
3849
/*============================ FILE I/O ================================*/
3851
/************************************************************************
3852
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
3854
Prepares a file node for i/o. Opens the file if it is closed. Updates the
3855
pending i/o's field in the node and the system appropriately. Takes the node
3856
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
3860
fil_node_prepare_for_io(
3861
/*====================*/
3862
fil_node_t* node, /* in: file node */
3863
fil_system_t* system, /* in: tablespace memory cache */
3864
fil_space_t* space) /* in: space */
3866
ut_ad(node && system && space);
3867
ut_ad(mutex_own(&(system->mutex)));
3869
if (system->n_open > system->max_n_open + 5) {
3870
ut_print_timestamp(stderr);
3872
" InnoDB: Warning: open files %lu"
3873
" exceeds the limit %lu\n",
3874
(ulong) system->n_open,
3875
(ulong) system->max_n_open);
3878
if (node->open == FALSE) {
3879
/* File is closed: open it */
3880
ut_a(node->n_pending == 0);
3882
fil_node_open_file(node, system, space);
3885
if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
3886
&& space->id != 0) {
3887
/* The node is in the LRU list, remove it */
3889
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
3891
UT_LIST_REMOVE(LRU, system->LRU, node);
3897
/************************************************************************
3898
Updates the data structures when an i/o operation finishes. Updates the
3899
pending i/o's field in the node appropriately. */
3902
fil_node_complete_io(
3903
/*=================*/
3904
fil_node_t* node, /* in: file node */
3905
fil_system_t* system, /* in: tablespace memory cache */
3906
ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks
3907
the node as modified if
3908
type == OS_FILE_WRITE */
3912
ut_ad(mutex_own(&(system->mutex)));
3914
ut_a(node->n_pending > 0);
3918
if (type == OS_FILE_WRITE) {
3919
system->modification_counter++;
3920
node->modification_counter = system->modification_counter;
3922
if (!node->space->is_in_unflushed_spaces) {
3924
node->space->is_in_unflushed_spaces = TRUE;
3925
UT_LIST_ADD_FIRST(unflushed_spaces,
3926
system->unflushed_spaces,
3931
if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
3932
&& node->space->id != 0) {
3933
/* The node must be put back to the LRU list */
3934
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
3938
/************************************************************************
3939
Report information about an invalid page access. */
3942
fil_report_invalid_page_access(
3943
/*===========================*/
3944
ulint block_offset, /* in: block offset */
3945
ulint space_id, /* in: space id */
3946
const char* space_name, /* in: space name */
3947
ulint byte_offset, /* in: byte offset */
3948
ulint len, /* in: I/O length */
3949
ulint type) /* in: I/O type */
3952
"InnoDB: Error: trying to access page number %lu"
3954
"InnoDB: space name %s,\n"
3955
"InnoDB: which is outside the tablespace bounds.\n"
3956
"InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
3957
"InnoDB: If you get this error at mysqld startup,"
3958
" please check that\n"
3959
"InnoDB: your my.cnf matches the ibdata files"
3960
" that you have in the\n"
3961
"InnoDB: MySQL server.\n",
3962
(ulong) block_offset, (ulong) space_id, space_name,
3963
(ulong) byte_offset, (ulong) len, (ulong) type);
3966
/************************************************************************
3967
Reads or writes data. This operation is asynchronous (aio). */
3972
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
3973
if we are trying to do i/o on a tablespace
3974
which does not exist */
3975
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
3976
ORed to OS_FILE_LOG, if a log i/o
3977
and ORed to OS_AIO_SIMULATED_WAKE_LATER
3978
if simulated aio and we want to post a
3979
batch of i/os; NOTE that a simulated batch
3980
may introduce hidden chances of deadlocks,
3981
because i/os are not actually handled until
3982
all have been posted: use with great
3984
ibool sync, /* in: TRUE if synchronous aio is desired */
3985
ulint space_id, /* in: space id */
3986
ulint block_offset, /* in: offset in number of blocks */
3987
ulint byte_offset, /* in: remainder of offset in bytes; in
3988
aio this must be divisible by the OS block
3990
ulint len, /* in: how many bytes to read or write; this
3991
must not cross a file boundary; in aio this
3992
must be a block size multiple */
3993
void* buf, /* in/out: buffer where to store read data
3994
or from where to write; in aio this must be
3995
appropriately aligned */
3996
void* message) /* in: message for aio handler if non-sync
3997
aio used, else ignored */
3999
fil_system_t* system = fil_system;
4009
is_log = type & OS_FILE_LOG;
4010
type = type & ~OS_FILE_LOG;
4012
wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
4013
type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
4015
ut_ad(byte_offset < UNIV_PAGE_SIZE);
4018
ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
4019
ut_ad(fil_validate());
4020
#ifndef UNIV_LOG_DEBUG
4021
/* ibuf bitmap pages must be read in the sync aio mode: */
4022
ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
4023
|| !ibuf_bitmap_page(block_offset) || sync || is_log);
4024
#ifdef UNIV_SYNC_DEBUG
4025
ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
4026
|| ibuf_page(space_id, block_offset));
4031
} else if (type == OS_FILE_READ && !is_log
4032
&& ibuf_page(space_id, block_offset)) {
4034
} else if (is_log) {
4037
mode = OS_AIO_NORMAL;
4040
if (type == OS_FILE_READ) {
4041
srv_data_read+= len;
4042
} else if (type == OS_FILE_WRITE) {
4043
srv_data_written+= len;
4046
/* Reserve the fil_system mutex and make sure that we can open at
4047
least one file while holding it, if the file is not already open */
4049
fil_mutex_enter_and_prepare_for_io(space_id);
4051
HASH_SEARCH(hash, system->spaces, space_id, space,
4052
space->id == space_id);
4054
mutex_exit(&(system->mutex));
4056
ut_print_timestamp(stderr);
4058
" InnoDB: Error: trying to do i/o"
4059
" to a tablespace which does not exist.\n"
4060
"InnoDB: i/o type %lu, space id %lu,"
4061
" page no. %lu, i/o length %lu bytes\n",
4062
(ulong) type, (ulong) space_id, (ulong) block_offset,
4065
return(DB_TABLESPACE_DELETED);
4068
ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
4070
node = UT_LIST_GET_FIRST(space->chain);
4074
fil_report_invalid_page_access(
4075
block_offset, space_id, space->name,
4076
byte_offset, len, type);
4081
if (space->id != 0 && node->size == 0) {
4082
/* We do not know the size of a single-table tablespace
4083
before we open the file */
4088
if (node->size > block_offset) {
4092
block_offset -= node->size;
4093
node = UT_LIST_GET_NEXT(chain, node);
4097
/* Open file if closed */
4098
fil_node_prepare_for_io(node, system, space);
4100
/* Check that at least the start offset is within the bounds of a
4101
single-table tablespace */
4102
if (space->purpose == FIL_TABLESPACE && space->id != 0
4103
&& node->size <= block_offset) {
4105
fil_report_invalid_page_access(
4106
block_offset, space_id, space->name, byte_offset,
4112
/* Now we have made the changes in the data structures of system */
4113
mutex_exit(&(system->mutex));
4115
/* Calculate the low 32 bits and the high 32 bits of the file offset */
4117
offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
4118
offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
4121
ut_a(node->size - block_offset
4122
>= (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
4126
ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4127
ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4129
#ifdef UNIV_HOTBACKUP
4130
/* In ibbackup do normal i/o, not aio */
4131
if (type == OS_FILE_READ) {
4132
ret = os_file_read(node->handle, buf, offset_low, offset_high,
4135
ret = os_file_write(node->name, node->handle, buf,
4136
offset_low, offset_high, len);
4139
/* Queue the aio request */
4140
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
4141
offset_low, offset_high, len, node, message);
4145
if (mode == OS_AIO_SYNC) {
4146
/* The i/o operation is already completed when we return from
4149
mutex_enter(&(system->mutex));
4151
fil_node_complete_io(node, system, type);
4153
mutex_exit(&(system->mutex));
4155
ut_ad(fil_validate());
4161
/************************************************************************
4162
Reads data from a space to a buffer. Remember that the possible incomplete
4163
blocks at the end of file are ignored: they are not taken into account when
4164
calculating the byte offset within a space. */
4169
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
4170
if we are trying to do i/o on a tablespace
4171
which does not exist */
4172
ibool sync, /* in: TRUE if synchronous aio is desired */
4173
ulint space_id, /* in: space id */
4174
ulint block_offset, /* in: offset in number of blocks */
4175
ulint byte_offset, /* in: remainder of offset in bytes; in aio
4176
this must be divisible by the OS block size */
4177
ulint len, /* in: how many bytes to read; this must not
4178
cross a file boundary; in aio this must be a
4179
block size multiple */
4180
void* buf, /* in/out: buffer where to store data read;
4181
in aio this must be appropriately aligned */
4182
void* message) /* in: message for aio handler if non-sync
4183
aio used, else ignored */
4185
return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
4186
byte_offset, len, buf, message));
4189
/************************************************************************
4190
Writes data to a space from a buffer. Remember that the possible incomplete
4191
blocks at the end of file are ignored: they are not taken into account when
4192
calculating the byte offset within a space. */
4197
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
4198
if we are trying to do i/o on a tablespace
4199
which does not exist */
4200
ibool sync, /* in: TRUE if synchronous aio is desired */
4201
ulint space_id, /* in: space id */
4202
ulint block_offset, /* in: offset in number of blocks */
4203
ulint byte_offset, /* in: remainder of offset in bytes; in aio
4204
this must be divisible by the OS block size */
4205
ulint len, /* in: how many bytes to write; this must
4206
not cross a file boundary; in aio this must
4207
be a block size multiple */
4208
void* buf, /* in: buffer from which to write; in aio
4209
this must be appropriately aligned */
4210
void* message) /* in: message for aio handler if non-sync
4211
aio used, else ignored */
4213
return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
4214
byte_offset, len, buf, message));
4217
/**************************************************************************
4218
Waits for an aio operation to complete. This function is used to write the
4219
handler for completed requests. The aio array of pending requests is divided
4220
into segments (see os0file.c for more info). The thread specifies which
4221
segment it wants to wait for. */
4226
ulint segment) /* in: the number of the segment in the aio
4227
array to wait for */
4229
fil_system_t* system = fil_system;
4231
fil_node_t* fil_node;
4235
ut_ad(fil_validate());
4237
if (os_aio_use_native_aio) {
4238
srv_set_io_thread_op_info(segment, "native aio handle");
4240
ret = os_aio_windows_handle(segment, 0, &fil_node,
4242
#elif defined(POSIX_ASYNC_IO)
4243
ret = os_aio_posix_handle(segment, &fil_node, &message);
4245
ret = 0; /* Eliminate compiler warning */
4249
srv_set_io_thread_op_info(segment, "simulated aio handle");
4251
ret = os_aio_simulated_handle(segment, &fil_node,
4257
srv_set_io_thread_op_info(segment, "complete io for fil node");
4259
mutex_enter(&(system->mutex));
4261
fil_node_complete_io(fil_node, fil_system, type);
4263
mutex_exit(&(system->mutex));
4265
ut_ad(fil_validate());
4267
/* Do the i/o handling */
4268
/* IMPORTANT: since i/o handling for reads will read also the insert
4269
buffer in tablespace 0, you have to be very careful not to introduce
4270
deadlocks in the i/o system. We keep tablespace 0 data files always
4271
open, and use a special i/o thread to serve insert buffer requests. */
4273
if (buf_pool_is_block(message)) {
4274
srv_set_io_thread_op_info(segment, "complete io for buf page");
4275
buf_page_io_complete(message);
4277
srv_set_io_thread_op_info(segment, "complete io for log");
4278
log_io_complete(message);
4282
/**************************************************************************
4283
Flushes to disk possible writes cached by the OS. If the space does not exist
4284
or is being dropped, does not do anything. */
4289
ulint space_id) /* in: file space id (this can be a group of
4290
log files or a tablespace of the database) */
4292
fil_system_t* system = fil_system;
4296
ib_longlong old_mod_counter;
4298
mutex_enter(&(system->mutex));
4300
HASH_SEARCH(hash, system->spaces, space_id, space,
4301
space->id == space_id);
4302
if (!space || space->is_being_deleted) {
4303
mutex_exit(&(system->mutex));
4308
space->n_pending_flushes++; /* prevent dropping of the space while
4310
node = UT_LIST_GET_FIRST(space->chain);
4313
if (node->modification_counter > node->flush_counter) {
4316
/* We want to flush the changes at least up to
4318
old_mod_counter = node->modification_counter;
4320
if (space->purpose == FIL_TABLESPACE) {
4321
fil_n_pending_tablespace_flushes++;
4323
fil_n_pending_log_flushes++;
4324
fil_n_log_flushes++;
4327
if (node->is_raw_disk) {
4333
if (node->n_pending_flushes > 0) {
4334
/* We want to avoid calling os_file_flush() on
4335
the file twice at the same time, because we do
4336
not know what bugs OS's may contain in file
4337
i/o; sleep for a while */
4339
mutex_exit(&(system->mutex));
4341
os_thread_sleep(20000);
4343
mutex_enter(&(system->mutex));
4345
if (node->flush_counter >= old_mod_counter) {
4354
file = node->handle;
4355
node->n_pending_flushes++;
4357
mutex_exit(&(system->mutex));
4359
/* fprintf(stderr, "Flushing to file %s\n",
4362
os_file_flush(file);
4364
mutex_enter(&(system->mutex));
4366
node->n_pending_flushes--;
4368
if (node->flush_counter < old_mod_counter) {
4369
node->flush_counter = old_mod_counter;
4371
if (space->is_in_unflushed_spaces
4372
&& fil_space_is_flushed(space)) {
4374
space->is_in_unflushed_spaces = FALSE;
4378
system->unflushed_spaces,
4383
if (space->purpose == FIL_TABLESPACE) {
4384
fil_n_pending_tablespace_flushes--;
4386
fil_n_pending_log_flushes--;
4390
node = UT_LIST_GET_NEXT(chain, node);
4393
space->n_pending_flushes--;
4395
mutex_exit(&(system->mutex));
4398
/**************************************************************************
4399
Flushes to disk the writes in file spaces of the given type possibly cached by
4403
fil_flush_file_spaces(
4404
/*==================*/
4405
ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
4407
fil_system_t* system = fil_system;
4413
mutex_enter(&(system->mutex));
4415
n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
4416
if (n_space_ids == 0) {
4418
mutex_exit(&system->mutex);
4422
/* Assemble a list of space ids to flush. Previously, we
4423
traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
4424
on a space that was just removed from the list by fil_flush().
4425
Thus, the space could be dropped and the memory overwritten. */
4426
space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
4430
for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
4432
space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4434
if (space->purpose == purpose && !space->is_being_deleted) {
4436
space_ids[n_space_ids++] = space->id;
4440
mutex_exit(&system->mutex);
4442
/* Flush the spaces. It will not hurt to call fil_flush() on
4443
a non-existing space id. */
4444
for (i = 0; i < n_space_ids; i++) {
4446
fil_flush(space_ids[i]);
4449
mem_free(space_ids);
4452
/**********************************************************************
4453
Checks the consistency of the tablespace cache. */
4458
/* out: TRUE if ok */
4460
fil_system_t* system = fil_system;
4462
fil_node_t* fil_node;
4466
mutex_enter(&(system->mutex));
4468
/* Look for spaces in the hash table */
4470
for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
4472
space = HASH_GET_FIRST(system->spaces, i);
4474
while (space != NULL) {
4475
UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
4477
fil_node = UT_LIST_GET_FIRST(space->chain);
4479
while (fil_node != NULL) {
4480
if (fil_node->n_pending > 0) {
4481
ut_a(fil_node->open);
4484
if (fil_node->open) {
4487
fil_node = UT_LIST_GET_NEXT(chain, fil_node);
4489
space = HASH_GET_NEXT(hash, space);
4493
ut_a(system->n_open == n_open);
4495
UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
4497
fil_node = UT_LIST_GET_FIRST(system->LRU);
4499
while (fil_node != NULL) {
4500
ut_a(fil_node->n_pending == 0);
4501
ut_a(fil_node->open);
4502
ut_a(fil_node->space->purpose == FIL_TABLESPACE);
4503
ut_a(fil_node->space->id != 0);
4505
fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
4508
mutex_exit(&(system->mutex));
4513
/************************************************************************
4514
Returns TRUE if file address is undefined. */
4518
/* out: TRUE if undefined */
4519
fil_addr_t addr) /* in: address */
4521
if (addr.page == FIL_NULL) {
4529
/************************************************************************
4530
Accessor functions for a file page */
4533
fil_page_get_prev(byte* page)
4535
return(mach_read_from_4(page + FIL_PAGE_PREV));
4539
fil_page_get_next(byte* page)
4541
return(mach_read_from_4(page + FIL_PAGE_NEXT));
4544
/*************************************************************************
4545
Sets the file page type. */
4550
byte* page, /* in: file page */
4551
ulint type) /* in: type */
4555
mach_write_to_2(page + FIL_PAGE_TYPE, type);
4558
/*************************************************************************
4559
Gets the file page type. */
4564
/* out: type; NOTE that if the type has not been
4565
written to page, the return value not defined */
4566
byte* page) /* in: file page */
4570
return(mach_read_from_2(page + FIL_PAGE_TYPE));