~akopytov/percona-xtrabackup/bug1210266-2.1

477.1.1 by Alexey Kopytov
Compact backups implementation:
1
/******************************************************
2
XtraBackup: hot backup tool for InnoDB
3
(c) 2009-2013 Percona Ireland Ltd.
4
Originally Created 3/3/2009 Yasufumi Kinoshita
5
Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko,
6
Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz.
7
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; version 2 of the License.
11
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with this program; if not, write to the Free Software
19
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21
*******************************************************/
22
23
/* Compact backups implementation */
24
25
#include <my_base.h>
26
#include "common.h"
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
27
#if MYSQL_VERSION_ID >= 50600
28
#include "table.h"
29
#endif
488 by Laurynas Biveinis
Merge C++ build support from 2.0
30
#include "innodb_int.h"
477.1.1 by Alexey Kopytov
Compact backups implementation:
31
#include "write_filt.h"
32
#include "fil_cur.h"
33
#include "xtrabackup.h"
34
#include "ds_buffer.h"
35
#include "xb0xb.h"
36
37
/* Number of the first primary key page in an .ibd file */
38
#define XB_FIRST_CLUSTERED_INDEX_PAGE_NO 3
39
40
/* Suffix for page map files */
41
#define XB_PAGE_MAP_SUFFIX ".pmap"
42
#define XB_TMPFILE_SUFFIX ".tmp"
43
44
/* Page range */
583.4.1 by Alexey Kopytov
Implementation of
45
struct page_range_t {
477.1.1 by Alexey Kopytov
Compact backups implementation:
46
	ulint	from;			/*!< range start */
47
	ulint	to;			/*!< range end */
583.4.1 by Alexey Kopytov
Implementation of
48
};
477.1.1 by Alexey Kopytov
Compact backups implementation:
49
50
/* Cursor in a page map file */
583.4.1 by Alexey Kopytov
Implementation of
51
struct page_map_cursor_t {
477.1.1 by Alexey Kopytov
Compact backups implementation:
52
	File		fd;	/*!< file descriptor */
53
	IO_CACHE	cache;	/*!< IO_CACHE associated with fd */
583.4.1 by Alexey Kopytov
Implementation of
54
};
55
56
/* Table descriptor for the index rebuild operation */
57
struct index_rebuild_table_t {
58
	char*	name;					/* table name */
59
	ulint	space_id;				/* space ID */
60
	UT_LIST_NODE_T(index_rebuild_table_t)	list;	/* list node */
61
};
62
63
/* Thread descriptor for the index rebuild operation */
64
struct index_rebuild_thread_t {
65
	ulint		num;    /* thread number */
66
	pthread_t	id;	/* thread ID */
67
};
477.1.1 by Alexey Kopytov
Compact backups implementation:
68
69
/* Empty page use to replace skipped pages in the data files */
70
static byte		empty_page[UNIV_PAGE_SIZE_MAX];
71
static const char	compacted_page_magic[] = "COMPACTP";
72
static const size_t	compacted_page_magic_size =
73
	sizeof(compacted_page_magic) - 1;
74
static const ulint	compacted_page_magic_offset = FIL_PAGE_DATA;
75
583.4.1 by Alexey Kopytov
Implementation of
76
/* Mutex protecting table_list */
77
static pthread_mutex_t					table_list_mutex;
78
/* List of tablespaces to process by the index rebuild operation */
79
static UT_LIST_BASE_NODE_T(index_rebuild_table_t)	table_list;
80
81
477.1.1 by Alexey Kopytov
Compact backups implementation:
82
/************************************************************************
83
Compact page filter. */
84
static my_bool wf_compact_init(xb_write_filt_ctxt_t *ctxt, char *dst_name,
85
			       xb_fil_cur_t *cursor);
86
static my_bool wf_compact_process(xb_write_filt_ctxt_t *ctxt,
87
				  ds_file_t *dstfile);
88
static my_bool wf_compact_finalize(xb_write_filt_ctxt_t *ctxt,
89
				   ds_file_t *dstfile);
90
xb_write_filt_t wf_compact = {
91
	&wf_compact_init,
92
	&wf_compact_process,
93
	&wf_compact_finalize,
94
	NULL
95
};
96
97
/************************************************************************
98
Initialize the compact page filter.
99
100
@return TRUE on success, FALSE on error. */
101
static my_bool
102
wf_compact_init(xb_write_filt_ctxt_t *ctxt,
103
		char *dst_name __attribute__((unused)), xb_fil_cur_t *cursor)
104
{
105
	xb_wf_compact_ctxt_t	*cp = &(ctxt->u.wf_compact_ctxt);
106
	char			 page_map_name[FN_REFLEN];
107
	MY_STAT			 mystat;
108
109
	ctxt->cursor = cursor;
110
	cp->clustered_index_found = FALSE;
111
	cp->inside_skipped_range = FALSE;
112
	cp->free_limit = 0;
113
114
	/* Don't compact the system table space */
115
	cp->skip = cursor->is_system;
116
	if (cp->skip) {
117
		return(TRUE);
118
	}
119
120
	snprintf(page_map_name, sizeof(page_map_name), "%s%s", dst_name,
121
		 XB_PAGE_MAP_SUFFIX);
122
123
	cp->ds_buffer = ds_create(xtrabackup_target_dir, DS_TYPE_BUFFER);
124
	if (cp->ds_buffer == NULL) {
125
		return(FALSE);
126
	}
127
128
	ds_set_pipe(cp->ds_buffer, ds_meta);
129
130
	memset(&mystat, 0, sizeof(mystat));
131
	mystat.st_mtime = my_time(0);
132
	cp->buffer = ds_open(cp->ds_buffer, page_map_name, &mystat);
133
	if (cp->buffer == NULL) {
134
		msg("xtrabackup: Error: cannot open output stream for %s\n",
135
		    page_map_name);
136
		return(FALSE);
137
	}
138
139
	return(TRUE);
140
}
141
142
/************************************************************************
143
Check if the specified page should be skipped. We currently skip all
144
non-clustered index pages for compact backups.
145
146
@return TRUE if the page should be skipped. */
147
static my_bool
148
check_if_skip_page(xb_wf_compact_ctxt_t *cp, xb_fil_cur_t *cursor, ulint offset)
149
{
150
	byte		*page;
151
	ulint		 page_no;
152
	ulint		 page_type;
153
	INDEX_ID_T	 index_id;
154
155
156
	xb_ad(cursor->is_system == FALSE);
157
158
	page = cursor->buf + cursor->page_size * offset;
159
	page_no = cursor->buf_page_no + offset;
160
	page_type = fil_page_get_type(page);
161
162
	if (UNIV_UNLIKELY(page_no == 0)) {
163
164
		cp->free_limit = mach_read_from_4(page + FSP_HEADER_OFFSET +
165
						  FSP_FREE_LIMIT);
166
	} else if (UNIV_UNLIKELY(page_no == XB_FIRST_CLUSTERED_INDEX_PAGE_NO)) {
167
168
		xb_ad(cp->clustered_index_found == FALSE);
169
170
		if (page_type != FIL_PAGE_INDEX) {
171
172
			/* Uninitialized clustered index root page, there's
173
			nothing we can do to compact the space.*/
174
175
			msg("[%02u] Uninitialized page type value (%lu) in the "
176
			    "clustered index root page of tablespace %s. "
177
			    "Will not be compacted.\n",
178
			    cursor->thread_n,
546.1.3 by Alexey Kopytov
Manual merge from 2.0.
179
			    page_type, cursor->rel_path);
477.1.1 by Alexey Kopytov
Compact backups implementation:
180
181
			cp->skip = TRUE;
182
183
			return(FALSE);
184
		}
185
186
		cp->clustered_index =
187
			mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
188
		cp->clustered_index_found = TRUE;
189
	} else if (UNIV_UNLIKELY(page_no >= cp->free_limit)) {
190
191
		/* Skip unused pages above free limit, if that value is set in
192
		the FSP header.*/
193
194
		return(cp->free_limit > 0);
195
	} else if (cp->clustered_index_found && page_type == FIL_PAGE_INDEX) {
196
197
		index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
198
		if (INDEX_ID_CMP(index_id, cp->clustered_index) != 0) {
199
			ulint	fseg_hdr_space =
200
				mach_read_from_4(page + PAGE_HEADER +
201
						 PAGE_BTR_SEG_TOP);
202
			ulint	fseg_hdr_page_no =
203
				mach_read_from_4(page + PAGE_HEADER +
204
						 PAGE_BTR_SEG_TOP + 4);
205
			ulint fseg_hdr_offset =
206
				mach_read_from_2(page + PAGE_HEADER +
207
						 PAGE_BTR_SEG_TOP + 8);
208
209
			/* Don't skip root index pages, i.e. the ones where the
210
			above fields are defined. We need root index pages to be
211
			able to correctly drop the indexes later, as they
212
			contain fseg inode pointers. */
213
214
			return(fseg_hdr_space == 0 &&
215
			       fseg_hdr_page_no == 0 &&
216
			       fseg_hdr_offset == 0);
217
		}
218
	}
219
220
	return(FALSE);
221
}
222
223
/************************************************************************
224
Run the next batch of pages through the compact page filter.
225
226
@return TRUE on success, FALSE on error. */
227
static my_bool
228
wf_compact_process(xb_write_filt_ctxt_t *ctxt, ds_file_t *dstfile)
229
{
230
	xb_fil_cur_t		*cursor = ctxt->cursor;
231
	ulint			 page_size = cursor->page_size;
232
	byte			*page;
233
	byte 			*buf_end;
234
	byte			*write_from;
235
	xb_wf_compact_ctxt_t	*cp = &(ctxt->u.wf_compact_ctxt);
236
	ulint 			i;
237
	ulint			page_no;
238
	byte			tmp[4];
239
240
	if (cp->skip) {
241
		return(!ds_write(dstfile, cursor->buf, cursor->buf_read));
242
	}
243
244
	write_from = NULL;
245
	buf_end = cursor->buf + cursor->buf_read;
246
	for (i = 0, page = cursor->buf; page < buf_end;
247
	     i++, page += page_size) {
248
249
		page_no = cursor->buf_page_no + i;
250
251
		if (!check_if_skip_page(cp, cursor, i)) {
252
253
			if (write_from == NULL) {
254
				write_from = page;
255
			}
256
257
			if (cp->inside_skipped_range) {
258
				cp->inside_skipped_range = FALSE;
259
260
				/* Write the last range endpoint to the
261
				skipped pages map */
262
263
				xb_ad(page_no > 0);
264
				mach_write_to_4(tmp, page_no - 1);
265
				if (ds_write(cp->buffer, tmp, sizeof(tmp))) {
266
					return(FALSE);
267
				}
268
			}
269
			continue;
270
		}
271
272
		if (write_from != NULL) {
273
274
			/* The first skipped page in this block, write the
275
			non-skipped ones to the data file */
276
277
			if (ds_write(dstfile, write_from, page - write_from)) {
278
				return(FALSE);
279
			}
280
281
			write_from = NULL;
282
		}
283
284
		if (!cp->inside_skipped_range) {
285
286
			/* The first skipped page in range, write the first
287
			range endpoint to the skipped pages map */
288
289
			cp->inside_skipped_range = TRUE;
290
291
			mach_write_to_4(tmp, page_no);
292
			if (ds_write(cp->buffer, tmp, sizeof(tmp))) {
293
				return(FALSE);
294
			}
295
		}
296
	}
297
298
	/* Write the remaining pages in the buffer, if any */
299
	if (write_from != NULL &&
300
	    ds_write(dstfile, write_from, buf_end - write_from)) {
301
		return(FALSE);
302
	}
303
304
	return(TRUE);
305
}
306
307
/************************************************************************
308
Close the compact filter's page map stream.
309
310
@return TRUE on success, FALSE on error. */
311
static my_bool
312
wf_compact_finalize(xb_write_filt_ctxt_t *ctxt,
313
		    ds_file_t *dstfile __attribute__((unused)))
314
{
315
	xb_fil_cur_t		*cursor = ctxt->cursor;
316
	xb_wf_compact_ctxt_t	*cp = &(ctxt->u.wf_compact_ctxt);
317
318
	/* Write the last endpoint of the current range, if the last pages of
319
	the space have been skipped. */
320
	if (cp->inside_skipped_range) {
321
		byte	tmp[4];
322
323
		mach_write_to_4(tmp, cursor->space_size - 1);
324
		if (ds_write(cp->buffer, tmp, sizeof(tmp))) {
325
			return(FALSE);
326
		}
327
328
		cp->inside_skipped_range = FALSE;
329
	}
330
331
	if (cp->buffer) {
332
		ds_close(cp->buffer);
333
	}
334
	if (cp->ds_buffer) {
335
		ds_destroy(cp->ds_buffer);
336
	}
337
338
	return(TRUE);
339
}
340
341
/************************************************************************
342
Open a page map file and return a cursor.
343
344
@return page map cursor, or NULL if the file doesn't exist. */
345
static page_map_cursor_t *
346
page_map_file_open(const char *path)
347
{
348
	MY_STAT			 statinfo;
349
	page_map_cursor_t	*pmap_cur;
350
	int			 rc;
351
352
	if (my_stat(path, &statinfo, MYF(0)) == NULL) {
353
354
		return(NULL);
355
	}
356
357
	/* The maximum possible page map file corresponds to a 64 TB tablespace
358
	and the worst case when every other page was skipped. That is, 2^32/2
359
	page ranges = 16 GB. */
360
	xb_a(statinfo.st_size < (off_t) 16 * 1024 * 1024 * 1024);
361
362
	/* Must be a series of 8-byte tuples */
363
	xb_a(statinfo.st_size % 8 == 0);
364
365
	pmap_cur = (page_map_cursor_t *) my_malloc(sizeof(page_map_cursor_t),
366
						   MYF(MY_FAE));
367
368
	pmap_cur->fd = my_open(path, O_RDONLY, MYF(MY_WME));
369
	xb_a(pmap_cur->fd != 0);
370
371
	rc = init_io_cache(&pmap_cur->cache, pmap_cur->fd, 0, READ_CACHE,
372
			   0, 0, MYF(MY_WME));
373
	xb_a(rc == 0);
374
375
	return(pmap_cur);
376
}
377
378
/************************************************************************
379
Read the next range from a page map file and update the cursor.
380
381
@return TRUE on success, FALSE on end-of-file. */
382
static ibool
383
page_map_file_next(page_map_cursor_t *pmap_cur, page_range_t *range)
384
{
385
	byte buf[8];
386
387
	xb_ad(pmap_cur != NULL);
388
389
	if (my_b_read(&pmap_cur->cache, buf, sizeof(buf))) {
390
		return(FALSE);
391
	}
392
393
	range->from = mach_read_from_4(buf);
394
	range->to = mach_read_from_4(buf + 4);
395
396
	return(TRUE);
397
}
398
399
/************************************************************************
400
Close the page map cursor.*/
401
static void
402
page_map_file_close(page_map_cursor_t *pmap_cur)
403
{
404
	int	rc;
405
406
	xb_ad(pmap_cur != NULL);
407
408
	rc = end_io_cache(&pmap_cur->cache);
409
	xb_a(rc == 0);
410
411
	posix_fadvise(pmap_cur->fd, 0, 0, POSIX_FADV_DONTNEED);
412
413
	rc = my_close(pmap_cur->fd, MY_WME);
414
	xb_a(rc == 0);
415
}
416
417
/****************************************************************************
418
Expand a single data file according to the skipped pages maps created by
419
--compact.
420
421
@return TRUE on success, FALSE on failure. */
422
static my_bool
423
xb_expand_file(fil_node_t *node)
424
{
425
	char			 pmapfile_path[FN_REFLEN];
426
	char			 tmpfile_path[FN_REFLEN];
427
	xb_fil_cur_t		 cursor;
428
	xb_fil_cur_result_t	 res;
429
	ds_ctxt_t		*ds_local;
430
	ds_ctxt_t		*ds_buffer;
431
	ds_file_t		*tmpfile;
432
	my_bool			 success = FALSE;
433
	ulint			 i;
434
	byte			*page;
435
	ulint			 page_expected_no;
436
	page_map_cursor_t	*pmap_cur;
437
	ibool			 have_next_range;
438
	page_range_t		 pmap_range;
439
440
	xb_ad(trx_sys_sys_space(node->space->id) == FALSE);
441
442
	snprintf(pmapfile_path, sizeof(pmapfile_path), "%s%s",
443
		 node->name, XB_PAGE_MAP_SUFFIX);
444
445
	/* Skip files that don't have a corresponding page map file */
446
447
	if (!(pmap_cur = page_map_file_open(pmapfile_path))) {
448
449
		msg("Not expanding %s\n", node->name);
450
451
		return(FALSE);
452
	}
453
454
	msg("Expanding %s\n", node->name);
455
456
	ds_local = ds_create(".", DS_TYPE_LOCAL);
457
	ds_buffer = ds_create(".", DS_TYPE_BUFFER);
458
459
	xb_a(ds_local != NULL && ds_buffer != NULL);
460
461
	ds_buffer_set_size(ds_buffer, FSP_EXTENT_SIZE * UNIV_PAGE_SIZE_MAX);
462
463
	ds_set_pipe(ds_buffer, ds_local);
464
534.7.1 by Laurynas Biveinis
Implement bitmap-based incremental backups.
465
	res = xb_fil_cur_open(&cursor, &rf_pass_through, node, 1);
477.1.1 by Alexey Kopytov
Compact backups implementation:
466
	xb_a(res == XB_FIL_CUR_SUCCESS);
467
468
	snprintf(tmpfile_path, sizeof(tmpfile_path), "%s%s",
469
		 node->name, XB_TMPFILE_SUFFIX);
470
471
	tmpfile = ds_open(ds_buffer, tmpfile_path, &cursor.statinfo);
472
	if (tmpfile == NULL) {
473
474
		msg("Could not open temporary file '%s'\n", tmpfile_path);
475
		goto error;
476
	}
477
478
	have_next_range = page_map_file_next(pmap_cur, &pmap_range);
479
480
	page_expected_no = 0;
481
482
	/* Initialize and mark the empty page which is used to replace
483
	skipped pages. */
484
	memset(empty_page, 0, cursor.page_size);
485
	memcpy(empty_page + compacted_page_magic_offset,
486
	       compacted_page_magic, compacted_page_magic_size);
487
	mach_write_to_4(empty_page + FIL_PAGE_SPACE_OR_CHKSUM,
488
			BUF_NO_CHECKSUM_MAGIC);
489
	mach_write_to_4(empty_page + cursor.page_size -
490
			FIL_PAGE_END_LSN_OLD_CHKSUM,
491
			BUF_NO_CHECKSUM_MAGIC);
492
493
494
	/* Main copy loop */
495
496
	while ((res = xb_fil_cur_read(&cursor)) == XB_FIL_CUR_SUCCESS) {
497
498
		for (i = 0, page = cursor.buf; i < cursor.buf_npages;
499
		     i++, page += cursor.page_size) {
500
501
			ulint	page_read_no;
502
503
			page_read_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
504
			xb_a(!page_read_no || page_expected_no <= page_read_no);
505
506
			if (have_next_range &&
507
			    page_expected_no == pmap_range.from) {
508
509
				xb_a(pmap_range.from <= pmap_range.to);
510
511
				/* Write empty pages instead of skipped ones, if
512
				necessary. */
513
514
				while (page_expected_no <= pmap_range.to) {
515
516
					if (ds_write(tmpfile, empty_page,
517
						     cursor.page_size)) {
518
519
						goto write_error;
520
					}
521
522
					page_expected_no++;
523
				}
524
525
				have_next_range =
526
					page_map_file_next(pmap_cur,
527
							   &pmap_range);
528
			}
529
530
			/* Write the current page */
531
532
			if (ds_write(tmpfile, page, cursor.page_size)) {
533
534
				goto write_error;
535
			}
536
537
			page_expected_no++;
538
		}
539
	}
540
541
	if (res != XB_FIL_CUR_EOF) {
542
543
		goto error;
544
	}
545
546
	/* Write empty pages instead of trailing skipped ones, if any */
547
548
	if (have_next_range) {
549
550
		xb_a(page_expected_no == pmap_range.from);
551
		xb_a(pmap_range.from <= pmap_range.to);
552
553
		while (page_expected_no <= pmap_range.to) {
554
555
			if (ds_write(tmpfile, empty_page,
556
				     cursor.page_size)) {
557
558
				goto write_error;
559
			}
560
561
			page_expected_no++;
562
		}
563
564
		xb_a(!page_map_file_next(pmap_cur, &pmap_range));
565
	}
566
567
	/* Replace the original .ibd file with the expanded file */
568
	if (my_rename(tmpfile_path, node->name, MYF(MY_WME))) {
569
570
		msg("Failed to rename '%s' to '%s'\n",
571
		    tmpfile_path, node->name);
572
		goto error;
573
	}
574
575
	my_delete(pmapfile_path, MYF(MY_WME));
576
577
	ds_close(tmpfile);
578
	tmpfile = NULL;
579
580
	success = TRUE;
581
582
	goto end;
583
584
write_error:
585
	msg("Write to '%s' failed\n", tmpfile_path);
586
587
error:
588
	if (tmpfile != NULL) {
589
590
		ds_close(tmpfile);
591
		my_delete(tmpfile_path, MYF(MY_WME));
592
	}
593
594
end:
595
	ds_destroy(ds_buffer);
596
	ds_destroy(ds_local);
597
598
	page_map_file_close(pmap_cur);
599
600
	return(success);
601
}
602
603
/******************************************************************************
604
Expand the data files according to the skipped pages maps created by --compact.
605
@return TRUE on success, FALSE on failure. */
606
my_bool
607
xb_expand_datafiles(void)
608
/*=====================*/
609
{
610
	ulint			 nfiles;
611
	datafiles_iter_t	*it = NULL;
612
	fil_node_t		*node;
613
	fil_space_t		*space;
614
615
	msg("Starting to expand compacted .ibd files.\n");
616
617
	/* Initialize the tablespace cache */
618
	if (xb_data_files_init() != DB_SUCCESS) {
619
		return(FALSE);
620
	}
621
622
	nfiles = UT_LIST_GET_LEN(fil_system->space_list);
623
	xb_a(nfiles > 0);
624
625
	it = datafiles_iter_new(fil_system);
626
	if (it == NULL) {
627
		msg("xtrabackup: error: datafiles_iter_new() failed.\n");
628
		goto error;
629
	}
630
631
	while ((node = datafiles_iter_next(it)) != NULL) {
632
633
		space = node->space;
634
635
		/* System tablespace cannot be compacted */
636
		if (trx_sys_sys_space(space->id)) {
637
638
			continue;
639
		}
640
641
		if (!xb_expand_file(node)) {
642
643
			goto error;
644
		}
645
	}
646
647
	datafiles_iter_free(it);
648
	xb_data_files_close();
649
650
	return(TRUE);
651
652
error:
653
	if (it != NULL) {
654
		datafiles_iter_free(it);
655
	}
656
657
	xb_data_files_close();
658
659
	return(FALSE);
660
}
661
662
/******************************************************************************
663
Callback used in buf_page_io_complete() to detect compacted pages.
664
@return TRUE if the page is marked as compacted, FALSE otherwise. */
665
ibool
666
buf_page_is_compacted(
667
/*==================*/
668
	const byte*	page)	/*!< in: a database page */
669
{
670
	return !memcmp(page + compacted_page_magic_offset,
671
		       compacted_page_magic, compacted_page_magic_size);
672
}
673
674
/*****************************************************************************
675
Builds an index definition corresponding to an index object. It is roughly
676
similar to innobase_create_index_def() / innobase_create_index_field_def() and
677
the opposite to dict_mem_index_create() / dict_mem_index_add_field(). */
678
static
679
void
680
xb_build_index_def(
681
/*=======================*/
682
	mem_heap_t*		heap,		/*!< in: heap */
683
	const dict_index_t*	index,		/*!< in: index */
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
684
	index_def_t*		index_def)	/*!< out: index definition */
477.1.1 by Alexey Kopytov
Compact backups implementation:
685
{
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
686
	index_field_t*	fields;
687
	ulint		n_fields;
688
	ulint		i;
477.1.1 by Alexey Kopytov
Compact backups implementation:
689
690
	ut_a(index->n_fields);
691
	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
692
693
	/* Use n_user_defined_cols instead of n_fields, as the index will
694
	contain a part of the primary key after n_user_defined_cols, and those
695
	columns will be created automatically in
696
	dict_index_build_internal_clust(). */
697
	n_fields = index->n_user_defined_cols;
698
699
	memset(index_def, 0, sizeof(*index_def));
700
701
	index_def->name = mem_heap_strdup(heap, index->name);
702
	index_def->ind_type = index->type;
703
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
704
	fields = static_cast<index_field_t *>
488 by Laurynas Biveinis
Merge C++ build support from 2.0
705
		(mem_heap_alloc(heap, n_fields * sizeof(*fields)));
477.1.1 by Alexey Kopytov
Compact backups implementation:
706
707
	for (i = 0; i < n_fields; i++) {
708
		dict_field_t*	field;
709
710
		field = dict_index_get_nth_field(index, i);
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
711
		xb_dict_index_field_to_index_field(heap, field, &fields[i]);
477.1.1 by Alexey Kopytov
Compact backups implementation:
712
713
	}
714
715
	index_def->fields = fields;
716
	index_def->n_fields = n_fields;
717
}
718
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
719
#if MYSQL_VERSION_ID >= 50600
720
/* A dummy autoc_inc sequence for row_merge_build_indexes().  */
721
static ib_sequence_t null_seq(NULL, 0, 0);
722
/* A dummy table share and table for row_merge_build_indexes() error reporting.
723
Assumes that no errors are going to be reported. */
724
static struct TABLE_SHARE dummy_table_share;
725
static struct TABLE dummy_table;
726
#endif
727
477.1.1 by Alexey Kopytov
Compact backups implementation:
728
/********************************************************************//**
729
Rebuild secondary indexes for a given table. */
730
static
731
void
732
xb_rebuild_indexes_for_table(
583.4.1 by Alexey Kopytov
Implementation of
733
/*=========================*/
734
	dict_table_t*	table,		/*!< in: table */
735
	trx_t*		trx,		/*!< in: transaction handle */
736
	ulint		thread_n)	/*!< in: thread number */
477.1.1 by Alexey Kopytov
Compact backups implementation:
737
{
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
738
	dict_index_t*	index;
739
	dict_index_t**	indexes;
740
	ulint		n_indexes;
741
	index_def_t*	index_defs;
742
	ulint		i;
743
	mem_heap_t*	heap;
744
	ulint		error;
745
#if MYSQL_VERSION_ID >= 50600
746
	ulint*		add_key_nums;
747
#endif
477.1.1 by Alexey Kopytov
Compact backups implementation:
748
583.4.1 by Alexey Kopytov
Implementation of
749
	ut_ad(!mutex_own(&(dict_sys->mutex)));
477.1.1 by Alexey Kopytov
Compact backups implementation:
750
	ut_ad(table);
751
752
	ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
753
754
	n_indexes = UT_LIST_GET_LEN(table->indexes) - 1;
755
	if (!n_indexes) {
756
		/* Only the primary key, nothing to do. */
757
		return;
758
	}
759
760
	heap = mem_heap_create(1024);
761
762
	indexes = (dict_index_t**) mem_heap_alloc(heap,
763
						  n_indexes * sizeof(*indexes));
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
764
	index_defs = (index_def_t*) mem_heap_alloc(heap, n_indexes *
477.1.1 by Alexey Kopytov
Compact backups implementation:
765
							 sizeof(*index_defs));
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
766
#if MYSQL_VERSION_ID >= 50600
767
	add_key_nums = static_cast<ulint *>
768
		(mem_heap_alloc(heap, n_indexes * sizeof(*add_key_nums)));
769
#endif
477.1.1 by Alexey Kopytov
Compact backups implementation:
770
771
	/* Skip the primary key. */
772
	index = dict_table_get_first_index(table);
773
	ut_a(dict_index_is_clust(index));
774
583.4.1 by Alexey Kopytov
Implementation of
775
	row_mysql_lock_data_dictionary(trx);
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
776
477.1.1 by Alexey Kopytov
Compact backups implementation:
777
	for (i = 0; (index = dict_table_get_next_index(index)); i++) {
778
583.4.1 by Alexey Kopytov
Implementation of
779
		msg("[%02lu]   Found index %s\n", thread_n, index->name);
477.1.1 by Alexey Kopytov
Compact backups implementation:
780
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
781
		/* Pretend that it's the current trx that created this index.
782
		Required to avoid 5.6+ debug assertions. */
783
		index->trx_id = xb_trx_id_to_index_trx_id(trx->id);
784
785
		xb_build_index_def(heap, index, &index_defs[i]);
786
787
#if MYSQL_VERSION_ID >= 50600
788
		/* In 5.6+, row_merge_drop_indexes() drops all the indexes on
789
		the table that have the temp index prefix.  It does not accept
790
		an array of indexes to drop as in 5.5-.  */
791
		row_merge_rename_index_to_drop(trx, table->id, index->id);
792
#else
477.1.1 by Alexey Kopytov
Compact backups implementation:
793
		indexes[i] = index;
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
794
#endif
477.1.1 by Alexey Kopytov
Compact backups implementation:
795
	}
796
797
	ut_ad(i == n_indexes);
798
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
799
#if MYSQL_VERSION_ID >= 50600
800
	ut_d(table->n_ref_count++);
801
	row_merge_drop_indexes(trx, table, TRUE);
802
	ut_d(table->n_ref_count--);
803
804
	index = dict_table_get_first_index(table);
805
	ut_a(dict_index_is_clust(index));
806
	index = dict_table_get_next_index(index);
807
	while (index) {
808
809
		/* In 5.6+, row_merge_drop_indexes() does not remove the
810
		indexes from the dictionary cache nor from any foreign key
811
		list.  This may cause invalid dereferences as we try to access
812
		the dropped indexes from other tables as FKs.  */
813
814
		dict_index_t* next_index = dict_table_get_next_index(index);
815
		index->to_be_dropped = 1;
816
817
		/* Patch up any FK referencing this index with NULL */
630 by Alexey Kopytov
Rebased XtraBackup on MySQL versions 5.1.70, 5.5.30, 5.6.11 and Percona
818
		dict_foreign_replace_index(table, NULL, index);
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
819
820
		dict_index_remove_from_cache(table, index);
821
822
		index = next_index;
823
	}
824
#else
477.1.1 by Alexey Kopytov
Compact backups implementation:
825
	row_merge_drop_indexes(trx, table, indexes, n_indexes);
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
826
#endif
477.1.1 by Alexey Kopytov
Compact backups implementation:
827
583.4.1 by Alexey Kopytov
Implementation of
828
	msg("[%02lu]   Rebuilding %lu index(es).\n", thread_n, n_indexes);
477.1.1 by Alexey Kopytov
Compact backups implementation:
829
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
830
	error = row_merge_lock_table(trx, table, LOCK_X);
831
	xb_a(error == DB_SUCCESS);
477.1.1 by Alexey Kopytov
Compact backups implementation:
832
833
	for (i = 0; i < n_indexes; i++) {
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
834
		indexes[i] = row_merge_create_index(trx, table,
835
						    &index_defs[i]);
836
#if MYSQL_VERSION_ID >= 50600
837
		add_key_nums[i] = index_defs[i].key_number;
838
#endif
477.1.1 by Alexey Kopytov
Compact backups implementation:
839
	}
840
583.4.1 by Alexey Kopytov
Implementation of
841
	/* Commit trx to release latches on system tables */
842
	trx_commit_for_mysql(trx);
843
	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
844
845
	row_mysql_unlock_data_dictionary(trx);
846
847
	/* Reacquire table lock for row_merge_build_indexes() */
848
	error = row_merge_lock_table(trx, table, LOCK_X);
849
	xb_a(error == DB_SUCCESS);
850
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
851
#if MYSQL_VERSION_ID >= 50600
852
	error = row_merge_build_indexes(trx, table, table, FALSE, indexes,
853
					add_key_nums, n_indexes, &dummy_table,
854
					NULL, NULL, ULINT_UNDEFINED, null_seq);
855
#else
477.1.1 by Alexey Kopytov
Compact backups implementation:
856
	error = row_merge_build_indexes(trx, table, table, indexes, n_indexes,
857
					NULL);
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
858
#endif
477.1.1 by Alexey Kopytov
Compact backups implementation:
859
	ut_a(error == DB_SUCCESS);
860
861
	mem_heap_free(heap);
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
862
863
	trx_commit_for_mysql(trx);
864
865
	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
477.1.1 by Alexey Kopytov
Compact backups implementation:
866
}
867
583.4.1 by Alexey Kopytov
Implementation of
868
/**************************************************************************
869
Worker thread function for index rebuild. */
870
static
871
void *
872
xb_rebuild_indexes_thread_func(
873
/*===========================*/
874
	void*	arg)	/* thread context */
875
{
876
	dict_table_t*		table;
877
	index_rebuild_table_t*	rebuild_table;
878
	index_rebuild_thread_t*	thread;
879
	trx_t*			trx;
880
881
	thread = (index_rebuild_thread_t *) arg;
882
883
	trx = trx_allocate_for_mysql();
884
885
	/* Suppress foreign key checks, as we are going to drop and recreate all
886
	secondary keys. */
887
	trx->check_foreigns = FALSE;
888
	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
889
890
	/* Loop until there are no more tables in tables list */
891
	for (;;) {
892
		pthread_mutex_lock(&table_list_mutex);
893
894
		rebuild_table = UT_LIST_GET_FIRST(table_list);
895
896
		if (rebuild_table == NULL) {
897
898
			pthread_mutex_unlock(&table_list_mutex);
899
			break;
900
		}
901
902
		UT_LIST_REMOVE(list, table_list, rebuild_table);
903
904
		pthread_mutex_unlock(&table_list_mutex);
905
906
		ut_ad(rebuild_table->name);
907
		ut_ad(!trx_sys_sys_space(rebuild_table->space_id));
908
909
		row_mysql_lock_data_dictionary(trx);
910
911
		table = dict_table_get_low(rebuild_table->name);
912
913
		row_mysql_unlock_data_dictionary(trx);
914
915
		ut_a(table != NULL);
916
		ut_a(table->space == rebuild_table->space_id);
917
918
		/* Discard change buffer entries for this space */
919
		ibuf_delete_for_discarded_space(rebuild_table->space_id);
920
921
		msg("[%02lu] Checking if there are indexes to rebuild in table "
922
		    "%s (space id: %lu)\n",
923
		    thread->num,
924
		    rebuild_table->name, rebuild_table->space_id);
925
926
		xb_rebuild_indexes_for_table(table, trx, thread->num);
927
928
		mem_free(rebuild_table->name);
929
		mem_free(rebuild_table);
930
	}
931
932
	trx_commit_for_mysql(trx);
933
934
	trx_free_for_mysql(trx);
935
936
	return(NULL);
937
}
938
477.1.1 by Alexey Kopytov
Compact backups implementation:
939
/******************************************************************************
940
Rebuild all secondary indexes in all tables in separate spaces. Called from
941
innobase_start_or_create_for_mysql(). */
942
void
943
xb_compact_rebuild_indexes(void)
944
/*=============================*/
945
{
583.4.1 by Alexey Kopytov
Implementation of
946
	dict_table_t*		sys_tables;
947
	dict_index_t*		sys_index;
948
	btr_pcur_t		pcur;
949
	const rec_t*		rec;
950
	mtr_t			mtr;
951
	const byte*		field;
952
	ulint			len;
953
	ulint			space_id;
954
	trx_t*			trx;
955
	index_rebuild_table_t*	rebuild_table;
956
	index_rebuild_thread_t*	threads;
957
	ulint			i;
477.1.1 by Alexey Kopytov
Compact backups implementation:
958
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
959
#if MYSQL_VERSION_ID >= 50600
960
	/* Set up the dummy table for the index rebuild error reporting */
961
	dummy_table_share.fields = 0;
962
	dummy_table.s = &dummy_table_share;
963
#endif
964
583.4.1 by Alexey Kopytov
Implementation of
965
	/* Iterate all tables that are not in the system tablespace and add them
966
	to the list of tables to be rebuilt later. */
477.1.1 by Alexey Kopytov
Compact backups implementation:
967
968
	trx = trx_allocate_for_mysql();
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
969
	trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
477.1.1 by Alexey Kopytov
Compact backups implementation:
970
971
	row_mysql_lock_data_dictionary(trx);
972
973
	/* Enlarge the fatal lock wait timeout during index rebuild
974
	operation. */
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
975
	xb_adjust_fatal_semaphore_wait_threshold(7200); /* 2 hours */
477.1.1 by Alexey Kopytov
Compact backups implementation:
976
977
	mtr_start(&mtr);
978
979
	sys_tables = dict_table_get_low("SYS_TABLES");
980
	sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
981
	ut_a(!dict_table_is_comp(sys_tables));
982
583.4.1 by Alexey Kopytov
Implementation of
983
	pthread_mutex_init(&table_list_mutex, NULL);
984
	UT_LIST_INIT(table_list);
985
522.6.1 by Laurynas Biveinis
Merge build against MySQL 5.6 support from 2.0.
986
	xb_btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
987
				       TRUE, 0, &mtr);
477.1.1 by Alexey Kopytov
Compact backups implementation:
988
	for (;;) {
989
		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
990
991
		rec = btr_pcur_get_rec(&pcur);
992
993
		if (!btr_pcur_is_on_user_rec(&pcur)) {
994
			/* end of index */
995
996
			break;
997
		}
998
999
		if (rec_get_deleted_flag(rec, 0)) {
1000
			continue;
1001
		}
1002
1003
		field = rec_get_nth_field_old(rec, 9, &len);
1004
		ut_a(len == 4);
1005
1006
		space_id = mach_read_from_4(field);
1007
1008
		/* Don't touch tables in the system tablespace */
1009
		if (trx_sys_sys_space(space_id)) {
1010
1011
			continue;
1012
		}
1013
1014
		field = rec_get_nth_field_old(rec, 0, &len);
583.4.1 by Alexey Kopytov
Implementation of
1015
1016
		rebuild_table = static_cast<index_rebuild_table_t *>
1017
			(mem_alloc(sizeof(*rebuild_table)));
1018
		rebuild_table->name = mem_strdupl((char*) field, len);
1019
		rebuild_table->space_id = space_id;
1020
1021
		UT_LIST_ADD_LAST(list, table_list, rebuild_table);
477.1.1 by Alexey Kopytov
Compact backups implementation:
1022
	}
1023
1024
	btr_pcur_close(&pcur);
1025
	mtr_commit(&mtr);
1026
1027
	row_mysql_unlock_data_dictionary(trx);
1028
1029
	trx_commit_for_mysql(trx);
1030
1031
	trx_free_for_mysql(trx);
583.4.1 by Alexey Kopytov
Implementation of
1032
1033
	/* Start worker threads for the index rebuild operation */
1034
	ut_ad(xtrabackup_rebuild_threads > 0);
1035
1036
	if (xtrabackup_rebuild_threads > 1) {
1037
		msg("Starting %lu threads to rebuild indexes.\n",
1038
		    xtrabackup_rebuild_threads);
1039
	}
1040
1041
	threads = (index_rebuild_thread_t *)
1042
		mem_alloc(sizeof(*threads) *
1043
			  xtrabackup_rebuild_threads);
1044
1045
	for (i = 0; i < xtrabackup_rebuild_threads; i++) {
1046
1047
		threads[i].num = i+1;
1048
		if (pthread_create(&threads[i].id, NULL,
1049
				   xb_rebuild_indexes_thread_func,
1050
				   &threads[i])) {
1051
1052
			msg("error: pthread_create() failed: errno = %d\n",
1053
			    errno);
1054
			ut_a(0);
1055
		}
1056
	}
1057
1058
	/* Wait for worker threads to finish */
1059
	for (i = 0; i < xtrabackup_rebuild_threads; i++) {
1060
		pthread_join(threads[i].id, NULL);
1061
	}
1062
1063
	mem_free(threads);
477.1.1 by Alexey Kopytov
Compact backups implementation:
1064
}