1
/******************************************************
4
(c) 1995-1997 Innobase Oy
6
Created 12/9/1995 Heikki Tuuri
7
*******************************************************/
21
#include "dict0boot.h"
23
#include "srv0start.h"
28
General philosophy of InnoDB redo-logs:
30
1) Every change to a contents of a data page must be done
31
through mtr, which in mtr_commit() writes log records
32
to the InnoDB redo log.
34
2) Normally these changes are performed using a mlog_write_ulint()
37
3) In some page level operations only a code number of a
38
c-function and its parameters are written to the log to
39
reduce the size of the log.
41
3a) You should not add parameters to these kind of functions
42
(e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
44
3b) You should not add such functionality which either change
45
working when compared with the old or are dependent on data
46
outside of the page. These kind of functions should implement
47
self-contained page transformation and it should be unchanged
48
if you don't have very essential reasons to change log
53
/* Current free limit of space 0; protected by the log sys mutex; 0 means
55
UNIV_INTERN ulint log_fsp_current_free_limit = 0;
57
/* Global log system variable */
58
UNIV_INTERN log_t* log_sys = NULL;
61
UNIV_INTERN ibool log_do_write = TRUE;
63
UNIV_INTERN ibool log_debug_writes = FALSE;
64
#endif /* UNIV_DEBUG */
66
/* These control how often we print warnings if the last checkpoint is too
68
UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
69
UNIV_INTERN time_t log_last_warning_time;
71
#ifdef UNIV_LOG_ARCHIVE
72
/* Pointer to this variable is used as the i/o-message when we do i/o to an
74
UNIV_INTERN byte log_archive_io;
75
#endif /* UNIV_LOG_ARCHIVE */
77
/* A margin for free space in the log buffer before a log entry is catenated */
78
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
80
/* Margins for free space in the log buffer after a log entry is catenated */
81
#define LOG_BUF_FLUSH_RATIO 2
82
#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
84
/* Margin for the free space in the smallest log group, before a new query
85
step which modifies the database, is started */
87
#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
88
#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
90
/* This parameter controls asynchronous making of a new checkpoint; the value
91
should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
93
#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
95
/* This parameter controls synchronous preflushing of modified buffer pages */
96
#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
98
/* The same ratio for asynchronous preflushing; this value should be less than
100
#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
102
/* Extra margin, in addition to one log file, used in archiving */
103
#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
105
#ifdef UNIV_LOG_ARCHIVE
106
/* This parameter controls asynchronous writing to the archive */
107
#define LOG_ARCHIVE_RATIO_ASYNC 16
109
/* States of an archiving operation */
110
#define LOG_ARCHIVE_READ 1
111
#define LOG_ARCHIVE_WRITE 2
113
#endif /* UNIV_LOG_ARCHIVE */
115
/* Codes used in unlocking flush latches */
116
#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
117
#define LOG_UNLOCK_FLUSH_LOCK 2
119
/**********************************************************
120
Completes a checkpoint write i/o to a log file. */
123
log_io_complete_checkpoint(void);
124
/*============================*/
125
#ifdef UNIV_LOG_ARCHIVE
126
/**********************************************************
127
Completes an archiving i/o. */
130
log_io_complete_archive(void);
131
/*=========================*/
132
#endif /* UNIV_LOG_ARCHIVE */
134
/********************************************************************
135
Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
136
so that we know that the limit has been written to a log checkpoint field
140
log_fsp_current_free_limit_set_and_checkpoint(
141
/*==========================================*/
142
ulint limit) /* in: limit to set */
146
mutex_enter(&(log_sys->mutex));
148
log_fsp_current_free_limit = limit;
150
mutex_exit(&(log_sys->mutex));
152
/* Try to make a synchronous checkpoint */
157
success = log_checkpoint(TRUE, TRUE);
161
/********************************************************************
162
Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
166
log_buf_pool_get_oldest_modification(void)
167
/*======================================*/
171
ut_ad(mutex_own(&(log_sys->mutex)));
173
lsn = buf_pool_get_oldest_modification();
183
/****************************************************************
184
Opens the log for log_write_low. The log must be closed with log_close and
185
released with log_release. */
188
log_reserve_and_open(
189
/*=================*/
190
/* out: start lsn of the log record */
191
ulint len) /* in: length of data to be catenated */
193
log_t* log = log_sys;
194
ulint len_upper_limit;
195
#ifdef UNIV_LOG_ARCHIVE
196
ulint archived_lsn_age;
198
#endif /* UNIV_LOG_ARCHIVE */
201
#endif /* UNIV_DEBUG */
203
ut_a(len < log->buf_size / 2);
205
mutex_enter(&(log->mutex));
207
/* Calculate an upper limit for the space the string may take in the
210
len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
212
if (log->buf_free + len_upper_limit > log->buf_size) {
214
mutex_exit(&(log->mutex));
216
/* Not enough free space, do a syncronous flush of the log
219
log_buffer_flush_to_disk();
228
#ifdef UNIV_LOG_ARCHIVE
229
if (log->archiving_state != LOG_ARCH_OFF) {
231
archived_lsn_age = log->lsn - log->archived_lsn;
232
if (archived_lsn_age + len_upper_limit
233
> log->max_archived_lsn_age) {
234
/* Not enough free archived space in log groups: do a
235
synchronous archive write batch: */
237
mutex_exit(&(log->mutex));
239
ut_ad(len_upper_limit <= log->max_archived_lsn_age);
241
log_archive_do(TRUE, &dummy);
248
#endif /* UNIV_LOG_ARCHIVE */
250
#ifdef UNIV_LOG_DEBUG
251
log->old_buf_free = log->buf_free;
252
log->old_lsn = log->lsn;
257
/****************************************************************
258
Writes to the log the string given. It is assumed that the caller holds the
264
byte* str, /* in: string */
265
ulint str_len) /* in: string length */
267
log_t* log = log_sys;
272
ut_ad(mutex_own(&(log->mutex)));
274
/* Calculate a part length */
276
data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
278
if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
280
/* The string fits within the current log block */
284
data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
286
len = OS_FILE_LOG_BLOCK_SIZE
287
- (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
288
- LOG_BLOCK_TRL_SIZE;
291
ut_memcpy(log->buf + log->buf_free, str, len);
296
log_block = ut_align_down(log->buf + log->buf_free,
297
OS_FILE_LOG_BLOCK_SIZE);
298
log_block_set_data_len(log_block, data_len);
300
if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
301
/* This block became full */
302
log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
303
log_block_set_checkpoint_no(log_block,
304
log_sys->next_checkpoint_no);
305
len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
309
/* Initialize the next block header */
310
log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
315
log->buf_free += len;
317
ut_ad(log->buf_free <= log->buf_size);
323
srv_log_write_requests++;
326
/****************************************************************
335
ulint first_rec_group;
336
ib_uint64_t oldest_lsn;
338
log_t* log = log_sys;
339
ib_uint64_t checkpoint_age;
341
ut_ad(mutex_own(&(log->mutex)));
345
log_block = ut_align_down(log->buf + log->buf_free,
346
OS_FILE_LOG_BLOCK_SIZE);
347
first_rec_group = log_block_get_first_rec_group(log_block);
349
if (first_rec_group == 0) {
350
/* We initialized a new log block which was not written
351
full by the current mtr: the next mtr log record group
352
will start within this block at the offset data_len */
354
log_block_set_first_rec_group(
355
log_block, log_block_get_data_len(log_block));
358
if (log->buf_free > log->max_buf_free) {
360
log->check_flush_or_checkpoint = TRUE;
363
checkpoint_age = lsn - log->last_checkpoint_lsn;
365
if (checkpoint_age >= log->log_group_capacity) {
366
/* TODO: split btr_store_big_rec_extern_fields() into small
367
steps so that we can release all latches in the middle, and
368
call log_free_check() to ensure we never write over log written
369
after the latest checkpoint. In principle, we should split all
370
big_rec operations, but other operations are smaller. */
372
if (!log_has_printed_chkp_warning
373
|| difftime(time(NULL), log_last_warning_time) > 15) {
375
log_has_printed_chkp_warning = TRUE;
376
log_last_warning_time = time(NULL);
378
ut_print_timestamp(stderr);
380
" InnoDB: ERROR: the age of the last"
381
" checkpoint is %lu,\n"
382
"InnoDB: which exceeds the log group"
384
"InnoDB: If you are using big"
385
" BLOB or TEXT rows, you must set the\n"
386
"InnoDB: combined size of log files"
387
" at least 10 times bigger than the\n"
388
"InnoDB: largest such row.\n",
389
(ulong) checkpoint_age,
390
(ulong) log->log_group_capacity);
394
if (checkpoint_age <= log->max_modified_age_async) {
399
oldest_lsn = buf_pool_get_oldest_modification();
402
|| lsn - oldest_lsn > log->max_modified_age_async
403
|| checkpoint_age > log->max_checkpoint_age_async) {
405
log->check_flush_or_checkpoint = TRUE;
409
#ifdef UNIV_LOG_DEBUG
410
log_check_log_recs(log->buf + log->old_buf_free,
411
log->buf_free - log->old_buf_free, log->old_lsn);
417
#ifdef UNIV_LOG_ARCHIVE
418
/**********************************************************
419
Pads the current log block full with dummy log records. Used in producing
420
consistent archived log files. */
423
log_pad_current_log_block(void)
424
/*===========================*/
426
byte b = MLOG_DUMMY_RECORD;
431
/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
432
lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
434
pad_length = OS_FILE_LOG_BLOCK_SIZE
435
- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
436
- LOG_BLOCK_TRL_SIZE;
438
for (i = 0; i < pad_length; i++) {
439
log_write_low(&b, 1);
447
ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
449
#endif /* UNIV_LOG_ARCHIVE */
451
/**********************************************************
452
Calculates the data capacity of a log group, when the log file headers are not
456
log_group_get_capacity(
457
/*===================*/
458
/* out: capacity in bytes */
459
log_group_t* group) /* in: log group */
461
ut_ad(mutex_own(&(log_sys->mutex)));
463
return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
466
/**********************************************************
467
Calculates the offset within a log group, when the log file headers are not
471
log_group_calc_size_offset(
472
/*=======================*/
473
/* out: size offset (<= offset) */
474
ulint offset, /* in: real offset within the log group */
475
log_group_t* group) /* in: log group */
477
ut_ad(mutex_own(&(log_sys->mutex)));
479
return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
482
/**********************************************************
483
Calculates the offset within a log group, when the log file headers are
487
log_group_calc_real_offset(
488
/*=======================*/
489
/* out: real offset (>= offset) */
490
ulint offset, /* in: size offset within the log group */
491
log_group_t* group) /* in: log group */
493
ut_ad(mutex_own(&(log_sys->mutex)));
495
return(offset + LOG_FILE_HDR_SIZE
496
* (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
499
/**********************************************************
500
Calculates the offset of an lsn within a log group. */
503
log_group_calc_lsn_offset(
504
/*======================*/
505
/* out: offset within the log group */
506
ib_uint64_t lsn, /* in: lsn, must be within 4 GB of
508
log_group_t* group) /* in: log group */
511
ib_int64_t gr_lsn_size_offset;
512
ib_int64_t difference;
513
ib_int64_t group_size;
516
ut_ad(mutex_own(&(log_sys->mutex)));
518
/* If total log file size is > 2 GB we can easily get overflows
519
with 32-bit integers. Use 64-bit integers instead. */
523
gr_lsn_size_offset = (ib_int64_t)
524
log_group_calc_size_offset(group->lsn_offset, group);
526
group_size = (ib_int64_t) log_group_get_capacity(group);
530
difference = (ib_int64_t) (lsn - gr_lsn);
532
difference = (ib_int64_t) (gr_lsn - lsn);
534
difference = difference % group_size;
536
difference = group_size - difference;
539
offset = (gr_lsn_size_offset + difference) % group_size;
541
ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
544
"Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
545
(ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
548
return(log_group_calc_real_offset((ulint)offset, group));
551
/***********************************************************************
552
Calculates where in log files we find a specified lsn. */
555
log_calc_where_lsn_is(
556
/*==================*/
557
/* out: log file number */
558
ib_int64_t* log_file_offset, /* out: offset in that file
559
(including the header) */
560
ib_uint64_t first_header_lsn, /* in: first log file start
562
ib_uint64_t lsn, /* in: lsn whose position to
564
ulint n_log_files, /* in: total number of log
566
ib_int64_t log_file_size) /* in: log file size
567
(including the header) */
569
ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE;
571
ib_int64_t add_this_many;
573
if (lsn < first_header_lsn) {
574
add_this_many = 1 + (first_header_lsn - lsn)
575
/ (capacity * (ib_int64_t)n_log_files);
577
* capacity * (ib_int64_t)n_log_files;
580
ut_a(lsn >= first_header_lsn);
582
file_no = ((ulint)((lsn - first_header_lsn) / capacity))
584
*log_file_offset = (lsn - first_header_lsn) % capacity;
586
*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
591
/************************************************************
592
Sets the field values in group to correspond to a given lsn. For this function
593
to work, the values must already be correctly initialized to correspond to
594
some lsn, for instance, a checkpoint lsn. */
597
log_group_set_fields(
598
/*=================*/
599
log_group_t* group, /* in: group */
600
ib_uint64_t lsn) /* in: lsn for which the values should be
603
group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
607
/*********************************************************************
608
Calculates the recommended highest values for lsn - last_checkpoint_lsn,
609
lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
612
log_calc_max_ages(void)
613
/*===================*/
614
/* out: error value FALSE if the smallest log group is
615
too small to accommodate the number of OS threads in
616
the database server */
621
ibool success = TRUE;
622
ulint smallest_capacity;
623
ulint archive_margin;
624
ulint smallest_archive_margin;
626
ut_ad(!mutex_own(&(log_sys->mutex)));
628
mutex_enter(&(log_sys->mutex));
630
group = UT_LIST_GET_FIRST(log_sys->log_groups);
634
smallest_capacity = ULINT_MAX;
635
smallest_archive_margin = ULINT_MAX;
638
if (log_group_get_capacity(group) < smallest_capacity) {
640
smallest_capacity = log_group_get_capacity(group);
643
archive_margin = log_group_get_capacity(group)
644
- (group->file_size - LOG_FILE_HDR_SIZE)
645
- LOG_ARCHIVE_EXTRA_MARGIN;
647
if (archive_margin < smallest_archive_margin) {
649
smallest_archive_margin = archive_margin;
652
group = UT_LIST_GET_NEXT(log_groups, group);
655
/* Add extra safety */
656
smallest_capacity = smallest_capacity - smallest_capacity / 10;
658
/* For each OS thread we must reserve so much free space in the
659
smallest log group that it can accommodate the log entries produced
660
by single query steps: running out of free log space is a serious
661
system error which requires rebooting the database. */
663
free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
664
+ LOG_CHECKPOINT_EXTRA_FREE;
665
if (free >= smallest_capacity / 2) {
670
margin = smallest_capacity - free;
673
margin = ut_min(margin, log_sys->adm_checkpoint_interval);
675
margin = margin - margin / 10; /* Add still some extra safety */
677
log_sys->log_group_capacity = smallest_capacity;
679
log_sys->max_modified_age_async = margin
680
- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
681
log_sys->max_modified_age_sync = margin
682
- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
684
log_sys->max_checkpoint_age_async = margin - margin
685
/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
686
log_sys->max_checkpoint_age = margin;
688
#ifdef UNIV_LOG_ARCHIVE
689
log_sys->max_archived_lsn_age = smallest_archive_margin;
691
log_sys->max_archived_lsn_age_async = smallest_archive_margin
692
- smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
693
#endif /* UNIV_LOG_ARCHIVE */
695
mutex_exit(&(log_sys->mutex));
699
"InnoDB: Error: ib_logfiles are too small"
700
" for innodb_thread_concurrency %lu.\n"
701
"InnoDB: The combined size of ib_logfiles"
702
" should be bigger than\n"
703
"InnoDB: 200 kB * innodb_thread_concurrency.\n"
704
"InnoDB: To get mysqld to start up, set"
705
" innodb_thread_concurrency in my.cnf\n"
706
"InnoDB: to a lower value, for example, to 8."
707
" After an ERROR-FREE shutdown\n"
708
"InnoDB: of mysqld you can adjust the size of"
709
" ib_logfiles, as explained in\n"
710
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
711
"adding-and-removing.html\n"
712
"InnoDB: Cannot continue operation."
713
" Calling exit(1).\n",
714
(ulong)srv_thread_concurrency);
722
/**********************************************************
723
Initializes the log. */
731
log_sys = mem_alloc(sizeof(log_t));
733
mutex_create(&log_sys->mutex, SYNC_LOG);
735
mutex_enter(&(log_sys->mutex));
737
/* Start the lsn from one log block from zero: this way every
738
log record has a start lsn != zero, a fact which we will use */
740
log_sys->lsn = LOG_START_LSN;
742
ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
743
ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
745
buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
746
log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
748
log_sys->buf_size = LOG_BUFFER_SIZE;
750
memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
752
log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
753
- LOG_BUF_FLUSH_MARGIN;
754
log_sys->check_flush_or_checkpoint = TRUE;
755
UT_LIST_INIT(log_sys->log_groups);
757
log_sys->n_log_ios = 0;
759
log_sys->n_log_ios_old = log_sys->n_log_ios;
760
log_sys->last_printout_time = time(NULL);
761
/*----------------------------*/
763
log_sys->buf_next_to_write = 0;
765
log_sys->write_lsn = 0;
766
log_sys->current_flush_lsn = 0;
767
log_sys->flushed_to_disk_lsn = 0;
769
log_sys->written_to_some_lsn = log_sys->lsn;
770
log_sys->written_to_all_lsn = log_sys->lsn;
772
log_sys->n_pending_writes = 0;
774
log_sys->no_flush_event = os_event_create(NULL);
776
os_event_set(log_sys->no_flush_event);
778
log_sys->one_flushed_event = os_event_create(NULL);
780
os_event_set(log_sys->one_flushed_event);
782
/*----------------------------*/
783
log_sys->adm_checkpoint_interval = ULINT_MAX;
785
log_sys->next_checkpoint_no = 0;
786
log_sys->last_checkpoint_lsn = log_sys->lsn;
787
log_sys->n_pending_checkpoint_writes = 0;
789
rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
791
log_sys->checkpoint_buf
792
= ut_align(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
793
OS_FILE_LOG_BLOCK_SIZE);
794
memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
795
/*----------------------------*/
797
#ifdef UNIV_LOG_ARCHIVE
798
/* Under MySQL, log archiving is always off */
799
log_sys->archiving_state = LOG_ARCH_OFF;
800
log_sys->archived_lsn = log_sys->lsn;
801
log_sys->next_archived_lsn = 0;
803
log_sys->n_pending_archive_ios = 0;
805
rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK);
807
log_sys->archive_buf = NULL;
810
ut_malloc(LOG_ARCHIVE_BUF_SIZE
811
+ OS_FILE_LOG_BLOCK_SIZE),
812
OS_FILE_LOG_BLOCK_SIZE); */
813
log_sys->archive_buf_size = 0;
815
/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
817
log_sys->archiving_on = os_event_create(NULL);
818
#endif /* UNIV_LOG_ARCHIVE */
820
/*----------------------------*/
822
log_block_init(log_sys->buf, log_sys->lsn);
823
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
825
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
826
log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
828
mutex_exit(&(log_sys->mutex));
830
#ifdef UNIV_LOG_DEBUG
832
recv_sys_init(FALSE, buf_pool_get_curr_size());
834
recv_sys->parse_start_lsn = log_sys->lsn;
835
recv_sys->scanned_lsn = log_sys->lsn;
836
recv_sys->scanned_checkpoint_no = 0;
837
recv_sys->recovered_lsn = log_sys->lsn;
838
recv_sys->limit_lsn = IB_ULONGLONG_MAX;
842
/**********************************************************************
843
Inits a log group to the log system. */
848
ulint id, /* in: group id */
849
ulint n_files, /* in: number of log files */
850
ulint file_size, /* in: log file size in bytes */
851
ulint space_id, /* in: space id of the file space
852
which contains the log files of this
854
ulint archive_space_id __attribute__((unused)))
855
/* in: space id of the file space
856
which contains some archived log
857
files for this group; currently, only
858
for the first log group this is
865
group = mem_alloc(sizeof(log_group_t));
868
group->n_files = n_files;
869
group->file_size = file_size;
870
group->space_id = space_id;
871
group->state = LOG_GROUP_OK;
872
group->lsn = LOG_START_LSN;
873
group->lsn_offset = LOG_FILE_HDR_SIZE;
874
group->n_pending_writes = 0;
876
group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
877
#ifdef UNIV_LOG_ARCHIVE
878
group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
879
#endif /* UNIV_LOG_ARCHIVE */
881
for (i = 0; i < n_files; i++) {
882
*(group->file_header_bufs + i) = ut_align(
883
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
884
OS_FILE_LOG_BLOCK_SIZE);
886
memset(*(group->file_header_bufs + i), '\0',
889
#ifdef UNIV_LOG_ARCHIVE
890
*(group->archive_file_header_bufs + i) = ut_align(
891
mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
892
OS_FILE_LOG_BLOCK_SIZE);
893
memset(*(group->archive_file_header_bufs + i), '\0',
895
#endif /* UNIV_LOG_ARCHIVE */
898
#ifdef UNIV_LOG_ARCHIVE
899
group->archive_space_id = archive_space_id;
901
group->archived_file_no = 0;
902
group->archived_offset = 0;
903
#endif /* UNIV_LOG_ARCHIVE */
905
group->checkpoint_buf = ut_align(
906
mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE);
908
memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
910
UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
912
ut_a(log_calc_max_ages());
915
/**********************************************************************
916
Does the unlockings needed in flush i/o completion. */
919
log_flush_do_unlocks(
920
/*=================*/
921
ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
922
and LOG_UNLOCK_NONE_FLUSHED_LOCK */
924
ut_ad(mutex_own(&(log_sys->mutex)));
926
/* NOTE that we must own the log mutex when doing the setting of the
927
events: this is because transactions will wait for these events to
928
be set, and at that moment the log flush they were waiting for must
929
have ended. If the log mutex were not reserved here, the i/o-thread
930
calling this function might be preempted for a while, and when it
931
resumed execution, it might be that a new flush had been started, and
932
this function would erroneously signal the NEW flush as completed.
933
Thus, the changes in the state of these events are performed
934
atomically in conjunction with the changes in the state of
935
log_sys->n_pending_writes etc. */
937
if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
938
os_event_set(log_sys->one_flushed_event);
941
if (code & LOG_UNLOCK_FLUSH_LOCK) {
942
os_event_set(log_sys->no_flush_event);
946
/**********************************************************************
947
Checks if a flush is completed for a log group and does the completion
951
log_group_check_flush_completion(
952
/*=============================*/
953
/* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
954
log_group_t* group) /* in: log group */
956
ut_ad(mutex_own(&(log_sys->mutex)));
958
if (!log_sys->one_flushed && group->n_pending_writes == 0) {
960
if (log_debug_writes) {
962
"Log flushed first to group %lu\n",
965
#endif /* UNIV_DEBUG */
966
log_sys->written_to_some_lsn = log_sys->write_lsn;
967
log_sys->one_flushed = TRUE;
969
return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
973
if (log_debug_writes && (group->n_pending_writes == 0)) {
975
fprintf(stderr, "Log flushed to group %lu\n",
978
#endif /* UNIV_DEBUG */
982
/**********************************************************
983
Checks if a flush is completed and does the completion routine if yes. */
986
log_sys_check_flush_completion(void)
987
/*================================*/
988
/* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
993
ut_ad(mutex_own(&(log_sys->mutex)));
995
if (log_sys->n_pending_writes == 0) {
997
log_sys->written_to_all_lsn = log_sys->write_lsn;
998
log_sys->buf_next_to_write = log_sys->write_end_offset;
1000
if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
1001
/* Move the log buffer content to the start of the
1004
move_start = ut_calc_align_down(
1005
log_sys->write_end_offset,
1006
OS_FILE_LOG_BLOCK_SIZE);
1007
move_end = ut_calc_align(log_sys->buf_free,
1008
OS_FILE_LOG_BLOCK_SIZE);
1010
ut_memmove(log_sys->buf, log_sys->buf + move_start,
1011
move_end - move_start);
1012
log_sys->buf_free -= move_start;
1014
log_sys->buf_next_to_write -= move_start;
1017
return(LOG_UNLOCK_FLUSH_LOCK);
1023
/**********************************************************
1024
Completes an i/o to a log file. */
1029
log_group_t* group) /* in: log group or a dummy pointer */
1033
#ifdef UNIV_LOG_ARCHIVE
1034
if ((byte*)group == &log_archive_io) {
1035
/* It was an archive write */
1037
log_io_complete_archive();
1041
#endif /* UNIV_LOG_ARCHIVE */
1043
if ((ulint)group & 0x1UL) {
1044
/* It was a checkpoint write */
1045
group = (log_group_t*)((ulint)group - 1);
1047
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
1048
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
1050
fil_flush(group->space_id);
1054
if (log_debug_writes) {
1056
"Checkpoint info written to group %lu\n",
1059
#endif /* UNIV_DEBUG */
1060
log_io_complete_checkpoint();
1065
ut_error; /* We currently use synchronous writing of the
1066
logs and cannot end up here! */
1068
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
1069
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
1070
&& srv_flush_log_at_trx_commit != 2) {
1072
fil_flush(group->space_id);
1075
mutex_enter(&(log_sys->mutex));
1077
ut_a(group->n_pending_writes > 0);
1078
ut_a(log_sys->n_pending_writes > 0);
1080
group->n_pending_writes--;
1081
log_sys->n_pending_writes--;
1083
unlock = log_group_check_flush_completion(group);
1084
unlock = unlock | log_sys_check_flush_completion();
1086
log_flush_do_unlocks(unlock);
1088
mutex_exit(&(log_sys->mutex));
1091
/**********************************************************
1092
Writes a log file header to a log file space. */
1095
log_group_file_header_flush(
1096
/*========================*/
1097
log_group_t* group, /* in: log group */
1098
ulint nth_file, /* in: header to the nth file in the
1100
ib_uint64_t start_lsn) /* in: log file data starts at this
1106
ut_ad(mutex_own(&(log_sys->mutex)));
1107
ut_a(nth_file < group->n_files);
1109
buf = *(group->file_header_bufs + nth_file);
1111
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
1112
mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
1114
/* Wipe over possible label of ibbackup --restore */
1115
memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
1117
dest_offset = nth_file * group->file_size;
1120
if (log_debug_writes) {
1122
"Writing log file header to group %lu file %lu\n",
1123
(ulong) group->id, (ulong) nth_file);
1125
#endif /* UNIV_DEBUG */
1127
log_sys->n_log_ios++;
1129
srv_os_log_pending_writes++;
1131
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
1132
dest_offset / UNIV_PAGE_SIZE,
1133
dest_offset % UNIV_PAGE_SIZE,
1134
OS_FILE_LOG_BLOCK_SIZE,
1137
srv_os_log_pending_writes--;
1141
/**********************************************************
1142
Stores a 4-byte checksum to the trailer checksum field of a log block
1143
before writing it to a log file. This checksum is used in recovery to
1144
check the consistency of a log block. */
1147
log_block_store_checksum(
1148
/*=====================*/
1149
byte* block) /* in/out: pointer to a log block */
1151
log_block_set_checksum(block, log_block_calc_checksum(block));
1154
/**********************************************************
1155
Writes a buffer to a log file group. */
1158
log_group_write_buf(
1159
/*================*/
1160
log_group_t* group, /* in: log group */
1161
byte* buf, /* in: buffer */
1162
ulint len, /* in: buffer len; must be divisible
1163
by OS_FILE_LOG_BLOCK_SIZE */
1164
ib_uint64_t start_lsn, /* in: start lsn of the buffer; must
1166
OS_FILE_LOG_BLOCK_SIZE */
1167
ulint new_data_offset)/* in: start offset of new data in
1168
buf: this parameter is used to decide
1169
if we have to write a new log file
1177
ut_ad(mutex_own(&(log_sys->mutex)));
1178
ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
1179
ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
1181
if (new_data_offset == 0) {
1182
write_header = TRUE;
1184
write_header = FALSE;
1192
next_offset = log_group_calc_lsn_offset(start_lsn, group);
1194
if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
1196
/* We start to write a new log file instance in the group */
1198
log_group_file_header_flush(group,
1199
next_offset / group->file_size,
1201
srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE;
1205
if ((next_offset % group->file_size) + len > group->file_size) {
1207
write_len = group->file_size
1208
- (next_offset % group->file_size);
1214
if (log_debug_writes) {
1217
"Writing log file segment to group %lu"
1218
" offset %lu len %lu\n"
1219
"start lsn %"PRIu64"\n"
1220
"First block n:o %lu last block n:o %lu\n",
1221
(ulong) group->id, (ulong) next_offset,
1224
(ulong) log_block_get_hdr_no(buf),
1225
(ulong) log_block_get_hdr_no(
1226
buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
1227
ut_a(log_block_get_hdr_no(buf)
1228
== log_block_convert_lsn_to_no(start_lsn));
1230
for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1232
ut_a(log_block_get_hdr_no(buf) + i
1233
== log_block_get_hdr_no(
1234
buf + i * OS_FILE_LOG_BLOCK_SIZE));
1237
#endif /* UNIV_DEBUG */
1238
/* Calculate the checksums for each log block and write them to
1239
the trailer fields of the log blocks */
1241
for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1242
log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
1246
log_sys->n_log_ios++;
1248
srv_os_log_pending_writes++;
1250
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
1251
next_offset / UNIV_PAGE_SIZE,
1252
next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
1254
srv_os_log_pending_writes--;
1256
srv_os_log_written+= write_len;
1260
if (write_len < len) {
1261
start_lsn += write_len;
1265
write_header = TRUE;
1271
/**********************************************************
1272
This function is called, e.g., when a transaction wants to commit. It checks
1273
that the log has been written to the log file up to the last log entry written
1274
by the transaction. If there is a flush running, it waits and checks if the
1275
flush flushed enough. If not, starts a new flush. */
1280
ib_uint64_t lsn, /* in: log sequence number up to which
1281
the log should be written,
1282
IB_ULONGLONG_MAX if not specified */
1283
ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
1284
or LOG_WAIT_ALL_GROUPS */
1285
ibool flush_to_disk)
1286
/* in: TRUE if we want the written log
1287
also to be flushed to disk */
1295
ulint loop_count = 0;
1296
#endif /* UNIV_DEBUG */
1299
if (recv_no_ibuf_operations) {
1300
/* Recovery is running and no operations on the log files are
1301
allowed yet (the variable name .._no_ibuf_.. is misleading) */
1310
ut_ad(loop_count < 5);
1313
if (loop_count > 2) {
1314
fprintf(stderr, "Log loop count %lu\n", loop_count);
1319
mutex_enter(&(log_sys->mutex));
1322
&& log_sys->flushed_to_disk_lsn >= lsn) {
1324
mutex_exit(&(log_sys->mutex));
1330
&& (log_sys->written_to_all_lsn >= lsn
1331
|| (log_sys->written_to_some_lsn >= lsn
1332
&& wait != LOG_WAIT_ALL_GROUPS))) {
1334
mutex_exit(&(log_sys->mutex));
1339
if (log_sys->n_pending_writes > 0) {
1340
/* A write (+ possibly flush to disk) is running */
1343
&& log_sys->current_flush_lsn >= lsn) {
1344
/* The write + flush will write enough: wait for it to
1351
&& log_sys->write_lsn >= lsn) {
1352
/* The write will write enough: wait for it to
1358
mutex_exit(&(log_sys->mutex));
1360
/* Wait for the write to complete and try to start a new
1363
os_event_wait(log_sys->no_flush_event);
1369
&& log_sys->buf_free == log_sys->buf_next_to_write) {
1370
/* Nothing to write and no flush to disk requested */
1372
mutex_exit(&(log_sys->mutex));
1378
if (log_debug_writes) {
1380
"Writing log from %"PRIu64" up to lsn %"PRIu64"\n",
1381
log_sys->written_to_all_lsn,
1384
#endif /* UNIV_DEBUG */
1385
log_sys->n_pending_writes++;
1387
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1388
group->n_pending_writes++; /* We assume here that we have only
1391
os_event_reset(log_sys->no_flush_event);
1392
os_event_reset(log_sys->one_flushed_event);
1394
start_offset = log_sys->buf_next_to_write;
1395
end_offset = log_sys->buf_free;
1397
area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
1398
area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
1400
ut_ad(area_end - area_start > 0);
1402
log_sys->write_lsn = log_sys->lsn;
1404
if (flush_to_disk) {
1405
log_sys->current_flush_lsn = log_sys->lsn;
1408
log_sys->one_flushed = FALSE;
1410
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
1411
log_block_set_checkpoint_no(
1412
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1413
log_sys->next_checkpoint_no);
1415
/* Copy the last, incompletely written, log block a log block length
1416
up, so that when the flush operation writes from the log buffer, the
1417
segment to write will not be changed by writers to the log */
1419
ut_memcpy(log_sys->buf + area_end,
1420
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1421
OS_FILE_LOG_BLOCK_SIZE);
1423
log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
1424
log_sys->write_end_offset = log_sys->buf_free;
1426
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1428
/* Do the write to the log files */
1431
log_group_write_buf(
1432
group, log_sys->buf + area_start,
1433
area_end - area_start,
1434
ut_uint64_align_down(log_sys->written_to_all_lsn,
1435
OS_FILE_LOG_BLOCK_SIZE),
1436
start_offset - area_start);
1438
log_group_set_fields(group, log_sys->write_lsn);
1440
group = UT_LIST_GET_NEXT(log_groups, group);
1443
mutex_exit(&(log_sys->mutex));
1445
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
1446
/* O_DSYNC means the OS did not buffer the log file at all:
1447
so we have also flushed to disk what we have written */
1449
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1451
} else if (flush_to_disk) {
1453
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1455
fil_flush(group->space_id);
1456
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1459
mutex_enter(&(log_sys->mutex));
1461
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1463
ut_a(group->n_pending_writes == 1);
1464
ut_a(log_sys->n_pending_writes == 1);
1466
group->n_pending_writes--;
1467
log_sys->n_pending_writes--;
1469
unlock = log_group_check_flush_completion(group);
1470
unlock = unlock | log_sys_check_flush_completion();
1472
log_flush_do_unlocks(unlock);
1474
mutex_exit(&(log_sys->mutex));
1479
mutex_exit(&(log_sys->mutex));
1482
case LOG_WAIT_ONE_GROUP:
1483
os_event_wait(log_sys->one_flushed_event);
1485
case LOG_WAIT_ALL_GROUPS:
1486
os_event_wait(log_sys->no_flush_event);
1493
#endif /* UNIV_DEBUG */
1497
/********************************************************************
1498
Does a syncronous flush of the log buffer to disk. */
1501
log_buffer_flush_to_disk(void)
1502
/*==========================*/
1506
mutex_enter(&(log_sys->mutex));
1510
mutex_exit(&(log_sys->mutex));
1512
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
1515
/********************************************************************
1516
Tries to establish a big enough margin of free space in the log buffer, such
1517
that a new log entry can be catenated without an immediate need for a flush. */
1520
log_flush_margin(void)
1521
/*==================*/
1523
log_t* log = log_sys;
1524
ib_uint64_t lsn = 0;
1526
mutex_enter(&(log->mutex));
1528
if (log->buf_free > log->max_buf_free) {
1530
if (log->n_pending_writes > 0) {
1531
/* A flush is running: hope that it will provide enough
1538
mutex_exit(&(log->mutex));
1541
log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
1545
/********************************************************************
1546
Advances the smallest lsn for which there are unflushed dirty blocks in the
1547
buffer pool. NOTE: this function may only be called if the calling thread owns
1548
no synchronization objects! */
1551
log_preflush_pool_modified_pages(
1552
/*=============================*/
1553
/* out: FALSE if there was a
1554
flush batch of the same type
1555
running, which means that we
1556
could not start this flush
1558
ib_uint64_t new_oldest, /* in: try to advance
1559
oldest_modified_lsn at least
1561
ibool sync) /* in: TRUE if synchronous
1562
operation is desired */
1566
if (recv_recovery_on) {
1567
/* If the recovery is running, we must first apply all
1568
log records to their respective file pages to get the
1569
right modify lsn values to these pages: otherwise, there
1570
might be pages on disk which are not yet recovered to the
1571
current lsn, and even after calling this function, we could
1572
not know how up-to-date the disk version of the database is,
1573
and we could not make a new checkpoint on the basis of the
1574
info on the buffer pool only. */
1576
recv_apply_hashed_log_recs(TRUE);
1579
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
1582
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
1585
if (n_pages == ULINT_UNDEFINED) {
1593
/**********************************************************
1594
Completes a checkpoint. */
1597
log_complete_checkpoint(void)
1598
/*=========================*/
1600
ut_ad(mutex_own(&(log_sys->mutex)));
1601
ut_ad(log_sys->n_pending_checkpoint_writes == 0);
1603
log_sys->next_checkpoint_no++;
1605
log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
1607
rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
1610
/**********************************************************
1611
Completes an asynchronous checkpoint info write i/o to a log file. */
1614
log_io_complete_checkpoint(void)
1615
/*============================*/
1617
mutex_enter(&(log_sys->mutex));
1619
ut_ad(log_sys->n_pending_checkpoint_writes > 0);
1621
log_sys->n_pending_checkpoint_writes--;
1623
if (log_sys->n_pending_checkpoint_writes == 0) {
1624
log_complete_checkpoint();
1627
mutex_exit(&(log_sys->mutex));
1630
/***********************************************************************
1631
Writes info to a checkpoint about a log group. */
1634
log_checkpoint_set_nth_group_info(
1635
/*==============================*/
1636
byte* buf, /* in: buffer for checkpoint info */
1637
ulint n, /* in: nth slot */
1638
ulint file_no,/* in: archived file number */
1639
ulint offset) /* in: archived file offset */
1641
ut_ad(n < LOG_MAX_N_GROUPS);
1643
mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1644
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
1645
mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1646
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
1649
/***********************************************************************
1650
Gets info from a checkpoint about a log group. */
1653
log_checkpoint_get_nth_group_info(
1654
/*==============================*/
1655
byte* buf, /* in: buffer containing checkpoint info */
1656
ulint n, /* in: nth slot */
1657
ulint* file_no,/* out: archived file number */
1658
ulint* offset) /* out: archived file offset */
1660
ut_ad(n < LOG_MAX_N_GROUPS);
1662
*file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1663
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
1664
*offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
1665
+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
1668
/**********************************************************
1669
Writes the checkpoint info to a log group header. */
1672
log_group_checkpoint(
1673
/*=================*/
1674
log_group_t* group) /* in: log group */
1676
log_group_t* group2;
1677
#ifdef UNIV_LOG_ARCHIVE
1678
ib_uint64_t archived_lsn;
1679
ib_uint64_t next_archived_lsn;
1680
#endif /* UNIV_LOG_ARCHIVE */
1686
ut_ad(mutex_own(&(log_sys->mutex)));
1687
#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
1688
# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
1691
buf = group->checkpoint_buf;
1693
mach_write_ull(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
1694
mach_write_ull(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
1696
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
1697
log_group_calc_lsn_offset(
1698
log_sys->next_checkpoint_lsn, group));
1700
mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
1702
#ifdef UNIV_LOG_ARCHIVE
1703
if (log_sys->archiving_state == LOG_ARCH_OFF) {
1704
archived_lsn = IB_ULONGLONG_MAX;
1706
archived_lsn = log_sys->archived_lsn;
1708
if (archived_lsn != log_sys->next_archived_lsn) {
1709
next_archived_lsn = log_sys->next_archived_lsn;
1710
/* For debugging only */
1714
mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
1715
#else /* UNIV_LOG_ARCHIVE */
1716
mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
1717
#endif /* UNIV_LOG_ARCHIVE */
1719
for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
1720
log_checkpoint_set_nth_group_info(buf, i, 0, 0);
1723
group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
1726
log_checkpoint_set_nth_group_info(buf, group2->id,
1727
#ifdef UNIV_LOG_ARCHIVE
1728
group2->archived_file_no,
1729
group2->archived_offset
1730
#else /* UNIV_LOG_ARCHIVE */
1732
#endif /* UNIV_LOG_ARCHIVE */
1735
group2 = UT_LIST_GET_NEXT(log_groups, group2);
1738
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
1739
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
1741
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1742
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1743
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
1745
/* Starting from InnoDB-3.23.50, we also write info on allocated
1746
size in the tablespace */
1748
mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
1749
log_fsp_current_free_limit);
1751
mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
1752
LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
1754
/* We alternate the physical place of the checkpoint info in the first
1757
if ((log_sys->next_checkpoint_no & 1) == 0) {
1758
write_offset = LOG_CHECKPOINT_1;
1760
write_offset = LOG_CHECKPOINT_2;
1764
if (log_sys->n_pending_checkpoint_writes == 0) {
1766
rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
1770
log_sys->n_pending_checkpoint_writes++;
1772
log_sys->n_log_ios++;
1774
/* We send as the last parameter the group machine address
1775
added with 1, as we want to distinguish between a normal log
1776
file write and a checkpoint field write */
1778
fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
1779
write_offset / UNIV_PAGE_SIZE,
1780
write_offset % UNIV_PAGE_SIZE,
1781
OS_FILE_LOG_BLOCK_SIZE,
1782
buf, ((byte*)group + 1));
1784
ut_ad(((ulint)group & 0x1UL) == 0);
1788
/**********************************************************
1789
Writes info to a buffer of a log group when log files are created in
1790
backup restoration. */
1793
log_reset_first_header_and_checkpoint(
1794
/*==================================*/
1795
byte* hdr_buf,/* in: buffer which will be written to the
1796
start of the first log file */
1797
ib_uint64_t start) /* in: lsn of the start of the first log file;
1798
we pretend that there is a checkpoint at
1799
start + LOG_BLOCK_HDR_SIZE */
1805
mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
1806
mach_write_ull(hdr_buf + LOG_FILE_START_LSN, start);
1808
lsn = start + LOG_BLOCK_HDR_SIZE;
1810
/* Write the label of ibbackup --restore */
1811
strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
1813
ut_sprintf_timestamp((char*) hdr_buf
1814
+ (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
1815
+ (sizeof "ibbackup ") - 1));
1816
buf = hdr_buf + LOG_CHECKPOINT_1;
1818
mach_write_ull(buf + LOG_CHECKPOINT_NO, 0);
1819
mach_write_ull(buf + LOG_CHECKPOINT_LSN, lsn);
1821
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
1822
LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
1824
mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
1826
mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
1828
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
1829
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
1831
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1832
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1833
mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
1835
/* Starting from InnoDB-3.23.50, we should also write info on
1836
allocated size in the tablespace, but unfortunately we do not
1840
/**********************************************************
1841
Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
1844
log_group_read_checkpoint_info(
1845
/*===========================*/
1846
log_group_t* group, /* in: log group */
1847
ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
1849
ut_ad(mutex_own(&(log_sys->mutex)));
1851
log_sys->n_log_ios++;
1853
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
1854
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
1855
OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
1858
/**********************************************************
1859
Writes checkpoint info to groups. */
1862
log_groups_write_checkpoint_info(void)
1863
/*==================================*/
1867
ut_ad(mutex_own(&(log_sys->mutex)));
1869
group = UT_LIST_GET_FIRST(log_sys->log_groups);
1872
log_group_checkpoint(group);
1874
group = UT_LIST_GET_NEXT(log_groups, group);
1878
/**********************************************************
1879
Makes a checkpoint. Note that this function does not flush dirty
1880
blocks from the buffer pool: it only checks what is lsn of the oldest
1881
modification in the pool, and writes information about the lsn in
1882
log files. Use log_make_checkpoint_at to flush also the pool. */
1887
/* out: TRUE if success, FALSE if a checkpoint
1888
write was already running */
1889
ibool sync, /* in: TRUE if synchronous operation is
1891
ibool write_always) /* in: the function normally checks if the
1892
the new checkpoint would have a greater
1893
lsn than the previous one: if not, then no
1894
physical write is done; by setting this
1895
parameter TRUE, a physical write will always be
1896
made to log files */
1898
ib_uint64_t oldest_lsn;
1900
if (recv_recovery_is_on()) {
1901
recv_apply_hashed_log_recs(TRUE);
1904
if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
1905
fil_flush_file_spaces(FIL_TABLESPACE);
1908
mutex_enter(&(log_sys->mutex));
1910
oldest_lsn = log_buf_pool_get_oldest_modification();
1912
mutex_exit(&(log_sys->mutex));
1914
/* Because log also contains headers and dummy log records,
1915
if the buffer pool contains no dirty buffers, oldest_lsn
1916
gets the value log_sys->lsn from the previous function,
1917
and we must make sure that the log is flushed up to that
1918
lsn. If there are dirty buffers in the buffer pool, then our
1919
write-ahead-logging algorithm ensures that the log has been flushed
1920
up to oldest_lsn. */
1922
log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
1924
mutex_enter(&(log_sys->mutex));
1927
&& log_sys->last_checkpoint_lsn >= oldest_lsn) {
1929
mutex_exit(&(log_sys->mutex));
1934
ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
1936
if (log_sys->n_pending_checkpoint_writes > 0) {
1937
/* A checkpoint write is running */
1939
mutex_exit(&(log_sys->mutex));
1942
/* Wait for the checkpoint write to complete */
1943
rw_lock_s_lock(&(log_sys->checkpoint_lock));
1944
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
1950
log_sys->next_checkpoint_lsn = oldest_lsn;
1953
if (log_debug_writes) {
1954
fprintf(stderr, "Making checkpoint no %lu at lsn %"PRIu64"\n",
1955
(ulong) log_sys->next_checkpoint_no,
1958
#endif /* UNIV_DEBUG */
1960
log_groups_write_checkpoint_info();
1962
mutex_exit(&(log_sys->mutex));
1965
/* Wait for the checkpoint write to complete */
1966
rw_lock_s_lock(&(log_sys->checkpoint_lock));
1967
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
1973
/********************************************************************
1974
Makes a checkpoint at a given lsn or later. */
1977
log_make_checkpoint_at(
1978
/*===================*/
1979
ib_uint64_t lsn, /* in: make a checkpoint at this or a
1980
later lsn, if IB_ULONGLONG_MAX, makes
1981
a checkpoint at the latest lsn */
1982
ibool write_always) /* in: the function normally checks if
1983
the the new checkpoint would have a
1984
greater lsn than the previous one: if
1985
not, then no physical write is done;
1986
by setting this parameter TRUE, a
1987
physical write will always be made to
1990
/* Preflush pages synchronously */
1992
while (!log_preflush_pool_modified_pages(lsn, TRUE));
1994
while (!log_checkpoint(TRUE, write_always));
1997
/********************************************************************
1998
Tries to establish a big enough margin of free space in the log groups, such
1999
that a new log entry can be catenated without an immediate need for a
2000
checkpoint. NOTE: this function may only be called if the calling thread
2001
owns no synchronization objects! */
2004
log_checkpoint_margin(void)
2005
/*=======================*/
2007
log_t* log = log_sys;
2009
ib_uint64_t checkpoint_age;
2010
ib_uint64_t advance;
2011
ib_uint64_t oldest_lsn;
2013
ibool checkpoint_sync;
2014
ibool do_checkpoint;
2018
checkpoint_sync = FALSE;
2019
do_checkpoint = FALSE;
2021
mutex_enter(&(log->mutex));
2023
if (log->check_flush_or_checkpoint == FALSE) {
2024
mutex_exit(&(log->mutex));
2029
oldest_lsn = log_buf_pool_get_oldest_modification();
2031
age = log->lsn - oldest_lsn;
2033
if (age > log->max_modified_age_sync) {
2035
/* A flush is urgent: we have to do a synchronous preflush */
2038
advance = 2 * (age - log->max_modified_age_sync);
2039
} else if (age > log->max_modified_age_async) {
2041
/* A flush is not urgent: we do an asynchronous preflush */
2042
advance = age - log->max_modified_age_async;
2047
checkpoint_age = log->lsn - log->last_checkpoint_lsn;
2049
if (checkpoint_age > log->max_checkpoint_age) {
2050
/* A checkpoint is urgent: we do it synchronously */
2052
checkpoint_sync = TRUE;
2054
do_checkpoint = TRUE;
2056
} else if (checkpoint_age > log->max_checkpoint_age_async) {
2057
/* A checkpoint is not urgent: do it asynchronously */
2059
do_checkpoint = TRUE;
2061
log->check_flush_or_checkpoint = FALSE;
2063
log->check_flush_or_checkpoint = FALSE;
2066
mutex_exit(&(log->mutex));
2069
ib_uint64_t new_oldest = oldest_lsn + advance;
2071
success = log_preflush_pool_modified_pages(new_oldest, sync);
2073
/* If the flush succeeded, this thread has done its part
2074
and can proceed. If it did not succeed, there was another
2075
thread doing a flush at the same time. If sync was FALSE,
2076
the flush was not urgent, and we let this thread proceed.
2077
Otherwise, we let it start from the beginning again. */
2079
if (sync && !success) {
2080
mutex_enter(&(log->mutex));
2082
log->check_flush_or_checkpoint = TRUE;
2084
mutex_exit(&(log->mutex));
2089
if (do_checkpoint) {
2090
log_checkpoint(checkpoint_sync, FALSE);
2092
if (checkpoint_sync) {
2099
/**********************************************************
2100
Reads a specified log segment to a buffer. */
2103
log_group_read_log_seg(
2104
/*===================*/
2105
ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
2106
byte* buf, /* in: buffer where to read */
2107
log_group_t* group, /* in: log group */
2108
ib_uint64_t start_lsn, /* in: read area start */
2109
ib_uint64_t end_lsn) /* in: read area end */
2112
ulint source_offset;
2115
ut_ad(mutex_own(&(log_sys->mutex)));
2117
sync = (type == LOG_RECOVER);
2119
source_offset = log_group_calc_lsn_offset(start_lsn, group);
2121
len = (ulint) (end_lsn - start_lsn);
2125
if ((source_offset % group->file_size) + len > group->file_size) {
2127
len = group->file_size - (source_offset % group->file_size);
2130
#ifdef UNIV_LOG_ARCHIVE
2131
if (type == LOG_ARCHIVE) {
2133
log_sys->n_pending_archive_ios++;
2135
#endif /* UNIV_LOG_ARCHIVE */
2137
log_sys->n_log_ios++;
2139
fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
2140
source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
2146
if (start_lsn != end_lsn) {
2152
#ifdef UNIV_LOG_ARCHIVE
2153
/**********************************************************
2154
Generates an archived log file name. */
2157
log_archived_file_name_gen(
2158
/*=======================*/
2159
char* buf, /* in: buffer where to write */
2160
ulint id __attribute__((unused)),
2162
currently we only archive the first group */
2163
ulint file_no)/* in: file number */
2165
sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
2168
/**********************************************************
2169
Writes a log file header to a log file space. */
2172
log_group_archive_file_header_write(
2173
/*================================*/
2174
log_group_t* group, /* in: log group */
2175
ulint nth_file, /* in: header to the nth file in the
2176
archive log file space */
2177
ulint file_no, /* in: archived file number */
2178
ib_uint64_t start_lsn) /* in: log file data starts at this
2184
ut_ad(mutex_own(&(log_sys->mutex)));
2186
ut_a(nth_file < group->n_files);
2188
buf = *(group->archive_file_header_bufs + nth_file);
2190
mach_write_to_4(buf + LOG_GROUP_ID, group->id);
2191
mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
2192
mach_write_to_4(buf + LOG_FILE_NO, file_no);
2194
mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
2196
dest_offset = nth_file * group->file_size;
2198
log_sys->n_log_ios++;
2200
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
2201
dest_offset / UNIV_PAGE_SIZE,
2202
dest_offset % UNIV_PAGE_SIZE,
2203
2 * OS_FILE_LOG_BLOCK_SIZE,
2204
buf, &log_archive_io);
2207
/**********************************************************
2208
Writes a log file header to a completed archived log file. */
2211
log_group_archive_completed_header_write(
2212
/*=====================================*/
2213
log_group_t* group, /* in: log group */
2214
ulint nth_file, /* in: header to the nth file in the
2215
archive log file space */
2216
ib_uint64_t end_lsn) /* in: end lsn of the file */
2221
ut_ad(mutex_own(&(log_sys->mutex)));
2222
ut_a(nth_file < group->n_files);
2224
buf = *(group->archive_file_header_bufs + nth_file);
2226
mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
2227
mach_write_ull(buf + LOG_FILE_END_LSN, end_lsn);
2229
dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
2231
log_sys->n_log_ios++;
2233
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
2234
dest_offset / UNIV_PAGE_SIZE,
2235
dest_offset % UNIV_PAGE_SIZE,
2236
OS_FILE_LOG_BLOCK_SIZE,
2237
buf + LOG_FILE_ARCH_COMPLETED,
2241
/**********************************************************
2242
Does the archive writes for a single log group. */
2247
log_group_t* group) /* in: log group */
2249
os_file_t file_handle;
2250
ib_uint64_t start_lsn;
2251
ib_uint64_t end_lsn;
2260
ut_ad(mutex_own(&(log_sys->mutex)));
2262
start_lsn = log_sys->archived_lsn;
2264
ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2266
end_lsn = log_sys->next_archived_lsn;
2268
ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
2270
buf = log_sys->archive_buf;
2274
next_offset = group->archived_offset;
2276
if ((next_offset % group->file_size == 0)
2277
|| (fil_space_get_size(group->archive_space_id) == 0)) {
2279
/* Add the file to the archive file space; create or open the
2282
if (next_offset % group->file_size == 0) {
2283
open_mode = OS_FILE_CREATE;
2285
open_mode = OS_FILE_OPEN;
2288
log_archived_file_name_gen(name, group->id,
2289
group->archived_file_no + n_files);
2291
file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
2292
OS_DATA_FILE, &ret);
2294
if (!ret && (open_mode == OS_FILE_CREATE)) {
2295
file_handle = os_file_create(
2296
name, OS_FILE_OPEN, OS_FILE_AIO,
2297
OS_DATA_FILE, &ret);
2302
"InnoDB: Cannot create or open"
2303
" archive log file %s.\n"
2304
"InnoDB: Cannot continue operation.\n"
2305
"InnoDB: Check that the log archive"
2306
" directory exists,\n"
2307
"InnoDB: you have access rights to it, and\n"
2308
"InnoDB: there is space available.\n", name);
2313
if (log_debug_writes) {
2314
fprintf(stderr, "Created archive file %s\n", name);
2316
#endif /* UNIV_DEBUG */
2318
ret = os_file_close(file_handle);
2322
/* Add the archive file as a node to the space */
2324
fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
2325
group->archive_space_id, FALSE);
2327
if (next_offset % group->file_size == 0) {
2328
log_group_archive_file_header_write(
2330
group->archived_file_no + n_files,
2333
next_offset += LOG_FILE_HDR_SIZE;
2337
len = end_lsn - start_lsn;
2339
if (group->file_size < (next_offset % group->file_size) + len) {
2341
len = group->file_size - (next_offset % group->file_size);
2345
if (log_debug_writes) {
2347
"Archiving starting at lsn %"PRIu64", len %lu"
2350
(ulong) len, (ulong) group->id);
2352
#endif /* UNIV_DEBUG */
2354
log_sys->n_pending_archive_ios++;
2356
log_sys->n_log_ios++;
2358
fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
2359
next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
2360
ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
2367
if (next_offset % group->file_size == 0) {
2371
if (end_lsn != start_lsn) {
2376
group->next_archived_file_no = group->archived_file_no + n_files;
2377
group->next_archived_offset = next_offset % group->file_size;
2379
ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
2382
/*********************************************************
2383
(Writes to the archive of each log group.) Currently, only the first
2384
group is archived. */
2387
log_archive_groups(void)
2388
/*====================*/
2392
ut_ad(mutex_own(&(log_sys->mutex)));
2394
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2396
log_group_archive(group);
2399
/*********************************************************
2400
Completes the archiving write phase for (each log group), currently,
2401
the first log group. */
2404
log_archive_write_complete_groups(void)
2405
/*===================================*/
2411
ib_uint64_t start_lsn;
2412
ib_uint64_t end_lsn;
2415
ut_ad(mutex_own(&(log_sys->mutex)));
2417
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2419
group->archived_file_no = group->next_archived_file_no;
2420
group->archived_offset = group->next_archived_offset;
2422
/* Truncate from the archive file space all but the last
2423
file, or if it has been written full, all files */
2425
n_files = (UNIV_PAGE_SIZE
2426
* fil_space_get_size(group->archive_space_id))
2430
end_offset = group->archived_offset;
2432
if (end_offset % group->file_size == 0) {
2434
trunc_files = n_files;
2436
trunc_files = n_files - 1;
2440
if (log_debug_writes && trunc_files) {
2442
"Complete file(s) archived to group %lu\n",
2445
#endif /* UNIV_DEBUG */
2447
/* Calculate the archive file space start lsn */
2448
start_lsn = log_sys->next_archived_lsn
2449
- (end_offset - LOG_FILE_HDR_SIZE + trunc_files
2450
* (group->file_size - LOG_FILE_HDR_SIZE));
2451
end_lsn = start_lsn;
2453
for (i = 0; i < trunc_files; i++) {
2455
end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
2457
/* Write a notice to the headers of archived log
2458
files that the file write has been completed */
2460
log_group_archive_completed_header_write(group, i, end_lsn);
2463
fil_space_truncate_start(group->archive_space_id,
2464
trunc_files * group->file_size);
2467
if (log_debug_writes) {
2468
fputs("Archiving writes completed\n", stderr);
2470
#endif /* UNIV_DEBUG */
2473
/**********************************************************
2474
Completes an archiving i/o. */
2477
log_archive_check_completion_low(void)
2478
/*==================================*/
2480
ut_ad(mutex_own(&(log_sys->mutex)));
2482
if (log_sys->n_pending_archive_ios == 0
2483
&& log_sys->archiving_phase == LOG_ARCHIVE_READ) {
2486
if (log_debug_writes) {
2487
fputs("Archiving read completed\n", stderr);
2489
#endif /* UNIV_DEBUG */
2491
/* Archive buffer has now been read in: start archive writes */
2493
log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
2495
log_archive_groups();
2498
if (log_sys->n_pending_archive_ios == 0
2499
&& log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
2501
log_archive_write_complete_groups();
2503
log_sys->archived_lsn = log_sys->next_archived_lsn;
2505
rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
2509
/**********************************************************
2510
Completes an archiving i/o. */
2513
log_io_complete_archive(void)
2514
/*=========================*/
2518
mutex_enter(&(log_sys->mutex));
2520
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2522
mutex_exit(&(log_sys->mutex));
2524
fil_flush(group->archive_space_id);
2526
mutex_enter(&(log_sys->mutex));
2528
ut_ad(log_sys->n_pending_archive_ios > 0);
2530
log_sys->n_pending_archive_ios--;
2532
log_archive_check_completion_low();
2534
mutex_exit(&(log_sys->mutex));
2537
/************************************************************************
2538
Starts an archiving operation. */
2543
/* out: TRUE if succeed, FALSE if an archiving
2544
operation was already running */
2545
ibool sync, /* in: TRUE if synchronous operation is desired */
2546
ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to
2549
ibool calc_new_limit;
2550
ib_uint64_t start_lsn;
2551
ib_uint64_t limit_lsn;
2553
calc_new_limit = TRUE;
2555
mutex_enter(&(log_sys->mutex));
2557
switch (log_sys->archiving_state) {
2560
mutex_exit(&(log_sys->mutex));
2565
case LOG_ARCH_STOPPED:
2566
case LOG_ARCH_STOPPING2:
2567
mutex_exit(&(log_sys->mutex));
2569
os_event_wait(log_sys->archiving_on);
2574
start_lsn = log_sys->archived_lsn;
2576
if (calc_new_limit) {
2577
ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
2578
limit_lsn = start_lsn + log_sys->archive_buf_size;
2580
*n_bytes = log_sys->archive_buf_size;
2582
if (limit_lsn >= log_sys->lsn) {
2584
limit_lsn = ut_uint64_align_down(
2585
log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
2589
if (log_sys->archived_lsn >= limit_lsn) {
2594
if (log_sys->written_to_all_lsn < limit_lsn) {
2596
mutex_exit(&(log_sys->mutex));
2598
log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
2600
calc_new_limit = FALSE;
2605
if (log_sys->n_pending_archive_ios > 0) {
2606
/* An archiving operation is running */
2608
mutex_exit(&(log_sys->mutex));
2611
rw_lock_s_lock(&(log_sys->archive_lock));
2612
rw_lock_s_unlock(&(log_sys->archive_lock));
2615
*n_bytes = log_sys->archive_buf_size;
2620
rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
2622
log_sys->archiving_phase = LOG_ARCHIVE_READ;
2624
log_sys->next_archived_lsn = limit_lsn;
2627
if (log_debug_writes) {
2629
"Archiving from lsn %"PRIu64" to lsn %"PRIu64"\n",
2630
log_sys->archived_lsn, limit_lsn);
2632
#endif /* UNIV_DEBUG */
2634
/* Read the log segment to the archive buffer */
2636
log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
2637
UT_LIST_GET_FIRST(log_sys->log_groups),
2638
start_lsn, limit_lsn);
2640
mutex_exit(&(log_sys->mutex));
2643
rw_lock_s_lock(&(log_sys->archive_lock));
2644
rw_lock_s_unlock(&(log_sys->archive_lock));
2647
*n_bytes = log_sys->archive_buf_size;
2652
/********************************************************************
2653
Writes the log contents to the archive at least up to the lsn when this
2654
function was called. */
2657
log_archive_all(void)
2658
/*=================*/
2660
ib_uint64_t present_lsn;
2663
mutex_enter(&(log_sys->mutex));
2665
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2666
mutex_exit(&(log_sys->mutex));
2671
present_lsn = log_sys->lsn;
2673
mutex_exit(&(log_sys->mutex));
2675
log_pad_current_log_block();
2678
mutex_enter(&(log_sys->mutex));
2680
if (present_lsn <= log_sys->archived_lsn) {
2682
mutex_exit(&(log_sys->mutex));
2687
mutex_exit(&(log_sys->mutex));
2689
log_archive_do(TRUE, &dummy);
2693
/*********************************************************
2694
Closes the possible open archive log file (for each group) the first group,
2695
and if it was open, increments the group file count by 2, if desired. */
2698
log_archive_close_groups(
2699
/*=====================*/
2700
ibool increment_file_count) /* in: TRUE if we want to increment
2706
ut_ad(mutex_own(&(log_sys->mutex)));
2708
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2713
group = UT_LIST_GET_FIRST(log_sys->log_groups);
2715
trunc_len = UNIV_PAGE_SIZE
2716
* fil_space_get_size(group->archive_space_id);
2717
if (trunc_len > 0) {
2718
ut_a(trunc_len == group->file_size);
2720
/* Write a notice to the headers of archived log
2721
files that the file write has been completed */
2723
log_group_archive_completed_header_write(
2724
group, 0, log_sys->archived_lsn);
2726
fil_space_truncate_start(group->archive_space_id,
2728
if (increment_file_count) {
2729
group->archived_offset = 0;
2730
group->archived_file_no += 2;
2734
if (log_debug_writes) {
2736
"Incrementing arch file no to %lu"
2737
" in log group %lu\n",
2738
(ulong) group->archived_file_no + 2,
2741
#endif /* UNIV_DEBUG */
2745
/********************************************************************
2746
Writes the log contents to the archive up to the lsn when this function was
2747
called, and stops the archiving. When archiving is started again, the archived
2748
log file numbers start from 2 higher, so that the archiving will not write
2749
again to the archived log files which exist when this function returns. */
2752
log_archive_stop(void)
2753
/*==================*/
2754
/* out: DB_SUCCESS or DB_ERROR */
2758
mutex_enter(&(log_sys->mutex));
2760
if (log_sys->archiving_state != LOG_ARCH_ON) {
2762
mutex_exit(&(log_sys->mutex));
2767
log_sys->archiving_state = LOG_ARCH_STOPPING;
2769
mutex_exit(&(log_sys->mutex));
2773
mutex_enter(&(log_sys->mutex));
2775
log_sys->archiving_state = LOG_ARCH_STOPPING2;
2776
os_event_reset(log_sys->archiving_on);
2778
mutex_exit(&(log_sys->mutex));
2780
/* Wait for a possible archiving operation to end */
2782
rw_lock_s_lock(&(log_sys->archive_lock));
2783
rw_lock_s_unlock(&(log_sys->archive_lock));
2785
mutex_enter(&(log_sys->mutex));
2787
/* Close all archived log files, incrementing the file count by 2,
2790
log_archive_close_groups(TRUE);
2792
mutex_exit(&(log_sys->mutex));
2794
/* Make a checkpoint, so that if recovery is needed, the file numbers
2795
of new archived log files will start from the right value */
2800
success = log_checkpoint(TRUE, TRUE);
2803
mutex_enter(&(log_sys->mutex));
2805
log_sys->archiving_state = LOG_ARCH_STOPPED;
2807
mutex_exit(&(log_sys->mutex));
2812
/********************************************************************
2813
Starts again archiving which has been stopped. */
2816
log_archive_start(void)
2817
/*===================*/
2818
/* out: DB_SUCCESS or DB_ERROR */
2820
mutex_enter(&(log_sys->mutex));
2822
if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
2824
mutex_exit(&(log_sys->mutex));
2829
log_sys->archiving_state = LOG_ARCH_ON;
2831
os_event_set(log_sys->archiving_on);
2833
mutex_exit(&(log_sys->mutex));
2838
/********************************************************************
2839
Stop archiving the log so that a gap may occur in the archived log files. */
2842
log_archive_noarchivelog(void)
2843
/*==========================*/
2844
/* out: DB_SUCCESS or DB_ERROR */
2847
mutex_enter(&(log_sys->mutex));
2849
if (log_sys->archiving_state == LOG_ARCH_STOPPED
2850
|| log_sys->archiving_state == LOG_ARCH_OFF) {
2852
log_sys->archiving_state = LOG_ARCH_OFF;
2854
os_event_set(log_sys->archiving_on);
2856
mutex_exit(&(log_sys->mutex));
2861
mutex_exit(&(log_sys->mutex));
2865
os_thread_sleep(500000);
2870
/********************************************************************
2871
Start archiving the log so that a gap may occur in the archived log files. */
2874
log_archive_archivelog(void)
2875
/*========================*/
2876
/* out: DB_SUCCESS or DB_ERROR */
2878
mutex_enter(&(log_sys->mutex));
2880
if (log_sys->archiving_state == LOG_ARCH_OFF) {
2882
log_sys->archiving_state = LOG_ARCH_ON;
2884
log_sys->archived_lsn
2885
= ut_uint64_align_down(log_sys->lsn,
2886
OS_FILE_LOG_BLOCK_SIZE);
2887
mutex_exit(&(log_sys->mutex));
2892
mutex_exit(&(log_sys->mutex));
2897
/********************************************************************
2898
Tries to establish a big enough margin of free space in the log groups, such
2899
that a new log entry can be catenated without an immediate need for
2903
log_archive_margin(void)
2904
/*====================*/
2906
log_t* log = log_sys;
2911
mutex_enter(&(log->mutex));
2913
if (log->archiving_state == LOG_ARCH_OFF) {
2914
mutex_exit(&(log->mutex));
2919
age = log->lsn - log->archived_lsn;
2921
if (age > log->max_archived_lsn_age) {
2923
/* An archiving is urgent: we have to do synchronous i/o */
2927
} else if (age > log->max_archived_lsn_age_async) {
2929
/* An archiving is not urgent: we do asynchronous i/o */
2933
/* No archiving required yet */
2935
mutex_exit(&(log->mutex));
2940
mutex_exit(&(log->mutex));
2942
log_archive_do(sync, &dummy);
2945
/* Check again that enough was written to the archive */
2950
#endif /* UNIV_LOG_ARCHIVE */
2952
/************************************************************************
2953
Checks that there is enough free space in the log to start a new query step.
2954
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
2955
function may only be called if the calling thread owns no synchronization
2959
log_check_margins(void)
2960
/*===================*/
2965
log_checkpoint_margin();
2967
#ifdef UNIV_LOG_ARCHIVE
2968
log_archive_margin();
2969
#endif /* UNIV_LOG_ARCHIVE */
2971
mutex_enter(&(log_sys->mutex));
2973
if (log_sys->check_flush_or_checkpoint) {
2975
mutex_exit(&(log_sys->mutex));
2980
mutex_exit(&(log_sys->mutex));
2983
/********************************************************************
2984
Makes a checkpoint at the latest lsn and writes it to first page of each
2985
data file in the database, so that we know that the file spaces contain
2986
all modifications up to that lsn. This can only be called at database
2987
shutdown. This function also writes all log in log files to the log archive. */
2990
logs_empty_and_mark_files_at_shutdown(void)
2991
/*=======================================*/
2996
if (srv_print_verbose_log) {
2997
ut_print_timestamp(stderr);
2998
fprintf(stderr, " InnoDB: Starting shutdown...\n");
3000
/* Wait until the master thread and all other operations are idle: our
3001
algorithm only works if the server is idle at shutdown */
3003
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
3005
os_thread_sleep(100000);
3007
mutex_enter(&kernel_mutex);
3009
/* We need the monitor threads to stop before we proceed with a
3010
normal shutdown. In case of very fast shutdown, however, we can
3011
proceed without waiting for monitor threads. */
3013
if (srv_fast_shutdown < 2
3014
&& (srv_error_monitor_active
3015
|| srv_lock_timeout_and_monitor_active)) {
3017
mutex_exit(&kernel_mutex);
3022
/* Check that there are no longer transactions. We need this wait even
3023
for the 'very fast' shutdown, because the InnoDB layer may have
3024
committed or prepared transactions and we don't want to lose them. */
3026
if (trx_n_mysql_transactions > 0
3027
|| UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
3029
mutex_exit(&kernel_mutex);
3034
if (srv_fast_shutdown == 2) {
3035
/* In this fastest shutdown we do not flush the buffer pool:
3036
it is essentially a 'crash' of the InnoDB server. Make sure
3037
that the log is all flushed to disk, so that we can recover
3038
all committed transactions in a crash recovery. We must not
3039
write the lsn stamps to the data files, since at a startup
3040
InnoDB deduces from the stamps if the previous shutdown was
3043
log_buffer_flush_to_disk();
3045
return; /* We SKIP ALL THE REST !! */
3048
/* Check that the master thread is suspended */
3050
if (srv_n_threads_active[SRV_MASTER] != 0) {
3052
mutex_exit(&kernel_mutex);
3057
mutex_exit(&kernel_mutex);
3059
mutex_enter(&(log_sys->mutex));
3061
if (log_sys->n_pending_checkpoint_writes
3062
#ifdef UNIV_LOG_ARCHIVE
3063
|| log_sys->n_pending_archive_ios
3064
#endif /* UNIV_LOG_ARCHIVE */
3065
|| log_sys->n_pending_writes) {
3067
mutex_exit(&(log_sys->mutex));
3072
mutex_exit(&(log_sys->mutex));
3074
if (!buf_pool_check_no_pending_io()) {
3079
#ifdef UNIV_LOG_ARCHIVE
3081
#endif /* UNIV_LOG_ARCHIVE */
3083
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3085
mutex_enter(&(log_sys->mutex));
3089
if (lsn != log_sys->last_checkpoint_lsn
3090
#ifdef UNIV_LOG_ARCHIVE
3091
|| (srv_log_archive_on
3092
&& lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
3093
#endif /* UNIV_LOG_ARCHIVE */
3096
mutex_exit(&(log_sys->mutex));
3103
#ifdef UNIV_LOG_ARCHIVE
3104
UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
3106
if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
3111
log_archive_close_groups(TRUE);
3112
#endif /* UNIV_LOG_ARCHIVE */
3114
mutex_exit(&(log_sys->mutex));
3116
mutex_enter(&kernel_mutex);
3117
/* Check that the master thread has stayed suspended */
3118
if (srv_n_threads_active[SRV_MASTER] != 0) {
3120
"InnoDB: Warning: the master thread woke up"
3121
" during shutdown\n");
3123
mutex_exit(&kernel_mutex);
3127
mutex_exit(&kernel_mutex);
3129
fil_flush_file_spaces(FIL_TABLESPACE);
3130
fil_flush_file_spaces(FIL_LOG);
3132
/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
3133
pool: therefore it is essential that the buffer pool has been
3134
completely flushed to disk! (We do not call fil_write... if the
3135
'very fast' shutdown is enabled.) */
3137
if (!buf_all_freed()) {
3142
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
3144
/* Make some checks that the server really is quiet */
3145
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
3146
ut_a(buf_all_freed());
3147
ut_a(lsn == log_sys->lsn);
3149
if (lsn < srv_start_lsn) {
3151
"InnoDB: Error: log sequence number"
3152
" at shutdown %"PRIu64"\n"
3153
"InnoDB: is lower than at startup %"PRIu64"!\n",
3154
lsn, srv_start_lsn);
3157
srv_shutdown_lsn = lsn;
3159
fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
3161
fil_flush_file_spaces(FIL_TABLESPACE);
3163
fil_close_all_files();
3165
/* Make some checks that the server really is quiet */
3166
ut_a(srv_n_threads_active[SRV_MASTER] == 0);
3167
ut_a(buf_all_freed());
3168
ut_a(lsn == log_sys->lsn);
3171
/**********************************************************
3172
Checks by parsing that the catenated log segment for a single mtr is
3178
byte* buf, /* in: pointer to the start of
3179
the log segment in the
3180
log_sys->buf log buffer */
3181
ulint len, /* in: segment length in bytes */
3182
ib_uint64_t buf_start_lsn) /* in: buffer start lsn */
3184
ib_uint64_t contiguous_lsn;
3185
ib_uint64_t scanned_lsn;
3191
ut_ad(mutex_own(&(log_sys->mutex)));
3198
start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
3199
end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
3201
buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
3202
scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
3204
ut_memcpy(scan_buf, start, end - start);
3206
recv_scan_log_recs(TRUE,
3207
(buf_pool->curr_size
3208
- recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
3209
FALSE, scan_buf, end - start,
3210
ut_uint64_align_down(buf_start_lsn,
3211
OS_FILE_LOG_BLOCK_SIZE),
3212
&contiguous_lsn, &scanned_lsn);
3214
ut_a(scanned_lsn == buf_start_lsn + len);
3215
ut_a(recv_sys->recovered_lsn == scanned_lsn);
3222
/**********************************************************
3223
Peeks the current lsn. */
3228
/* out: TRUE if success, FALSE if
3229
could not get the log system mutex */
3230
ib_uint64_t* lsn) /* out: if returns TRUE, current lsn is here */
3232
if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
3233
*lsn = log_sys->lsn;
3235
mutex_exit(&(log_sys->mutex));
3243
/**********************************************************
3244
Prints info of the log. */
3249
FILE* file) /* in: file where to print */
3251
double time_elapsed;
3252
time_t current_time;
3254
mutex_enter(&(log_sys->mutex));
3257
"Log sequence number %"PRIu64"\n"
3258
"Log flushed up to %"PRIu64"\n"
3259
"Last checkpoint at %"PRIu64"\n",
3261
log_sys->flushed_to_disk_lsn,
3262
log_sys->last_checkpoint_lsn);
3264
current_time = time(NULL);
3266
time_elapsed = 0.001 + difftime(current_time,
3267
log_sys->last_printout_time);
3269
"%lu pending log writes, %lu pending chkp writes\n"
3270
"%lu log i/o's done, %.2f log i/o's/second\n",
3271
(ulong) log_sys->n_pending_writes,
3272
(ulong) log_sys->n_pending_checkpoint_writes,
3273
(ulong) log_sys->n_log_ios,
3274
((log_sys->n_log_ios - log_sys->n_log_ios_old)
3277
log_sys->n_log_ios_old = log_sys->n_log_ios;
3278
log_sys->last_printout_time = current_time;
3280
mutex_exit(&(log_sys->mutex));
3283
/**************************************************************************
3284
Refreshes the statistics used to print per-second averages. */
3287
log_refresh_stats(void)
3288
/*===================*/
3290
log_sys->n_log_ios_old = log_sys->n_log_ios;
3291
log_sys->last_printout_time = time(NULL);