~jaypipes/drizzle/new-test-runner

« back to all changes in this revision

Viewing changes to storage/innobase/log/log0log.c

  • Committer: Jay Pipes
  • Date: 2008-12-11 17:52:34 UTC
  • mfrom: (482.16.152 testable)
  • Revision ID: jpipes@serialcoder-20081211175234-uqsfvmgxejvmellq
merge with trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************
 
2
Database log
 
3
 
 
4
(c) 1995-1997 Innobase Oy
 
5
 
 
6
Created 12/9/1995 Heikki Tuuri
 
7
*******************************************************/
 
8
 
 
9
#include "log0log.h"
 
10
 
 
11
#ifdef UNIV_NONINL
 
12
#include "log0log.ic"
 
13
#endif
 
14
 
 
15
#include "mem0mem.h"
 
16
#include "buf0buf.h"
 
17
#include "buf0flu.h"
 
18
#include "srv0srv.h"
 
19
#include "log0recv.h"
 
20
#include "fil0fil.h"
 
21
#include "dict0boot.h"
 
22
#include "srv0srv.h"
 
23
#include "srv0start.h"
 
24
#include "trx0sys.h"
 
25
#include "trx0trx.h"
 
26
 
 
27
/*
 
28
General philosophy of InnoDB redo-logs:
 
29
 
 
30
1) Every change to a contents of a data page must be done
 
31
through mtr, which in mtr_commit() writes log records
 
32
to the InnoDB redo log.
 
33
 
 
34
2) Normally these changes are performed using a mlog_write_ulint()
 
35
or similar function.
 
36
 
 
37
3) In some page level operations only a code number of a
 
38
c-function and its parameters are written to the log to
 
39
reduce the size of the log.
 
40
 
 
41
  3a) You should not add parameters to these kind of functions
 
42
  (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
 
43
 
 
44
  3b) You should not add such functionality which either change
 
45
  working when compared with the old or are dependent on data
 
46
  outside of the page. These kind of functions should implement
 
47
  self-contained page transformation and it should be unchanged
 
48
  if you don't have very essential reasons to change log
 
49
  semantics or format.
 
50
 
 
51
*/
 
52
 
 
53
/* Current free limit of space 0; protected by the log sys mutex; 0 means
 
54
uninitialized */
 
55
UNIV_INTERN ulint       log_fsp_current_free_limit              = 0;
 
56
 
 
57
/* Global log system variable */
 
58
UNIV_INTERN log_t*      log_sys = NULL;
 
59
 
 
60
#ifdef UNIV_DEBUG
 
61
UNIV_INTERN ibool       log_do_write = TRUE;
 
62
 
 
63
UNIV_INTERN ibool       log_debug_writes = FALSE;
 
64
#endif /* UNIV_DEBUG */
 
65
 
 
66
/* These control how often we print warnings if the last checkpoint is too
 
67
old */
 
68
UNIV_INTERN ibool       log_has_printed_chkp_warning = FALSE;
 
69
UNIV_INTERN time_t      log_last_warning_time;
 
70
 
 
71
#ifdef UNIV_LOG_ARCHIVE
 
72
/* Pointer to this variable is used as the i/o-message when we do i/o to an
 
73
archive */
 
74
UNIV_INTERN byte        log_archive_io;
 
75
#endif /* UNIV_LOG_ARCHIVE */
 
76
 
 
77
/* A margin for free space in the log buffer before a log entry is catenated */
 
78
#define LOG_BUF_WRITE_MARGIN    (4 * OS_FILE_LOG_BLOCK_SIZE)
 
79
 
 
80
/* Margins for free space in the log buffer after a log entry is catenated */
 
81
#define LOG_BUF_FLUSH_RATIO     2
 
82
#define LOG_BUF_FLUSH_MARGIN    (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
 
83
 
 
84
/* Margin for the free space in the smallest log group, before a new query
 
85
step which modifies the database, is started */
 
86
 
 
87
#define LOG_CHECKPOINT_FREE_PER_THREAD  (4 * UNIV_PAGE_SIZE)
 
88
#define LOG_CHECKPOINT_EXTRA_FREE       (8 * UNIV_PAGE_SIZE)
 
89
 
 
90
/* This parameter controls asynchronous making of a new checkpoint; the value
 
91
should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
 
92
 
 
93
#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
 
94
 
 
95
/* This parameter controls synchronous preflushing of modified buffer pages */
 
96
#define LOG_POOL_PREFLUSH_RATIO_SYNC    16
 
97
 
 
98
/* The same ratio for asynchronous preflushing; this value should be less than
 
99
the previous */
 
100
#define LOG_POOL_PREFLUSH_RATIO_ASYNC   8
 
101
 
 
102
/* Extra margin, in addition to one log file, used in archiving */
 
103
#define LOG_ARCHIVE_EXTRA_MARGIN        (4 * UNIV_PAGE_SIZE)
 
104
 
 
105
#ifdef UNIV_LOG_ARCHIVE
 
106
/* This parameter controls asynchronous writing to the archive */
 
107
#define LOG_ARCHIVE_RATIO_ASYNC         16
 
108
 
 
109
/* States of an archiving operation */
 
110
#define LOG_ARCHIVE_READ        1
 
111
#define LOG_ARCHIVE_WRITE       2
 
112
 
 
113
#endif /* UNIV_LOG_ARCHIVE */
 
114
 
 
115
/* Codes used in unlocking flush latches */
 
116
#define LOG_UNLOCK_NONE_FLUSHED_LOCK    1
 
117
#define LOG_UNLOCK_FLUSH_LOCK           2
 
118
 
 
119
/**********************************************************
 
120
Completes a checkpoint write i/o to a log file. */
 
121
static
 
122
void
 
123
log_io_complete_checkpoint(void);
 
124
/*============================*/
 
125
#ifdef UNIV_LOG_ARCHIVE
 
126
/**********************************************************
 
127
Completes an archiving i/o. */
 
128
static
 
129
void
 
130
log_io_complete_archive(void);
 
131
/*=========================*/
 
132
#endif /* UNIV_LOG_ARCHIVE */
 
133
 
 
134
/********************************************************************
 
135
Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
 
136
so that we know that the limit has been written to a log checkpoint field
 
137
on disk. */
 
138
UNIV_INTERN
 
139
void
 
140
log_fsp_current_free_limit_set_and_checkpoint(
 
141
/*==========================================*/
 
142
        ulint   limit)  /* in: limit to set */
 
143
{
 
144
        ibool   success;
 
145
 
 
146
        mutex_enter(&(log_sys->mutex));
 
147
 
 
148
        log_fsp_current_free_limit = limit;
 
149
 
 
150
        mutex_exit(&(log_sys->mutex));
 
151
 
 
152
        /* Try to make a synchronous checkpoint */
 
153
 
 
154
        success = FALSE;
 
155
 
 
156
        while (!success) {
 
157
                success = log_checkpoint(TRUE, TRUE);
 
158
        }
 
159
}
 
160
 
 
161
/********************************************************************
 
162
Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
 
163
exists. */
 
164
static
 
165
ib_uint64_t
 
166
log_buf_pool_get_oldest_modification(void)
 
167
/*======================================*/
 
168
{
 
169
        ib_uint64_t     lsn;
 
170
 
 
171
        ut_ad(mutex_own(&(log_sys->mutex)));
 
172
 
 
173
        lsn = buf_pool_get_oldest_modification();
 
174
 
 
175
        if (!lsn) {
 
176
 
 
177
                lsn = log_sys->lsn;
 
178
        }
 
179
 
 
180
        return(lsn);
 
181
}
 
182
 
 
183
/****************************************************************
 
184
Opens the log for log_write_low. The log must be closed with log_close and
 
185
released with log_release. */
 
186
UNIV_INTERN
 
187
ib_uint64_t
 
188
log_reserve_and_open(
 
189
/*=================*/
 
190
                        /* out: start lsn of the log record */
 
191
        ulint   len)    /* in: length of data to be catenated */
 
192
{
 
193
        log_t*  log                     = log_sys;
 
194
        ulint   len_upper_limit;
 
195
#ifdef UNIV_LOG_ARCHIVE
 
196
        ulint   archived_lsn_age;
 
197
        ulint   dummy;
 
198
#endif /* UNIV_LOG_ARCHIVE */
 
199
#ifdef UNIV_DEBUG
 
200
        ulint   count                   = 0;
 
201
#endif /* UNIV_DEBUG */
 
202
 
 
203
        ut_a(len < log->buf_size / 2);
 
204
loop:
 
205
        mutex_enter(&(log->mutex));
 
206
 
 
207
        /* Calculate an upper limit for the space the string may take in the
 
208
        log buffer */
 
209
 
 
210
        len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
 
211
 
 
212
        if (log->buf_free + len_upper_limit > log->buf_size) {
 
213
 
 
214
                mutex_exit(&(log->mutex));
 
215
 
 
216
                /* Not enough free space, do a syncronous flush of the log
 
217
                buffer */
 
218
 
 
219
                log_buffer_flush_to_disk();
 
220
 
 
221
                srv_log_waits++;
 
222
 
 
223
                ut_ad(++count < 50);
 
224
 
 
225
                goto loop;
 
226
        }
 
227
 
 
228
#ifdef UNIV_LOG_ARCHIVE
 
229
        if (log->archiving_state != LOG_ARCH_OFF) {
 
230
 
 
231
                archived_lsn_age = log->lsn - log->archived_lsn;
 
232
                if (archived_lsn_age + len_upper_limit
 
233
                    > log->max_archived_lsn_age) {
 
234
                        /* Not enough free archived space in log groups: do a
 
235
                        synchronous archive write batch: */
 
236
 
 
237
                        mutex_exit(&(log->mutex));
 
238
 
 
239
                        ut_ad(len_upper_limit <= log->max_archived_lsn_age);
 
240
 
 
241
                        log_archive_do(TRUE, &dummy);
 
242
 
 
243
                        ut_ad(++count < 50);
 
244
 
 
245
                        goto loop;
 
246
                }
 
247
        }
 
248
#endif /* UNIV_LOG_ARCHIVE */
 
249
 
 
250
#ifdef UNIV_LOG_DEBUG
 
251
        log->old_buf_free = log->buf_free;
 
252
        log->old_lsn = log->lsn;
 
253
#endif
 
254
        return(log->lsn);
 
255
}
 
256
 
 
257
/****************************************************************
 
258
Writes to the log the string given. It is assumed that the caller holds the
 
259
log mutex. */
 
260
UNIV_INTERN
 
261
void
 
262
log_write_low(
 
263
/*==========*/
 
264
        byte*   str,            /* in: string */
 
265
        ulint   str_len)        /* in: string length */
 
266
{
 
267
        log_t*  log     = log_sys;
 
268
        ulint   len;
 
269
        ulint   data_len;
 
270
        byte*   log_block;
 
271
 
 
272
        ut_ad(mutex_own(&(log->mutex)));
 
273
part_loop:
 
274
        /* Calculate a part length */
 
275
 
 
276
        data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
 
277
 
 
278
        if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
279
 
 
280
                /* The string fits within the current log block */
 
281
 
 
282
                len = str_len;
 
283
        } else {
 
284
                data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 
285
 
 
286
                len = OS_FILE_LOG_BLOCK_SIZE
 
287
                        - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
 
288
                        - LOG_BLOCK_TRL_SIZE;
 
289
        }
 
290
 
 
291
        ut_memcpy(log->buf + log->buf_free, str, len);
 
292
 
 
293
        str_len -= len;
 
294
        str = str + len;
 
295
 
 
296
        log_block = ut_align_down(log->buf + log->buf_free,
 
297
                                  OS_FILE_LOG_BLOCK_SIZE);
 
298
        log_block_set_data_len(log_block, data_len);
 
299
 
 
300
        if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 
301
                /* This block became full */
 
302
                log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
 
303
                log_block_set_checkpoint_no(log_block,
 
304
                                            log_sys->next_checkpoint_no);
 
305
                len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
 
306
 
 
307
                log->lsn += len;
 
308
 
 
309
                /* Initialize the next block header */
 
310
                log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
 
311
        } else {
 
312
                log->lsn += len;
 
313
        }
 
314
 
 
315
        log->buf_free += len;
 
316
 
 
317
        ut_ad(log->buf_free <= log->buf_size);
 
318
 
 
319
        if (str_len > 0) {
 
320
                goto part_loop;
 
321
        }
 
322
 
 
323
        srv_log_write_requests++;
 
324
}
 
325
 
 
326
/****************************************************************
 
327
Closes the log. */
 
328
UNIV_INTERN
 
329
ib_uint64_t
 
330
log_close(void)
 
331
/*===========*/
 
332
                        /* out: lsn */
 
333
{
 
334
        byte*           log_block;
 
335
        ulint           first_rec_group;
 
336
        ib_uint64_t     oldest_lsn;
 
337
        ib_uint64_t     lsn;
 
338
        log_t*          log     = log_sys;
 
339
        ib_uint64_t     checkpoint_age;
 
340
 
 
341
        ut_ad(mutex_own(&(log->mutex)));
 
342
 
 
343
        lsn = log->lsn;
 
344
 
 
345
        log_block = ut_align_down(log->buf + log->buf_free,
 
346
                                  OS_FILE_LOG_BLOCK_SIZE);
 
347
        first_rec_group = log_block_get_first_rec_group(log_block);
 
348
 
 
349
        if (first_rec_group == 0) {
 
350
                /* We initialized a new log block which was not written
 
351
                full by the current mtr: the next mtr log record group
 
352
                will start within this block at the offset data_len */
 
353
 
 
354
                log_block_set_first_rec_group(
 
355
                        log_block, log_block_get_data_len(log_block));
 
356
        }
 
357
 
 
358
        if (log->buf_free > log->max_buf_free) {
 
359
 
 
360
                log->check_flush_or_checkpoint = TRUE;
 
361
        }
 
362
 
 
363
        checkpoint_age = lsn - log->last_checkpoint_lsn;
 
364
 
 
365
        if (checkpoint_age >= log->log_group_capacity) {
 
366
                /* TODO: split btr_store_big_rec_extern_fields() into small
 
367
                steps so that we can release all latches in the middle, and
 
368
                call log_free_check() to ensure we never write over log written
 
369
                after the latest checkpoint. In principle, we should split all
 
370
                big_rec operations, but other operations are smaller. */
 
371
 
 
372
                if (!log_has_printed_chkp_warning
 
373
                    || difftime(time(NULL), log_last_warning_time) > 15) {
 
374
 
 
375
                        log_has_printed_chkp_warning = TRUE;
 
376
                        log_last_warning_time = time(NULL);
 
377
 
 
378
                        ut_print_timestamp(stderr);
 
379
                        fprintf(stderr,
 
380
                                "  InnoDB: ERROR: the age of the last"
 
381
                                " checkpoint is %lu,\n"
 
382
                                "InnoDB: which exceeds the log group"
 
383
                                " capacity %lu.\n"
 
384
                                "InnoDB: If you are using big"
 
385
                                " BLOB or TEXT rows, you must set the\n"
 
386
                                "InnoDB: combined size of log files"
 
387
                                " at least 10 times bigger than the\n"
 
388
                                "InnoDB: largest such row.\n",
 
389
                                (ulong) checkpoint_age,
 
390
                                (ulong) log->log_group_capacity);
 
391
                }
 
392
        }
 
393
 
 
394
        if (checkpoint_age <= log->max_modified_age_async) {
 
395
 
 
396
                goto function_exit;
 
397
        }
 
398
 
 
399
        oldest_lsn = buf_pool_get_oldest_modification();
 
400
 
 
401
        if (!oldest_lsn
 
402
            || lsn - oldest_lsn > log->max_modified_age_async
 
403
            || checkpoint_age > log->max_checkpoint_age_async) {
 
404
 
 
405
                log->check_flush_or_checkpoint = TRUE;
 
406
        }
 
407
function_exit:
 
408
 
 
409
#ifdef UNIV_LOG_DEBUG
 
410
        log_check_log_recs(log->buf + log->old_buf_free,
 
411
                           log->buf_free - log->old_buf_free, log->old_lsn);
 
412
#endif
 
413
 
 
414
        return(lsn);
 
415
}
 
416
 
 
417
#ifdef UNIV_LOG_ARCHIVE
 
418
/**********************************************************
 
419
Pads the current log block full with dummy log records. Used in producing
 
420
consistent archived log files. */
 
421
static
 
422
void
 
423
log_pad_current_log_block(void)
 
424
/*===========================*/
 
425
{
 
426
        byte            b               = MLOG_DUMMY_RECORD;
 
427
        ulint           pad_length;
 
428
        ulint           i;
 
429
        ib_uint64_t     lsn;
 
430
 
 
431
        /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
 
432
        lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
 
433
 
 
434
        pad_length = OS_FILE_LOG_BLOCK_SIZE
 
435
                - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
 
436
                - LOG_BLOCK_TRL_SIZE;
 
437
 
 
438
        for (i = 0; i < pad_length; i++) {
 
439
                log_write_low(&b, 1);
 
440
        }
 
441
 
 
442
        lsn = log_sys->lsn;
 
443
 
 
444
        log_close();
 
445
        log_release();
 
446
 
 
447
        ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
 
448
}
 
449
#endif /* UNIV_LOG_ARCHIVE */
 
450
 
 
451
/**********************************************************
 
452
Calculates the data capacity of a log group, when the log file headers are not
 
453
included. */
 
454
UNIV_INTERN
 
455
ulint
 
456
log_group_get_capacity(
 
457
/*===================*/
 
458
                                /* out: capacity in bytes */
 
459
        log_group_t*    group)  /* in: log group */
 
460
{
 
461
        ut_ad(mutex_own(&(log_sys->mutex)));
 
462
 
 
463
        return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
 
464
}
 
465
 
 
466
/**********************************************************
 
467
Calculates the offset within a log group, when the log file headers are not
 
468
included. */
 
469
UNIV_INLINE
 
470
ulint
 
471
log_group_calc_size_offset(
 
472
/*=======================*/
 
473
                                /* out: size offset (<= offset) */
 
474
        ulint           offset, /* in: real offset within the log group */
 
475
        log_group_t*    group)  /* in: log group */
 
476
{
 
477
        ut_ad(mutex_own(&(log_sys->mutex)));
 
478
 
 
479
        return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
 
480
}
 
481
 
 
482
/**********************************************************
 
483
Calculates the offset within a log group, when the log file headers are
 
484
included. */
 
485
UNIV_INLINE
 
486
ulint
 
487
log_group_calc_real_offset(
 
488
/*=======================*/
 
489
                                /* out: real offset (>= offset) */
 
490
        ulint           offset, /* in: size offset within the log group */
 
491
        log_group_t*    group)  /* in: log group */
 
492
{
 
493
        ut_ad(mutex_own(&(log_sys->mutex)));
 
494
 
 
495
        return(offset + LOG_FILE_HDR_SIZE
 
496
               * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
 
497
}
 
498
 
 
499
/**********************************************************
 
500
Calculates the offset of an lsn within a log group. */
 
501
static
 
502
ulint
 
503
log_group_calc_lsn_offset(
 
504
/*======================*/
 
505
                                /* out: offset within the log group */
 
506
        ib_uint64_t     lsn,    /* in: lsn, must be within 4 GB of
 
507
                                group->lsn */
 
508
        log_group_t*    group)  /* in: log group */
 
509
{
 
510
        ib_uint64_t     gr_lsn;
 
511
        ib_int64_t      gr_lsn_size_offset;
 
512
        ib_int64_t      difference;
 
513
        ib_int64_t      group_size;
 
514
        ib_int64_t      offset;
 
515
 
 
516
        ut_ad(mutex_own(&(log_sys->mutex)));
 
517
 
 
518
        /* If total log file size is > 2 GB we can easily get overflows
 
519
        with 32-bit integers. Use 64-bit integers instead. */
 
520
 
 
521
        gr_lsn = group->lsn;
 
522
 
 
523
        gr_lsn_size_offset = (ib_int64_t)
 
524
                log_group_calc_size_offset(group->lsn_offset, group);
 
525
 
 
526
        group_size = (ib_int64_t) log_group_get_capacity(group);
 
527
 
 
528
        if (lsn >= gr_lsn) {
 
529
 
 
530
                difference = (ib_int64_t) (lsn - gr_lsn);
 
531
        } else {
 
532
                difference = (ib_int64_t) (gr_lsn - lsn);
 
533
 
 
534
                difference = difference % group_size;
 
535
 
 
536
                difference = group_size - difference;
 
537
        }
 
538
 
 
539
        offset = (gr_lsn_size_offset + difference) % group_size;
 
540
 
 
541
        ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
 
542
 
 
543
        /* fprintf(stderr,
 
544
        "Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
 
545
        (ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
 
546
        */
 
547
 
 
548
        return(log_group_calc_real_offset((ulint)offset, group));
 
549
}
 
550
 
 
551
/***********************************************************************
 
552
Calculates where in log files we find a specified lsn. */
 
553
UNIV_INTERN
 
554
ulint
 
555
log_calc_where_lsn_is(
 
556
/*==================*/
 
557
                                                /* out: log file number */
 
558
        ib_int64_t*     log_file_offset,        /* out: offset in that file
 
559
                                                (including the header) */
 
560
        ib_uint64_t     first_header_lsn,       /* in: first log file start
 
561
                                                lsn */
 
562
        ib_uint64_t     lsn,                    /* in: lsn whose position to
 
563
                                                determine */
 
564
        ulint           n_log_files,            /* in: total number of log
 
565
                                                files */
 
566
        ib_int64_t      log_file_size)          /* in: log file size
 
567
                                                (including the header) */
 
568
{
 
569
        ib_int64_t      capacity        = log_file_size - LOG_FILE_HDR_SIZE;
 
570
        ulint           file_no;
 
571
        ib_int64_t      add_this_many;
 
572
 
 
573
        if (lsn < first_header_lsn) {
 
574
                add_this_many = 1 + (first_header_lsn - lsn)
 
575
                        / (capacity * (ib_int64_t)n_log_files);
 
576
                lsn += add_this_many
 
577
                        * capacity * (ib_int64_t)n_log_files;
 
578
        }
 
579
 
 
580
        ut_a(lsn >= first_header_lsn);
 
581
 
 
582
        file_no = ((ulint)((lsn - first_header_lsn) / capacity))
 
583
                % n_log_files;
 
584
        *log_file_offset = (lsn - first_header_lsn) % capacity;
 
585
 
 
586
        *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
 
587
 
 
588
        return(file_no);
 
589
}
 
590
 
 
591
/************************************************************
 
592
Sets the field values in group to correspond to a given lsn. For this function
 
593
to work, the values must already be correctly initialized to correspond to
 
594
some lsn, for instance, a checkpoint lsn. */
 
595
UNIV_INTERN
 
596
void
 
597
log_group_set_fields(
 
598
/*=================*/
 
599
        log_group_t*    group,  /* in: group */
 
600
        ib_uint64_t     lsn)    /* in: lsn for which the values should be
 
601
                                set */
 
602
{
 
603
        group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
 
604
        group->lsn = lsn;
 
605
}
 
606
 
 
607
/*********************************************************************
 
608
Calculates the recommended highest values for lsn - last_checkpoint_lsn,
 
609
lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
 
610
static
 
611
ibool
 
612
log_calc_max_ages(void)
 
613
/*===================*/
 
614
                        /* out: error value FALSE if the smallest log group is
 
615
                        too small to accommodate the number of OS threads in
 
616
                        the database server */
 
617
{
 
618
        log_group_t*    group;
 
619
        ulint           margin;
 
620
        ulint           free;
 
621
        ibool           success         = TRUE;
 
622
        ulint           smallest_capacity;
 
623
        ulint           archive_margin;
 
624
        ulint           smallest_archive_margin;
 
625
 
 
626
        ut_ad(!mutex_own(&(log_sys->mutex)));
 
627
 
 
628
        mutex_enter(&(log_sys->mutex));
 
629
 
 
630
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
631
 
 
632
        ut_ad(group);
 
633
 
 
634
        smallest_capacity = ULINT_MAX;
 
635
        smallest_archive_margin = ULINT_MAX;
 
636
 
 
637
        while (group) {
 
638
                if (log_group_get_capacity(group) < smallest_capacity) {
 
639
 
 
640
                        smallest_capacity = log_group_get_capacity(group);
 
641
                }
 
642
 
 
643
                archive_margin = log_group_get_capacity(group)
 
644
                        - (group->file_size - LOG_FILE_HDR_SIZE)
 
645
                        - LOG_ARCHIVE_EXTRA_MARGIN;
 
646
 
 
647
                if (archive_margin < smallest_archive_margin) {
 
648
 
 
649
                        smallest_archive_margin = archive_margin;
 
650
                }
 
651
 
 
652
                group = UT_LIST_GET_NEXT(log_groups, group);
 
653
        }
 
654
 
 
655
        /* Add extra safety */
 
656
        smallest_capacity = smallest_capacity - smallest_capacity / 10;
 
657
 
 
658
        /* For each OS thread we must reserve so much free space in the
 
659
        smallest log group that it can accommodate the log entries produced
 
660
        by single query steps: running out of free log space is a serious
 
661
        system error which requires rebooting the database. */
 
662
 
 
663
        free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
 
664
                + LOG_CHECKPOINT_EXTRA_FREE;
 
665
        if (free >= smallest_capacity / 2) {
 
666
                success = FALSE;
 
667
 
 
668
                goto failure;
 
669
        } else {
 
670
                margin = smallest_capacity - free;
 
671
        }
 
672
 
 
673
        margin = ut_min(margin, log_sys->adm_checkpoint_interval);
 
674
 
 
675
        margin = margin - margin / 10;  /* Add still some extra safety */
 
676
 
 
677
        log_sys->log_group_capacity = smallest_capacity;
 
678
 
 
679
        log_sys->max_modified_age_async = margin
 
680
                - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
 
681
        log_sys->max_modified_age_sync = margin
 
682
                - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
 
683
 
 
684
        log_sys->max_checkpoint_age_async = margin - margin
 
685
                / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
 
686
        log_sys->max_checkpoint_age = margin;
 
687
 
 
688
#ifdef UNIV_LOG_ARCHIVE
 
689
        log_sys->max_archived_lsn_age = smallest_archive_margin;
 
690
 
 
691
        log_sys->max_archived_lsn_age_async = smallest_archive_margin
 
692
                - smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
 
693
#endif /* UNIV_LOG_ARCHIVE */
 
694
failure:
 
695
        mutex_exit(&(log_sys->mutex));
 
696
 
 
697
        if (!success) {
 
698
                fprintf(stderr,
 
699
                        "InnoDB: Error: ib_logfiles are too small"
 
700
                        " for innodb_thread_concurrency %lu.\n"
 
701
                        "InnoDB: The combined size of ib_logfiles"
 
702
                        " should be bigger than\n"
 
703
                        "InnoDB: 200 kB * innodb_thread_concurrency.\n"
 
704
                        "InnoDB: To get mysqld to start up, set"
 
705
                        " innodb_thread_concurrency in my.cnf\n"
 
706
                        "InnoDB: to a lower value, for example, to 8."
 
707
                        " After an ERROR-FREE shutdown\n"
 
708
                        "InnoDB: of mysqld you can adjust the size of"
 
709
                        " ib_logfiles, as explained in\n"
 
710
                        "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
 
711
                        "adding-and-removing.html\n"
 
712
                        "InnoDB: Cannot continue operation."
 
713
                        " Calling exit(1).\n",
 
714
                        (ulong)srv_thread_concurrency);
 
715
 
 
716
                exit(1);
 
717
        }
 
718
 
 
719
        return(success);
 
720
}
 
721
 
 
722
/**********************************************************
 
723
Initializes the log. */
 
724
UNIV_INTERN
 
725
void
 
726
log_init(void)
 
727
/*==========*/
 
728
{
 
729
        byte*   buf;
 
730
 
 
731
        log_sys = mem_alloc(sizeof(log_t));
 
732
 
 
733
        mutex_create(&log_sys->mutex, SYNC_LOG);
 
734
 
 
735
        mutex_enter(&(log_sys->mutex));
 
736
 
 
737
        /* Start the lsn from one log block from zero: this way every
 
738
        log record has a start lsn != zero, a fact which we will use */
 
739
 
 
740
        log_sys->lsn = LOG_START_LSN;
 
741
 
 
742
        ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
 
743
        ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
 
744
 
 
745
        buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 
746
        log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
 
747
 
 
748
        log_sys->buf_size = LOG_BUFFER_SIZE;
 
749
 
 
750
        memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
 
751
 
 
752
        log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 
753
                - LOG_BUF_FLUSH_MARGIN;
 
754
        log_sys->check_flush_or_checkpoint = TRUE;
 
755
        UT_LIST_INIT(log_sys->log_groups);
 
756
 
 
757
        log_sys->n_log_ios = 0;
 
758
 
 
759
        log_sys->n_log_ios_old = log_sys->n_log_ios;
 
760
        log_sys->last_printout_time = time(NULL);
 
761
        /*----------------------------*/
 
762
 
 
763
        log_sys->buf_next_to_write = 0;
 
764
 
 
765
        log_sys->write_lsn = 0;
 
766
        log_sys->current_flush_lsn = 0;
 
767
        log_sys->flushed_to_disk_lsn = 0;
 
768
 
 
769
        log_sys->written_to_some_lsn = log_sys->lsn;
 
770
        log_sys->written_to_all_lsn = log_sys->lsn;
 
771
 
 
772
        log_sys->n_pending_writes = 0;
 
773
 
 
774
        log_sys->no_flush_event = os_event_create(NULL);
 
775
 
 
776
        os_event_set(log_sys->no_flush_event);
 
777
 
 
778
        log_sys->one_flushed_event = os_event_create(NULL);
 
779
 
 
780
        os_event_set(log_sys->one_flushed_event);
 
781
 
 
782
        /*----------------------------*/
 
783
        log_sys->adm_checkpoint_interval = ULINT_MAX;
 
784
 
 
785
        log_sys->next_checkpoint_no = 0;
 
786
        log_sys->last_checkpoint_lsn = log_sys->lsn;
 
787
        log_sys->n_pending_checkpoint_writes = 0;
 
788
 
 
789
        rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
 
790
 
 
791
        log_sys->checkpoint_buf
 
792
                = ut_align(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
 
793
                           OS_FILE_LOG_BLOCK_SIZE);
 
794
        memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
 
795
        /*----------------------------*/
 
796
 
 
797
#ifdef UNIV_LOG_ARCHIVE
 
798
        /* Under MySQL, log archiving is always off */
 
799
        log_sys->archiving_state = LOG_ARCH_OFF;
 
800
        log_sys->archived_lsn = log_sys->lsn;
 
801
        log_sys->next_archived_lsn = 0;
 
802
 
 
803
        log_sys->n_pending_archive_ios = 0;
 
804
 
 
805
        rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK);
 
806
 
 
807
        log_sys->archive_buf = NULL;
 
808
 
 
809
        /* ut_align(
 
810
        ut_malloc(LOG_ARCHIVE_BUF_SIZE
 
811
        + OS_FILE_LOG_BLOCK_SIZE),
 
812
        OS_FILE_LOG_BLOCK_SIZE); */
 
813
        log_sys->archive_buf_size = 0;
 
814
 
 
815
        /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
 
816
 
 
817
        log_sys->archiving_on = os_event_create(NULL);
 
818
#endif /* UNIV_LOG_ARCHIVE */
 
819
 
 
820
        /*----------------------------*/
 
821
 
 
822
        log_block_init(log_sys->buf, log_sys->lsn);
 
823
        log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 
824
 
 
825
        log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 
826
        log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
 
827
 
 
828
        mutex_exit(&(log_sys->mutex));
 
829
 
 
830
#ifdef UNIV_LOG_DEBUG
 
831
        recv_sys_create();
 
832
        recv_sys_init(FALSE, buf_pool_get_curr_size());
 
833
 
 
834
        recv_sys->parse_start_lsn = log_sys->lsn;
 
835
        recv_sys->scanned_lsn = log_sys->lsn;
 
836
        recv_sys->scanned_checkpoint_no = 0;
 
837
        recv_sys->recovered_lsn = log_sys->lsn;
 
838
        recv_sys->limit_lsn = IB_ULONGLONG_MAX;
 
839
#endif
 
840
}
 
841
 
 
842
/**********************************************************************
 
843
Inits a log group to the log system. */
 
844
UNIV_INTERN
 
845
void
 
846
log_group_init(
 
847
/*===========*/
 
848
        ulint   id,                     /* in: group id */
 
849
        ulint   n_files,                /* in: number of log files */
 
850
        ulint   file_size,              /* in: log file size in bytes */
 
851
        ulint   space_id,               /* in: space id of the file space
 
852
                                        which contains the log files of this
 
853
                                        group */
 
854
        ulint   archive_space_id __attribute__((unused)))
 
855
                                        /* in: space id of the file space
 
856
                                        which contains some archived log
 
857
                                        files for this group; currently, only
 
858
                                        for the first log group this is
 
859
                                        used */
 
860
{
 
861
        ulint   i;
 
862
 
 
863
        log_group_t*    group;
 
864
 
 
865
        group = mem_alloc(sizeof(log_group_t));
 
866
 
 
867
        group->id = id;
 
868
        group->n_files = n_files;
 
869
        group->file_size = file_size;
 
870
        group->space_id = space_id;
 
871
        group->state = LOG_GROUP_OK;
 
872
        group->lsn = LOG_START_LSN;
 
873
        group->lsn_offset = LOG_FILE_HDR_SIZE;
 
874
        group->n_pending_writes = 0;
 
875
 
 
876
        group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
 
877
#ifdef UNIV_LOG_ARCHIVE
 
878
        group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
 
879
#endif /* UNIV_LOG_ARCHIVE */
 
880
 
 
881
        for (i = 0; i < n_files; i++) {
 
882
                *(group->file_header_bufs + i) = ut_align(
 
883
                        mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
 
884
                        OS_FILE_LOG_BLOCK_SIZE);
 
885
 
 
886
                memset(*(group->file_header_bufs + i), '\0',
 
887
                       LOG_FILE_HDR_SIZE);
 
888
 
 
889
#ifdef UNIV_LOG_ARCHIVE
 
890
                *(group->archive_file_header_bufs + i) = ut_align(
 
891
                        mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
 
892
                        OS_FILE_LOG_BLOCK_SIZE);
 
893
                memset(*(group->archive_file_header_bufs + i), '\0',
 
894
                       LOG_FILE_HDR_SIZE);
 
895
#endif /* UNIV_LOG_ARCHIVE */
 
896
        }
 
897
 
 
898
#ifdef UNIV_LOG_ARCHIVE
 
899
        group->archive_space_id = archive_space_id;
 
900
 
 
901
        group->archived_file_no = 0;
 
902
        group->archived_offset = 0;
 
903
#endif /* UNIV_LOG_ARCHIVE */
 
904
 
 
905
        group->checkpoint_buf = ut_align(
 
906
                mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE);
 
907
 
 
908
        memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
 
909
 
 
910
        UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
 
911
 
 
912
        ut_a(log_calc_max_ages());
 
913
}
 
914
 
 
915
/**********************************************************************
 
916
Does the unlockings needed in flush i/o completion. */
 
917
UNIV_INLINE
 
918
void
 
919
log_flush_do_unlocks(
 
920
/*=================*/
 
921
        ulint   code)   /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
 
922
                        and LOG_UNLOCK_NONE_FLUSHED_LOCK */
 
923
{
 
924
        ut_ad(mutex_own(&(log_sys->mutex)));
 
925
 
 
926
        /* NOTE that we must own the log mutex when doing the setting of the
 
927
        events: this is because transactions will wait for these events to
 
928
        be set, and at that moment the log flush they were waiting for must
 
929
        have ended. If the log mutex were not reserved here, the i/o-thread
 
930
        calling this function might be preempted for a while, and when it
 
931
        resumed execution, it might be that a new flush had been started, and
 
932
        this function would erroneously signal the NEW flush as completed.
 
933
        Thus, the changes in the state of these events are performed
 
934
        atomically in conjunction with the changes in the state of
 
935
        log_sys->n_pending_writes etc. */
 
936
 
 
937
        if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
 
938
                os_event_set(log_sys->one_flushed_event);
 
939
        }
 
940
 
 
941
        if (code & LOG_UNLOCK_FLUSH_LOCK) {
 
942
                os_event_set(log_sys->no_flush_event);
 
943
        }
 
944
}
 
945
 
 
946
/**********************************************************************
 
947
Checks if a flush is completed for a log group and does the completion
 
948
routine if yes. */
 
949
UNIV_INLINE
 
950
ulint
 
951
log_group_check_flush_completion(
 
952
/*=============================*/
 
953
                                /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
 
954
        log_group_t*    group)  /* in: log group */
 
955
{
 
956
        ut_ad(mutex_own(&(log_sys->mutex)));
 
957
 
 
958
        if (!log_sys->one_flushed && group->n_pending_writes == 0) {
 
959
#ifdef UNIV_DEBUG
 
960
                if (log_debug_writes) {
 
961
                        fprintf(stderr,
 
962
                                "Log flushed first to group %lu\n",
 
963
                                (ulong) group->id);
 
964
                }
 
965
#endif /* UNIV_DEBUG */
 
966
                log_sys->written_to_some_lsn = log_sys->write_lsn;
 
967
                log_sys->one_flushed = TRUE;
 
968
 
 
969
                return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
 
970
        }
 
971
 
 
972
#ifdef UNIV_DEBUG
 
973
        if (log_debug_writes && (group->n_pending_writes == 0)) {
 
974
 
 
975
                fprintf(stderr, "Log flushed to group %lu\n",
 
976
                        (ulong) group->id);
 
977
        }
 
978
#endif /* UNIV_DEBUG */
 
979
        return(0);
 
980
}
 
981
 
 
982
/**********************************************************
 
983
Checks if a flush is completed and does the completion routine if yes. */
 
984
static
 
985
ulint
 
986
log_sys_check_flush_completion(void)
 
987
/*================================*/
 
988
                        /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
 
989
{
 
990
        ulint   move_start;
 
991
        ulint   move_end;
 
992
 
 
993
        ut_ad(mutex_own(&(log_sys->mutex)));
 
994
 
 
995
        if (log_sys->n_pending_writes == 0) {
 
996
 
 
997
                log_sys->written_to_all_lsn = log_sys->write_lsn;
 
998
                log_sys->buf_next_to_write = log_sys->write_end_offset;
 
999
 
 
1000
                if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
 
1001
                        /* Move the log buffer content to the start of the
 
1002
                        buffer */
 
1003
 
 
1004
                        move_start = ut_calc_align_down(
 
1005
                                log_sys->write_end_offset,
 
1006
                                OS_FILE_LOG_BLOCK_SIZE);
 
1007
                        move_end = ut_calc_align(log_sys->buf_free,
 
1008
                                                 OS_FILE_LOG_BLOCK_SIZE);
 
1009
 
 
1010
                        ut_memmove(log_sys->buf, log_sys->buf + move_start,
 
1011
                                   move_end - move_start);
 
1012
                        log_sys->buf_free -= move_start;
 
1013
 
 
1014
                        log_sys->buf_next_to_write -= move_start;
 
1015
                }
 
1016
 
 
1017
                return(LOG_UNLOCK_FLUSH_LOCK);
 
1018
        }
 
1019
 
 
1020
        return(0);
 
1021
}
 
1022
 
 
1023
/**********************************************************
 
1024
Completes an i/o to a log file. */
 
1025
UNIV_INTERN
 
1026
void
 
1027
log_io_complete(
 
1028
/*============*/
 
1029
        log_group_t*    group)  /* in: log group or a dummy pointer */
 
1030
{
 
1031
        ulint   unlock;
 
1032
 
 
1033
#ifdef UNIV_LOG_ARCHIVE
 
1034
        if ((byte*)group == &log_archive_io) {
 
1035
                /* It was an archive write */
 
1036
 
 
1037
                log_io_complete_archive();
 
1038
 
 
1039
                return;
 
1040
        }
 
1041
#endif /* UNIV_LOG_ARCHIVE */
 
1042
 
 
1043
        if ((ulint)group & 0x1UL) {
 
1044
                /* It was a checkpoint write */
 
1045
                group = (log_group_t*)((ulint)group - 1);
 
1046
 
 
1047
                if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 
1048
                    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
 
1049
 
 
1050
                        fil_flush(group->space_id);
 
1051
                }
 
1052
 
 
1053
#ifdef UNIV_DEBUG
 
1054
                if (log_debug_writes) {
 
1055
                        fprintf(stderr,
 
1056
                                "Checkpoint info written to group %lu\n",
 
1057
                                group->id);
 
1058
                }
 
1059
#endif /* UNIV_DEBUG */
 
1060
                log_io_complete_checkpoint();
 
1061
 
 
1062
                return;
 
1063
        }
 
1064
 
 
1065
        ut_error;       /* We currently use synchronous writing of the
 
1066
                        logs and cannot end up here! */
 
1067
 
 
1068
        if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 
1069
            && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
 
1070
            && srv_flush_log_at_trx_commit != 2) {
 
1071
 
 
1072
                fil_flush(group->space_id);
 
1073
        }
 
1074
 
 
1075
        mutex_enter(&(log_sys->mutex));
 
1076
 
 
1077
        ut_a(group->n_pending_writes > 0);
 
1078
        ut_a(log_sys->n_pending_writes > 0);
 
1079
 
 
1080
        group->n_pending_writes--;
 
1081
        log_sys->n_pending_writes--;
 
1082
 
 
1083
        unlock = log_group_check_flush_completion(group);
 
1084
        unlock = unlock | log_sys_check_flush_completion();
 
1085
 
 
1086
        log_flush_do_unlocks(unlock);
 
1087
 
 
1088
        mutex_exit(&(log_sys->mutex));
 
1089
}
 
1090
 
 
1091
/**********************************************************
 
1092
Writes a log file header to a log file space. */
 
1093
static
 
1094
void
 
1095
log_group_file_header_flush(
 
1096
/*========================*/
 
1097
        log_group_t*    group,          /* in: log group */
 
1098
        ulint           nth_file,       /* in: header to the nth file in the
 
1099
                                        log file space */
 
1100
        ib_uint64_t     start_lsn)      /* in: log file data starts at this
 
1101
                                        lsn */
 
1102
{
 
1103
        byte*   buf;
 
1104
        ulint   dest_offset;
 
1105
 
 
1106
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1107
        ut_a(nth_file < group->n_files);
 
1108
 
 
1109
        buf = *(group->file_header_bufs + nth_file);
 
1110
 
 
1111
        mach_write_to_4(buf + LOG_GROUP_ID, group->id);
 
1112
        mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
 
1113
 
 
1114
        /* Wipe over possible label of ibbackup --restore */
 
1115
        memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
 
1116
 
 
1117
        dest_offset = nth_file * group->file_size;
 
1118
 
 
1119
#ifdef UNIV_DEBUG
 
1120
        if (log_debug_writes) {
 
1121
                fprintf(stderr,
 
1122
                        "Writing log file header to group %lu file %lu\n",
 
1123
                        (ulong) group->id, (ulong) nth_file);
 
1124
        }
 
1125
#endif /* UNIV_DEBUG */
 
1126
        if (log_do_write) {
 
1127
                log_sys->n_log_ios++;
 
1128
 
 
1129
                srv_os_log_pending_writes++;
 
1130
 
 
1131
                fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
 
1132
                       dest_offset / UNIV_PAGE_SIZE,
 
1133
                       dest_offset % UNIV_PAGE_SIZE,
 
1134
                       OS_FILE_LOG_BLOCK_SIZE,
 
1135
                       buf, group);
 
1136
 
 
1137
                srv_os_log_pending_writes--;
 
1138
        }
 
1139
}
 
1140
 
 
1141
/**********************************************************
 
1142
Stores a 4-byte checksum to the trailer checksum field of a log block
 
1143
before writing it to a log file. This checksum is used in recovery to
 
1144
check the consistency of a log block. */
 
1145
static
 
1146
void
 
1147
log_block_store_checksum(
 
1148
/*=====================*/
 
1149
        byte*   block)  /* in/out: pointer to a log block */
 
1150
{
 
1151
        log_block_set_checksum(block, log_block_calc_checksum(block));
 
1152
}
 
1153
 
 
1154
/**********************************************************
 
1155
Writes a buffer to a log file group. */
 
1156
UNIV_INTERN
 
1157
void
 
1158
log_group_write_buf(
 
1159
/*================*/
 
1160
        log_group_t*    group,          /* in: log group */
 
1161
        byte*           buf,            /* in: buffer */
 
1162
        ulint           len,            /* in: buffer len; must be divisible
 
1163
                                        by OS_FILE_LOG_BLOCK_SIZE */
 
1164
        ib_uint64_t     start_lsn,      /* in: start lsn of the buffer; must
 
1165
                                        be divisible by
 
1166
                                        OS_FILE_LOG_BLOCK_SIZE */
 
1167
        ulint           new_data_offset)/* in: start offset of new data in
 
1168
                                        buf: this parameter is used to decide
 
1169
                                        if we have to write a new log file
 
1170
                                        header */
 
1171
{
 
1172
        ulint   write_len;
 
1173
        ibool   write_header;
 
1174
        ulint   next_offset;
 
1175
        ulint   i;
 
1176
 
 
1177
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1178
        ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 
1179
        ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
 
1180
 
 
1181
        if (new_data_offset == 0) {
 
1182
                write_header = TRUE;
 
1183
        } else {
 
1184
                write_header = FALSE;
 
1185
        }
 
1186
loop:
 
1187
        if (len == 0) {
 
1188
 
 
1189
                return;
 
1190
        }
 
1191
 
 
1192
        next_offset = log_group_calc_lsn_offset(start_lsn, group);
 
1193
 
 
1194
        if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
 
1195
            && write_header) {
 
1196
                /* We start to write a new log file instance in the group */
 
1197
 
 
1198
                log_group_file_header_flush(group,
 
1199
                                            next_offset / group->file_size,
 
1200
                                            start_lsn);
 
1201
                srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE;
 
1202
                srv_log_writes++;
 
1203
        }
 
1204
 
 
1205
        if ((next_offset % group->file_size) + len > group->file_size) {
 
1206
 
 
1207
                write_len = group->file_size
 
1208
                        - (next_offset % group->file_size);
 
1209
        } else {
 
1210
                write_len = len;
 
1211
        }
 
1212
 
 
1213
#ifdef UNIV_DEBUG
 
1214
        if (log_debug_writes) {
 
1215
 
 
1216
                fprintf(stderr,
 
1217
                        "Writing log file segment to group %lu"
 
1218
                        " offset %lu len %lu\n"
 
1219
                        "start lsn %"PRIu64"\n"
 
1220
                        "First block n:o %lu last block n:o %lu\n",
 
1221
                        (ulong) group->id, (ulong) next_offset,
 
1222
                        (ulong) write_len,
 
1223
                        start_lsn,
 
1224
                        (ulong) log_block_get_hdr_no(buf),
 
1225
                        (ulong) log_block_get_hdr_no(
 
1226
                                buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
 
1227
                ut_a(log_block_get_hdr_no(buf)
 
1228
                     == log_block_convert_lsn_to_no(start_lsn));
 
1229
 
 
1230
                for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
 
1231
 
 
1232
                        ut_a(log_block_get_hdr_no(buf) + i
 
1233
                             == log_block_get_hdr_no(
 
1234
                                     buf + i * OS_FILE_LOG_BLOCK_SIZE));
 
1235
                }
 
1236
        }
 
1237
#endif /* UNIV_DEBUG */
 
1238
        /* Calculate the checksums for each log block and write them to
 
1239
        the trailer fields of the log blocks */
 
1240
 
 
1241
        for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
 
1242
                log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
 
1243
        }
 
1244
 
 
1245
        if (log_do_write) {
 
1246
                log_sys->n_log_ios++;
 
1247
 
 
1248
                srv_os_log_pending_writes++;
 
1249
 
 
1250
                fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
 
1251
                       next_offset / UNIV_PAGE_SIZE,
 
1252
                       next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
 
1253
 
 
1254
                srv_os_log_pending_writes--;
 
1255
 
 
1256
                srv_os_log_written+= write_len;
 
1257
                srv_log_writes++;
 
1258
        }
 
1259
 
 
1260
        if (write_len < len) {
 
1261
                start_lsn += write_len;
 
1262
                len -= write_len;
 
1263
                buf += write_len;
 
1264
 
 
1265
                write_header = TRUE;
 
1266
 
 
1267
                goto loop;
 
1268
        }
 
1269
}
 
1270
 
 
1271
/**********************************************************
 
1272
This function is called, e.g., when a transaction wants to commit. It checks
 
1273
that the log has been written to the log file up to the last log entry written
 
1274
by the transaction. If there is a flush running, it waits and checks if the
 
1275
flush flushed enough. If not, starts a new flush. */
 
1276
UNIV_INTERN
 
1277
void
 
1278
log_write_up_to(
 
1279
/*============*/
 
1280
        ib_uint64_t     lsn,    /* in: log sequence number up to which
 
1281
                                the log should be written,
 
1282
                                IB_ULONGLONG_MAX if not specified */
 
1283
        ulint           wait,   /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
 
1284
                                or LOG_WAIT_ALL_GROUPS */
 
1285
        ibool           flush_to_disk)
 
1286
                                /* in: TRUE if we want the written log
 
1287
                                also to be flushed to disk */
 
1288
{
 
1289
        log_group_t*    group;
 
1290
        ulint           start_offset;
 
1291
        ulint           end_offset;
 
1292
        ulint           area_start;
 
1293
        ulint           area_end;
 
1294
#ifdef UNIV_DEBUG
 
1295
        ulint           loop_count      = 0;
 
1296
#endif /* UNIV_DEBUG */
 
1297
        ulint           unlock;
 
1298
 
 
1299
        if (recv_no_ibuf_operations) {
 
1300
                /* Recovery is running and no operations on the log files are
 
1301
                allowed yet (the variable name .._no_ibuf_.. is misleading) */
 
1302
 
 
1303
                return;
 
1304
        }
 
1305
 
 
1306
loop:
 
1307
#ifdef UNIV_DEBUG
 
1308
        loop_count++;
 
1309
 
 
1310
        ut_ad(loop_count < 5);
 
1311
 
 
1312
# if 0
 
1313
        if (loop_count > 2) {
 
1314
                fprintf(stderr, "Log loop count %lu\n", loop_count);
 
1315
        }
 
1316
# endif
 
1317
#endif
 
1318
 
 
1319
        mutex_enter(&(log_sys->mutex));
 
1320
 
 
1321
        if (flush_to_disk
 
1322
            && log_sys->flushed_to_disk_lsn >= lsn) {
 
1323
 
 
1324
                mutex_exit(&(log_sys->mutex));
 
1325
 
 
1326
                return;
 
1327
        }
 
1328
 
 
1329
        if (!flush_to_disk
 
1330
            && (log_sys->written_to_all_lsn >= lsn
 
1331
                || (log_sys->written_to_some_lsn >= lsn
 
1332
                    && wait != LOG_WAIT_ALL_GROUPS))) {
 
1333
 
 
1334
                mutex_exit(&(log_sys->mutex));
 
1335
 
 
1336
                return;
 
1337
        }
 
1338
 
 
1339
        if (log_sys->n_pending_writes > 0) {
 
1340
                /* A write (+ possibly flush to disk) is running */
 
1341
 
 
1342
                if (flush_to_disk
 
1343
                    && log_sys->current_flush_lsn >= lsn) {
 
1344
                        /* The write + flush will write enough: wait for it to
 
1345
                        complete  */
 
1346
 
 
1347
                        goto do_waits;
 
1348
                }
 
1349
 
 
1350
                if (!flush_to_disk
 
1351
                    && log_sys->write_lsn >= lsn) {
 
1352
                        /* The write will write enough: wait for it to
 
1353
                        complete  */
 
1354
 
 
1355
                        goto do_waits;
 
1356
                }
 
1357
 
 
1358
                mutex_exit(&(log_sys->mutex));
 
1359
 
 
1360
                /* Wait for the write to complete and try to start a new
 
1361
                write */
 
1362
 
 
1363
                os_event_wait(log_sys->no_flush_event);
 
1364
 
 
1365
                goto loop;
 
1366
        }
 
1367
 
 
1368
        if (!flush_to_disk
 
1369
            && log_sys->buf_free == log_sys->buf_next_to_write) {
 
1370
                /* Nothing to write and no flush to disk requested */
 
1371
 
 
1372
                mutex_exit(&(log_sys->mutex));
 
1373
 
 
1374
                return;
 
1375
        }
 
1376
 
 
1377
#ifdef UNIV_DEBUG
 
1378
        if (log_debug_writes) {
 
1379
                fprintf(stderr,
 
1380
                        "Writing log from %"PRIu64" up to lsn %"PRIu64"\n",
 
1381
                        log_sys->written_to_all_lsn,
 
1382
                        log_sys->lsn);
 
1383
        }
 
1384
#endif /* UNIV_DEBUG */
 
1385
        log_sys->n_pending_writes++;
 
1386
 
 
1387
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
1388
        group->n_pending_writes++;      /* We assume here that we have only
 
1389
                                        one log group! */
 
1390
 
 
1391
        os_event_reset(log_sys->no_flush_event);
 
1392
        os_event_reset(log_sys->one_flushed_event);
 
1393
 
 
1394
        start_offset = log_sys->buf_next_to_write;
 
1395
        end_offset = log_sys->buf_free;
 
1396
 
 
1397
        area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
 
1398
        area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
 
1399
 
 
1400
        ut_ad(area_end - area_start > 0);
 
1401
 
 
1402
        log_sys->write_lsn = log_sys->lsn;
 
1403
 
 
1404
        if (flush_to_disk) {
 
1405
                log_sys->current_flush_lsn = log_sys->lsn;
 
1406
        }
 
1407
 
 
1408
        log_sys->one_flushed = FALSE;
 
1409
 
 
1410
        log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
 
1411
        log_block_set_checkpoint_no(
 
1412
                log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 
1413
                log_sys->next_checkpoint_no);
 
1414
 
 
1415
        /* Copy the last, incompletely written, log block a log block length
 
1416
        up, so that when the flush operation writes from the log buffer, the
 
1417
        segment to write will not be changed by writers to the log */
 
1418
 
 
1419
        ut_memcpy(log_sys->buf + area_end,
 
1420
                  log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 
1421
                  OS_FILE_LOG_BLOCK_SIZE);
 
1422
 
 
1423
        log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
 
1424
        log_sys->write_end_offset = log_sys->buf_free;
 
1425
 
 
1426
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
1427
 
 
1428
        /* Do the write to the log files */
 
1429
 
 
1430
        while (group) {
 
1431
                log_group_write_buf(
 
1432
                        group, log_sys->buf + area_start,
 
1433
                        area_end - area_start,
 
1434
                        ut_uint64_align_down(log_sys->written_to_all_lsn,
 
1435
                                             OS_FILE_LOG_BLOCK_SIZE),
 
1436
                        start_offset - area_start);
 
1437
 
 
1438
                log_group_set_fields(group, log_sys->write_lsn);
 
1439
 
 
1440
                group = UT_LIST_GET_NEXT(log_groups, group);
 
1441
        }
 
1442
 
 
1443
        mutex_exit(&(log_sys->mutex));
 
1444
 
 
1445
        if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
 
1446
                /* O_DSYNC means the OS did not buffer the log file at all:
 
1447
                so we have also flushed to disk what we have written */
 
1448
 
 
1449
                log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 
1450
 
 
1451
        } else if (flush_to_disk) {
 
1452
 
 
1453
                group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
1454
 
 
1455
                fil_flush(group->space_id);
 
1456
                log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 
1457
        }
 
1458
 
 
1459
        mutex_enter(&(log_sys->mutex));
 
1460
 
 
1461
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
1462
 
 
1463
        ut_a(group->n_pending_writes == 1);
 
1464
        ut_a(log_sys->n_pending_writes == 1);
 
1465
 
 
1466
        group->n_pending_writes--;
 
1467
        log_sys->n_pending_writes--;
 
1468
 
 
1469
        unlock = log_group_check_flush_completion(group);
 
1470
        unlock = unlock | log_sys_check_flush_completion();
 
1471
 
 
1472
        log_flush_do_unlocks(unlock);
 
1473
 
 
1474
        mutex_exit(&(log_sys->mutex));
 
1475
 
 
1476
        return;
 
1477
 
 
1478
do_waits:
 
1479
        mutex_exit(&(log_sys->mutex));
 
1480
 
 
1481
        switch (wait) {
 
1482
        case LOG_WAIT_ONE_GROUP:
 
1483
                os_event_wait(log_sys->one_flushed_event);
 
1484
                break;
 
1485
        case LOG_WAIT_ALL_GROUPS:
 
1486
                os_event_wait(log_sys->no_flush_event);
 
1487
                break;
 
1488
#ifdef UNIV_DEBUG
 
1489
        case LOG_NO_WAIT:
 
1490
                break;
 
1491
        default:
 
1492
                ut_error;
 
1493
#endif /* UNIV_DEBUG */
 
1494
        }
 
1495
}
 
1496
 
 
1497
/********************************************************************
 
1498
Does a syncronous flush of the log buffer to disk. */
 
1499
UNIV_INTERN
 
1500
void
 
1501
log_buffer_flush_to_disk(void)
 
1502
/*==========================*/
 
1503
{
 
1504
        ib_uint64_t     lsn;
 
1505
 
 
1506
        mutex_enter(&(log_sys->mutex));
 
1507
 
 
1508
        lsn = log_sys->lsn;
 
1509
 
 
1510
        mutex_exit(&(log_sys->mutex));
 
1511
 
 
1512
        log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 
1513
}
 
1514
 
 
1515
/********************************************************************
 
1516
Tries to establish a big enough margin of free space in the log buffer, such
 
1517
that a new log entry can be catenated without an immediate need for a flush. */
 
1518
static
 
1519
void
 
1520
log_flush_margin(void)
 
1521
/*==================*/
 
1522
{
 
1523
        log_t*          log     = log_sys;
 
1524
        ib_uint64_t     lsn     = 0;
 
1525
 
 
1526
        mutex_enter(&(log->mutex));
 
1527
 
 
1528
        if (log->buf_free > log->max_buf_free) {
 
1529
 
 
1530
                if (log->n_pending_writes > 0) {
 
1531
                        /* A flush is running: hope that it will provide enough
 
1532
                        free space */
 
1533
                } else {
 
1534
                        lsn = log->lsn;
 
1535
                }
 
1536
        }
 
1537
 
 
1538
        mutex_exit(&(log->mutex));
 
1539
 
 
1540
        if (lsn) {
 
1541
                log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
 
1542
        }
 
1543
}
 
1544
 
 
1545
/********************************************************************
 
1546
Advances the smallest lsn for which there are unflushed dirty blocks in the
 
1547
buffer pool. NOTE: this function may only be called if the calling thread owns
 
1548
no synchronization objects! */
 
1549
UNIV_INTERN
 
1550
ibool
 
1551
log_preflush_pool_modified_pages(
 
1552
/*=============================*/
 
1553
                                        /* out: FALSE if there was a
 
1554
                                        flush batch of the same type
 
1555
                                        running, which means that we
 
1556
                                        could not start this flush
 
1557
                                        batch */
 
1558
        ib_uint64_t     new_oldest,     /* in: try to advance
 
1559
                                        oldest_modified_lsn at least
 
1560
                                        to this lsn */
 
1561
        ibool           sync)           /* in: TRUE if synchronous
 
1562
                                        operation is desired */
 
1563
{
 
1564
        ulint   n_pages;
 
1565
 
 
1566
        if (recv_recovery_on) {
 
1567
                /* If the recovery is running, we must first apply all
 
1568
                log records to their respective file pages to get the
 
1569
                right modify lsn values to these pages: otherwise, there
 
1570
                might be pages on disk which are not yet recovered to the
 
1571
                current lsn, and even after calling this function, we could
 
1572
                not know how up-to-date the disk version of the database is,
 
1573
                and we could not make a new checkpoint on the basis of the
 
1574
                info on the buffer pool only. */
 
1575
 
 
1576
                recv_apply_hashed_log_recs(TRUE);
 
1577
        }
 
1578
 
 
1579
        n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
 
1580
 
 
1581
        if (sync) {
 
1582
                buf_flush_wait_batch_end(BUF_FLUSH_LIST);
 
1583
        }
 
1584
 
 
1585
        if (n_pages == ULINT_UNDEFINED) {
 
1586
 
 
1587
                return(FALSE);
 
1588
        }
 
1589
 
 
1590
        return(TRUE);
 
1591
}
 
1592
 
 
1593
/**********************************************************
 
1594
Completes a checkpoint. */
 
1595
static
 
1596
void
 
1597
log_complete_checkpoint(void)
 
1598
/*=========================*/
 
1599
{
 
1600
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1601
        ut_ad(log_sys->n_pending_checkpoint_writes == 0);
 
1602
 
 
1603
        log_sys->next_checkpoint_no++;
 
1604
 
 
1605
        log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
 
1606
 
 
1607
        rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 
1608
}
 
1609
 
 
1610
/**********************************************************
 
1611
Completes an asynchronous checkpoint info write i/o to a log file. */
 
1612
static
 
1613
void
 
1614
log_io_complete_checkpoint(void)
 
1615
/*============================*/
 
1616
{
 
1617
        mutex_enter(&(log_sys->mutex));
 
1618
 
 
1619
        ut_ad(log_sys->n_pending_checkpoint_writes > 0);
 
1620
 
 
1621
        log_sys->n_pending_checkpoint_writes--;
 
1622
 
 
1623
        if (log_sys->n_pending_checkpoint_writes == 0) {
 
1624
                log_complete_checkpoint();
 
1625
        }
 
1626
 
 
1627
        mutex_exit(&(log_sys->mutex));
 
1628
}
 
1629
 
 
1630
/***********************************************************************
 
1631
Writes info to a checkpoint about a log group. */
 
1632
static
 
1633
void
 
1634
log_checkpoint_set_nth_group_info(
 
1635
/*==============================*/
 
1636
        byte*   buf,    /* in: buffer for checkpoint info */
 
1637
        ulint   n,      /* in: nth slot */
 
1638
        ulint   file_no,/* in: archived file number */
 
1639
        ulint   offset) /* in: archived file offset */
 
1640
{
 
1641
        ut_ad(n < LOG_MAX_N_GROUPS);
 
1642
 
 
1643
        mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 
1644
                        + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
 
1645
        mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 
1646
                        + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
 
1647
}
 
1648
 
 
1649
/***********************************************************************
 
1650
Gets info from a checkpoint about a log group. */
 
1651
UNIV_INTERN
 
1652
void
 
1653
log_checkpoint_get_nth_group_info(
 
1654
/*==============================*/
 
1655
        byte*   buf,    /* in: buffer containing checkpoint info */
 
1656
        ulint   n,      /* in: nth slot */
 
1657
        ulint*  file_no,/* out: archived file number */
 
1658
        ulint*  offset) /* out: archived file offset */
 
1659
{
 
1660
        ut_ad(n < LOG_MAX_N_GROUPS);
 
1661
 
 
1662
        *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 
1663
                                    + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
 
1664
        *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 
1665
                                   + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
 
1666
}
 
1667
 
 
1668
/**********************************************************
 
1669
Writes the checkpoint info to a log group header. */
 
1670
static
 
1671
void
 
1672
log_group_checkpoint(
 
1673
/*=================*/
 
1674
        log_group_t*    group)  /* in: log group */
 
1675
{
 
1676
        log_group_t*    group2;
 
1677
#ifdef UNIV_LOG_ARCHIVE
 
1678
        ib_uint64_t     archived_lsn;
 
1679
        ib_uint64_t     next_archived_lsn;
 
1680
#endif /* UNIV_LOG_ARCHIVE */
 
1681
        ulint           write_offset;
 
1682
        ulint           fold;
 
1683
        byte*           buf;
 
1684
        ulint           i;
 
1685
 
 
1686
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1687
#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
 
1688
# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
 
1689
#endif
 
1690
 
 
1691
        buf = group->checkpoint_buf;
 
1692
 
 
1693
        mach_write_ull(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
 
1694
        mach_write_ull(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
 
1695
 
 
1696
        mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
 
1697
                        log_group_calc_lsn_offset(
 
1698
                                log_sys->next_checkpoint_lsn, group));
 
1699
 
 
1700
        mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
 
1701
 
 
1702
#ifdef UNIV_LOG_ARCHIVE
 
1703
        if (log_sys->archiving_state == LOG_ARCH_OFF) {
 
1704
                archived_lsn = IB_ULONGLONG_MAX;
 
1705
        } else {
 
1706
                archived_lsn = log_sys->archived_lsn;
 
1707
 
 
1708
                if (archived_lsn != log_sys->next_archived_lsn) {
 
1709
                        next_archived_lsn = log_sys->next_archived_lsn;
 
1710
                        /* For debugging only */
 
1711
                }
 
1712
        }
 
1713
 
 
1714
        mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
 
1715
#else /* UNIV_LOG_ARCHIVE */
 
1716
        mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
 
1717
#endif /* UNIV_LOG_ARCHIVE */
 
1718
 
 
1719
        for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
 
1720
                log_checkpoint_set_nth_group_info(buf, i, 0, 0);
 
1721
        }
 
1722
 
 
1723
        group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
 
1724
 
 
1725
        while (group2) {
 
1726
                log_checkpoint_set_nth_group_info(buf, group2->id,
 
1727
#ifdef UNIV_LOG_ARCHIVE
 
1728
                                                  group2->archived_file_no,
 
1729
                                                  group2->archived_offset
 
1730
#else /* UNIV_LOG_ARCHIVE */
 
1731
                                                  0, 0
 
1732
#endif /* UNIV_LOG_ARCHIVE */
 
1733
                                                  );
 
1734
 
 
1735
                group2 = UT_LIST_GET_NEXT(log_groups, group2);
 
1736
        }
 
1737
 
 
1738
        fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 
1739
        mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
 
1740
 
 
1741
        fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 
1742
                              LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 
1743
        mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 
1744
 
 
1745
        /* Starting from InnoDB-3.23.50, we also write info on allocated
 
1746
        size in the tablespace */
 
1747
 
 
1748
        mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
 
1749
                        log_fsp_current_free_limit);
 
1750
 
 
1751
        mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
 
1752
                        LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
 
1753
 
 
1754
        /* We alternate the physical place of the checkpoint info in the first
 
1755
        log file */
 
1756
 
 
1757
        if ((log_sys->next_checkpoint_no & 1) == 0) {
 
1758
                write_offset = LOG_CHECKPOINT_1;
 
1759
        } else {
 
1760
                write_offset = LOG_CHECKPOINT_2;
 
1761
        }
 
1762
 
 
1763
        if (log_do_write) {
 
1764
                if (log_sys->n_pending_checkpoint_writes == 0) {
 
1765
 
 
1766
                        rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
 
1767
                                           LOG_CHECKPOINT);
 
1768
                }
 
1769
 
 
1770
                log_sys->n_pending_checkpoint_writes++;
 
1771
 
 
1772
                log_sys->n_log_ios++;
 
1773
 
 
1774
                /* We send as the last parameter the group machine address
 
1775
                added with 1, as we want to distinguish between a normal log
 
1776
                file write and a checkpoint field write */
 
1777
 
 
1778
                fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
 
1779
                       write_offset / UNIV_PAGE_SIZE,
 
1780
                       write_offset % UNIV_PAGE_SIZE,
 
1781
                       OS_FILE_LOG_BLOCK_SIZE,
 
1782
                       buf, ((byte*)group + 1));
 
1783
 
 
1784
                ut_ad(((ulint)group & 0x1UL) == 0);
 
1785
        }
 
1786
}
 
1787
 
 
1788
/**********************************************************
 
1789
Writes info to a buffer of a log group when log files are created in
 
1790
backup restoration. */
 
1791
UNIV_INTERN
 
1792
void
 
1793
log_reset_first_header_and_checkpoint(
 
1794
/*==================================*/
 
1795
        byte*           hdr_buf,/* in: buffer which will be written to the
 
1796
                                start of the first log file */
 
1797
        ib_uint64_t     start)  /* in: lsn of the start of the first log file;
 
1798
                                we pretend that there is a checkpoint at
 
1799
                                start + LOG_BLOCK_HDR_SIZE */
 
1800
{
 
1801
        ulint           fold;
 
1802
        byte*           buf;
 
1803
        ib_uint64_t     lsn;
 
1804
 
 
1805
        mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
 
1806
        mach_write_ull(hdr_buf + LOG_FILE_START_LSN, start);
 
1807
 
 
1808
        lsn = start + LOG_BLOCK_HDR_SIZE;
 
1809
 
 
1810
        /* Write the label of ibbackup --restore */
 
1811
        strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 
1812
               "ibbackup ");
 
1813
        ut_sprintf_timestamp((char*) hdr_buf
 
1814
                             + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
 
1815
                                + (sizeof "ibbackup ") - 1));
 
1816
        buf = hdr_buf + LOG_CHECKPOINT_1;
 
1817
 
 
1818
        mach_write_ull(buf + LOG_CHECKPOINT_NO, 0);
 
1819
        mach_write_ull(buf + LOG_CHECKPOINT_LSN, lsn);
 
1820
 
 
1821
        mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
 
1822
                        LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
 
1823
 
 
1824
        mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
 
1825
 
 
1826
        mach_write_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
 
1827
 
 
1828
        fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 
1829
        mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
 
1830
 
 
1831
        fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 
1832
                              LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 
1833
        mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 
1834
 
 
1835
        /* Starting from InnoDB-3.23.50, we should also write info on
 
1836
        allocated size in the tablespace, but unfortunately we do not
 
1837
        know it here */
 
1838
}
 
1839
 
 
1840
/**********************************************************
 
1841
Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
 
1842
UNIV_INTERN
 
1843
void
 
1844
log_group_read_checkpoint_info(
 
1845
/*===========================*/
 
1846
        log_group_t*    group,  /* in: log group */
 
1847
        ulint           field)  /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
 
1848
{
 
1849
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1850
 
 
1851
        log_sys->n_log_ios++;
 
1852
 
 
1853
        fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
 
1854
               field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
 
1855
               OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
 
1856
}
 
1857
 
 
1858
/**********************************************************
 
1859
Writes checkpoint info to groups. */
 
1860
UNIV_INTERN
 
1861
void
 
1862
log_groups_write_checkpoint_info(void)
 
1863
/*==================================*/
 
1864
{
 
1865
        log_group_t*    group;
 
1866
 
 
1867
        ut_ad(mutex_own(&(log_sys->mutex)));
 
1868
 
 
1869
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
1870
 
 
1871
        while (group) {
 
1872
                log_group_checkpoint(group);
 
1873
 
 
1874
                group = UT_LIST_GET_NEXT(log_groups, group);
 
1875
        }
 
1876
}
 
1877
 
 
1878
/**********************************************************
 
1879
Makes a checkpoint. Note that this function does not flush dirty
 
1880
blocks from the buffer pool: it only checks what is lsn of the oldest
 
1881
modification in the pool, and writes information about the lsn in
 
1882
log files. Use log_make_checkpoint_at to flush also the pool. */
 
1883
UNIV_INTERN
 
1884
ibool
 
1885
log_checkpoint(
 
1886
/*===========*/
 
1887
                                /* out: TRUE if success, FALSE if a checkpoint
 
1888
                                write was already running */
 
1889
        ibool   sync,           /* in: TRUE if synchronous operation is
 
1890
                                desired */
 
1891
        ibool   write_always)   /* in: the function normally checks if the
 
1892
                                the new checkpoint would have a greater
 
1893
                                lsn than the previous one: if not, then no
 
1894
                                physical write is done; by setting this
 
1895
                                parameter TRUE, a physical write will always be
 
1896
                                made to log files */
 
1897
{
 
1898
        ib_uint64_t     oldest_lsn;
 
1899
 
 
1900
        if (recv_recovery_is_on()) {
 
1901
                recv_apply_hashed_log_recs(TRUE);
 
1902
        }
 
1903
 
 
1904
        if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
 
1905
                fil_flush_file_spaces(FIL_TABLESPACE);
 
1906
        }
 
1907
 
 
1908
        mutex_enter(&(log_sys->mutex));
 
1909
 
 
1910
        oldest_lsn = log_buf_pool_get_oldest_modification();
 
1911
 
 
1912
        mutex_exit(&(log_sys->mutex));
 
1913
 
 
1914
        /* Because log also contains headers and dummy log records,
 
1915
        if the buffer pool contains no dirty buffers, oldest_lsn
 
1916
        gets the value log_sys->lsn from the previous function,
 
1917
        and we must make sure that the log is flushed up to that
 
1918
        lsn. If there are dirty buffers in the buffer pool, then our
 
1919
        write-ahead-logging algorithm ensures that the log has been flushed
 
1920
        up to oldest_lsn. */
 
1921
 
 
1922
        log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 
1923
 
 
1924
        mutex_enter(&(log_sys->mutex));
 
1925
 
 
1926
        if (!write_always
 
1927
            && log_sys->last_checkpoint_lsn >= oldest_lsn) {
 
1928
 
 
1929
                mutex_exit(&(log_sys->mutex));
 
1930
 
 
1931
                return(TRUE);
 
1932
        }
 
1933
 
 
1934
        ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
 
1935
 
 
1936
        if (log_sys->n_pending_checkpoint_writes > 0) {
 
1937
                /* A checkpoint write is running */
 
1938
 
 
1939
                mutex_exit(&(log_sys->mutex));
 
1940
 
 
1941
                if (sync) {
 
1942
                        /* Wait for the checkpoint write to complete */
 
1943
                        rw_lock_s_lock(&(log_sys->checkpoint_lock));
 
1944
                        rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 
1945
                }
 
1946
 
 
1947
                return(FALSE);
 
1948
        }
 
1949
 
 
1950
        log_sys->next_checkpoint_lsn = oldest_lsn;
 
1951
 
 
1952
#ifdef UNIV_DEBUG
 
1953
        if (log_debug_writes) {
 
1954
                fprintf(stderr, "Making checkpoint no %lu at lsn %"PRIu64"\n",
 
1955
                        (ulong) log_sys->next_checkpoint_no,
 
1956
                        oldest_lsn);
 
1957
        }
 
1958
#endif /* UNIV_DEBUG */
 
1959
 
 
1960
        log_groups_write_checkpoint_info();
 
1961
 
 
1962
        mutex_exit(&(log_sys->mutex));
 
1963
 
 
1964
        if (sync) {
 
1965
                /* Wait for the checkpoint write to complete */
 
1966
                rw_lock_s_lock(&(log_sys->checkpoint_lock));
 
1967
                rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 
1968
        }
 
1969
 
 
1970
        return(TRUE);
 
1971
}
 
1972
 
 
1973
/********************************************************************
 
1974
Makes a checkpoint at a given lsn or later. */
 
1975
UNIV_INTERN
 
1976
void
 
1977
log_make_checkpoint_at(
 
1978
/*===================*/
 
1979
        ib_uint64_t     lsn,            /* in: make a checkpoint at this or a
 
1980
                                        later lsn, if IB_ULONGLONG_MAX, makes
 
1981
                                        a checkpoint at the latest lsn */
 
1982
        ibool           write_always)   /* in: the function normally checks if
 
1983
                                        the the new checkpoint would have a
 
1984
                                        greater lsn than the previous one: if
 
1985
                                        not, then no physical write is done;
 
1986
                                        by setting this parameter TRUE, a
 
1987
                                        physical write will always be made to
 
1988
                                        log files */
 
1989
{
 
1990
        /* Preflush pages synchronously */
 
1991
 
 
1992
        while (!log_preflush_pool_modified_pages(lsn, TRUE));
 
1993
 
 
1994
        while (!log_checkpoint(TRUE, write_always));
 
1995
}
 
1996
 
 
1997
/********************************************************************
 
1998
Tries to establish a big enough margin of free space in the log groups, such
 
1999
that a new log entry can be catenated without an immediate need for a
 
2000
checkpoint. NOTE: this function may only be called if the calling thread
 
2001
owns no synchronization objects! */
 
2002
static
 
2003
void
 
2004
log_checkpoint_margin(void)
 
2005
/*=======================*/
 
2006
{
 
2007
        log_t*          log             = log_sys;
 
2008
        ib_uint64_t     age;
 
2009
        ib_uint64_t     checkpoint_age;
 
2010
        ib_uint64_t     advance;
 
2011
        ib_uint64_t     oldest_lsn;
 
2012
        ibool           sync;
 
2013
        ibool           checkpoint_sync;
 
2014
        ibool           do_checkpoint;
 
2015
        ibool           success;
 
2016
loop:
 
2017
        sync = FALSE;
 
2018
        checkpoint_sync = FALSE;
 
2019
        do_checkpoint = FALSE;
 
2020
 
 
2021
        mutex_enter(&(log->mutex));
 
2022
 
 
2023
        if (log->check_flush_or_checkpoint == FALSE) {
 
2024
                mutex_exit(&(log->mutex));
 
2025
 
 
2026
                return;
 
2027
        }
 
2028
 
 
2029
        oldest_lsn = log_buf_pool_get_oldest_modification();
 
2030
 
 
2031
        age = log->lsn - oldest_lsn;
 
2032
 
 
2033
        if (age > log->max_modified_age_sync) {
 
2034
 
 
2035
                /* A flush is urgent: we have to do a synchronous preflush */
 
2036
 
 
2037
                sync = TRUE;
 
2038
                advance = 2 * (age - log->max_modified_age_sync);
 
2039
        } else if (age > log->max_modified_age_async) {
 
2040
 
 
2041
                /* A flush is not urgent: we do an asynchronous preflush */
 
2042
                advance = age - log->max_modified_age_async;
 
2043
        } else {
 
2044
                advance = 0;
 
2045
        }
 
2046
 
 
2047
        checkpoint_age = log->lsn - log->last_checkpoint_lsn;
 
2048
 
 
2049
        if (checkpoint_age > log->max_checkpoint_age) {
 
2050
                /* A checkpoint is urgent: we do it synchronously */
 
2051
 
 
2052
                checkpoint_sync = TRUE;
 
2053
 
 
2054
                do_checkpoint = TRUE;
 
2055
 
 
2056
        } else if (checkpoint_age > log->max_checkpoint_age_async) {
 
2057
                /* A checkpoint is not urgent: do it asynchronously */
 
2058
 
 
2059
                do_checkpoint = TRUE;
 
2060
 
 
2061
                log->check_flush_or_checkpoint = FALSE;
 
2062
        } else {
 
2063
                log->check_flush_or_checkpoint = FALSE;
 
2064
        }
 
2065
 
 
2066
        mutex_exit(&(log->mutex));
 
2067
 
 
2068
        if (advance) {
 
2069
                ib_uint64_t     new_oldest = oldest_lsn + advance;
 
2070
 
 
2071
                success = log_preflush_pool_modified_pages(new_oldest, sync);
 
2072
 
 
2073
                /* If the flush succeeded, this thread has done its part
 
2074
                and can proceed. If it did not succeed, there was another
 
2075
                thread doing a flush at the same time. If sync was FALSE,
 
2076
                the flush was not urgent, and we let this thread proceed.
 
2077
                Otherwise, we let it start from the beginning again. */
 
2078
 
 
2079
                if (sync && !success) {
 
2080
                        mutex_enter(&(log->mutex));
 
2081
 
 
2082
                        log->check_flush_or_checkpoint = TRUE;
 
2083
 
 
2084
                        mutex_exit(&(log->mutex));
 
2085
                        goto loop;
 
2086
                }
 
2087
        }
 
2088
 
 
2089
        if (do_checkpoint) {
 
2090
                log_checkpoint(checkpoint_sync, FALSE);
 
2091
 
 
2092
                if (checkpoint_sync) {
 
2093
 
 
2094
                        goto loop;
 
2095
                }
 
2096
        }
 
2097
}
 
2098
 
 
2099
/**********************************************************
 
2100
Reads a specified log segment to a buffer. */
 
2101
UNIV_INTERN
 
2102
void
 
2103
log_group_read_log_seg(
 
2104
/*===================*/
 
2105
        ulint           type,           /* in: LOG_ARCHIVE or LOG_RECOVER */
 
2106
        byte*           buf,            /* in: buffer where to read */
 
2107
        log_group_t*    group,          /* in: log group */
 
2108
        ib_uint64_t     start_lsn,      /* in: read area start */
 
2109
        ib_uint64_t     end_lsn)        /* in: read area end */
 
2110
{
 
2111
        ulint   len;
 
2112
        ulint   source_offset;
 
2113
        ibool   sync;
 
2114
 
 
2115
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2116
 
 
2117
        sync = (type == LOG_RECOVER);
 
2118
loop:
 
2119
        source_offset = log_group_calc_lsn_offset(start_lsn, group);
 
2120
 
 
2121
        len = (ulint) (end_lsn - start_lsn);
 
2122
 
 
2123
        ut_ad(len != 0);
 
2124
 
 
2125
        if ((source_offset % group->file_size) + len > group->file_size) {
 
2126
 
 
2127
                len = group->file_size - (source_offset % group->file_size);
 
2128
        }
 
2129
 
 
2130
#ifdef UNIV_LOG_ARCHIVE
 
2131
        if (type == LOG_ARCHIVE) {
 
2132
 
 
2133
                log_sys->n_pending_archive_ios++;
 
2134
        }
 
2135
#endif /* UNIV_LOG_ARCHIVE */
 
2136
 
 
2137
        log_sys->n_log_ios++;
 
2138
 
 
2139
        fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
 
2140
               source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
 
2141
               len, buf, NULL);
 
2142
 
 
2143
        start_lsn += len;
 
2144
        buf += len;
 
2145
 
 
2146
        if (start_lsn != end_lsn) {
 
2147
 
 
2148
                goto loop;
 
2149
        }
 
2150
}
 
2151
 
 
2152
#ifdef UNIV_LOG_ARCHIVE
 
2153
/**********************************************************
 
2154
Generates an archived log file name. */
 
2155
UNIV_INTERN
 
2156
void
 
2157
log_archived_file_name_gen(
 
2158
/*=======================*/
 
2159
        char*   buf,    /* in: buffer where to write */
 
2160
        ulint   id __attribute__((unused)),
 
2161
                        /* in: group id;
 
2162
                        currently we only archive the first group */
 
2163
        ulint   file_no)/* in: file number */
 
2164
{
 
2165
        sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
 
2166
}
 
2167
 
 
2168
/**********************************************************
 
2169
Writes a log file header to a log file space. */
 
2170
static
 
2171
void
 
2172
log_group_archive_file_header_write(
 
2173
/*================================*/
 
2174
        log_group_t*    group,          /* in: log group */
 
2175
        ulint           nth_file,       /* in: header to the nth file in the
 
2176
                                        archive log file space */
 
2177
        ulint           file_no,        /* in: archived file number */
 
2178
        ib_uint64_t     start_lsn)      /* in: log file data starts at this
 
2179
                                        lsn */
 
2180
{
 
2181
        byte*   buf;
 
2182
        ulint   dest_offset;
 
2183
 
 
2184
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2185
 
 
2186
        ut_a(nth_file < group->n_files);
 
2187
 
 
2188
        buf = *(group->archive_file_header_bufs + nth_file);
 
2189
 
 
2190
        mach_write_to_4(buf + LOG_GROUP_ID, group->id);
 
2191
        mach_write_ull(buf + LOG_FILE_START_LSN, start_lsn);
 
2192
        mach_write_to_4(buf + LOG_FILE_NO, file_no);
 
2193
 
 
2194
        mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
 
2195
 
 
2196
        dest_offset = nth_file * group->file_size;
 
2197
 
 
2198
        log_sys->n_log_ios++;
 
2199
 
 
2200
        fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
 
2201
               dest_offset / UNIV_PAGE_SIZE,
 
2202
               dest_offset % UNIV_PAGE_SIZE,
 
2203
               2 * OS_FILE_LOG_BLOCK_SIZE,
 
2204
               buf, &log_archive_io);
 
2205
}
 
2206
 
 
2207
/**********************************************************
 
2208
Writes a log file header to a completed archived log file. */
 
2209
static
 
2210
void
 
2211
log_group_archive_completed_header_write(
 
2212
/*=====================================*/
 
2213
        log_group_t*    group,          /* in: log group */
 
2214
        ulint           nth_file,       /* in: header to the nth file in the
 
2215
                                        archive log file space */
 
2216
        ib_uint64_t     end_lsn)        /* in: end lsn of the file */
 
2217
{
 
2218
        byte*   buf;
 
2219
        ulint   dest_offset;
 
2220
 
 
2221
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2222
        ut_a(nth_file < group->n_files);
 
2223
 
 
2224
        buf = *(group->archive_file_header_bufs + nth_file);
 
2225
 
 
2226
        mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
 
2227
        mach_write_ull(buf + LOG_FILE_END_LSN, end_lsn);
 
2228
 
 
2229
        dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
 
2230
 
 
2231
        log_sys->n_log_ios++;
 
2232
 
 
2233
        fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
 
2234
               dest_offset / UNIV_PAGE_SIZE,
 
2235
               dest_offset % UNIV_PAGE_SIZE,
 
2236
               OS_FILE_LOG_BLOCK_SIZE,
 
2237
               buf + LOG_FILE_ARCH_COMPLETED,
 
2238
               &log_archive_io);
 
2239
}
 
2240
 
 
2241
/**********************************************************
 
2242
Does the archive writes for a single log group. */
 
2243
static
 
2244
void
 
2245
log_group_archive(
 
2246
/*==============*/
 
2247
        log_group_t*    group)  /* in: log group */
 
2248
{
 
2249
        os_file_t        file_handle;
 
2250
        ib_uint64_t     start_lsn;
 
2251
        ib_uint64_t     end_lsn;
 
2252
        char            name[1024];
 
2253
        byte*           buf;
 
2254
        ulint           len;
 
2255
        ibool           ret;
 
2256
        ulint           next_offset;
 
2257
        ulint           n_files;
 
2258
        ulint           open_mode;
 
2259
 
 
2260
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2261
 
 
2262
        start_lsn = log_sys->archived_lsn;
 
2263
 
 
2264
        ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2265
 
 
2266
        end_lsn = log_sys->next_archived_lsn;
 
2267
 
 
2268
        ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2269
 
 
2270
        buf = log_sys->archive_buf;
 
2271
 
 
2272
        n_files = 0;
 
2273
 
 
2274
        next_offset = group->archived_offset;
 
2275
loop:
 
2276
        if ((next_offset % group->file_size == 0)
 
2277
            || (fil_space_get_size(group->archive_space_id) == 0)) {
 
2278
 
 
2279
                /* Add the file to the archive file space; create or open the
 
2280
                file */
 
2281
 
 
2282
                if (next_offset % group->file_size == 0) {
 
2283
                        open_mode = OS_FILE_CREATE;
 
2284
                } else {
 
2285
                        open_mode = OS_FILE_OPEN;
 
2286
                }
 
2287
 
 
2288
                log_archived_file_name_gen(name, group->id,
 
2289
                                           group->archived_file_no + n_files);
 
2290
 
 
2291
                file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
 
2292
                                             OS_DATA_FILE, &ret);
 
2293
 
 
2294
                if (!ret && (open_mode == OS_FILE_CREATE)) {
 
2295
                        file_handle = os_file_create(
 
2296
                                name, OS_FILE_OPEN, OS_FILE_AIO,
 
2297
                                OS_DATA_FILE, &ret);
 
2298
                }
 
2299
 
 
2300
                if (!ret) {
 
2301
                        fprintf(stderr,
 
2302
                                "InnoDB: Cannot create or open"
 
2303
                                " archive log file %s.\n"
 
2304
                                "InnoDB: Cannot continue operation.\n"
 
2305
                                "InnoDB: Check that the log archive"
 
2306
                                " directory exists,\n"
 
2307
                                "InnoDB: you have access rights to it, and\n"
 
2308
                                "InnoDB: there is space available.\n", name);
 
2309
                        exit(1);
 
2310
                }
 
2311
 
 
2312
#ifdef UNIV_DEBUG
 
2313
                if (log_debug_writes) {
 
2314
                        fprintf(stderr, "Created archive file %s\n", name);
 
2315
                }
 
2316
#endif /* UNIV_DEBUG */
 
2317
 
 
2318
                ret = os_file_close(file_handle);
 
2319
 
 
2320
                ut_a(ret);
 
2321
 
 
2322
                /* Add the archive file as a node to the space */
 
2323
 
 
2324
                fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
 
2325
                                group->archive_space_id, FALSE);
 
2326
 
 
2327
                if (next_offset % group->file_size == 0) {
 
2328
                        log_group_archive_file_header_write(
 
2329
                                group, n_files,
 
2330
                                group->archived_file_no + n_files,
 
2331
                                start_lsn);
 
2332
 
 
2333
                        next_offset += LOG_FILE_HDR_SIZE;
 
2334
                }
 
2335
        }
 
2336
 
 
2337
        len = end_lsn - start_lsn;
 
2338
 
 
2339
        if (group->file_size < (next_offset % group->file_size) + len) {
 
2340
 
 
2341
                len = group->file_size - (next_offset % group->file_size);
 
2342
        }
 
2343
 
 
2344
#ifdef UNIV_DEBUG
 
2345
        if (log_debug_writes) {
 
2346
                fprintf(stderr,
 
2347
                        "Archiving starting at lsn %"PRIu64", len %lu"
 
2348
                        " to group %lu\n",
 
2349
                        start_lsn,
 
2350
                        (ulong) len, (ulong) group->id);
 
2351
        }
 
2352
#endif /* UNIV_DEBUG */
 
2353
 
 
2354
        log_sys->n_pending_archive_ios++;
 
2355
 
 
2356
        log_sys->n_log_ios++;
 
2357
 
 
2358
        fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
 
2359
               next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
 
2360
               ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
 
2361
               &log_archive_io);
 
2362
 
 
2363
        start_lsn += len;
 
2364
        next_offset += len;
 
2365
        buf += len;
 
2366
 
 
2367
        if (next_offset % group->file_size == 0) {
 
2368
                n_files++;
 
2369
        }
 
2370
 
 
2371
        if (end_lsn != start_lsn) {
 
2372
 
 
2373
                goto loop;
 
2374
        }
 
2375
 
 
2376
        group->next_archived_file_no = group->archived_file_no + n_files;
 
2377
        group->next_archived_offset = next_offset % group->file_size;
 
2378
 
 
2379
        ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2380
}
 
2381
 
 
2382
/*********************************************************
 
2383
(Writes to the archive of each log group.) Currently, only the first
 
2384
group is archived. */
 
2385
static
 
2386
void
 
2387
log_archive_groups(void)
 
2388
/*====================*/
 
2389
{
 
2390
        log_group_t*    group;
 
2391
 
 
2392
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2393
 
 
2394
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2395
 
 
2396
        log_group_archive(group);
 
2397
}
 
2398
 
 
2399
/*********************************************************
 
2400
Completes the archiving write phase for (each log group), currently,
 
2401
the first log group. */
 
2402
static
 
2403
void
 
2404
log_archive_write_complete_groups(void)
 
2405
/*===================================*/
 
2406
{
 
2407
        log_group_t*    group;
 
2408
        ulint           end_offset;
 
2409
        ulint           trunc_files;
 
2410
        ulint           n_files;
 
2411
        ib_uint64_t     start_lsn;
 
2412
        ib_uint64_t     end_lsn;
 
2413
        ulint           i;
 
2414
 
 
2415
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2416
 
 
2417
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2418
 
 
2419
        group->archived_file_no = group->next_archived_file_no;
 
2420
        group->archived_offset = group->next_archived_offset;
 
2421
 
 
2422
        /* Truncate from the archive file space all but the last
 
2423
        file, or if it has been written full, all files */
 
2424
 
 
2425
        n_files = (UNIV_PAGE_SIZE
 
2426
                   * fil_space_get_size(group->archive_space_id))
 
2427
                / group->file_size;
 
2428
        ut_ad(n_files > 0);
 
2429
 
 
2430
        end_offset = group->archived_offset;
 
2431
 
 
2432
        if (end_offset % group->file_size == 0) {
 
2433
 
 
2434
                trunc_files = n_files;
 
2435
        } else {
 
2436
                trunc_files = n_files - 1;
 
2437
        }
 
2438
 
 
2439
#ifdef UNIV_DEBUG
 
2440
        if (log_debug_writes && trunc_files) {
 
2441
                fprintf(stderr,
 
2442
                        "Complete file(s) archived to group %lu\n",
 
2443
                        (ulong) group->id);
 
2444
        }
 
2445
#endif /* UNIV_DEBUG */
 
2446
 
 
2447
        /* Calculate the archive file space start lsn */
 
2448
        start_lsn = log_sys->next_archived_lsn
 
2449
                - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
 
2450
                   * (group->file_size - LOG_FILE_HDR_SIZE));
 
2451
        end_lsn = start_lsn;
 
2452
 
 
2453
        for (i = 0; i < trunc_files; i++) {
 
2454
 
 
2455
                end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
 
2456
 
 
2457
                /* Write a notice to the headers of archived log
 
2458
                files that the file write has been completed */
 
2459
 
 
2460
                log_group_archive_completed_header_write(group, i, end_lsn);
 
2461
        }
 
2462
 
 
2463
        fil_space_truncate_start(group->archive_space_id,
 
2464
                                 trunc_files * group->file_size);
 
2465
 
 
2466
#ifdef UNIV_DEBUG
 
2467
        if (log_debug_writes) {
 
2468
                fputs("Archiving writes completed\n", stderr);
 
2469
        }
 
2470
#endif /* UNIV_DEBUG */
 
2471
}
 
2472
 
 
2473
/**********************************************************
 
2474
Completes an archiving i/o. */
 
2475
static
 
2476
void
 
2477
log_archive_check_completion_low(void)
 
2478
/*==================================*/
 
2479
{
 
2480
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2481
 
 
2482
        if (log_sys->n_pending_archive_ios == 0
 
2483
            && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
 
2484
 
 
2485
#ifdef UNIV_DEBUG
 
2486
                if (log_debug_writes) {
 
2487
                        fputs("Archiving read completed\n", stderr);
 
2488
                }
 
2489
#endif /* UNIV_DEBUG */
 
2490
 
 
2491
                /* Archive buffer has now been read in: start archive writes */
 
2492
 
 
2493
                log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
 
2494
 
 
2495
                log_archive_groups();
 
2496
        }
 
2497
 
 
2498
        if (log_sys->n_pending_archive_ios == 0
 
2499
            && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
 
2500
 
 
2501
                log_archive_write_complete_groups();
 
2502
 
 
2503
                log_sys->archived_lsn = log_sys->next_archived_lsn;
 
2504
 
 
2505
                rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
 
2506
        }
 
2507
}
 
2508
 
 
2509
/**********************************************************
 
2510
Completes an archiving i/o. */
 
2511
static
 
2512
void
 
2513
log_io_complete_archive(void)
 
2514
/*=========================*/
 
2515
{
 
2516
        log_group_t*    group;
 
2517
 
 
2518
        mutex_enter(&(log_sys->mutex));
 
2519
 
 
2520
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2521
 
 
2522
        mutex_exit(&(log_sys->mutex));
 
2523
 
 
2524
        fil_flush(group->archive_space_id);
 
2525
 
 
2526
        mutex_enter(&(log_sys->mutex));
 
2527
 
 
2528
        ut_ad(log_sys->n_pending_archive_ios > 0);
 
2529
 
 
2530
        log_sys->n_pending_archive_ios--;
 
2531
 
 
2532
        log_archive_check_completion_low();
 
2533
 
 
2534
        mutex_exit(&(log_sys->mutex));
 
2535
}
 
2536
 
 
2537
/************************************************************************
 
2538
Starts an archiving operation. */
 
2539
UNIV_INTERN
 
2540
ibool
 
2541
log_archive_do(
 
2542
/*===========*/
 
2543
                        /* out: TRUE if succeed, FALSE if an archiving
 
2544
                        operation was already running */
 
2545
        ibool   sync,   /* in: TRUE if synchronous operation is desired */
 
2546
        ulint*  n_bytes)/* out: archive log buffer size, 0 if nothing to
 
2547
                        archive */
 
2548
{
 
2549
        ibool           calc_new_limit;
 
2550
        ib_uint64_t     start_lsn;
 
2551
        ib_uint64_t     limit_lsn;
 
2552
 
 
2553
        calc_new_limit = TRUE;
 
2554
loop:
 
2555
        mutex_enter(&(log_sys->mutex));
 
2556
 
 
2557
        switch (log_sys->archiving_state) {
 
2558
        case LOG_ARCH_OFF:
 
2559
arch_none:
 
2560
                mutex_exit(&(log_sys->mutex));
 
2561
 
 
2562
                *n_bytes = 0;
 
2563
 
 
2564
                return(TRUE);
 
2565
        case LOG_ARCH_STOPPED:
 
2566
        case LOG_ARCH_STOPPING2:
 
2567
                mutex_exit(&(log_sys->mutex));
 
2568
 
 
2569
                os_event_wait(log_sys->archiving_on);
 
2570
 
 
2571
                goto loop;
 
2572
        }
 
2573
 
 
2574
        start_lsn = log_sys->archived_lsn;
 
2575
 
 
2576
        if (calc_new_limit) {
 
2577
                ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
 
2578
                limit_lsn = start_lsn + log_sys->archive_buf_size;
 
2579
 
 
2580
                *n_bytes = log_sys->archive_buf_size;
 
2581
 
 
2582
                if (limit_lsn >= log_sys->lsn) {
 
2583
 
 
2584
                        limit_lsn = ut_uint64_align_down(
 
2585
                                log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
 
2586
                }
 
2587
        }
 
2588
 
 
2589
        if (log_sys->archived_lsn >= limit_lsn) {
 
2590
 
 
2591
                goto arch_none;
 
2592
        }
 
2593
 
 
2594
        if (log_sys->written_to_all_lsn < limit_lsn) {
 
2595
 
 
2596
                mutex_exit(&(log_sys->mutex));
 
2597
 
 
2598
                log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 
2599
 
 
2600
                calc_new_limit = FALSE;
 
2601
 
 
2602
                goto loop;
 
2603
        }
 
2604
 
 
2605
        if (log_sys->n_pending_archive_ios > 0) {
 
2606
                /* An archiving operation is running */
 
2607
 
 
2608
                mutex_exit(&(log_sys->mutex));
 
2609
 
 
2610
                if (sync) {
 
2611
                        rw_lock_s_lock(&(log_sys->archive_lock));
 
2612
                        rw_lock_s_unlock(&(log_sys->archive_lock));
 
2613
                }
 
2614
 
 
2615
                *n_bytes = log_sys->archive_buf_size;
 
2616
 
 
2617
                return(FALSE);
 
2618
        }
 
2619
 
 
2620
        rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
 
2621
 
 
2622
        log_sys->archiving_phase = LOG_ARCHIVE_READ;
 
2623
 
 
2624
        log_sys->next_archived_lsn = limit_lsn;
 
2625
 
 
2626
#ifdef UNIV_DEBUG
 
2627
        if (log_debug_writes) {
 
2628
                fprintf(stderr,
 
2629
                        "Archiving from lsn %"PRIu64" to lsn %"PRIu64"\n",
 
2630
                        log_sys->archived_lsn, limit_lsn);
 
2631
        }
 
2632
#endif /* UNIV_DEBUG */
 
2633
 
 
2634
        /* Read the log segment to the archive buffer */
 
2635
 
 
2636
        log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
 
2637
                               UT_LIST_GET_FIRST(log_sys->log_groups),
 
2638
                               start_lsn, limit_lsn);
 
2639
 
 
2640
        mutex_exit(&(log_sys->mutex));
 
2641
 
 
2642
        if (sync) {
 
2643
                rw_lock_s_lock(&(log_sys->archive_lock));
 
2644
                rw_lock_s_unlock(&(log_sys->archive_lock));
 
2645
        }
 
2646
 
 
2647
        *n_bytes = log_sys->archive_buf_size;
 
2648
 
 
2649
        return(TRUE);
 
2650
}
 
2651
 
 
2652
/********************************************************************
 
2653
Writes the log contents to the archive at least up to the lsn when this
 
2654
function was called. */
 
2655
static
 
2656
void
 
2657
log_archive_all(void)
 
2658
/*=================*/
 
2659
{
 
2660
        ib_uint64_t     present_lsn;
 
2661
        ulint           dummy;
 
2662
 
 
2663
        mutex_enter(&(log_sys->mutex));
 
2664
 
 
2665
        if (log_sys->archiving_state == LOG_ARCH_OFF) {
 
2666
                mutex_exit(&(log_sys->mutex));
 
2667
 
 
2668
                return;
 
2669
        }
 
2670
 
 
2671
        present_lsn = log_sys->lsn;
 
2672
 
 
2673
        mutex_exit(&(log_sys->mutex));
 
2674
 
 
2675
        log_pad_current_log_block();
 
2676
 
 
2677
        for (;;) {
 
2678
                mutex_enter(&(log_sys->mutex));
 
2679
 
 
2680
                if (present_lsn <= log_sys->archived_lsn) {
 
2681
 
 
2682
                        mutex_exit(&(log_sys->mutex));
 
2683
 
 
2684
                        return;
 
2685
                }
 
2686
 
 
2687
                mutex_exit(&(log_sys->mutex));
 
2688
 
 
2689
                log_archive_do(TRUE, &dummy);
 
2690
        }
 
2691
}
 
2692
 
 
2693
/*********************************************************
 
2694
Closes the possible open archive log file (for each group) the first group,
 
2695
and if it was open, increments the group file count by 2, if desired. */
 
2696
static
 
2697
void
 
2698
log_archive_close_groups(
 
2699
/*=====================*/
 
2700
        ibool   increment_file_count)   /* in: TRUE if we want to increment
 
2701
                                        the file count */
 
2702
{
 
2703
        log_group_t*    group;
 
2704
        ulint           trunc_len;
 
2705
 
 
2706
        ut_ad(mutex_own(&(log_sys->mutex)));
 
2707
 
 
2708
        if (log_sys->archiving_state == LOG_ARCH_OFF) {
 
2709
 
 
2710
                return;
 
2711
        }
 
2712
 
 
2713
        group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
2714
 
 
2715
        trunc_len = UNIV_PAGE_SIZE
 
2716
                * fil_space_get_size(group->archive_space_id);
 
2717
        if (trunc_len > 0) {
 
2718
                ut_a(trunc_len == group->file_size);
 
2719
 
 
2720
                /* Write a notice to the headers of archived log
 
2721
                files that the file write has been completed */
 
2722
 
 
2723
                log_group_archive_completed_header_write(
 
2724
                        group, 0, log_sys->archived_lsn);
 
2725
 
 
2726
                fil_space_truncate_start(group->archive_space_id,
 
2727
                                         trunc_len);
 
2728
                if (increment_file_count) {
 
2729
                        group->archived_offset = 0;
 
2730
                        group->archived_file_no += 2;
 
2731
                }
 
2732
 
 
2733
#ifdef UNIV_DEBUG
 
2734
                if (log_debug_writes) {
 
2735
                        fprintf(stderr,
 
2736
                                "Incrementing arch file no to %lu"
 
2737
                                " in log group %lu\n",
 
2738
                                (ulong) group->archived_file_no + 2,
 
2739
                                (ulong) group->id);
 
2740
                }
 
2741
#endif /* UNIV_DEBUG */
 
2742
        }
 
2743
}
 
2744
 
 
2745
/********************************************************************
 
2746
Writes the log contents to the archive up to the lsn when this function was
 
2747
called, and stops the archiving. When archiving is started again, the archived
 
2748
log file numbers start from 2 higher, so that the archiving will not write
 
2749
again to the archived log files which exist when this function returns. */
 
2750
UNIV_INTERN
 
2751
ulint
 
2752
log_archive_stop(void)
 
2753
/*==================*/
 
2754
                        /* out: DB_SUCCESS or DB_ERROR */
 
2755
{
 
2756
        ibool   success;
 
2757
 
 
2758
        mutex_enter(&(log_sys->mutex));
 
2759
 
 
2760
        if (log_sys->archiving_state != LOG_ARCH_ON) {
 
2761
 
 
2762
                mutex_exit(&(log_sys->mutex));
 
2763
 
 
2764
                return(DB_ERROR);
 
2765
        }
 
2766
 
 
2767
        log_sys->archiving_state = LOG_ARCH_STOPPING;
 
2768
 
 
2769
        mutex_exit(&(log_sys->mutex));
 
2770
 
 
2771
        log_archive_all();
 
2772
 
 
2773
        mutex_enter(&(log_sys->mutex));
 
2774
 
 
2775
        log_sys->archiving_state = LOG_ARCH_STOPPING2;
 
2776
        os_event_reset(log_sys->archiving_on);
 
2777
 
 
2778
        mutex_exit(&(log_sys->mutex));
 
2779
 
 
2780
        /* Wait for a possible archiving operation to end */
 
2781
 
 
2782
        rw_lock_s_lock(&(log_sys->archive_lock));
 
2783
        rw_lock_s_unlock(&(log_sys->archive_lock));
 
2784
 
 
2785
        mutex_enter(&(log_sys->mutex));
 
2786
 
 
2787
        /* Close all archived log files, incrementing the file count by 2,
 
2788
        if appropriate */
 
2789
 
 
2790
        log_archive_close_groups(TRUE);
 
2791
 
 
2792
        mutex_exit(&(log_sys->mutex));
 
2793
 
 
2794
        /* Make a checkpoint, so that if recovery is needed, the file numbers
 
2795
        of new archived log files will start from the right value */
 
2796
 
 
2797
        success = FALSE;
 
2798
 
 
2799
        while (!success) {
 
2800
                success = log_checkpoint(TRUE, TRUE);
 
2801
        }
 
2802
 
 
2803
        mutex_enter(&(log_sys->mutex));
 
2804
 
 
2805
        log_sys->archiving_state = LOG_ARCH_STOPPED;
 
2806
 
 
2807
        mutex_exit(&(log_sys->mutex));
 
2808
 
 
2809
        return(DB_SUCCESS);
 
2810
}
 
2811
 
 
2812
/********************************************************************
 
2813
Starts again archiving which has been stopped. */
 
2814
UNIV_INTERN
 
2815
ulint
 
2816
log_archive_start(void)
 
2817
/*===================*/
 
2818
                        /* out: DB_SUCCESS or DB_ERROR */
 
2819
{
 
2820
        mutex_enter(&(log_sys->mutex));
 
2821
 
 
2822
        if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
 
2823
 
 
2824
                mutex_exit(&(log_sys->mutex));
 
2825
 
 
2826
                return(DB_ERROR);
 
2827
        }
 
2828
 
 
2829
        log_sys->archiving_state = LOG_ARCH_ON;
 
2830
 
 
2831
        os_event_set(log_sys->archiving_on);
 
2832
 
 
2833
        mutex_exit(&(log_sys->mutex));
 
2834
 
 
2835
        return(DB_SUCCESS);
 
2836
}
 
2837
 
 
2838
/********************************************************************
 
2839
Stop archiving the log so that a gap may occur in the archived log files. */
 
2840
UNIV_INTERN
 
2841
ulint
 
2842
log_archive_noarchivelog(void)
 
2843
/*==========================*/
 
2844
                        /* out: DB_SUCCESS or DB_ERROR */
 
2845
{
 
2846
loop:
 
2847
        mutex_enter(&(log_sys->mutex));
 
2848
 
 
2849
        if (log_sys->archiving_state == LOG_ARCH_STOPPED
 
2850
            || log_sys->archiving_state == LOG_ARCH_OFF) {
 
2851
 
 
2852
                log_sys->archiving_state = LOG_ARCH_OFF;
 
2853
 
 
2854
                os_event_set(log_sys->archiving_on);
 
2855
 
 
2856
                mutex_exit(&(log_sys->mutex));
 
2857
 
 
2858
                return(DB_SUCCESS);
 
2859
        }
 
2860
 
 
2861
        mutex_exit(&(log_sys->mutex));
 
2862
 
 
2863
        log_archive_stop();
 
2864
 
 
2865
        os_thread_sleep(500000);
 
2866
 
 
2867
        goto loop;
 
2868
}
 
2869
 
 
2870
/********************************************************************
 
2871
Start archiving the log so that a gap may occur in the archived log files. */
 
2872
UNIV_INTERN
 
2873
ulint
 
2874
log_archive_archivelog(void)
 
2875
/*========================*/
 
2876
                        /* out: DB_SUCCESS or DB_ERROR */
 
2877
{
 
2878
        mutex_enter(&(log_sys->mutex));
 
2879
 
 
2880
        if (log_sys->archiving_state == LOG_ARCH_OFF) {
 
2881
 
 
2882
                log_sys->archiving_state = LOG_ARCH_ON;
 
2883
 
 
2884
                log_sys->archived_lsn
 
2885
                        = ut_uint64_align_down(log_sys->lsn,
 
2886
                                               OS_FILE_LOG_BLOCK_SIZE);
 
2887
                mutex_exit(&(log_sys->mutex));
 
2888
 
 
2889
                return(DB_SUCCESS);
 
2890
        }
 
2891
 
 
2892
        mutex_exit(&(log_sys->mutex));
 
2893
 
 
2894
        return(DB_ERROR);
 
2895
}
 
2896
 
 
2897
/********************************************************************
 
2898
Tries to establish a big enough margin of free space in the log groups, such
 
2899
that a new log entry can be catenated without an immediate need for
 
2900
archiving. */
 
2901
static
 
2902
void
 
2903
log_archive_margin(void)
 
2904
/*====================*/
 
2905
{
 
2906
        log_t*  log             = log_sys;
 
2907
        ulint   age;
 
2908
        ibool   sync;
 
2909
        ulint   dummy;
 
2910
loop:
 
2911
        mutex_enter(&(log->mutex));
 
2912
 
 
2913
        if (log->archiving_state == LOG_ARCH_OFF) {
 
2914
                mutex_exit(&(log->mutex));
 
2915
 
 
2916
                return;
 
2917
        }
 
2918
 
 
2919
        age = log->lsn - log->archived_lsn;
 
2920
 
 
2921
        if (age > log->max_archived_lsn_age) {
 
2922
 
 
2923
                /* An archiving is urgent: we have to do synchronous i/o */
 
2924
 
 
2925
                sync = TRUE;
 
2926
 
 
2927
        } else if (age > log->max_archived_lsn_age_async) {
 
2928
 
 
2929
                /* An archiving is not urgent: we do asynchronous i/o */
 
2930
 
 
2931
                sync = FALSE;
 
2932
        } else {
 
2933
                /* No archiving required yet */
 
2934
 
 
2935
                mutex_exit(&(log->mutex));
 
2936
 
 
2937
                return;
 
2938
        }
 
2939
 
 
2940
        mutex_exit(&(log->mutex));
 
2941
 
 
2942
        log_archive_do(sync, &dummy);
 
2943
 
 
2944
        if (sync == TRUE) {
 
2945
                /* Check again that enough was written to the archive */
 
2946
 
 
2947
                goto loop;
 
2948
        }
 
2949
}
 
2950
#endif /* UNIV_LOG_ARCHIVE */
 
2951
 
 
2952
/************************************************************************
 
2953
Checks that there is enough free space in the log to start a new query step.
 
2954
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 
2955
function may only be called if the calling thread owns no synchronization
 
2956
objects! */
 
2957
UNIV_INTERN
 
2958
void
 
2959
log_check_margins(void)
 
2960
/*===================*/
 
2961
{
 
2962
loop:
 
2963
        log_flush_margin();
 
2964
 
 
2965
        log_checkpoint_margin();
 
2966
 
 
2967
#ifdef UNIV_LOG_ARCHIVE
 
2968
        log_archive_margin();
 
2969
#endif /* UNIV_LOG_ARCHIVE */
 
2970
 
 
2971
        mutex_enter(&(log_sys->mutex));
 
2972
 
 
2973
        if (log_sys->check_flush_or_checkpoint) {
 
2974
 
 
2975
                mutex_exit(&(log_sys->mutex));
 
2976
 
 
2977
                goto loop;
 
2978
        }
 
2979
 
 
2980
        mutex_exit(&(log_sys->mutex));
 
2981
}
 
2982
 
 
2983
/********************************************************************
 
2984
Makes a checkpoint at the latest lsn and writes it to first page of each
 
2985
data file in the database, so that we know that the file spaces contain
 
2986
all modifications up to that lsn. This can only be called at database
 
2987
shutdown. This function also writes all log in log files to the log archive. */
 
2988
UNIV_INTERN
 
2989
void
 
2990
logs_empty_and_mark_files_at_shutdown(void)
 
2991
/*=======================================*/
 
2992
{
 
2993
        ib_uint64_t     lsn;
 
2994
        ulint           arch_log_no;
 
2995
 
 
2996
        if (srv_print_verbose_log) {
 
2997
                ut_print_timestamp(stderr);
 
2998
                fprintf(stderr, "  InnoDB: Starting shutdown...\n");
 
2999
        }
 
3000
        /* Wait until the master thread and all other operations are idle: our
 
3001
        algorithm only works if the server is idle at shutdown */
 
3002
 
 
3003
        srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
 
3004
loop:
 
3005
        os_thread_sleep(100000);
 
3006
 
 
3007
        mutex_enter(&kernel_mutex);
 
3008
 
 
3009
        /* We need the monitor threads to stop before we proceed with a
 
3010
        normal shutdown. In case of very fast shutdown, however, we can
 
3011
        proceed without waiting for monitor threads. */
 
3012
 
 
3013
        if (srv_fast_shutdown < 2
 
3014
           && (srv_error_monitor_active
 
3015
              || srv_lock_timeout_and_monitor_active)) {
 
3016
 
 
3017
                mutex_exit(&kernel_mutex);
 
3018
 
 
3019
                goto loop;
 
3020
        }
 
3021
 
 
3022
        /* Check that there are no longer transactions. We need this wait even
 
3023
        for the 'very fast' shutdown, because the InnoDB layer may have
 
3024
        committed or prepared transactions and we don't want to lose them. */
 
3025
 
 
3026
        if (trx_n_mysql_transactions > 0
 
3027
            || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
 
3028
 
 
3029
                mutex_exit(&kernel_mutex);
 
3030
 
 
3031
                goto loop;
 
3032
        }
 
3033
 
 
3034
        if (srv_fast_shutdown == 2) {
 
3035
                /* In this fastest shutdown we do not flush the buffer pool:
 
3036
                it is essentially a 'crash' of the InnoDB server. Make sure
 
3037
                that the log is all flushed to disk, so that we can recover
 
3038
                all committed transactions in a crash recovery. We must not
 
3039
                write the lsn stamps to the data files, since at a startup
 
3040
                InnoDB deduces from the stamps if the previous shutdown was
 
3041
                clean. */
 
3042
 
 
3043
                log_buffer_flush_to_disk();
 
3044
 
 
3045
                return; /* We SKIP ALL THE REST !! */
 
3046
        }
 
3047
 
 
3048
        /* Check that the master thread is suspended */
 
3049
 
 
3050
        if (srv_n_threads_active[SRV_MASTER] != 0) {
 
3051
 
 
3052
                mutex_exit(&kernel_mutex);
 
3053
 
 
3054
                goto loop;
 
3055
        }
 
3056
 
 
3057
        mutex_exit(&kernel_mutex);
 
3058
 
 
3059
        mutex_enter(&(log_sys->mutex));
 
3060
 
 
3061
        if (log_sys->n_pending_checkpoint_writes
 
3062
#ifdef UNIV_LOG_ARCHIVE
 
3063
            || log_sys->n_pending_archive_ios
 
3064
#endif /* UNIV_LOG_ARCHIVE */
 
3065
            || log_sys->n_pending_writes) {
 
3066
 
 
3067
                mutex_exit(&(log_sys->mutex));
 
3068
 
 
3069
                goto loop;
 
3070
        }
 
3071
 
 
3072
        mutex_exit(&(log_sys->mutex));
 
3073
 
 
3074
        if (!buf_pool_check_no_pending_io()) {
 
3075
 
 
3076
                goto loop;
 
3077
        }
 
3078
 
 
3079
#ifdef UNIV_LOG_ARCHIVE
 
3080
        log_archive_all();
 
3081
#endif /* UNIV_LOG_ARCHIVE */
 
3082
 
 
3083
        log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
 
3084
 
 
3085
        mutex_enter(&(log_sys->mutex));
 
3086
 
 
3087
        lsn = log_sys->lsn;
 
3088
 
 
3089
        if (lsn != log_sys->last_checkpoint_lsn
 
3090
#ifdef UNIV_LOG_ARCHIVE
 
3091
            || (srv_log_archive_on
 
3092
                && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
 
3093
#endif /* UNIV_LOG_ARCHIVE */
 
3094
            ) {
 
3095
 
 
3096
                mutex_exit(&(log_sys->mutex));
 
3097
 
 
3098
                goto loop;
 
3099
        }
 
3100
 
 
3101
        arch_log_no = 0;
 
3102
 
 
3103
#ifdef UNIV_LOG_ARCHIVE
 
3104
        UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
 
3105
 
 
3106
        if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
 
3107
 
 
3108
                arch_log_no--;
 
3109
        }
 
3110
 
 
3111
        log_archive_close_groups(TRUE);
 
3112
#endif /* UNIV_LOG_ARCHIVE */
 
3113
 
 
3114
        mutex_exit(&(log_sys->mutex));
 
3115
 
 
3116
        mutex_enter(&kernel_mutex);
 
3117
        /* Check that the master thread has stayed suspended */
 
3118
        if (srv_n_threads_active[SRV_MASTER] != 0) {
 
3119
                fprintf(stderr,
 
3120
                        "InnoDB: Warning: the master thread woke up"
 
3121
                        " during shutdown\n");
 
3122
 
 
3123
                mutex_exit(&kernel_mutex);
 
3124
 
 
3125
                goto loop;
 
3126
        }
 
3127
        mutex_exit(&kernel_mutex);
 
3128
 
 
3129
        fil_flush_file_spaces(FIL_TABLESPACE);
 
3130
        fil_flush_file_spaces(FIL_LOG);
 
3131
 
 
3132
        /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
 
3133
        pool: therefore it is essential that the buffer pool has been
 
3134
        completely flushed to disk! (We do not call fil_write... if the
 
3135
        'very fast' shutdown is enabled.) */
 
3136
 
 
3137
        if (!buf_all_freed()) {
 
3138
 
 
3139
                goto loop;
 
3140
        }
 
3141
 
 
3142
        srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 
3143
 
 
3144
        /* Make some checks that the server really is quiet */
 
3145
        ut_a(srv_n_threads_active[SRV_MASTER] == 0);
 
3146
        ut_a(buf_all_freed());
 
3147
        ut_a(lsn == log_sys->lsn);
 
3148
 
 
3149
        if (lsn < srv_start_lsn) {
 
3150
                fprintf(stderr,
 
3151
                        "InnoDB: Error: log sequence number"
 
3152
                        " at shutdown %"PRIu64"\n"
 
3153
                        "InnoDB: is lower than at startup %"PRIu64"!\n",
 
3154
                        lsn, srv_start_lsn);
 
3155
        }
 
3156
 
 
3157
        srv_shutdown_lsn = lsn;
 
3158
 
 
3159
        fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
 
3160
 
 
3161
        fil_flush_file_spaces(FIL_TABLESPACE);
 
3162
 
 
3163
        fil_close_all_files();
 
3164
 
 
3165
        /* Make some checks that the server really is quiet */
 
3166
        ut_a(srv_n_threads_active[SRV_MASTER] == 0);
 
3167
        ut_a(buf_all_freed());
 
3168
        ut_a(lsn == log_sys->lsn);
 
3169
}
 
3170
 
 
3171
/**********************************************************
 
3172
Checks by parsing that the catenated log segment for a single mtr is
 
3173
consistent. */
 
3174
UNIV_INTERN
 
3175
ibool
 
3176
log_check_log_recs(
 
3177
/*===============*/
 
3178
        byte*           buf,            /* in: pointer to the start of
 
3179
                                        the log segment in the
 
3180
                                        log_sys->buf log buffer */
 
3181
        ulint           len,            /* in: segment length in bytes */
 
3182
        ib_uint64_t     buf_start_lsn)  /* in: buffer start lsn */
 
3183
{
 
3184
        ib_uint64_t     contiguous_lsn;
 
3185
        ib_uint64_t     scanned_lsn;
 
3186
        byte*           start;
 
3187
        byte*           end;
 
3188
        byte*           buf1;
 
3189
        byte*           scan_buf;
 
3190
 
 
3191
        ut_ad(mutex_own(&(log_sys->mutex)));
 
3192
 
 
3193
        if (len == 0) {
 
3194
 
 
3195
                return(TRUE);
 
3196
        }
 
3197
 
 
3198
        start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
 
3199
        end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
 
3200
 
 
3201
        buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
 
3202
        scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
 
3203
 
 
3204
        ut_memcpy(scan_buf, start, end - start);
 
3205
 
 
3206
        recv_scan_log_recs(TRUE,
 
3207
                           (buf_pool->curr_size
 
3208
                            - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
 
3209
                           FALSE, scan_buf, end - start,
 
3210
                           ut_uint64_align_down(buf_start_lsn,
 
3211
                                                OS_FILE_LOG_BLOCK_SIZE),
 
3212
                           &contiguous_lsn, &scanned_lsn);
 
3213
 
 
3214
        ut_a(scanned_lsn == buf_start_lsn + len);
 
3215
        ut_a(recv_sys->recovered_lsn == scanned_lsn);
 
3216
 
 
3217
        mem_free(buf1);
 
3218
 
 
3219
        return(TRUE);
 
3220
}
 
3221
 
 
3222
/**********************************************************
 
3223
Peeks the current lsn. */
 
3224
UNIV_INTERN
 
3225
ibool
 
3226
log_peek_lsn(
 
3227
/*=========*/
 
3228
                                /* out: TRUE if success, FALSE if
 
3229
                                could not get the log system mutex */
 
3230
        ib_uint64_t*    lsn)    /* out: if returns TRUE, current lsn is here */
 
3231
{
 
3232
        if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
 
3233
                *lsn = log_sys->lsn;
 
3234
 
 
3235
                mutex_exit(&(log_sys->mutex));
 
3236
 
 
3237
                return(TRUE);
 
3238
        }
 
3239
 
 
3240
        return(FALSE);
 
3241
}
 
3242
 
 
3243
/**********************************************************
 
3244
Prints info of the log. */
 
3245
UNIV_INTERN
 
3246
void
 
3247
log_print(
 
3248
/*======*/
 
3249
        FILE*   file)   /* in: file where to print */
 
3250
{
 
3251
        double  time_elapsed;
 
3252
        time_t  current_time;
 
3253
 
 
3254
        mutex_enter(&(log_sys->mutex));
 
3255
 
 
3256
        fprintf(file,
 
3257
                "Log sequence number %"PRIu64"\n"
 
3258
                "Log flushed up to   %"PRIu64"\n"
 
3259
                "Last checkpoint at  %"PRIu64"\n",
 
3260
                log_sys->lsn,
 
3261
                log_sys->flushed_to_disk_lsn,
 
3262
                log_sys->last_checkpoint_lsn);
 
3263
 
 
3264
        current_time = time(NULL);
 
3265
 
 
3266
        time_elapsed = 0.001 + difftime(current_time,
 
3267
                                        log_sys->last_printout_time);
 
3268
        fprintf(file,
 
3269
                "%lu pending log writes, %lu pending chkp writes\n"
 
3270
                "%lu log i/o's done, %.2f log i/o's/second\n",
 
3271
                (ulong) log_sys->n_pending_writes,
 
3272
                (ulong) log_sys->n_pending_checkpoint_writes,
 
3273
                (ulong) log_sys->n_log_ios,
 
3274
                ((log_sys->n_log_ios - log_sys->n_log_ios_old)
 
3275
                 / time_elapsed));
 
3276
 
 
3277
        log_sys->n_log_ios_old = log_sys->n_log_ios;
 
3278
        log_sys->last_printout_time = current_time;
 
3279
 
 
3280
        mutex_exit(&(log_sys->mutex));
 
3281
}
 
3282
 
 
3283
/**************************************************************************
 
3284
Refreshes the statistics used to print per-second averages. */
 
3285
UNIV_INTERN
 
3286
void
 
3287
log_refresh_stats(void)
 
3288
/*===================*/
 
3289
{
 
3290
        log_sys->n_log_ios_old = log_sys->n_log_ios;
 
3291
        log_sys->last_printout_time = time(NULL);
 
3292
}