~ubuntu-branches/ubuntu/trusty/drizzle/trusty

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0flu.c

  • Committer: Bazaar Package Importer
  • Author(s): Monty Taylor
  • Date: 2010-12-09 06:02:39 UTC
  • mto: This revision was merged to the branch mainline in revision 5.
  • Revision ID: james.westby@ubuntu.com-20101209060239-t0ujftvcvd558yno
Tags: upstream-2010.12.05
ImportĀ upstreamĀ versionĀ 2010.12.05

Show diffs side-by-side

added added

removed removed

Lines of Context:
83
83
@return TRUE if ok */
84
84
static
85
85
ibool
86
 
buf_flush_validate_low(void);
87
 
/*========================*/
 
86
buf_flush_validate_low(
 
87
/*===================*/
 
88
        buf_pool_t*     buf_pool);      /*!< in: Buffer pool instance */
88
89
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
89
90
 
90
 
/********************************************************************//**
 
91
/******************************************************************//**
91
92
Insert a block in the flush_rbt and returns a pointer to its
92
93
predecessor or NULL if no predecessor. The ordering is maintained
93
94
on the basis of the <oldest_modification, space, offset> key.
94
 
@return pointer to the predecessor or NULL if no predecessor. */
 
95
@return pointer to the predecessor or NULL if no predecessor. */
95
96
static
96
97
buf_page_t*
97
98
buf_flush_insert_in_flush_rbt(
98
99
/*==========================*/
99
 
        buf_page_t*     bpage)          /*!< in: bpage to be inserted. */
 
100
        buf_page_t*     bpage)  /*!< in: bpage to be inserted. */
100
101
{
101
 
        buf_page_t*             prev = NULL;
102
102
        const ib_rbt_node_t*    c_node;
103
103
        const ib_rbt_node_t*    p_node;
 
104
        buf_page_t*             prev = NULL;
 
105
        buf_pool_t*             buf_pool = buf_pool_from_bpage(bpage);
104
106
 
105
 
        ut_ad(buf_pool_mutex_own());
 
107
        ut_ad(buf_flush_list_mutex_own(buf_pool));
106
108
 
107
109
        /* Insert this buffer into the rbt. */
108
110
        c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
112
114
        p_node = rbt_prev(buf_pool->flush_rbt, c_node);
113
115
 
114
116
        if (p_node != NULL) {
115
 
                prev = *rbt_value(buf_page_t*, p_node);
 
117
                buf_page_t**    value;
 
118
                value = rbt_value(buf_page_t*, p_node);
 
119
                prev = *value;
116
120
                ut_a(prev != NULL);
117
121
        }
118
122
 
119
123
        return(prev);
120
124
}
121
125
 
122
 
/********************************************************************//**
 
126
/*********************************************************//**
123
127
Delete a bpage from the flush_rbt. */
124
128
static
125
129
void
126
130
buf_flush_delete_from_flush_rbt(
127
131
/*============================*/
128
 
        buf_page_t*     bpage)          /*!< in: bpage to be removed. */
 
132
        buf_page_t*     bpage)  /*!< in: bpage to be removed. */
129
133
{
130
 
 
131
 
        ibool   ret = FALSE;
132
 
 
133
 
        ut_ad(buf_pool_mutex_own());
 
134
        ibool           ret = FALSE;
 
135
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
136
 
 
137
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
138
 
134
139
        ret = rbt_delete(buf_pool->flush_rbt, &bpage);
135
140
        ut_ad(ret);
136
141
}
137
142
 
138
 
/********************************************************************//**
 
143
/*****************************************************************//**
139
144
Compare two modified blocks in the buffer pool. The key for comparison
140
145
is:
141
146
key = <oldest_modification, space, offset>
144
149
Note that for the purpose of flush_rbt, we only need to order blocks
145
150
on the oldest_modification. The other two fields are used to uniquely
146
151
identify the blocks.
147
 
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
 
152
@return  < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
148
153
static
149
154
int
150
155
buf_flush_block_cmp(
152
157
        const void*     p1,             /*!< in: block1 */
153
158
        const void*     p2)             /*!< in: block2 */
154
159
{
155
 
        int             ret;
156
 
        const buf_page_t* b1;
157
 
        const buf_page_t* b2;
158
 
 
159
 
        ut_ad(p1 != NULL);
160
 
        ut_ad(p2 != NULL);
161
 
 
162
 
        b1 = *(const buf_page_t**) p1;
163
 
        b2 = *(const buf_page_t**) p2;
 
160
        int                     ret;
 
161
        const buf_page_t*       b1 = *(const buf_page_t**) p1;
 
162
        const buf_page_t*       b2 = *(const buf_page_t**) p2;
 
163
#ifdef UNIV_DEBUG
 
164
        buf_pool_t*             buf_pool = buf_pool_from_bpage(b1);
 
165
#endif /* UNIV_DEBUG */
164
166
 
165
167
        ut_ad(b1 != NULL);
166
168
        ut_ad(b2 != NULL);
167
169
 
 
170
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
171
 
168
172
        ut_ad(b1->in_flush_list);
169
173
        ut_ad(b2->in_flush_list);
170
174
 
171
 
        if (b2->oldest_modification
172
 
            > b1->oldest_modification) {
 
175
        if (b2->oldest_modification > b1->oldest_modification) {
173
176
                return(1);
174
 
        }
175
 
 
176
 
        if (b2->oldest_modification
177
 
            < b1->oldest_modification) {
 
177
        } else if (b2->oldest_modification < b1->oldest_modification) {
178
178
                return(-1);
179
179
        }
180
180
 
194
194
buf_flush_init_flush_rbt(void)
195
195
/*==========================*/
196
196
{
197
 
        buf_pool_mutex_enter();
198
 
 
199
 
        /* Create red black tree for speedy insertions in flush list. */
200
 
        buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
201
 
                                         buf_flush_block_cmp);
202
 
        buf_pool_mutex_exit();
 
197
        ulint   i;
 
198
 
 
199
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
200
                buf_pool_t*     buf_pool;
 
201
 
 
202
                buf_pool = buf_pool_from_array(i);
 
203
 
 
204
                buf_flush_list_mutex_enter(buf_pool);
 
205
 
 
206
                /* Create red black tree for speedy insertions in flush list. */
 
207
                buf_pool->flush_rbt = rbt_create(
 
208
                        sizeof(buf_page_t*), buf_flush_block_cmp);
 
209
 
 
210
                buf_flush_list_mutex_exit(buf_pool);
 
211
        }
203
212
}
204
213
 
205
214
/********************************************************************//**
209
218
buf_flush_free_flush_rbt(void)
210
219
/*==========================*/
211
220
{
212
 
        buf_pool_mutex_enter();
213
 
 
 
221
        ulint   i;
 
222
 
 
223
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
224
                buf_pool_t*     buf_pool;
 
225
 
 
226
                buf_pool = buf_pool_from_array(i);
 
227
 
 
228
                buf_flush_list_mutex_enter(buf_pool);
 
229
 
 
230
#ifdef UNIV_DEBUG_VALGRIND
 
231
        {
 
232
                ulint   zip_size = buf_block_get_zip_size(block);
 
233
 
 
234
                if (UNIV_UNLIKELY(zip_size)) {
 
235
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
236
                } else {
 
237
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
238
                }
 
239
        }
 
240
#endif /* UNIV_DEBUG_VALGRIND */
214
241
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
215
 
        ut_a(buf_flush_validate_low());
 
242
                ut_a(buf_flush_validate_low(buf_pool));
216
243
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
217
244
 
218
 
        rbt_free(buf_pool->flush_rbt);
219
 
        buf_pool->flush_rbt = NULL;
 
245
                rbt_free(buf_pool->flush_rbt);
 
246
                buf_pool->flush_rbt = NULL;
220
247
 
221
 
        buf_pool_mutex_exit();
 
248
                buf_flush_list_mutex_exit(buf_pool);
 
249
        }
222
250
}
223
251
 
224
252
/********************************************************************//**
227
255
void
228
256
buf_flush_insert_into_flush_list(
229
257
/*=============================*/
230
 
        buf_block_t*    block)  /*!< in/out: block which is modified */
 
258
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
259
        buf_block_t*    block,          /*!< in/out: block which is modified */
 
260
        ib_uint64_t     lsn)            /*!< in: oldest modification */
231
261
{
232
 
        ut_ad(buf_pool_mutex_own());
 
262
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
263
        ut_ad(log_flush_order_mutex_own());
 
264
        ut_ad(mutex_own(&block->mutex));
 
265
 
 
266
        buf_flush_list_mutex_enter(buf_pool);
 
267
 
233
268
        ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
234
269
              || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
235
 
                  <= block->page.oldest_modification));
 
270
                  <= lsn));
236
271
 
237
272
        /* If we are in the recovery then we need to update the flush
238
273
        red-black tree as well. */
239
274
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
240
 
                buf_flush_insert_sorted_into_flush_list(block);
 
275
                buf_flush_list_mutex_exit(buf_pool);
 
276
                buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
241
277
                return;
242
278
        }
243
279
 
244
280
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
245
 
        ut_ad(block->page.in_LRU_list);
246
 
        ut_ad(block->page.in_page_hash);
247
 
        ut_ad(!block->page.in_zip_hash);
248
281
        ut_ad(!block->page.in_flush_list);
 
282
 
249
283
        ut_d(block->page.in_flush_list = TRUE);
 
284
        block->page.oldest_modification = lsn;
250
285
        UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
251
286
 
 
287
#ifdef UNIV_DEBUG_VALGRIND
 
288
        {
 
289
                ulint   zip_size = buf_block_get_zip_size(block);
 
290
 
 
291
                if (UNIV_UNLIKELY(zip_size)) {
 
292
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
293
                } else {
 
294
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
295
                }
 
296
        }
 
297
#endif /* UNIV_DEBUG_VALGRIND */
252
298
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
253
 
        ut_a(buf_flush_validate_low());
 
299
        ut_a(buf_flush_validate_low(buf_pool));
254
300
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
301
 
 
302
        buf_flush_list_mutex_exit(buf_pool);
255
303
}
256
304
 
257
305
/********************************************************************//**
262
310
void
263
311
buf_flush_insert_sorted_into_flush_list(
264
312
/*====================================*/
265
 
        buf_block_t*    block)  /*!< in/out: block which is modified */
 
313
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
314
        buf_block_t*    block,          /*!< in/out: block which is modified */
 
315
        ib_uint64_t     lsn)            /*!< in: oldest modification */
266
316
{
267
317
        buf_page_t*     prev_b;
268
318
        buf_page_t*     b;
269
319
 
270
 
        ut_ad(buf_pool_mutex_own());
 
320
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
321
        ut_ad(log_flush_order_mutex_own());
 
322
        ut_ad(mutex_own(&block->mutex));
271
323
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
272
324
 
 
325
        buf_flush_list_mutex_enter(buf_pool);
 
326
 
 
327
        /* The field in_LRU_list is protected by buf_pool_mutex, which
 
328
        we are not holding.  However, while a block is in the flush
 
329
        list, it is dirty and cannot be discarded, not from the
 
330
        page_hash or from the LRU list.  At most, the uncompressed
 
331
        page frame of a compressed block may be discarded or created
 
332
        (copying the block->page to or from a buf_page_t that is
 
333
        dynamically allocated from buf_buddy_alloc()).  Because those
 
334
        transitions hold block->mutex and the flush list mutex (via
 
335
        buf_flush_relocate_on_flush_list()), there is no possibility
 
336
        of a race condition in the assertions below. */
273
337
        ut_ad(block->page.in_LRU_list);
274
338
        ut_ad(block->page.in_page_hash);
 
339
        /* buf_buddy_block_register() will take a block in the
 
340
        BUF_BLOCK_MEMORY state, not a file page. */
275
341
        ut_ad(!block->page.in_zip_hash);
 
342
 
276
343
        ut_ad(!block->page.in_flush_list);
277
344
        ut_d(block->page.in_flush_list = TRUE);
 
345
        block->page.oldest_modification = lsn;
 
346
 
 
347
#ifdef UNIV_DEBUG_VALGRIND
 
348
        {
 
349
                ulint   zip_size = buf_block_get_zip_size(block);
 
350
 
 
351
                if (UNIV_UNLIKELY(zip_size)) {
 
352
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
353
                } else {
 
354
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
355
                }
 
356
        }
 
357
#endif /* UNIV_DEBUG_VALGRIND */
 
358
 
 
359
#ifdef UNIV_DEBUG_VALGRIND
 
360
        {
 
361
                ulint   zip_size = buf_block_get_zip_size(block);
 
362
 
 
363
                if (UNIV_UNLIKELY(zip_size)) {
 
364
                        UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
365
                } else {
 
366
                        UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
 
367
                }
 
368
        }
 
369
#endif /* UNIV_DEBUG_VALGRIND */
278
370
 
279
371
        prev_b = NULL;
280
372
 
308
400
        }
309
401
 
310
402
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
311
 
        ut_a(buf_flush_validate_low());
 
403
        ut_a(buf_flush_validate_low(buf_pool));
312
404
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
405
 
 
406
        buf_flush_list_mutex_exit(buf_pool);
313
407
}
314
408
 
315
409
/********************************************************************//**
323
417
        buf_page_t*     bpage)  /*!< in: buffer control block, must be
324
418
                                buf_page_in_file(bpage) and in the LRU list */
325
419
{
326
 
        ut_ad(buf_pool_mutex_own());
 
420
#ifdef UNIV_DEBUG
 
421
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
422
        ut_ad(buf_pool_mutex_own(buf_pool));
 
423
#endif
327
424
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
328
425
        ut_ad(bpage->in_LRU_list);
329
426
 
356
453
                                buf_page_in_file(bpage) */
357
454
        enum buf_flush  flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
358
455
{
 
456
#ifdef UNIV_DEBUG
 
457
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
458
        ut_ad(buf_pool_mutex_own(buf_pool));
 
459
#endif
359
460
        ut_a(buf_page_in_file(bpage));
360
 
        ut_ad(buf_pool_mutex_own());
361
461
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
362
462
        ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
363
463
 
390
490
/*=============*/
391
491
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
392
492
{
393
 
        ut_ad(buf_pool_mutex_own());
 
493
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
494
 
 
495
        ut_ad(buf_pool_mutex_own(buf_pool));
394
496
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
395
497
        ut_ad(bpage->in_flush_list);
396
498
 
 
499
        buf_flush_list_mutex_enter(buf_pool);
 
500
 
397
501
        switch (buf_page_get_state(bpage)) {
398
502
        case BUF_BLOCK_ZIP_PAGE:
399
 
                /* clean compressed pages should not be on the flush list */
 
503
                /* Clean compressed pages should not be on the flush list */
400
504
        case BUF_BLOCK_ZIP_FREE:
401
505
        case BUF_BLOCK_NOT_USED:
402
506
        case BUF_BLOCK_READY_FOR_USE:
414
518
                break;
415
519
        }
416
520
 
417
 
        /* If the flush_rbt is active then delete from it as well. */
 
521
        /* If the flush_rbt is active then delete from there as well. */
418
522
        if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
419
523
                buf_flush_delete_from_flush_rbt(bpage);
420
524
        }
425
529
 
426
530
        bpage->oldest_modification = 0;
427
531
 
428
 
        ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
429
 
                              ut_ad(ut_list_node_313->in_flush_list)));
 
532
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
533
        ut_a(buf_flush_validate_low(buf_pool));
 
534
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
535
 
 
536
        buf_flush_list_mutex_exit(buf_pool);
430
537
}
431
538
 
432
 
/********************************************************************//**
 
539
/*******************************************************************//**
433
540
Relocates a buffer control block on the flush_list.
434
 
Note that it is assumed that the contents of bpage has already been
435
 
copied to dpage. */
 
541
Note that it is assumed that the contents of bpage have already been
 
542
copied to dpage.
 
543
IMPORTANT: When this function is called bpage and dpage are not
 
544
exact copies of each other. For example, they both will have different
 
545
::state. Also the ::list pointers in dpage may be stale. We need to
 
546
use the current list node (bpage) to do the list manipulation because
 
547
the list pointers could have changed between the time that we copied
 
548
the contents of bpage to the dpage and the flush list manipulation
 
549
below. */
436
550
UNIV_INTERN
437
551
void
438
552
buf_flush_relocate_on_flush_list(
440
554
        buf_page_t*     bpage,  /*!< in/out: control block being moved */
441
555
        buf_page_t*     dpage)  /*!< in/out: destination block */
442
556
{
443
 
        buf_page_t* prev;
444
 
        buf_page_t* prev_b = NULL;
 
557
        buf_page_t*     prev;
 
558
        buf_page_t*     prev_b = NULL;
 
559
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
445
560
 
446
 
        ut_ad(buf_pool_mutex_own());
 
561
        ut_ad(buf_pool_mutex_own(buf_pool));
 
562
        /* Must reside in the same buffer pool. */
 
563
        ut_ad(buf_pool == buf_pool_from_bpage(dpage));
447
564
 
448
565
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
449
566
 
 
567
        buf_flush_list_mutex_enter(buf_pool);
 
568
 
 
569
        /* FIXME: At this point we have both buf_pool and flush_list
 
570
        mutexes. Theoretically removal of a block from flush list is
 
571
        only covered by flush_list mutex but currently we do
 
572
        have buf_pool mutex in buf_flush_remove() therefore this block
 
573
        is guaranteed to be in the flush list. We need to check if
 
574
        this will work without the assumption of block removing code
 
575
        having the buf_pool mutex. */
450
576
        ut_ad(bpage->in_flush_list);
451
577
        ut_ad(dpage->in_flush_list);
452
578
 
482
608
        ut_a(!buf_pool->flush_rbt || prev_b == prev);
483
609
 
484
610
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
485
 
        ut_a(buf_flush_validate_low());
 
611
        ut_a(buf_flush_validate_low(buf_pool));
486
612
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
613
 
 
614
        buf_flush_list_mutex_exit(buf_pool);
487
615
}
488
616
 
489
617
/********************************************************************//**
495
623
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
496
624
{
497
625
        enum buf_flush  flush_type;
 
626
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
498
627
 
499
628
        ut_ad(bpage);
500
629
 
515
644
        /* fprintf(stderr, "n pending flush %lu\n",
516
645
        buf_pool->n_flush[flush_type]); */
517
646
 
518
 
        if ((buf_pool->n_flush[flush_type] == 0)
519
 
            && (buf_pool->init_flush[flush_type] == FALSE)) {
 
647
        if (buf_pool->n_flush[flush_type] == 0
 
648
            && buf_pool->init_flush[flush_type] == FALSE) {
520
649
 
521
650
                /* The running flush batch has ended */
522
651
 
809
938
        zip_size = buf_page_get_zip_size(bpage);
810
939
 
811
940
        if (UNIV_UNLIKELY(zip_size)) {
 
941
                UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
812
942
                /* Copy the compressed page and clear the rest. */
813
943
                memcpy(trx_doublewrite->write_buf
814
944
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
818
948
                       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
819
949
        } else {
820
950
                ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
951
                UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
 
952
                                   UNIV_PAGE_SIZE);
821
953
 
822
954
                memcpy(trx_doublewrite->write_buf
823
955
                       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
873
1005
                case FIL_PAGE_TYPE_ZBLOB:
874
1006
                case FIL_PAGE_TYPE_ZBLOB2:
875
1007
                case FIL_PAGE_INDEX:
876
 
                        mach_write_ull(page_zip->data
877
 
                                       + FIL_PAGE_LSN, newest_lsn);
 
1008
                        mach_write_to_8(page_zip->data
 
1009
                                        + FIL_PAGE_LSN, newest_lsn);
878
1010
                        memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
879
1011
                        mach_write_to_4(page_zip->data
880
1012
                                        + FIL_PAGE_SPACE_OR_CHKSUM,
896
1028
        }
897
1029
 
898
1030
        /* Write the newest modification lsn to the page header and trailer */
899
 
        mach_write_ull(page + FIL_PAGE_LSN, newest_lsn);
 
1031
        mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
900
1032
 
901
 
        mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
902
 
                       newest_lsn);
 
1033
        mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 
1034
                        newest_lsn);
903
1035
 
904
1036
        /* Store the new formula checksum */
905
1037
 
932
1064
{
933
1065
        ulint   zip_size        = buf_page_get_zip_size(bpage);
934
1066
        page_t* frame           = NULL;
 
1067
 
 
1068
#ifdef UNIV_DEBUG
 
1069
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
1070
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
1071
#endif
 
1072
 
935
1073
#ifdef UNIV_LOG_DEBUG
936
1074
        static ibool univ_log_debug_warned;
937
1075
#endif /* UNIV_LOG_DEBUG */
943
1081
        io_fixed and oldest_modification != 0.  Thus, it cannot be
944
1082
        relocated in the buffer pool or removed from flush_list or
945
1083
        LRU_list. */
946
 
        ut_ad(!buf_pool_mutex_own());
 
1084
        ut_ad(!buf_pool_mutex_own(buf_pool));
 
1085
        ut_ad(!buf_flush_list_mutex_own(buf_pool));
947
1086
        ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
948
1087
        ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
949
1088
        ut_ad(bpage->oldest_modification != 0);
980
1119
                        ut_a(mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
981
1120
                             == page_zip_calc_checksum(frame, zip_size));
982
1121
                }
983
 
                mach_write_ull(frame + FIL_PAGE_LSN,
984
 
                               bpage->newest_modification);
 
1122
                mach_write_to_8(frame + FIL_PAGE_LSN,
 
1123
                                bpage->newest_modification);
985
1124
                memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
986
1125
                break;
987
1126
        case BUF_BLOCK_FILE_PAGE:
1012
1151
Writes a flushable page asynchronously from the buffer pool to a file.
1013
1152
NOTE: in simulated aio we must call
1014
1153
os_aio_simulated_wake_handler_threads after we have posted a batch of
1015
 
writes! NOTE: buf_pool_mutex and buf_page_get_mutex(bpage) must be
 
1154
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
1016
1155
held upon entering this function, and they will be released by this
1017
1156
function. */
1018
1157
static
1019
1158
void
1020
1159
buf_flush_page(
1021
1160
/*===========*/
 
1161
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1022
1162
        buf_page_t*     bpage,          /*!< in: buffer control block */
1023
1163
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
1024
1164
                                        or BUF_FLUSH_LIST */
1027
1167
        ibool           is_uncompressed;
1028
1168
 
1029
1169
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1030
 
        ut_ad(buf_pool_mutex_own());
 
1170
        ut_ad(buf_pool_mutex_own(buf_pool));
1031
1171
        ut_ad(buf_page_in_file(bpage));
1032
1172
 
1033
1173
        block_mutex = buf_page_get_mutex(bpage);
1047
1187
        buf_pool->n_flush[flush_type]++;
1048
1188
 
1049
1189
        is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1050
 
        ut_ad(is_uncompressed == (block_mutex != &buf_pool_zip_mutex));
 
1190
        ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
1051
1191
 
1052
1192
        switch (flush_type) {
1053
1193
                ibool   is_s_latched;
1063
1203
                }
1064
1204
 
1065
1205
                mutex_exit(block_mutex);
1066
 
                buf_pool_mutex_exit();
 
1206
                buf_pool_mutex_exit(buf_pool);
1067
1207
 
1068
1208
                /* Even though bpage is not protected by any mutex at
1069
1209
                this point, it is safe to access bpage, because it is
1100
1240
                immediately. */
1101
1241
 
1102
1242
                mutex_exit(block_mutex);
1103
 
                buf_pool_mutex_exit();
 
1243
                buf_pool_mutex_exit(buf_pool);
1104
1244
                break;
1105
1245
 
1106
1246
        default:
1131
1271
/*====================*/
1132
1272
        ulint           space,          /*!< in: space id */
1133
1273
        ulint           offset,         /*!< in: page offset */
1134
 
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU or
 
1274
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
1135
1275
                                        BUF_FLUSH_LIST */
 
1276
        ulint           n_flushed,      /*!< in: number of pages
 
1277
                                        flushed so far in this batch */
 
1278
        ulint           n_to_flush)     /*!< in: maximum number of pages
 
1279
                                        we are allowed to flush */
1136
1280
{
1137
 
        buf_page_t*     bpage;
1138
 
        ulint           low, high;
1139
 
        ulint           count           = 0;
1140
1281
        ulint           i;
 
1282
        ulint           low;
 
1283
        ulint           high;
 
1284
        ulint           count = 0;
 
1285
        buf_pool_t*     buf_pool = buf_pool_get(space, offset);
1141
1286
 
1142
1287
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1143
1288
 
1144
1289
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1145
 
                /* If there is little space, it is better not to flush any
1146
 
                block except from the end of the LRU list */
 
1290
                /* If there is little space, it is better not to flush
 
1291
                any block except from the end of the LRU list */
1147
1292
 
1148
1293
                low = offset;
1149
1294
                high = offset + 1;
1150
1295
        } else {
1151
 
                /* When flushed, dirty blocks are searched in neighborhoods of
1152
 
                this size, and flushed along with the original page. */
 
1296
                /* When flushed, dirty blocks are searched in
 
1297
                neighborhoods of this size, and flushed along with the
 
1298
                original page. */
1153
1299
 
1154
 
                ulint   buf_flush_area  = ut_min(BUF_READ_AHEAD_AREA,
1155
 
                                                 buf_pool->curr_size / 16);
 
1300
                ulint   buf_flush_area;
 
1301
        
 
1302
                buf_flush_area  = ut_min(
 
1303
                        BUF_READ_AHEAD_AREA(buf_pool),
 
1304
                        buf_pool->curr_size / 16);
1156
1305
 
1157
1306
                low = (offset / buf_flush_area) * buf_flush_area;
1158
1307
                high = (offset / buf_flush_area + 1) * buf_flush_area;
1164
1313
                high = fil_space_get_size(space);
1165
1314
        }
1166
1315
 
1167
 
        buf_pool_mutex_enter();
1168
 
 
1169
1316
        for (i = low; i < high; i++) {
1170
1317
 
1171
 
                bpage = buf_page_hash_get(space, i);
 
1318
                buf_page_t*     bpage;
 
1319
 
 
1320
                if ((count + n_flushed) >= n_to_flush) {
 
1321
 
 
1322
                        /* We have already flushed enough pages and
 
1323
                        should call it a day. There is, however, one
 
1324
                        exception. If the page whose neighbors we
 
1325
                        are flushing has not been flushed yet then
 
1326
                        we'll try to flush the victim that we
 
1327
                        selected originally. */
 
1328
                        if (i <= offset) {
 
1329
                                i = offset;
 
1330
                        } else {
 
1331
                                break;
 
1332
                        }
 
1333
                }
 
1334
 
 
1335
                buf_pool = buf_pool_get(space, i);
 
1336
 
 
1337
                buf_pool_mutex_enter(buf_pool);
 
1338
 
 
1339
                /* We only want to flush pages from this buffer pool. */
 
1340
                bpage = buf_page_hash_get(buf_pool, space, i);
1172
1341
 
1173
1342
                if (!bpage) {
1174
1343
 
 
1344
                        buf_pool_mutex_exit(buf_pool);
1175
1345
                        continue;
1176
1346
                }
1177
1347
 
1190
1360
                        if (buf_flush_ready_for_flush(bpage, flush_type)
1191
1361
                            && (i == offset || !bpage->buf_fix_count)) {
1192
1362
                                /* We only try to flush those
1193
 
                                neighbors != offset where the buf fix count is
1194
 
                                zero, as we then know that we probably can
1195
 
                                latch the page without a semaphore wait.
1196
 
                                Semaphore waits are expensive because we must
1197
 
                                flush the doublewrite buffer before we start
 
1363
                                neighbors != offset where the buf fix
 
1364
                                count is zero, as we then know that we
 
1365
                                probably can latch the page without a
 
1366
                                semaphore wait. Semaphore waits are
 
1367
                                expensive because we must flush the
 
1368
                                doublewrite buffer before we start
1198
1369
                                waiting. */
1199
1370
 
1200
 
                                buf_flush_page(bpage, flush_type);
 
1371
                                buf_flush_page(buf_pool, bpage, flush_type);
1201
1372
                                ut_ad(!mutex_own(block_mutex));
 
1373
                                ut_ad(!buf_pool_mutex_own(buf_pool));
1202
1374
                                count++;
1203
 
 
1204
 
                                buf_pool_mutex_enter();
 
1375
                                continue;
1205
1376
                        } else {
1206
1377
                                mutex_exit(block_mutex);
1207
1378
                        }
1208
1379
                }
1209
 
        }
1210
 
 
1211
 
        buf_pool_mutex_exit();
 
1380
                buf_pool_mutex_exit(buf_pool);
 
1381
        }
 
1382
 
 
1383
        return(count);
 
1384
}
 
1385
 
 
1386
/********************************************************************//**
 
1387
Check if the block is modified and ready for flushing. If the the block
 
1388
is ready to flush then flush the page and try o flush its neighbors.
 
1389
 
 
1390
@return TRUE if buf_pool mutex was not released during this function.
 
1391
This does not guarantee that some pages were written as well.
 
1392
Number of pages written are incremented to the count. */
 
1393
static
 
1394
ibool
 
1395
buf_flush_page_and_try_neighbors(
 
1396
/*=============================*/
 
1397
        buf_page_t*     bpage,          /*!< in: buffer control block,
 
1398
                                        must be
 
1399
                                        buf_page_in_file(bpage) */
 
1400
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU
 
1401
                                        or BUF_FLUSH_LIST */
 
1402
        ulint           n_to_flush,     /*!< in: number of pages to
 
1403
                                        flush */
 
1404
        ulint*          count)          /*!< in/out: number of pages
 
1405
                                        flushed */
 
1406
{
 
1407
        mutex_t*        block_mutex;
 
1408
        ibool           flushed = FALSE;
 
1409
#ifdef UNIV_DEBUG
 
1410
        buf_pool_t*     buf_pool = buf_pool_from_bpage(bpage);
 
1411
#endif /* UNIV_DEBUG */
 
1412
 
 
1413
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1414
 
 
1415
        block_mutex = buf_page_get_mutex(bpage);
 
1416
        mutex_enter(block_mutex);
 
1417
 
 
1418
        ut_a(buf_page_in_file(bpage));
 
1419
 
 
1420
        if (buf_flush_ready_for_flush(bpage, flush_type)) {
 
1421
                ulint           space;
 
1422
                ulint           offset;
 
1423
                buf_pool_t*     buf_pool;
 
1424
 
 
1425
                buf_pool = buf_pool_from_bpage(bpage);
 
1426
 
 
1427
                buf_pool_mutex_exit(buf_pool);
 
1428
 
 
1429
                /* These fields are protected by both the
 
1430
                buffer pool mutex and block mutex. */
 
1431
                space = buf_page_get_space(bpage);
 
1432
                offset = buf_page_get_page_no(bpage);
 
1433
 
 
1434
                mutex_exit(block_mutex);
 
1435
 
 
1436
                /* Try to flush also all the neighbors */
 
1437
                *count += buf_flush_try_neighbors(space,
 
1438
                                                  offset,
 
1439
                                                  flush_type,
 
1440
                                                  *count,
 
1441
                                                  n_to_flush);
 
1442
 
 
1443
                buf_pool_mutex_enter(buf_pool);
 
1444
                flushed = TRUE;
 
1445
        } else {
 
1446
                mutex_exit(block_mutex);
 
1447
        }
 
1448
 
 
1449
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1450
 
 
1451
        return(flushed);
 
1452
}
 
1453
 
 
1454
/*******************************************************************//**
 
1455
This utility flushes dirty blocks from the end of the LRU list.
 
1456
In the case of an LRU flush the calling thread may own latches to
 
1457
pages: to avoid deadlocks, this function must be written so that it
 
1458
cannot end up waiting for these latches!
 
1459
@return number of blocks for which the write request was queued. */
 
1460
static
 
1461
ulint
 
1462
buf_flush_LRU_list_batch(
 
1463
/*=====================*/
 
1464
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
1465
        ulint           max)            /*!< in: max of blocks to flush */
 
1466
{
 
1467
        buf_page_t*     bpage;
 
1468
        ulint           count = 0;
 
1469
 
 
1470
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1471
 
 
1472
        do {
 
1473
                /* Start from the end of the list looking for a
 
1474
                suitable block to be flushed. */
 
1475
                bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
1476
 
 
1477
                /* Iterate backwards over the flush list till we find
 
1478
                a page that isn't ready for flushing. */
 
1479
                while (bpage != NULL
 
1480
                       && !buf_flush_page_and_try_neighbors(
 
1481
                                bpage, BUF_FLUSH_LRU, max, &count)) {
 
1482
 
 
1483
                        bpage = UT_LIST_GET_PREV(LRU, bpage);
 
1484
                }
 
1485
        } while (bpage != NULL && count < max);
 
1486
 
 
1487
        /* We keep track of all flushes happening as part of LRU
 
1488
        flush. When estimating the desired rate at which flush_list
 
1489
        should be flushed, we factor in this value. */
 
1490
        buf_lru_flush_page_count += count;
 
1491
 
 
1492
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1493
 
 
1494
        return(count);
 
1495
}
 
1496
 
 
1497
/*******************************************************************//**
 
1498
This utility flushes dirty blocks from the end of the flush_list.
 
1499
the calling thread is not allowed to own any latches on pages!
 
1500
@return number of blocks for which the write request was queued;
 
1501
ULINT_UNDEFINED if there was a flush of the same type already
 
1502
running */
 
1503
static
 
1504
ulint
 
1505
buf_flush_flush_list_batch(
 
1506
/*=======================*/
 
1507
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
1508
        ulint           min_n,          /*!< in: wished minimum mumber
 
1509
                                        of blocks flushed (it is not
 
1510
                                        guaranteed that the actual
 
1511
                                        number is that big, though) */
 
1512
        ib_uint64_t     lsn_limit)      /*!< all blocks whose
 
1513
                                        oldest_modification is smaller
 
1514
                                        than this should be flushed (if
 
1515
                                        their number does not exceed
 
1516
                                        min_n) */
 
1517
{
 
1518
        ulint           len;
 
1519
        buf_page_t*     bpage;
 
1520
        ulint           count = 0;
 
1521
 
 
1522
        ut_ad(buf_pool_mutex_own(buf_pool));
 
1523
 
 
1524
        /* If we have flushed enough, leave the loop */
 
1525
        do {
 
1526
                /* Start from the end of the list looking for a suitable
 
1527
                block to be flushed. */
 
1528
 
 
1529
                buf_flush_list_mutex_enter(buf_pool);
 
1530
 
 
1531
                /* We use len here because theoretically insertions can
 
1532
                happen in the flush_list below while we are traversing
 
1533
                it for a suitable candidate for flushing. We'd like to
 
1534
                set a limit on how farther we are willing to traverse
 
1535
                the list. */
 
1536
                len = UT_LIST_GET_LEN(buf_pool->flush_list);
 
1537
                bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 
1538
 
 
1539
                if (bpage) {
 
1540
                        ut_a(bpage->oldest_modification > 0);
 
1541
                }
 
1542
 
 
1543
                if (!bpage || bpage->oldest_modification >= lsn_limit) {
 
1544
 
 
1545
                        /* We have flushed enough */
 
1546
                        buf_flush_list_mutex_exit(buf_pool);
 
1547
                        break;
 
1548
                }
 
1549
 
 
1550
                ut_a(bpage->oldest_modification > 0);
 
1551
 
 
1552
                ut_ad(bpage->in_flush_list);
 
1553
 
 
1554
                buf_flush_list_mutex_exit(buf_pool);
 
1555
 
 
1556
                /* The list may change during the flushing and we cannot
 
1557
                safely preserve within this function a pointer to a
 
1558
                block in the list! */
 
1559
                while (bpage != NULL
 
1560
                       && len > 0
 
1561
                       && !buf_flush_page_and_try_neighbors(
 
1562
                                bpage, BUF_FLUSH_LIST, min_n, &count)) {
 
1563
 
 
1564
                        buf_flush_list_mutex_enter(buf_pool);
 
1565
 
 
1566
                        /* If we are here that means that buf_pool->mutex
 
1567
                         was not released in buf_flush_page_and_try_neighbors()
 
1568
                        above and this guarantees that bpage didn't get
 
1569
                        relocated since we released the flush_list
 
1570
                        mutex above. There is a chance, however, that
 
1571
                        the bpage got removed from flush_list (not
 
1572
                        currently possible because flush_list_remove()
 
1573
                        also obtains buf_pool mutex but that may change
 
1574
                        in future). To avoid this scenario we check
 
1575
                        the oldest_modification and if it is zero
 
1576
                        we start all over again. */
 
1577
                        if (bpage->oldest_modification == 0) {
 
1578
                                buf_flush_list_mutex_exit(buf_pool);
 
1579
                                break;
 
1580
                        }
 
1581
 
 
1582
                        bpage = UT_LIST_GET_PREV(list, bpage);
 
1583
 
 
1584
                        ut_ad(!bpage || bpage->in_flush_list);
 
1585
 
 
1586
                        buf_flush_list_mutex_exit(buf_pool);
 
1587
 
 
1588
                        --len;
 
1589
                }
 
1590
 
 
1591
        } while (count < min_n && bpage != NULL && len > 0);
 
1592
 
 
1593
        ut_ad(buf_pool_mutex_own(buf_pool));
1212
1594
 
1213
1595
        return(count);
1214
1596
}
1221
1603
the calling thread is not allowed to own any latches on pages!
1222
1604
@return number of blocks for which the write request was queued;
1223
1605
ULINT_UNDEFINED if there was a flush of the same type already running */
1224
 
UNIV_INTERN
 
1606
static
1225
1607
ulint
1226
1608
buf_flush_batch(
1227
1609
/*============*/
 
1610
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
1228
1611
        enum buf_flush  flush_type,     /*!< in: BUF_FLUSH_LRU or
1229
1612
                                        BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
1230
1613
                                        then the caller must not own any
1232
1615
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
1233
1616
                                        flushed (it is not guaranteed that the
1234
1617
                                        actual number is that big, though) */
1235
 
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
1236
 
                                        blocks whose oldest_modification is
 
1618
        ib_uint64_t     lsn_limit)      /*!< in: in the case of BUF_FLUSH_LIST
 
1619
                                        all blocks whose oldest_modification is
1237
1620
                                        smaller than this should be flushed
1238
1621
                                        (if their number does not exceed
1239
1622
                                        min_n), otherwise ignored */
1240
1623
{
1241
 
        buf_page_t*     bpage;
1242
 
        ulint           page_count      = 0;
1243
 
        ulint           old_page_count;
1244
 
        ulint           space;
1245
 
        ulint           offset;
 
1624
        ulint           count   = 0;
1246
1625
 
1247
 
        ut_ad((flush_type == BUF_FLUSH_LRU)
1248
 
              || (flush_type == BUF_FLUSH_LIST));
 
1626
        ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1249
1627
#ifdef UNIV_SYNC_DEBUG
1250
1628
        ut_ad((flush_type != BUF_FLUSH_LIST)
1251
1629
              || sync_thread_levels_empty_gen(TRUE));
1252
1630
#endif /* UNIV_SYNC_DEBUG */
1253
 
        buf_pool_mutex_enter();
1254
 
 
1255
 
        if ((buf_pool->n_flush[flush_type] > 0)
1256
 
            || (buf_pool->init_flush[flush_type] == TRUE)) {
1257
 
 
1258
 
                /* There is already a flush batch of the same type running */
1259
 
 
1260
 
                buf_pool_mutex_exit();
1261
 
 
1262
 
                return(ULINT_UNDEFINED);
1263
 
        }
1264
 
 
1265
 
        buf_pool->init_flush[flush_type] = TRUE;
1266
 
 
1267
 
        bool done_with_loop= false;
1268
 
        for (;done_with_loop != true;) {
1269
 
flush_next:
1270
 
                /* If we have flushed enough, leave the loop */
1271
 
                if (page_count >= min_n) {
1272
 
 
1273
 
                        break;
1274
 
                }
1275
 
 
1276
 
                /* Start from the end of the list looking for a suitable
1277
 
                block to be flushed. */
1278
 
 
1279
 
                if (flush_type == BUF_FLUSH_LRU) {
1280
 
                        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1281
 
                } else {
1282
 
                        ut_ad(flush_type == BUF_FLUSH_LIST);
1283
 
 
1284
 
                        bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1285
 
                        if (!bpage
1286
 
                            || bpage->oldest_modification >= lsn_limit) {
1287
 
                                /* We have flushed enough */
1288
 
 
1289
 
                                break;
1290
 
                        }
1291
 
                        ut_ad(bpage->in_flush_list);
1292
 
                }
1293
 
 
1294
 
                /* Note that after finding a single flushable page, we try to
1295
 
                flush also all its neighbors, and after that start from the
1296
 
                END of the LRU list or flush list again: the list may change
1297
 
                during the flushing and we cannot safely preserve within this
1298
 
                function a pointer to a block in the list! */
1299
 
 
1300
 
                do {
1301
 
                        mutex_t*block_mutex = buf_page_get_mutex(bpage);
1302
 
                        ibool   ready;
1303
 
 
1304
 
                        ut_a(buf_page_in_file(bpage));
1305
 
 
1306
 
                        mutex_enter(block_mutex);
1307
 
                        ready = buf_flush_ready_for_flush(bpage, flush_type);
1308
 
                        mutex_exit(block_mutex);
1309
 
 
1310
 
                        if (ready) {
1311
 
                                space = buf_page_get_space(bpage);
1312
 
                                offset = buf_page_get_page_no(bpage);
1313
 
 
1314
 
                                buf_pool_mutex_exit();
1315
 
 
1316
 
                                old_page_count = page_count;
1317
 
 
1318
 
                                /* Try to flush also all the neighbors */
1319
 
                                page_count += buf_flush_try_neighbors(
1320
 
                                        space, offset, flush_type);
1321
 
                                /* fprintf(stderr,
1322
 
                                "Flush type %lu, page no %lu, neighb %lu\n",
1323
 
                                flush_type, offset,
1324
 
                                page_count - old_page_count); */
1325
 
 
1326
 
                                buf_pool_mutex_enter();
1327
 
                                goto flush_next;
1328
 
 
1329
 
                        } else if (flush_type == BUF_FLUSH_LRU) {
1330
 
                                bpage = UT_LIST_GET_PREV(LRU, bpage);
1331
 
                        } else {
1332
 
                                ut_ad(flush_type == BUF_FLUSH_LIST);
1333
 
 
1334
 
                                bpage = UT_LIST_GET_PREV(list, bpage);
1335
 
                                ut_ad(!bpage || bpage->in_flush_list);
1336
 
                        }
1337
 
                } while (bpage != NULL);
1338
 
 
1339
 
                /* If we could not find anything to flush, leave the loop */
1340
 
 
1341
 
                done_with_loop= true;
1342
 
 
1343
 
        }
1344
 
 
1345
 
        buf_pool->init_flush[flush_type] = FALSE;
1346
 
 
1347
 
        if (buf_pool->n_flush[flush_type] == 0) {
1348
 
 
1349
 
                /* The running flush batch has ended */
1350
 
 
1351
 
                os_event_set(buf_pool->no_flush[flush_type]);
1352
 
        }
1353
 
 
1354
 
        buf_pool_mutex_exit();
1355
 
 
1356
 
        buf_flush_buffered_writes();
 
1631
 
 
1632
        buf_pool_mutex_enter(buf_pool);
 
1633
 
 
1634
        /* Note: The buffer pool mutex is released and reacquired within
 
1635
        the flush functions. */
 
1636
        switch(flush_type) {
 
1637
        case BUF_FLUSH_LRU:
 
1638
                count = buf_flush_LRU_list_batch(buf_pool, min_n);
 
1639
                break;
 
1640
        case BUF_FLUSH_LIST:
 
1641
                count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
 
1642
                break;
 
1643
        default:
 
1644
                ut_error;
 
1645
        }
 
1646
 
 
1647
        buf_pool_mutex_exit(buf_pool);
 
1648
 
 
1649
        buf_flush_buffered_writes();
 
1650
 
 
1651
#ifdef UNIV_DEBUG
 
1652
        if (buf_debug_prints && count > 0) {
 
1653
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
 
1654
                        ? "Flushed %lu pages in LRU flush\n"
 
1655
                        : "Flushed %lu pages in flush list flush\n",
 
1656
                        (ulong) count);
 
1657
        }
 
1658
#endif /* UNIV_DEBUG */
 
1659
 
 
1660
        srv_buf_pool_flushed += count;
 
1661
 
 
1662
        return(count);
 
1663
}
 
1664
 
 
1665
/******************************************************************//**
 
1666
Gather the aggregated stats for both flush list and LRU list flushing */
 
1667
static
 
1668
void
 
1669
buf_flush_common(
 
1670
/*=============*/
 
1671
        enum buf_flush  flush_type,     /*!< in: type of flush */
 
1672
        ulint           page_count)     /*!< in: number of pages flushed */
 
1673
{
 
1674
        buf_flush_buffered_writes();
 
1675
 
 
1676
        ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1357
1677
 
1358
1678
#ifdef UNIV_DEBUG
1359
1679
        if (buf_debug_prints && page_count > 0) {
1360
 
                ut_a(flush_type == BUF_FLUSH_LRU
1361
 
                     || flush_type == BUF_FLUSH_LIST);
1362
1680
                fprintf(stderr, flush_type == BUF_FLUSH_LRU
1363
1681
                        ? "Flushed %lu pages in LRU flush\n"
1364
1682
                        : "Flushed %lu pages in flush list flush\n",
1368
1686
 
1369
1687
        srv_buf_pool_flushed += page_count;
1370
1688
 
1371
 
        /* We keep track of all flushes happening as part of LRU
1372
 
        flush. When estimating the desired rate at which flush_list
1373
 
        should be flushed we factor in this value. */
1374
1689
        if (flush_type == BUF_FLUSH_LRU) {
 
1690
                /* We keep track of all flushes happening as part of LRU
 
1691
                flush. When estimating the desired rate at which flush_list
 
1692
                should be flushed we factor in this value. */
1375
1693
                buf_lru_flush_page_count += page_count;
1376
1694
        }
1377
 
 
1378
 
        return(page_count);
 
1695
}
 
1696
 
 
1697
/******************************************************************//**
 
1698
Start a buffer flush batch for LRU or flush list */
 
1699
static
 
1700
ibool
 
1701
buf_flush_start(
 
1702
/*============*/
 
1703
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
1704
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
 
1705
                                        or BUF_FLUSH_LIST */
 
1706
{
 
1707
        buf_pool_mutex_enter(buf_pool);
 
1708
 
 
1709
        if (buf_pool->n_flush[flush_type] > 0
 
1710
           || buf_pool->init_flush[flush_type] == TRUE) {
 
1711
 
 
1712
                /* There is already a flush batch of the same type running */
 
1713
 
 
1714
                buf_pool_mutex_exit(buf_pool);
 
1715
 
 
1716
                return(FALSE);
 
1717
        }
 
1718
 
 
1719
        buf_pool->init_flush[flush_type] = TRUE;
 
1720
 
 
1721
        buf_pool_mutex_exit(buf_pool);
 
1722
 
 
1723
        return(TRUE);
 
1724
}
 
1725
 
 
1726
/******************************************************************//**
 
1727
End a buffer flush batch for LRU or flush list */
 
1728
static
 
1729
void
 
1730
buf_flush_end(
 
1731
/*==========*/
 
1732
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
1733
        enum buf_flush  flush_type)     /*!< in: BUF_FLUSH_LRU
 
1734
                                        or BUF_FLUSH_LIST */
 
1735
{
 
1736
        buf_pool_mutex_enter(buf_pool);
 
1737
 
 
1738
        buf_pool->init_flush[flush_type] = FALSE;
 
1739
 
 
1740
        if (buf_pool->n_flush[flush_type] == 0) {
 
1741
 
 
1742
                /* The running flush batch has ended */
 
1743
 
 
1744
                os_event_set(buf_pool->no_flush[flush_type]);
 
1745
        }
 
1746
 
 
1747
        buf_pool_mutex_exit(buf_pool);
1379
1748
}
1380
1749
 
1381
1750
/******************************************************************//**
1384
1753
void
1385
1754
buf_flush_wait_batch_end(
1386
1755
/*=====================*/
1387
 
        enum buf_flush  type)   /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1388
 
{
1389
 
        ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1390
 
 
1391
 
        os_event_wait(buf_pool->no_flush[type]);
1392
 
}
1393
 
 
 
1756
        buf_pool_t*     buf_pool,       /*!< buffer pool instance */
 
1757
        enum buf_flush  type)           /*!< in: BUF_FLUSH_LRU
 
1758
                                        or BUF_FLUSH_LIST */
 
1759
{
 
1760
        ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
 
1761
 
 
1762
        if (buf_pool == NULL) {
 
1763
                ulint   i;
 
1764
 
 
1765
                for (i = 0; i < srv_buf_pool_instances; ++i) {
 
1766
                        buf_pool_t*     buf_pool;
 
1767
 
 
1768
                        buf_pool = buf_pool_from_array(i);
 
1769
 
 
1770
                        os_event_wait(buf_pool->no_flush[type]);
 
1771
                }
 
1772
        } else {
 
1773
                os_event_wait(buf_pool->no_flush[type]);
 
1774
        }
 
1775
}
 
1776
 
 
1777
/*******************************************************************//**
 
1778
This utility flushes dirty blocks from the end of the LRU list.
 
1779
NOTE: The calling thread may own latches to pages: to avoid deadlocks,
 
1780
this function must be written so that it cannot end up waiting for these
 
1781
latches!
 
1782
@return number of blocks for which the write request was queued;
 
1783
ULINT_UNDEFINED if there was a flush of the same type already running */
 
1784
UNIV_INTERN
 
1785
ulint
 
1786
buf_flush_LRU(
 
1787
/*==========*/
 
1788
        buf_pool_t*     buf_pool,       /*!< in: buffer pool instance */
 
1789
        ulint           min_n)          /*!< in: wished minimum mumber of blocks
 
1790
                                        flushed (it is not guaranteed that the
 
1791
                                        actual number is that big, though) */
 
1792
{
 
1793
        ulint           page_count;
 
1794
 
 
1795
        if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
 
1796
                return(ULINT_UNDEFINED);
 
1797
        }
 
1798
 
 
1799
        page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
 
1800
 
 
1801
        buf_flush_end(buf_pool, BUF_FLUSH_LRU);
 
1802
 
 
1803
        buf_flush_common(BUF_FLUSH_LRU, page_count);
 
1804
 
 
1805
        return(page_count);
 
1806
}
 
1807
 
 
1808
/*******************************************************************//**
 
1809
This utility flushes dirty blocks from the end of the flush list of
 
1810
all buffer pool instances.
 
1811
NOTE: The calling thread is not allowed to own any latches on pages!
 
1812
@return number of blocks for which the write request was queued;
 
1813
ULINT_UNDEFINED if there was a flush of the same type already running */
 
1814
UNIV_INTERN
 
1815
ulint
 
1816
buf_flush_list(
 
1817
/*===========*/
 
1818
        ulint           min_n,          /*!< in: wished minimum mumber of blocks
 
1819
                                        flushed (it is not guaranteed that the
 
1820
                                        actual number is that big, though) */
 
1821
        ib_uint64_t     lsn_limit)      /*!< in the case BUF_FLUSH_LIST all
 
1822
                                        blocks whose oldest_modification is
 
1823
                                        smaller than this should be flushed
 
1824
                                        (if their number does not exceed
 
1825
                                        min_n), otherwise ignored */
 
1826
{
 
1827
        ulint           i;
 
1828
        ulint           total_page_count = 0;
 
1829
        ibool           skipped = FALSE;
 
1830
 
 
1831
        if (min_n != ULINT_MAX) {
 
1832
                /* Ensure that flushing is spread evenly amongst the
 
1833
                buffer pool instances. When min_n is ULINT_MAX
 
1834
                we need to flush everything up to the lsn limit
 
1835
                so no limit here. */
 
1836
                min_n = (min_n + srv_buf_pool_instances - 1)
 
1837
                         / srv_buf_pool_instances;
 
1838
        }
 
1839
 
 
1840
        /* Flush to lsn_limit in all buffer pool instances */
 
1841
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
1842
                buf_pool_t*     buf_pool;
 
1843
                ulint           page_count = 0;
 
1844
 
 
1845
                buf_pool = buf_pool_from_array(i);
 
1846
 
 
1847
                if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
 
1848
                        /* We have two choices here. If lsn_limit was
 
1849
                        specified then skipping an instance of buffer
 
1850
                        pool means we cannot guarantee that all pages
 
1851
                        up to lsn_limit has been flushed. We can
 
1852
                        return right now with failure or we can try
 
1853
                        to flush remaining buffer pools up to the
 
1854
                        lsn_limit. We attempt to flush other buffer
 
1855
                        pools based on the assumption that it will
 
1856
                        help in the retry which will follow the
 
1857
                        failure. */
 
1858
                        skipped = TRUE;
 
1859
 
 
1860
                        continue;
 
1861
                }
 
1862
 
 
1863
                page_count = buf_flush_batch(
 
1864
                        buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
 
1865
 
 
1866
                buf_flush_end(buf_pool, BUF_FLUSH_LIST);
 
1867
 
 
1868
                buf_flush_common(BUF_FLUSH_LIST, page_count);
 
1869
 
 
1870
                total_page_count += page_count;
 
1871
        }
 
1872
 
 
1873
        return(lsn_limit != IB_ULONGLONG_MAX && skipped
 
1874
               ? ULINT_UNDEFINED : total_page_count);
 
1875
}
 
1876
 
1394
1877
/******************************************************************//**
1395
1878
Gives a recommendation of how many blocks should be flushed to establish
1396
1879
a big enough margin of replaceable blocks near the end of the LRU list
1399
1882
LRU list */
1400
1883
static
1401
1884
ulint
1402
 
buf_flush_LRU_recommendation(void)
1403
 
/*==============================*/
 
1885
buf_flush_LRU_recommendation(
 
1886
/*=========================*/
 
1887
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
1404
1888
{
1405
1889
        buf_page_t*     bpage;
1406
1890
        ulint           n_replaceable;
1407
1891
        ulint           distance        = 0;
1408
1892
 
1409
 
        buf_pool_mutex_enter();
 
1893
        buf_pool_mutex_enter(buf_pool);
1410
1894
 
1411
1895
        n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
1412
1896
 
1413
1897
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1414
1898
 
1415
1899
        while ((bpage != NULL)
1416
 
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
1417
 
                   + BUF_FLUSH_EXTRA_MARGIN)
1418
 
               && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
 
1900
               && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
 
1901
                   + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
 
1902
               && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
1419
1903
 
1420
1904
                mutex_t* block_mutex = buf_page_get_mutex(bpage);
1421
1905
 
1432
1916
                bpage = UT_LIST_GET_PREV(LRU, bpage);
1433
1917
        }
1434
1918
 
1435
 
        buf_pool_mutex_exit();
 
1919
        buf_pool_mutex_exit(buf_pool);
1436
1920
 
1437
 
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
 
1921
        if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
1438
1922
 
1439
1923
                return(0);
1440
1924
        }
1441
1925
 
1442
 
        return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
 
1926
        return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
 
1927
               + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
1443
1928
               - n_replaceable);
1444
1929
}
1445
1930
 
1451
1936
immediately, without waiting. */
1452
1937
UNIV_INTERN
1453
1938
void
1454
 
buf_flush_free_margin(void)
1455
 
/*=======================*/
 
1939
buf_flush_free_margin(
 
1940
/*==================*/
 
1941
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
1456
1942
{
1457
1943
        ulint   n_to_flush;
1458
 
        ulint   n_flushed;
1459
1944
 
1460
 
        n_to_flush = buf_flush_LRU_recommendation();
 
1945
        n_to_flush = buf_flush_LRU_recommendation(buf_pool);
1461
1946
 
1462
1947
        if (n_to_flush > 0) {
1463
 
                n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, 0);
 
1948
                ulint   n_flushed;
 
1949
 
 
1950
                n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
 
1951
 
1464
1952
                if (n_flushed == ULINT_UNDEFINED) {
1465
1953
                        /* There was an LRU type flush batch already running;
1466
1954
                        let us wait for it to end */
1467
1955
 
1468
 
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
 
1956
                        buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1469
1957
                }
1470
1958
        }
1471
1959
}
1472
1960
 
 
1961
/*********************************************************************//**
 
1962
Flushes pages from the end of all the LRU lists. */
 
1963
UNIV_INTERN
 
1964
void
 
1965
buf_flush_free_margins(void)
 
1966
/*========================*/
 
1967
{
 
1968
        ulint   i;
 
1969
 
 
1970
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
1971
                buf_pool_t*     buf_pool;
 
1972
 
 
1973
                buf_pool = buf_pool_from_array(i);
 
1974
 
 
1975
                buf_flush_free_margin(buf_pool);
 
1976
        }
 
1977
}
 
1978
 
1473
1979
/*********************************************************************
1474
1980
Update the historical stats that we are collecting for flush rate
1475
1981
heuristics at the end of each interval.
1530
2036
buf_flush_get_desired_flush_rate(void)
1531
2037
/*==================================*/
1532
2038
{
1533
 
        ulint                   redo_avg;
1534
 
        ulint                   lru_flush_avg;
1535
 
        ulint                   n_dirty;
1536
 
        ulint                   n_flush_req;
1537
 
        lint                    rate;
1538
 
        ib_uint64_t             lsn = log_get_lsn();
1539
 
        ulint                   log_capacity = log_get_capacity();
 
2039
        ulint           i;
 
2040
        lint            rate;
 
2041
        ulint           redo_avg;
 
2042
        ulint           n_dirty = 0;
 
2043
        ulint           n_flush_req;
 
2044
        ulint           lru_flush_avg;
 
2045
        ib_uint64_t     lsn = log_get_lsn();
 
2046
        ulint           log_capacity = log_get_capacity();
1540
2047
 
1541
2048
        /* log_capacity should never be zero after the initialization
1542
2049
        of log subsystem. */
1543
2050
        ut_ad(log_capacity != 0);
1544
2051
 
1545
2052
        /* Get total number of dirty pages. It is OK to access
1546
 
        flush_list without holding any mtex as we are using this
 
2053
        flush_list without holding any mutex as we are using this
1547
2054
        only for heuristics. */
1548
 
        n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
 
2055
        for (i = 0; i < srv_buf_pool_instances; i++) {
 
2056
                buf_pool_t*     buf_pool;
 
2057
 
 
2058
                buf_pool = buf_pool_from_array(i);
 
2059
                n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
 
2060
        }
1549
2061
 
1550
2062
        /* An overflow can happen if we generate more than 2^32 bytes
1551
2063
        of redo in this interval i.e.: 4G of redo in 1 second. We can
1587
2099
@return TRUE if ok */
1588
2100
static
1589
2101
ibool
1590
 
buf_flush_validate_low(void)
1591
 
/*========================*/
 
2102
buf_flush_validate_low(
 
2103
/*===================*/
 
2104
        buf_pool_t*     buf_pool)               /*!< in: Buffer pool instance */
1592
2105
{
1593
2106
        buf_page_t*             bpage;
1594
2107
        const ib_rbt_node_t*    rnode = NULL;
1595
2108
 
 
2109
        ut_ad(buf_flush_list_mutex_own(buf_pool));
 
2110
 
1596
2111
        UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
1597
2112
                         ut_ad(ut_list_node_313->in_flush_list));
1598
2113
 
1607
2122
 
1608
2123
        while (bpage != NULL) {
1609
2124
                const ib_uint64_t om = bpage->oldest_modification;
 
2125
 
 
2126
                ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
2127
 
1610
2128
                ut_ad(bpage->in_flush_list);
1611
 
                ut_a(buf_page_in_file(bpage));
 
2129
 
 
2130
                /* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
 
2131
                state. This happens when a page is in the middle of
 
2132
                being relocated. In that case the original descriptor
 
2133
                can have this state and still be in the flush list
 
2134
                waiting to acquire the flush_list_mutex to complete
 
2135
                the relocation. */
 
2136
                ut_a(buf_page_in_file(bpage)
 
2137
                     || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
1612
2138
                ut_a(om > 0);
1613
2139
 
1614
2140
                if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
 
2141
                        buf_page_t** prpage;
 
2142
 
1615
2143
                        ut_a(rnode);
1616
 
                        buf_page_t* rpage = *rbt_value(buf_page_t*,
1617
 
                                                       rnode);
1618
 
                        ut_a(rpage);
1619
 
                        ut_a(rpage == bpage);
 
2144
                        prpage = rbt_value(buf_page_t*, rnode);
 
2145
 
 
2146
                        ut_a(*prpage);
 
2147
                        ut_a(*prpage == bpage);
1620
2148
                        rnode = rbt_next(buf_pool->flush_rbt, rnode);
1621
2149
                }
1622
2150
 
1637
2165
@return TRUE if ok */
1638
2166
UNIV_INTERN
1639
2167
ibool
1640
 
buf_flush_validate(void)
1641
 
/*====================*/
 
2168
buf_flush_validate(
 
2169
/*===============*/
 
2170
        buf_pool_t*     buf_pool)       /*!< buffer pool instance */
1642
2171
{
1643
2172
        ibool   ret;
1644
2173
 
1645
 
        buf_pool_mutex_enter();
1646
 
 
1647
 
        ret = buf_flush_validate_low();
1648
 
 
1649
 
        buf_pool_mutex_exit();
 
2174
        buf_flush_list_mutex_enter(buf_pool);
 
2175
 
 
2176
        ret = buf_flush_validate_low(buf_pool);
 
2177
 
 
2178
        buf_flush_list_mutex_exit(buf_pool);
1650
2179
 
1651
2180
        return(ret);
1652
2181
}