~linuxjedi/drizzle/trunk-bug-667053

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0lru.c

  • Committer: brian
  • Date: 2008-06-25 05:29:13 UTC
  • Revision ID: brian@localhost.localdomain-20080625052913-6upwo0jsrl4lnapl
clean slate

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************
 
2
The database buffer replacement algorithm
 
3
 
 
4
(c) 1995 Innobase Oy
 
5
 
 
6
Created 11/5/1995 Heikki Tuuri
 
7
*******************************************************/
 
8
 
 
9
#include "buf0lru.h"
 
10
 
 
11
#ifdef UNIV_NONINL
 
12
#include "buf0lru.ic"
 
13
#include "srv0srv.h"    /* Needed to getsrv_print_innodb_monitor */
 
14
#endif
 
15
 
 
16
#include "ut0byte.h"
 
17
#include "ut0lst.h"
 
18
#include "ut0rnd.h"
 
19
#include "sync0sync.h"
 
20
#include "sync0rw.h"
 
21
#include "hash0hash.h"
 
22
#include "os0sync.h"
 
23
#include "fil0fil.h"
 
24
#include "btr0btr.h"
 
25
#include "buf0buf.h"
 
26
#include "buf0flu.h"
 
27
#include "buf0rea.h"
 
28
#include "btr0sea.h"
 
29
#include "os0file.h"
 
30
#include "log0recv.h"
 
31
 
 
32
/* The number of blocks from the LRU_old pointer onward, including the block
 
33
pointed to, must be 3/8 of the whole LRU list length, except that the
 
34
tolerance defined below is allowed. Note that the tolerance must be small
 
35
enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
 
36
LRU_old pointer is not allowed to point to either end of the LRU list. */
 
37
 
 
38
#define BUF_LRU_OLD_TOLERANCE   20
 
39
 
 
40
/* The whole LRU list length is divided by this number to determine an
 
41
initial segment in buf_LRU_get_recent_limit */
 
42
 
 
43
#define BUF_LRU_INITIAL_RATIO   8
 
44
 
 
45
/* If we switch on the InnoDB monitor because there are too few available
 
46
frames in the buffer pool, we set this to TRUE */
 
47
ibool   buf_lru_switched_on_innodb_mon  = FALSE;
 
48
 
 
49
/**********************************************************************
 
50
Takes a block out of the LRU list and page hash table and sets the block
 
51
state to BUF_BLOCK_REMOVE_HASH. */
 
52
static
 
53
void
 
54
buf_LRU_block_remove_hashed_page(
 
55
/*=============================*/
 
56
        buf_block_t*    block); /* in: block, must contain a file page and
 
57
                                be in a state where it can be freed; there
 
58
                                may or may not be a hash index to the page */
 
59
/**********************************************************************
 
60
Puts a file page whose has no hash index to the free list. */
 
61
static
 
62
void
 
63
buf_LRU_block_free_hashed_page(
 
64
/*===========================*/
 
65
        buf_block_t*    block); /* in: block, must contain a file page and
 
66
                                be in a state where it can be freed */
 
67
 
 
68
/**********************************************************************
 
69
Invalidates all pages belonging to a given tablespace when we are deleting
 
70
the data file(s) of that tablespace. */
 
71
 
 
72
void
 
73
buf_LRU_invalidate_tablespace(
 
74
/*==========================*/
 
75
        ulint   id)     /* in: space id */
 
76
{
 
77
        buf_block_t*    block;
 
78
        ulint           page_no;
 
79
        ibool           all_freed;
 
80
 
 
81
scan_again:
 
82
        mutex_enter(&(buf_pool->mutex));
 
83
 
 
84
        all_freed = TRUE;
 
85
 
 
86
        block = UT_LIST_GET_LAST(buf_pool->LRU);
 
87
 
 
88
        while (block != NULL) {
 
89
                buf_block_t*    prev_block;
 
90
 
 
91
                mutex_enter(&block->mutex);
 
92
                prev_block = UT_LIST_GET_PREV(LRU, block);
 
93
 
 
94
                ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
95
 
 
96
                if (block->space == id
 
97
                    && (block->buf_fix_count > 0 || block->io_fix != 0)) {
 
98
 
 
99
                        /* We cannot remove this page during this scan yet;
 
100
                        maybe the system is currently reading it in, or
 
101
                        flushing the modifications to the file */
 
102
 
 
103
                        all_freed = FALSE;
 
104
 
 
105
                        goto next_page;
 
106
                }
 
107
 
 
108
                if (block->space == id) {
 
109
#ifdef UNIV_DEBUG
 
110
                        if (buf_debug_prints) {
 
111
                                fprintf(stderr,
 
112
                                        "Dropping space %lu page %lu\n",
 
113
                                        (ulong) block->space,
 
114
                                        (ulong) block->offset);
 
115
                        }
 
116
#endif
 
117
                        if (block->is_hashed) {
 
118
                                page_no = block->offset;
 
119
 
 
120
                                mutex_exit(&block->mutex);
 
121
 
 
122
                                mutex_exit(&(buf_pool->mutex));
 
123
 
 
124
                                /* Note that the following call will acquire
 
125
                                an S-latch on the page */
 
126
 
 
127
                                btr_search_drop_page_hash_when_freed(id,
 
128
                                                                     page_no);
 
129
                                goto scan_again;
 
130
                        }
 
131
 
 
132
                        if (0 != ut_dulint_cmp(block->oldest_modification,
 
133
                                               ut_dulint_zero)) {
 
134
 
 
135
                                /* Remove from the flush list of modified
 
136
                                blocks */
 
137
                                block->oldest_modification = ut_dulint_zero;
 
138
 
 
139
                                UT_LIST_REMOVE(flush_list,
 
140
                                               buf_pool->flush_list, block);
 
141
                        }
 
142
 
 
143
                        /* Remove from the LRU list */
 
144
                        buf_LRU_block_remove_hashed_page(block);
 
145
                        buf_LRU_block_free_hashed_page(block);
 
146
                }
 
147
next_page:
 
148
                mutex_exit(&block->mutex);
 
149
                block = prev_block;
 
150
        }
 
151
 
 
152
        mutex_exit(&(buf_pool->mutex));
 
153
 
 
154
        if (!all_freed) {
 
155
                os_thread_sleep(20000);
 
156
 
 
157
                goto scan_again;
 
158
        }
 
159
}
 
160
 
 
161
/**********************************************************************
 
162
Gets the minimum LRU_position field for the blocks in an initial segment
 
163
(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
 
164
guaranteed to be precise, because the ulint_clock may wrap around. */
 
165
 
 
166
ulint
 
167
buf_LRU_get_recent_limit(void)
 
168
/*==========================*/
 
169
                        /* out: the limit; zero if could not determine it */
 
170
{
 
171
        buf_block_t*    block;
 
172
        ulint           len;
 
173
        ulint           limit;
 
174
 
 
175
        mutex_enter(&(buf_pool->mutex));
 
176
 
 
177
        len = UT_LIST_GET_LEN(buf_pool->LRU);
 
178
 
 
179
        if (len < BUF_LRU_OLD_MIN_LEN) {
 
180
                /* The LRU list is too short to do read-ahead */
 
181
 
 
182
                mutex_exit(&(buf_pool->mutex));
 
183
 
 
184
                return(0);
 
185
        }
 
186
 
 
187
        block = UT_LIST_GET_FIRST(buf_pool->LRU);
 
188
 
 
189
        limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
 
190
 
 
191
        mutex_exit(&(buf_pool->mutex));
 
192
 
 
193
        return(limit);
 
194
}
 
195
 
 
196
/**********************************************************************
 
197
Look for a replaceable block from the end of the LRU list and put it to
 
198
the free list if found. */
 
199
 
 
200
ibool
 
201
buf_LRU_search_and_free_block(
 
202
/*==========================*/
 
203
                                /* out: TRUE if freed */
 
204
        ulint   n_iterations)   /* in: how many times this has been called
 
205
                                repeatedly without result: a high value means
 
206
                                that we should search farther; if value is
 
207
                                k < 10, then we only search k/10 * [number
 
208
                                of pages in the buffer pool] from the end
 
209
                                of the LRU list */
 
210
{
 
211
        buf_block_t*    block;
 
212
        ulint           distance = 0;
 
213
        ibool           freed;
 
214
 
 
215
        mutex_enter(&(buf_pool->mutex));
 
216
 
 
217
        freed = FALSE;
 
218
        block = UT_LIST_GET_LAST(buf_pool->LRU);
 
219
 
 
220
        while (block != NULL) {
 
221
                ut_a(block->in_LRU_list);
 
222
 
 
223
                mutex_enter(&block->mutex);
 
224
 
 
225
                if (buf_flush_ready_for_replace(block)) {
 
226
 
 
227
#ifdef UNIV_DEBUG
 
228
                        if (buf_debug_prints) {
 
229
                                fprintf(stderr,
 
230
                                        "Putting space %lu page %lu"
 
231
                                        " to free list\n",
 
232
                                        (ulong) block->space,
 
233
                                        (ulong) block->offset);
 
234
                        }
 
235
#endif /* UNIV_DEBUG */
 
236
 
 
237
                        buf_LRU_block_remove_hashed_page(block);
 
238
 
 
239
                        mutex_exit(&(buf_pool->mutex));
 
240
                        mutex_exit(&block->mutex);
 
241
 
 
242
                        /* Remove possible adaptive hash index built on the
 
243
                        page; in the case of AWE the block may not have a
 
244
                        frame at all */
 
245
 
 
246
                        if (block->frame) {
 
247
                                /* The page was declared uninitialized
 
248
                                by buf_LRU_block_remove_hashed_page().
 
249
                                We need to flag the contents of the
 
250
                                page valid (which it still is) in
 
251
                                order to avoid bogus Valgrind
 
252
                                warnings. */
 
253
                                UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
 
254
                                btr_search_drop_page_hash_index(block->frame);
 
255
                                UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 
256
                        }
 
257
 
 
258
                        ut_a(block->buf_fix_count == 0);
 
259
 
 
260
                        mutex_enter(&(buf_pool->mutex));
 
261
                        mutex_enter(&block->mutex);
 
262
 
 
263
                        buf_LRU_block_free_hashed_page(block);
 
264
                        freed = TRUE;
 
265
                        mutex_exit(&block->mutex);
 
266
 
 
267
                        break;
 
268
                }
 
269
 
 
270
                mutex_exit(&block->mutex);
 
271
 
 
272
                block = UT_LIST_GET_PREV(LRU, block);
 
273
                distance++;
 
274
 
 
275
                if (!freed && n_iterations <= 10
 
276
                    && distance > 100 + (n_iterations * buf_pool->curr_size)
 
277
                    / 10) {
 
278
                        buf_pool->LRU_flush_ended = 0;
 
279
 
 
280
                        mutex_exit(&(buf_pool->mutex));
 
281
 
 
282
                        return(FALSE);
 
283
                }
 
284
        }
 
285
        if (buf_pool->LRU_flush_ended > 0) {
 
286
                buf_pool->LRU_flush_ended--;
 
287
        }
 
288
        if (!freed) {
 
289
                buf_pool->LRU_flush_ended = 0;
 
290
        }
 
291
        mutex_exit(&(buf_pool->mutex));
 
292
 
 
293
        return(freed);
 
294
}
 
295
 
 
296
/**********************************************************************
 
297
Tries to remove LRU flushed blocks from the end of the LRU list and put them
 
298
to the free list. This is beneficial for the efficiency of the insert buffer
 
299
operation, as flushed pages from non-unique non-clustered indexes are here
 
300
taken out of the buffer pool, and their inserts redirected to the insert
 
301
buffer. Otherwise, the flushed blocks could get modified again before read
 
302
operations need new buffer blocks, and the i/o work done in flushing would be
 
303
wasted. */
 
304
 
 
305
void
 
306
buf_LRU_try_free_flushed_blocks(void)
 
307
/*=================================*/
 
308
{
 
309
        mutex_enter(&(buf_pool->mutex));
 
310
 
 
311
        while (buf_pool->LRU_flush_ended > 0) {
 
312
 
 
313
                mutex_exit(&(buf_pool->mutex));
 
314
 
 
315
                buf_LRU_search_and_free_block(1);
 
316
 
 
317
                mutex_enter(&(buf_pool->mutex));
 
318
        }
 
319
 
 
320
        mutex_exit(&(buf_pool->mutex));
 
321
}
 
322
 
 
323
/**********************************************************************
 
324
Returns TRUE if less than 25 % of the buffer pool is available. This can be
 
325
used in heuristics to prevent huge transactions eating up the whole buffer
 
326
pool for their locks. */
 
327
 
 
328
ibool
 
329
buf_LRU_buf_pool_running_out(void)
 
330
/*==============================*/
 
331
                                /* out: TRUE if less than 25 % of buffer pool
 
332
                                left */
 
333
{
 
334
        ibool   ret     = FALSE;
 
335
 
 
336
        mutex_enter(&(buf_pool->mutex));
 
337
 
 
338
        if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
 
339
            + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
 
340
 
 
341
                ret = TRUE;
 
342
        }
 
343
 
 
344
        mutex_exit(&(buf_pool->mutex));
 
345
 
 
346
        return(ret);
 
347
}
 
348
 
 
349
/**********************************************************************
 
350
Returns a free block from buf_pool. The block is taken off the free list.
 
351
If it is empty, blocks are moved from the end of the LRU list to the free
 
352
list. */
 
353
 
 
354
buf_block_t*
 
355
buf_LRU_get_free_block(void)
 
356
/*========================*/
 
357
                                /* out: the free control block; also if AWE is
 
358
                                used, it is guaranteed that the block has its
 
359
                                page mapped to a frame when we return */
 
360
{
 
361
        buf_block_t*    block           = NULL;
 
362
        ibool           freed;
 
363
        ulint           n_iterations    = 1;
 
364
        ibool           mon_value_was   = FALSE;
 
365
        ibool           started_monitor = FALSE;
 
366
loop:
 
367
        mutex_enter(&(buf_pool->mutex));
 
368
 
 
369
        if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
 
370
            + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
 
371
                ut_print_timestamp(stderr);
 
372
 
 
373
                fprintf(stderr,
 
374
                        "  InnoDB: ERROR: over 95 percent of the buffer pool"
 
375
                        " is occupied by\n"
 
376
                        "InnoDB: lock heaps or the adaptive hash index!"
 
377
                        " Check that your\n"
 
378
                        "InnoDB: transactions do not set too many row locks.\n"
 
379
                        "InnoDB: Your buffer pool size is %lu MB."
 
380
                        " Maybe you should make\n"
 
381
                        "InnoDB: the buffer pool bigger?\n"
 
382
                        "InnoDB: We intentionally generate a seg fault"
 
383
                        " to print a stack trace\n"
 
384
                        "InnoDB: on Linux!\n",
 
385
                        (ulong) (buf_pool->curr_size
 
386
                                 / (1024 * 1024 / UNIV_PAGE_SIZE)));
 
387
 
 
388
                ut_error;
 
389
 
 
390
        } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
 
391
                   + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) {
 
392
 
 
393
                if (!buf_lru_switched_on_innodb_mon) {
 
394
 
 
395
                        /* Over 67 % of the buffer pool is occupied by lock
 
396
                        heaps or the adaptive hash index. This may be a memory
 
397
                        leak! */
 
398
 
 
399
                        ut_print_timestamp(stderr);
 
400
                        fprintf(stderr,
 
401
                                "  InnoDB: WARNING: over 67 percent of"
 
402
                                " the buffer pool is occupied by\n"
 
403
                                "InnoDB: lock heaps or the adaptive"
 
404
                                " hash index! Check that your\n"
 
405
                                "InnoDB: transactions do not set too many"
 
406
                                " row locks.\n"
 
407
                                "InnoDB: Your buffer pool size is %lu MB."
 
408
                                " Maybe you should make\n"
 
409
                                "InnoDB: the buffer pool bigger?\n"
 
410
                                "InnoDB: Starting the InnoDB Monitor to print"
 
411
                                " diagnostics, including\n"
 
412
                                "InnoDB: lock heap and hash index sizes.\n",
 
413
                                (ulong) (buf_pool->curr_size
 
414
                                         / (1024 * 1024 / UNIV_PAGE_SIZE)));
 
415
 
 
416
                        buf_lru_switched_on_innodb_mon = TRUE;
 
417
                        srv_print_innodb_monitor = TRUE;
 
418
                        os_event_set(srv_lock_timeout_thread_event);
 
419
                }
 
420
        } else if (buf_lru_switched_on_innodb_mon) {
 
421
 
 
422
                /* Switch off the InnoDB Monitor; this is a simple way
 
423
                to stop the monitor if the situation becomes less urgent,
 
424
                but may also surprise users if the user also switched on the
 
425
                monitor! */
 
426
 
 
427
                buf_lru_switched_on_innodb_mon = FALSE;
 
428
                srv_print_innodb_monitor = FALSE;
 
429
        }
 
430
 
 
431
        /* If there is a block in the free list, take it */
 
432
        if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
 
433
 
 
434
                block = UT_LIST_GET_FIRST(buf_pool->free);
 
435
                ut_a(block->in_free_list);
 
436
                UT_LIST_REMOVE(free, buf_pool->free, block);
 
437
                block->in_free_list = FALSE;
 
438
                ut_a(block->state != BUF_BLOCK_FILE_PAGE);
 
439
                ut_a(!block->in_LRU_list);
 
440
 
 
441
                if (srv_use_awe) {
 
442
                        if (block->frame) {
 
443
                                /* Remove from the list of mapped pages */
 
444
 
 
445
                                UT_LIST_REMOVE(awe_LRU_free_mapped,
 
446
                                               buf_pool->awe_LRU_free_mapped,
 
447
                                               block);
 
448
                        } else {
 
449
                                /* We map the page to a frame; second param
 
450
                                FALSE below because we do not want it to be
 
451
                                added to the awe_LRU_free_mapped list */
 
452
 
 
453
                                buf_awe_map_page_to_frame(block, FALSE);
 
454
                        }
 
455
                }
 
456
 
 
457
                mutex_enter(&block->mutex);
 
458
 
 
459
                block->state = BUF_BLOCK_READY_FOR_USE;
 
460
                UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
461
 
 
462
                mutex_exit(&block->mutex);
 
463
 
 
464
                mutex_exit(&(buf_pool->mutex));
 
465
 
 
466
                if (started_monitor) {
 
467
                        srv_print_innodb_monitor = mon_value_was;
 
468
                }
 
469
 
 
470
                return(block);
 
471
        }
 
472
 
 
473
        /* If no block was in the free list, search from the end of the LRU
 
474
        list and try to free a block there */
 
475
 
 
476
        mutex_exit(&(buf_pool->mutex));
 
477
 
 
478
        freed = buf_LRU_search_and_free_block(n_iterations);
 
479
 
 
480
        if (freed > 0) {
 
481
                goto loop;
 
482
        }
 
483
 
 
484
        if (n_iterations > 30) {
 
485
                ut_print_timestamp(stderr);
 
486
                fprintf(stderr,
 
487
                        "InnoDB: Warning: difficult to find free blocks from\n"
 
488
                        "InnoDB: the buffer pool (%lu search iterations)!"
 
489
                        " Consider\n"
 
490
                        "InnoDB: increasing the buffer pool size.\n"
 
491
                        "InnoDB: It is also possible that"
 
492
                        " in your Unix version\n"
 
493
                        "InnoDB: fsync is very slow, or"
 
494
                        " completely frozen inside\n"
 
495
                        "InnoDB: the OS kernel. Then upgrading to"
 
496
                        " a newer version\n"
 
497
                        "InnoDB: of your operating system may help."
 
498
                        " Look at the\n"
 
499
                        "InnoDB: number of fsyncs in diagnostic info below.\n"
 
500
                        "InnoDB: Pending flushes (fsync) log: %lu;"
 
501
                        " buffer pool: %lu\n"
 
502
                        "InnoDB: %lu OS file reads, %lu OS file writes,"
 
503
                        " %lu OS fsyncs\n"
 
504
                        "InnoDB: Starting InnoDB Monitor to print further\n"
 
505
                        "InnoDB: diagnostics to the standard output.\n",
 
506
                        (ulong) n_iterations,
 
507
                        (ulong) fil_n_pending_log_flushes,
 
508
                        (ulong) fil_n_pending_tablespace_flushes,
 
509
                        (ulong) os_n_file_reads, (ulong) os_n_file_writes,
 
510
                        (ulong) os_n_fsyncs);
 
511
 
 
512
                mon_value_was = srv_print_innodb_monitor;
 
513
                started_monitor = TRUE;
 
514
                srv_print_innodb_monitor = TRUE;
 
515
                os_event_set(srv_lock_timeout_thread_event);
 
516
        }
 
517
 
 
518
        /* No free block was found: try to flush the LRU list */
 
519
 
 
520
        buf_flush_free_margin();
 
521
        ++srv_buf_pool_wait_free;
 
522
 
 
523
        os_aio_simulated_wake_handler_threads();
 
524
 
 
525
        mutex_enter(&(buf_pool->mutex));
 
526
 
 
527
        if (buf_pool->LRU_flush_ended > 0) {
 
528
                /* We have written pages in an LRU flush. To make the insert
 
529
                buffer more efficient, we try to move these pages to the free
 
530
                list. */
 
531
 
 
532
                mutex_exit(&(buf_pool->mutex));
 
533
 
 
534
                buf_LRU_try_free_flushed_blocks();
 
535
        } else {
 
536
                mutex_exit(&(buf_pool->mutex));
 
537
        }
 
538
 
 
539
        if (n_iterations > 10) {
 
540
 
 
541
                os_thread_sleep(500000);
 
542
        }
 
543
 
 
544
        n_iterations++;
 
545
 
 
546
        goto loop;
 
547
}
 
548
 
 
549
/***********************************************************************
 
550
Moves the LRU_old pointer so that the length of the old blocks list
 
551
is inside the allowed limits. */
 
552
UNIV_INLINE
 
553
void
 
554
buf_LRU_old_adjust_len(void)
 
555
/*========================*/
 
556
{
 
557
        ulint   old_len;
 
558
        ulint   new_len;
 
559
 
 
560
        ut_a(buf_pool->LRU_old);
 
561
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
562
        ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
 
563
 
 
564
        for (;;) {
 
565
                old_len = buf_pool->LRU_old_len;
 
566
                new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
 
567
 
 
568
                ut_a(buf_pool->LRU_old->in_LRU_list);
 
569
 
 
570
                /* Update the LRU_old pointer if necessary */
 
571
 
 
572
                if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
 
573
 
 
574
                        buf_pool->LRU_old = UT_LIST_GET_PREV(
 
575
                                LRU, buf_pool->LRU_old);
 
576
                        (buf_pool->LRU_old)->old = TRUE;
 
577
                        buf_pool->LRU_old_len++;
 
578
 
 
579
                } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
 
580
 
 
581
                        (buf_pool->LRU_old)->old = FALSE;
 
582
                        buf_pool->LRU_old = UT_LIST_GET_NEXT(
 
583
                                LRU, buf_pool->LRU_old);
 
584
                        buf_pool->LRU_old_len--;
 
585
                } else {
 
586
                        ut_a(buf_pool->LRU_old); /* Check that we did not
 
587
                                                 fall out of the LRU list */
 
588
                        return;
 
589
                }
 
590
        }
 
591
}
 
592
 
 
593
/***********************************************************************
 
594
Initializes the old blocks pointer in the LRU list. This function should be
 
595
called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
 
596
static
 
597
void
 
598
buf_LRU_old_init(void)
 
599
/*==================*/
 
600
{
 
601
        buf_block_t*    block;
 
602
 
 
603
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
604
        ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
 
605
 
 
606
        /* We first initialize all blocks in the LRU list as old and then use
 
607
        the adjust function to move the LRU_old pointer to the right
 
608
        position */
 
609
 
 
610
        block = UT_LIST_GET_FIRST(buf_pool->LRU);
 
611
 
 
612
        while (block != NULL) {
 
613
                ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
614
                ut_a(block->in_LRU_list);
 
615
                block->old = TRUE;
 
616
                block = UT_LIST_GET_NEXT(LRU, block);
 
617
        }
 
618
 
 
619
        buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
 
620
        buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
 
621
 
 
622
        buf_LRU_old_adjust_len();
 
623
}
 
624
 
 
625
/**********************************************************************
 
626
Removes a block from the LRU list. */
 
627
UNIV_INLINE
 
628
void
 
629
buf_LRU_remove_block(
 
630
/*=================*/
 
631
        buf_block_t*    block)  /* in: control block */
 
632
{
 
633
        ut_ad(buf_pool);
 
634
        ut_ad(block);
 
635
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
636
 
 
637
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
638
        ut_a(block->in_LRU_list);
 
639
 
 
640
        /* If the LRU_old pointer is defined and points to just this block,
 
641
        move it backward one step */
 
642
 
 
643
        if (block == buf_pool->LRU_old) {
 
644
 
 
645
                /* Below: the previous block is guaranteed to exist, because
 
646
                the LRU_old pointer is only allowed to differ by the
 
647
                tolerance value from strict 3/8 of the LRU list length. */
 
648
 
 
649
                buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
 
650
                (buf_pool->LRU_old)->old = TRUE;
 
651
 
 
652
                buf_pool->LRU_old_len++;
 
653
                ut_a(buf_pool->LRU_old);
 
654
        }
 
655
 
 
656
        /* Remove the block from the LRU list */
 
657
        UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
 
658
        block->in_LRU_list = FALSE;
 
659
 
 
660
        if (srv_use_awe && block->frame) {
 
661
                /* Remove from the list of mapped pages */
 
662
 
 
663
                UT_LIST_REMOVE(awe_LRU_free_mapped,
 
664
                               buf_pool->awe_LRU_free_mapped, block);
 
665
        }
 
666
 
 
667
        /* If the LRU list is so short that LRU_old not defined, return */
 
668
        if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
 
669
 
 
670
                buf_pool->LRU_old = NULL;
 
671
 
 
672
                return;
 
673
        }
 
674
 
 
675
        ut_ad(buf_pool->LRU_old);
 
676
 
 
677
        /* Update the LRU_old_len field if necessary */
 
678
        if (block->old) {
 
679
 
 
680
                buf_pool->LRU_old_len--;
 
681
        }
 
682
 
 
683
        /* Adjust the length of the old block list if necessary */
 
684
        buf_LRU_old_adjust_len();
 
685
}
 
686
 
 
687
/**********************************************************************
 
688
Adds a block to the LRU list end. */
 
689
UNIV_INLINE
 
690
void
 
691
buf_LRU_add_block_to_end_low(
 
692
/*=========================*/
 
693
        buf_block_t*    block)  /* in: control block */
 
694
{
 
695
        buf_block_t*    last_block;
 
696
 
 
697
        ut_ad(buf_pool);
 
698
        ut_ad(block);
 
699
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
700
 
 
701
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
702
 
 
703
        block->old = TRUE;
 
704
 
 
705
        last_block = UT_LIST_GET_LAST(buf_pool->LRU);
 
706
 
 
707
        if (last_block) {
 
708
                block->LRU_position = last_block->LRU_position;
 
709
        } else {
 
710
                block->LRU_position = buf_pool_clock_tic();
 
711
        }
 
712
 
 
713
        ut_a(!block->in_LRU_list);
 
714
        UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
 
715
        block->in_LRU_list = TRUE;
 
716
 
 
717
        if (srv_use_awe && block->frame) {
 
718
                /* Add to the list of mapped pages */
 
719
 
 
720
                UT_LIST_ADD_LAST(awe_LRU_free_mapped,
 
721
                                 buf_pool->awe_LRU_free_mapped, block);
 
722
        }
 
723
 
 
724
        if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
725
 
 
726
                buf_pool->LRU_old_len++;
 
727
        }
 
728
 
 
729
        if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
730
 
 
731
                ut_ad(buf_pool->LRU_old);
 
732
 
 
733
                /* Adjust the length of the old block list if necessary */
 
734
 
 
735
                buf_LRU_old_adjust_len();
 
736
 
 
737
        } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
 
738
 
 
739
                /* The LRU list is now long enough for LRU_old to become
 
740
                defined: init it */
 
741
 
 
742
                buf_LRU_old_init();
 
743
        }
 
744
}
 
745
 
 
746
/**********************************************************************
 
747
Adds a block to the LRU list. */
 
748
UNIV_INLINE
 
749
void
 
750
buf_LRU_add_block_low(
 
751
/*==================*/
 
752
        buf_block_t*    block,  /* in: control block */
 
753
        ibool           old)    /* in: TRUE if should be put to the old blocks
 
754
                                in the LRU list, else put to the start; if the
 
755
                                LRU list is very short, the block is added to
 
756
                                the start, regardless of this parameter */
 
757
{
 
758
        ulint   cl;
 
759
 
 
760
        ut_ad(buf_pool);
 
761
        ut_ad(block);
 
762
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
763
 
 
764
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
765
        ut_a(!block->in_LRU_list);
 
766
 
 
767
        block->old = old;
 
768
        cl = buf_pool_clock_tic();
 
769
 
 
770
        if (srv_use_awe && block->frame) {
 
771
                /* Add to the list of mapped pages; for simplicity we always
 
772
                add to the start, even if the user would have set 'old'
 
773
                TRUE */
 
774
 
 
775
                UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
 
776
                                  buf_pool->awe_LRU_free_mapped, block);
 
777
        }
 
778
 
 
779
        if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
 
780
 
 
781
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
 
782
 
 
783
                block->LRU_position = cl;
 
784
                block->freed_page_clock = buf_pool->freed_page_clock;
 
785
        } else {
 
786
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
 
787
                                     block);
 
788
                buf_pool->LRU_old_len++;
 
789
 
 
790
                /* We copy the LRU position field of the previous block
 
791
                to the new block */
 
792
 
 
793
                block->LRU_position = (buf_pool->LRU_old)->LRU_position;
 
794
        }
 
795
 
 
796
        block->in_LRU_list = TRUE;
 
797
 
 
798
        if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
799
 
 
800
                ut_ad(buf_pool->LRU_old);
 
801
 
 
802
                /* Adjust the length of the old block list if necessary */
 
803
 
 
804
                buf_LRU_old_adjust_len();
 
805
 
 
806
        } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
 
807
 
 
808
                /* The LRU list is now long enough for LRU_old to become
 
809
                defined: init it */
 
810
 
 
811
                buf_LRU_old_init();
 
812
        }
 
813
}
 
814
 
 
815
/**********************************************************************
 
816
Adds a block to the LRU list. */
 
817
 
 
818
void
 
819
buf_LRU_add_block(
 
820
/*==============*/
 
821
        buf_block_t*    block,  /* in: control block */
 
822
        ibool           old)    /* in: TRUE if should be put to the old
 
823
                                blocks in the LRU list, else put to the start;
 
824
                                if the LRU list is very short, the block is
 
825
                                added to the start, regardless of this
 
826
                                parameter */
 
827
{
 
828
        buf_LRU_add_block_low(block, old);
 
829
}
 
830
 
 
831
/**********************************************************************
 
832
Moves a block to the start of the LRU list. */
 
833
 
 
834
void
 
835
buf_LRU_make_block_young(
 
836
/*=====================*/
 
837
        buf_block_t*    block)  /* in: control block */
 
838
{
 
839
        buf_LRU_remove_block(block);
 
840
        buf_LRU_add_block_low(block, FALSE);
 
841
}
 
842
 
 
843
/**********************************************************************
 
844
Moves a block to the end of the LRU list. */
 
845
 
 
846
void
 
847
buf_LRU_make_block_old(
 
848
/*===================*/
 
849
        buf_block_t*    block)  /* in: control block */
 
850
{
 
851
        buf_LRU_remove_block(block);
 
852
        buf_LRU_add_block_to_end_low(block);
 
853
}
 
854
 
 
855
/**********************************************************************
 
856
Puts a block back to the free list. */
 
857
 
 
858
void
 
859
buf_LRU_block_free_non_file_page(
 
860
/*=============================*/
 
861
        buf_block_t*    block)  /* in: block, must not contain a file page */
 
862
{
 
863
 
 
864
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
865
        ut_ad(mutex_own(&block->mutex));
 
866
        ut_ad(block);
 
867
 
 
868
        ut_a((block->state == BUF_BLOCK_MEMORY)
 
869
             || (block->state == BUF_BLOCK_READY_FOR_USE));
 
870
 
 
871
        ut_a(block->n_pointers == 0);
 
872
        ut_a(!block->in_free_list);
 
873
 
 
874
        block->state = BUF_BLOCK_NOT_USED;
 
875
 
 
876
        UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
877
#ifdef UNIV_DEBUG
 
878
        /* Wipe contents of page to reveal possible stale pointers to it */
 
879
        memset(block->frame, '\0', UNIV_PAGE_SIZE);
 
880
#endif
 
881
        UT_LIST_ADD_FIRST(free, buf_pool->free, block);
 
882
        block->in_free_list = TRUE;
 
883
 
 
884
        UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
 
885
 
 
886
        if (srv_use_awe && block->frame) {
 
887
                /* Add to the list of mapped pages */
 
888
 
 
889
                UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
 
890
                                  buf_pool->awe_LRU_free_mapped, block);
 
891
        }
 
892
}
 
893
 
 
894
/**********************************************************************
 
895
Takes a block out of the LRU list and page hash table and sets the block
 
896
state to BUF_BLOCK_REMOVE_HASH. */
 
897
static
 
898
void
 
899
buf_LRU_block_remove_hashed_page(
 
900
/*=============================*/
 
901
        buf_block_t*    block)  /* in: block, must contain a file page and
 
902
                                be in a state where it can be freed; there
 
903
                                may or may not be a hash index to the page */
 
904
{
 
905
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
906
        ut_ad(mutex_own(&block->mutex));
 
907
        ut_ad(block);
 
908
 
 
909
        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
910
        ut_a(block->io_fix == 0);
 
911
        ut_a(block->buf_fix_count == 0);
 
912
        ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
 
913
 
 
914
        buf_LRU_remove_block(block);
 
915
 
 
916
        buf_pool->freed_page_clock += 1;
 
917
 
 
918
        /* Note that if AWE is enabled the block may not have a frame at all */
 
919
 
 
920
        buf_block_modify_clock_inc(block);
 
921
 
 
922
        if (block != buf_page_hash_get(block->space, block->offset)) {
 
923
                fprintf(stderr,
 
924
                        "InnoDB: Error: page %lu %lu not found"
 
925
                        " in the hash table\n",
 
926
                        (ulong) block->space,
 
927
                        (ulong) block->offset);
 
928
                if (buf_page_hash_get(block->space, block->offset)) {
 
929
                        fprintf(stderr,
 
930
                                "InnoDB: In hash table we find block"
 
931
                                " %p of %lu %lu which is not %p\n",
 
932
                                (void*) buf_page_hash_get
 
933
                                (block->space, block->offset),
 
934
                                (ulong) buf_page_hash_get
 
935
                                (block->space, block->offset)->space,
 
936
                                (ulong) buf_page_hash_get
 
937
                                (block->space, block->offset)->offset,
 
938
                                (void*) block);
 
939
                }
 
940
 
 
941
#ifdef UNIV_DEBUG
 
942
                buf_print();
 
943
                buf_LRU_print();
 
944
                buf_validate();
 
945
                buf_LRU_validate();
 
946
#endif
 
947
                ut_a(0);
 
948
        }
 
949
 
 
950
        HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
 
951
                    buf_page_address_fold(block->space, block->offset),
 
952
                    block);
 
953
 
 
954
        UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 
955
        block->state = BUF_BLOCK_REMOVE_HASH;
 
956
}
 
957
 
 
958
/**********************************************************************
 
959
Puts a file page whose has no hash index to the free list. */
 
960
static
 
961
void
 
962
buf_LRU_block_free_hashed_page(
 
963
/*===========================*/
 
964
        buf_block_t*    block)  /* in: block, must contain a file page and
 
965
                                be in a state where it can be freed */
 
966
{
 
967
        ut_ad(mutex_own(&(buf_pool->mutex)));
 
968
        ut_ad(mutex_own(&block->mutex));
 
969
 
 
970
        ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
 
971
 
 
972
        block->state = BUF_BLOCK_MEMORY;
 
973
 
 
974
        buf_LRU_block_free_non_file_page(block);
 
975
}
 
976
 
 
977
#ifdef UNIV_DEBUG
 
978
/**************************************************************************
 
979
Validates the LRU list. */
 
980
 
 
981
ibool
 
982
buf_LRU_validate(void)
 
983
/*==================*/
 
984
{
 
985
        buf_block_t*    block;
 
986
        ulint           old_len;
 
987
        ulint           new_len;
 
988
        ulint           LRU_pos;
 
989
 
 
990
        ut_ad(buf_pool);
 
991
        mutex_enter(&(buf_pool->mutex));
 
992
 
 
993
        if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
994
 
 
995
                ut_a(buf_pool->LRU_old);
 
996
                old_len = buf_pool->LRU_old_len;
 
997
                new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
 
998
                ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
 
999
                ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
 
1000
        }
 
1001
 
 
1002
        UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
 
1003
 
 
1004
        block = UT_LIST_GET_FIRST(buf_pool->LRU);
 
1005
 
 
1006
        old_len = 0;
 
1007
 
 
1008
        while (block != NULL) {
 
1009
 
 
1010
                ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
1011
 
 
1012
                if (block->old) {
 
1013
                        old_len++;
 
1014
                }
 
1015
 
 
1016
                if (buf_pool->LRU_old && (old_len == 1)) {
 
1017
                        ut_a(buf_pool->LRU_old == block);
 
1018
                }
 
1019
 
 
1020
                LRU_pos = block->LRU_position;
 
1021
 
 
1022
                block = UT_LIST_GET_NEXT(LRU, block);
 
1023
 
 
1024
                if (block) {
 
1025
                        /* If the following assert fails, it may
 
1026
                        not be an error: just the buf_pool clock
 
1027
                        has wrapped around */
 
1028
                        ut_a(LRU_pos >= block->LRU_position);
 
1029
                }
 
1030
        }
 
1031
 
 
1032
        if (buf_pool->LRU_old) {
 
1033
                ut_a(buf_pool->LRU_old_len == old_len);
 
1034
        }
 
1035
 
 
1036
        UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
 
1037
 
 
1038
        block = UT_LIST_GET_FIRST(buf_pool->free);
 
1039
 
 
1040
        while (block != NULL) {
 
1041
                ut_a(block->state == BUF_BLOCK_NOT_USED);
 
1042
 
 
1043
                block = UT_LIST_GET_NEXT(free, block);
 
1044
        }
 
1045
 
 
1046
        mutex_exit(&(buf_pool->mutex));
 
1047
        return(TRUE);
 
1048
}
 
1049
 
 
1050
/**************************************************************************
 
1051
Prints the LRU list. */
 
1052
 
 
1053
void
 
1054
buf_LRU_print(void)
 
1055
/*===============*/
 
1056
{
 
1057
        buf_block_t*    block;
 
1058
        buf_frame_t*    frame;
 
1059
        ulint           len;
 
1060
 
 
1061
        ut_ad(buf_pool);
 
1062
        mutex_enter(&(buf_pool->mutex));
 
1063
 
 
1064
        fprintf(stderr, "Pool ulint clock %lu\n",
 
1065
                (ulong) buf_pool->ulint_clock);
 
1066
 
 
1067
        block = UT_LIST_GET_FIRST(buf_pool->LRU);
 
1068
 
 
1069
        len = 0;
 
1070
 
 
1071
        while (block != NULL) {
 
1072
 
 
1073
                fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);
 
1074
 
 
1075
                if (block->old) {
 
1076
                        fputs("old ", stderr);
 
1077
                }
 
1078
 
 
1079
                if (block->buf_fix_count) {
 
1080
                        fprintf(stderr, "buffix count %lu ",
 
1081
                                (ulong) block->buf_fix_count);
 
1082
                }
 
1083
 
 
1084
                if (block->io_fix) {
 
1085
                        fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
 
1086
                }
 
1087
 
 
1088
                if (ut_dulint_cmp(block->oldest_modification,
 
1089
                                  ut_dulint_zero) > 0) {
 
1090
                        fputs("modif. ", stderr);
 
1091
                }
 
1092
 
 
1093
                frame = buf_block_get_frame(block);
 
1094
 
 
1095
                fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
 
1096
                        (ulong) block->LRU_position,
 
1097
                        (ulong) fil_page_get_type(frame),
 
1098
                        (ulong) ut_dulint_get_low
 
1099
                        (btr_page_get_index_id(frame)));
 
1100
 
 
1101
                block = UT_LIST_GET_NEXT(LRU, block);
 
1102
                if (++len == 10) {
 
1103
                        len = 0;
 
1104
                        putc('\n', stderr);
 
1105
                }
 
1106
        }
 
1107
 
 
1108
        mutex_exit(&(buf_pool->mutex));
 
1109
}
 
1110
#endif /* UNIV_DEBUG */