~ubuntu-branches/ubuntu/maverick/drizzle/maverick

« back to all changes in this revision

Viewing changes to plugin/innobase/buf/buf0buf.c

  • Committer: Bazaar Package Importer
  • Author(s): Monty Taylor
  • Date: 2010-03-18 12:12:31 UTC
  • Revision ID: james.westby@ubuntu.com-20100318121231-k6g1xe6cshbwa0f8
Tags: upstream-2010.03.1347
ImportĀ upstreamĀ versionĀ 2010.03.1347

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************
 
2
 
 
3
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
 
4
Copyright (c) 2008, Google Inc.
 
5
 
 
6
Portions of this file contain modifications contributed and copyrighted by
 
7
Google, Inc. Those modifications are gratefully acknowledged and are described
 
8
briefly in the InnoDB documentation. The contributions by Google are
 
9
incorporated with their permission, and subject to the conditions contained in
 
10
the file COPYING.Google.
 
11
 
 
12
This program is free software; you can redistribute it and/or modify it under
 
13
the terms of the GNU General Public License as published by the Free Software
 
14
Foundation; version 2 of the License.
 
15
 
 
16
This program is distributed in the hope that it will be useful, but WITHOUT
 
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
19
 
 
20
You should have received a copy of the GNU General Public License along with
 
21
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 
22
Place, Suite 330, Boston, MA 02111-1307 USA
 
23
 
 
24
*****************************************************************************/
 
25
 
 
26
/**************************************************//**
 
27
@file buf/buf0buf.c
 
28
The database buffer buf_pool
 
29
 
 
30
Created 11/5/1995 Heikki Tuuri
 
31
*******************************************************/
 
32
 
 
33
#include "buf0buf.h"
 
34
 
 
35
#ifdef UNIV_NONINL
 
36
#include "buf0buf.ic"
 
37
#endif
 
38
 
 
39
#include "mem0mem.h"
 
40
#include "btr0btr.h"
 
41
#include "fil0fil.h"
 
42
#ifndef UNIV_HOTBACKUP
 
43
#include "buf0buddy.h"
 
44
#include "lock0lock.h"
 
45
#include "btr0sea.h"
 
46
#include "ibuf0ibuf.h"
 
47
#include "trx0undo.h"
 
48
#include "log0log.h"
 
49
#endif /* !UNIV_HOTBACKUP */
 
50
#include "srv0srv.h"
 
51
#include "dict0dict.h"
 
52
#include "log0recv.h"
 
53
#include "page0zip.h"
 
54
 
 
55
/*
 
56
                IMPLEMENTATION OF THE BUFFER POOL
 
57
                =================================
 
58
 
 
59
Performance improvement:
 
60
------------------------
 
61
Thread scheduling in NT may be so slow that the OS wait mechanism should
 
62
not be used even in waiting for disk reads to complete.
 
63
Rather, we should put waiting query threads to the queue of
 
64
waiting jobs, and let the OS thread do something useful while the i/o
 
65
is processed. In this way we could remove most OS thread switches in
 
66
an i/o-intensive benchmark like TPC-C.
 
67
 
 
68
A possibility is to put a user space thread library between the database
 
69
and NT. User space thread libraries might be very fast.
 
70
 
 
71
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
 
72
threads in NT. These should be studied.
 
73
 
 
74
                Buffer frames and blocks
 
75
                ------------------------
 
76
Following the terminology of Gray and Reuter, we call the memory
 
77
blocks where file pages are loaded buffer frames. For each buffer
 
78
frame there is a control block, or shortly, a block, in the buffer
 
79
control array. The control info which does not need to be stored
 
80
in the file along with the file page, resides in the control block.
 
81
 
 
82
                Buffer pool struct
 
83
                ------------------
 
84
The buffer buf_pool contains a single mutex which protects all the
 
85
control data structures of the buf_pool. The content of a buffer frame is
 
86
protected by a separate read-write lock in its control block, though.
 
87
These locks can be locked and unlocked without owning the buf_pool mutex.
 
88
The OS events in the buf_pool struct can be waited for without owning the
 
89
buf_pool mutex.
 
90
 
 
91
The buf_pool mutex is a hot-spot in main memory, causing a lot of
 
92
memory bus traffic on multiprocessor systems when processors
 
93
alternately access the mutex. On our Pentium, the mutex is accessed
 
94
maybe every 10 microseconds. We gave up the solution to have mutexes
 
95
for each control block, for instance, because it seemed to be
 
96
complicated.
 
97
 
 
98
A solution to reduce mutex contention of the buf_pool mutex is to
 
99
create a separate mutex for the page hash table. On Pentium,
 
100
accessing the hash table takes 2 microseconds, about half
 
101
of the total buf_pool mutex hold time.
 
102
 
 
103
                Control blocks
 
104
                --------------
 
105
 
 
106
The control block contains, for instance, the bufferfix count
 
107
which is incremented when a thread wants a file page to be fixed
 
108
in a buffer frame. The bufferfix operation does not lock the
 
109
contents of the frame, however. For this purpose, the control
 
110
block contains a read-write lock.
 
111
 
 
112
The buffer frames have to be aligned so that the start memory
 
113
address of a frame is divisible by the universal page size, which
 
114
is a power of two.
 
115
 
 
116
We intend to make the buffer buf_pool size on-line reconfigurable,
 
117
that is, the buf_pool size can be changed without closing the database.
 
118
Then the database administarator may adjust it to be bigger
 
119
at night, for example. The control block array must
 
120
contain enough control blocks for the maximum buffer buf_pool size
 
121
which is used in the particular database.
 
122
If the buf_pool size is cut, we exploit the virtual memory mechanism of
 
123
the OS, and just refrain from using frames at high addresses. Then the OS
 
124
can swap them to disk.
 
125
 
 
126
The control blocks containing file pages are put to a hash table
 
127
according to the file address of the page.
 
128
We could speed up the access to an individual page by using
 
129
"pointer swizzling": we could replace the page references on
 
130
non-leaf index pages by direct pointers to the page, if it exists
 
131
in the buf_pool. We could make a separate hash table where we could
 
132
chain all the page references in non-leaf pages residing in the buf_pool,
 
133
using the page reference as the hash key,
 
134
and at the time of reading of a page update the pointers accordingly.
 
135
Drawbacks of this solution are added complexity and,
 
136
possibly, extra space required on non-leaf pages for memory pointers.
 
137
A simpler solution is just to speed up the hash table mechanism
 
138
in the database, using tables whose size is a power of 2.
 
139
 
 
140
                Lists of blocks
 
141
                ---------------
 
142
 
 
143
There are several lists of control blocks.
 
144
 
 
145
The free list (buf_pool->free) contains blocks which are currently not
 
146
used.
 
147
 
 
148
The common LRU list contains all the blocks holding a file page
 
149
except those for which the bufferfix count is non-zero.
 
150
The pages are in the LRU list roughly in the order of the last
 
151
access to the page, so that the oldest pages are at the end of the
 
152
list. We also keep a pointer to near the end of the LRU list,
 
153
which we can use when we want to artificially age a page in the
 
154
buf_pool. This is used if we know that some page is not needed
 
155
again for some time: we insert the block right after the pointer,
 
156
causing it to be replaced sooner than would noramlly be the case.
 
157
Currently this aging mechanism is used for read-ahead mechanism
 
158
of pages, and it can also be used when there is a scan of a full
 
159
table which cannot fit in the memory. Putting the pages near the
 
160
of the LRU list, we make sure that most of the buf_pool stays in the
 
161
main memory, undisturbed.
 
162
 
 
163
The unzip_LRU list contains a subset of the common LRU list.  The
 
164
blocks on the unzip_LRU list hold a compressed file page and the
 
165
corresponding uncompressed page frame.  A block is in unzip_LRU if and
 
166
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
 
167
holds.  The blocks in unzip_LRU will be in same order as they are in
 
168
the common LRU list.  That is, each manipulation of the common LRU
 
169
list will result in the same manipulation of the unzip_LRU list.
 
170
 
 
171
The chain of modified blocks (buf_pool->flush_list) contains the blocks
 
172
holding file pages that have been modified in the memory
 
173
but not written to disk yet. The block with the oldest modification
 
174
which has not yet been written to disk is at the end of the chain.
 
175
 
 
176
The chain of unmodified compressed blocks (buf_pool->zip_clean)
 
177
contains the control blocks (buf_page_t) of those compressed pages
 
178
that are not in buf_pool->flush_list and for which no uncompressed
 
179
page has been allocated in the buffer pool.  The control blocks for
 
180
uncompressed pages are accessible via buf_block_t objects that are
 
181
reachable via buf_pool->chunks[].
 
182
 
 
183
The chains of free memory blocks (buf_pool->zip_free[]) are used by
 
184
the buddy allocator (buf0buddy.c) to keep track of currently unused
 
185
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2.  These
 
186
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
 
187
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
 
188
pool.  The buddy allocator is solely used for allocating control
 
189
blocks for compressed pages (buf_page_t) and compressed page frames.
 
190
 
 
191
                Loading a file page
 
192
                -------------------
 
193
 
 
194
First, a victim block for replacement has to be found in the
 
195
buf_pool. It is taken from the free list or searched for from the
 
196
end of the LRU-list. An exclusive lock is reserved for the frame,
 
197
the io_fix field is set in the block fixing the block in buf_pool,
 
198
and the io-operation for loading the page is queued. The io-handler thread
 
199
releases the X-lock on the frame and resets the io_fix field
 
200
when the io operation completes.
 
201
 
 
202
A thread may request the above operation using the function
 
203
buf_page_get(). It may then continue to request a lock on the frame.
 
204
The lock is granted when the io-handler releases the x-lock.
 
205
 
 
206
                Read-ahead
 
207
                ----------
 
208
 
 
209
The read-ahead mechanism is intended to be intelligent and
 
210
isolated from the semantically higher levels of the database
 
211
index management. From the higher level we only need the
 
212
information if a file page has a natural successor or
 
213
predecessor page. On the leaf level of a B-tree index,
 
214
these are the next and previous pages in the natural
 
215
order of the pages.
 
216
 
 
217
Let us first explain the read-ahead mechanism when the leafs
 
218
of a B-tree are scanned in an ascending or descending order.
 
219
When a read page is the first time referenced in the buf_pool,
 
220
the buffer manager checks if it is at the border of a so-called
 
221
linear read-ahead area. The tablespace is divided into these
 
222
areas of size 64 blocks, for example. So if the page is at the
 
223
border of such an area, the read-ahead mechanism checks if
 
224
all the other blocks in the area have been accessed in an
 
225
ascending or descending order. If this is the case, the system
 
226
looks at the natural successor or predecessor of the page,
 
227
checks if that is at the border of another area, and in this case
 
228
issues read-requests for all the pages in that area. Maybe
 
229
we could relax the condition that all the pages in the area
 
230
have to be accessed: if data is deleted from a table, there may
 
231
appear holes of unused pages in the area.
 
232
 
 
233
A different read-ahead mechanism is used when there appears
 
234
to be a random access pattern to a file.
 
235
If a new page is referenced in the buf_pool, and several pages
 
236
of its random access area (for instance, 32 consecutive pages
 
237
in a tablespace) have recently been referenced, we may predict
 
238
that the whole area may be needed in the near future, and issue
 
239
the read requests for the whole area.
 
240
*/
 
241
 
 
242
#ifndef UNIV_HOTBACKUP
 
243
/** Value in microseconds */
 
244
static const int WAIT_FOR_READ  = 5000;
 
245
 
 
246
/** The buffer buf_pool of the database */
 
247
UNIV_INTERN buf_pool_t* buf_pool = NULL;
 
248
 
 
249
/** mutex protecting the buffer pool struct and control blocks, except the
 
250
read-write lock in them */
 
251
UNIV_INTERN mutex_t             buf_pool_mutex;
 
252
/** mutex protecting the control blocks of compressed-only pages
 
253
(of type buf_page_t, not buf_block_t) */
 
254
UNIV_INTERN mutex_t             buf_pool_zip_mutex;
 
255
 
 
256
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
257
static ulint    buf_dbg_counter = 0; /*!< This is used to insert validation
 
258
                                        operations in excution in the
 
259
                                        debug version */
 
260
/** Flag to forbid the release of the buffer pool mutex.
 
261
Protected by buf_pool_mutex. */
 
262
UNIV_INTERN ulint               buf_pool_mutex_exit_forbidden = 0;
 
263
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
264
#ifdef UNIV_DEBUG
 
265
/** If this is set TRUE, the program prints info whenever
 
266
read-ahead or flush occurs */
 
267
UNIV_INTERN ibool               buf_debug_prints = FALSE;
 
268
#endif /* UNIV_DEBUG */
 
269
 
 
270
/** A chunk of buffers.  The buffer pool is allocated in chunks. */
 
271
struct buf_chunk_struct{
 
272
        ulint           mem_size;       /*!< allocated size of the chunk */
 
273
        ulint           size;           /*!< size of frames[] and blocks[] */
 
274
        void*           mem;            /*!< pointer to the memory area which
 
275
                                        was allocated for the frames */
 
276
        buf_block_t*    blocks;         /*!< array of buffer control blocks */
 
277
};
 
278
#endif /* !UNIV_HOTBACKUP */
 
279
 
 
280
/********************************************************************//**
 
281
Calculates a page checksum which is stored to the page when it is written
 
282
to a file. Note that we must be careful to calculate the same value on
 
283
32-bit and 64-bit architectures.
 
284
@return checksum */
 
285
UNIV_INTERN
 
286
ulint
 
287
buf_calc_page_new_checksum(
 
288
/*=======================*/
 
289
        const byte*     page)   /*!< in: buffer page */
 
290
{
 
291
        ulint checksum;
 
292
 
 
293
        /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
 
294
        ..._ARCH_LOG_NO, are written outside the buffer pool to the first
 
295
        pages of data files, we have to skip them in the page checksum
 
296
        calculation.
 
297
        We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
 
298
        checksum is stored, and also the last 8 bytes of page because
 
299
        there we store the old formula checksum. */
 
300
 
 
301
        checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
 
302
                                  FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
 
303
                + ut_fold_binary(page + FIL_PAGE_DATA,
 
304
                                 UNIV_PAGE_SIZE - FIL_PAGE_DATA
 
305
                                 - FIL_PAGE_END_LSN_OLD_CHKSUM);
 
306
        checksum = checksum & 0xFFFFFFFFUL;
 
307
 
 
308
        return(checksum);
 
309
}
 
310
 
 
311
/********************************************************************//**
 
312
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
 
313
looked at the first few bytes of the page. This calculates that old
 
314
checksum.
 
315
NOTE: we must first store the new formula checksum to
 
316
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
 
317
because this takes that field as an input!
 
318
@return checksum */
 
319
UNIV_INTERN
 
320
ulint
 
321
buf_calc_page_old_checksum(
 
322
/*=======================*/
 
323
        const byte*     page)   /*!< in: buffer page */
 
324
{
 
325
        ulint checksum;
 
326
 
 
327
        checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
 
328
 
 
329
        checksum = checksum & 0xFFFFFFFFUL;
 
330
 
 
331
        return(checksum);
 
332
}
 
333
 
 
334
/********************************************************************//**
 
335
Checks if a page is corrupt.
 
336
@return TRUE if corrupted */
 
337
UNIV_INTERN
 
338
ibool
 
339
buf_page_is_corrupted(
 
340
/*==================*/
 
341
        const byte*     read_buf,       /*!< in: a database page */
 
342
        ulint           zip_size)       /*!< in: size of compressed page;
 
343
                                        0 for uncompressed pages */
 
344
{
 
345
        ulint           checksum_field;
 
346
        ulint           old_checksum_field;
 
347
 
 
348
        if (UNIV_LIKELY(!zip_size)
 
349
            && memcmp(read_buf + FIL_PAGE_LSN + 4,
 
350
                      read_buf + UNIV_PAGE_SIZE
 
351
                      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
 
352
 
 
353
                /* Stored log sequence numbers at the start and the end
 
354
                of page do not match */
 
355
 
 
356
                return(TRUE);
 
357
        }
 
358
 
 
359
#ifndef UNIV_HOTBACKUP
 
360
        if (recv_lsn_checks_on) {
 
361
                ib_uint64_t     current_lsn;
 
362
 
 
363
                if (log_peek_lsn(&current_lsn)
 
364
                    && current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
 
365
                        ut_print_timestamp(stderr);
 
366
 
 
367
                        fprintf(stderr,
 
368
                                "  InnoDB: Error: page %lu log sequence number"
 
369
                                " %"PRIu64"\n"
 
370
                                "InnoDB: is in the future! Current system "
 
371
                                "log sequence number %"PRIu64".\n"
 
372
                                "InnoDB: Your database may be corrupt or "
 
373
                                "you may have copied the InnoDB\n"
 
374
                                "InnoDB: tablespace but not the InnoDB "
 
375
                                "log files. See\n"
 
376
                                "InnoDB: " REFMAN "forcing-recovery.html\n"
 
377
                                "InnoDB: for more information.\n",
 
378
                                (ulong) mach_read_from_4(read_buf
 
379
                                                         + FIL_PAGE_OFFSET),
 
380
                                mach_read_ull(read_buf + FIL_PAGE_LSN),
 
381
                                current_lsn);
 
382
                }
 
383
        }
 
384
#endif
 
385
 
 
386
        /* If we use checksums validation, make additional check before
 
387
        returning TRUE to ensure that the checksum is not equal to
 
388
        BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
 
389
        disabled. Otherwise, skip checksum calculation and return FALSE */
 
390
 
 
391
        if (UNIV_LIKELY(srv_use_checksums)) {
 
392
                checksum_field = mach_read_from_4(read_buf
 
393
                                                  + FIL_PAGE_SPACE_OR_CHKSUM);
 
394
 
 
395
                if (UNIV_UNLIKELY(zip_size)) {
 
396
                        return(checksum_field != BUF_NO_CHECKSUM_MAGIC
 
397
                               && checksum_field
 
398
                               != page_zip_calc_checksum(read_buf, zip_size));
 
399
                }
 
400
 
 
401
                old_checksum_field = mach_read_from_4(
 
402
                        read_buf + UNIV_PAGE_SIZE
 
403
                        - FIL_PAGE_END_LSN_OLD_CHKSUM);
 
404
 
 
405
                /* There are 2 valid formulas for old_checksum_field:
 
406
 
 
407
                1. Very old versions of InnoDB only stored 8 byte lsn to the
 
408
                start and the end of the page.
 
409
 
 
410
                2. Newer InnoDB versions store the old formula checksum
 
411
                there. */
 
412
 
 
413
                if (old_checksum_field != mach_read_from_4(read_buf
 
414
                                                           + FIL_PAGE_LSN)
 
415
                    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
 
416
                    && old_checksum_field
 
417
                    != buf_calc_page_old_checksum(read_buf)) {
 
418
 
 
419
                        return(TRUE);
 
420
                }
 
421
 
 
422
                /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
 
423
                (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
 
424
 
 
425
                if (checksum_field != 0
 
426
                    && checksum_field != BUF_NO_CHECKSUM_MAGIC
 
427
                    && checksum_field
 
428
                    != buf_calc_page_new_checksum(read_buf)) {
 
429
 
 
430
                        return(TRUE);
 
431
                }
 
432
        }
 
433
 
 
434
        return(FALSE);
 
435
}
 
436
 
 
437
/********************************************************************//**
 
438
Prints a page to stderr. */
 
439
UNIV_INTERN
 
440
void
 
441
buf_page_print(
 
442
/*===========*/
 
443
        const byte*     read_buf,       /*!< in: a database page */
 
444
        ulint           zip_size)       /*!< in: compressed page size, or
 
445
                                0 for uncompressed pages */
 
446
{
 
447
#ifndef UNIV_HOTBACKUP
 
448
        dict_index_t*   index;
 
449
#endif /* !UNIV_HOTBACKUP */
 
450
        ulint           checksum;
 
451
        ulint           old_checksum;
 
452
        ulint           size    = zip_size;
 
453
 
 
454
        if (!size) {
 
455
                size = UNIV_PAGE_SIZE;
 
456
        }
 
457
 
 
458
        ut_print_timestamp(stderr);
 
459
        fprintf(stderr, "  InnoDB: Page dump in ascii and hex (%lu bytes):\n",
 
460
                (ulong) size);
 
461
        ut_print_buf(stderr, read_buf, size);
 
462
        fputs("\nInnoDB: End of page dump\n", stderr);
 
463
 
 
464
        if (zip_size) {
 
465
                /* Print compressed page. */
 
466
 
 
467
                switch (fil_page_get_type(read_buf)) {
 
468
                case FIL_PAGE_TYPE_ZBLOB:
 
469
                case FIL_PAGE_TYPE_ZBLOB2:
 
470
                        checksum = srv_use_checksums
 
471
                                ? page_zip_calc_checksum(read_buf, zip_size)
 
472
                                : BUF_NO_CHECKSUM_MAGIC;
 
473
                        ut_print_timestamp(stderr);
 
474
                        fprintf(stderr,
 
475
                                "  InnoDB: Compressed BLOB page"
 
476
                                " checksum %lu, stored %lu\n"
 
477
                                "InnoDB: Page lsn %lu %lu\n"
 
478
                                "InnoDB: Page number (if stored"
 
479
                                " to page already) %lu,\n"
 
480
                                "InnoDB: space id (if stored"
 
481
                                " to page already) %lu\n",
 
482
                                (ulong) checksum,
 
483
                                (ulong) mach_read_from_4(
 
484
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
 
485
                                (ulong) mach_read_from_4(
 
486
                                        read_buf + FIL_PAGE_LSN),
 
487
                                (ulong) mach_read_from_4(
 
488
                                        read_buf + (FIL_PAGE_LSN + 4)),
 
489
                                (ulong) mach_read_from_4(
 
490
                                        read_buf + FIL_PAGE_OFFSET),
 
491
                                (ulong) mach_read_from_4(
 
492
                                        read_buf
 
493
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 
494
                        return;
 
495
                default:
 
496
                        ut_print_timestamp(stderr);
 
497
                        fprintf(stderr,
 
498
                                "  InnoDB: unknown page type %lu,"
 
499
                                " assuming FIL_PAGE_INDEX\n",
 
500
                                fil_page_get_type(read_buf));
 
501
                        /* fall through */
 
502
                case FIL_PAGE_INDEX:
 
503
                        checksum = srv_use_checksums
 
504
                                ? page_zip_calc_checksum(read_buf, zip_size)
 
505
                                : BUF_NO_CHECKSUM_MAGIC;
 
506
 
 
507
                        ut_print_timestamp(stderr);
 
508
                        fprintf(stderr,
 
509
                                "  InnoDB: Compressed page checksum %lu,"
 
510
                                " stored %lu\n"
 
511
                                "InnoDB: Page lsn %lu %lu\n"
 
512
                                "InnoDB: Page number (if stored"
 
513
                                " to page already) %lu,\n"
 
514
                                "InnoDB: space id (if stored"
 
515
                                " to page already) %lu\n",
 
516
                                (ulong) checksum,
 
517
                                (ulong) mach_read_from_4(
 
518
                                        read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
 
519
                                (ulong) mach_read_from_4(
 
520
                                        read_buf + FIL_PAGE_LSN),
 
521
                                (ulong) mach_read_from_4(
 
522
                                        read_buf + (FIL_PAGE_LSN + 4)),
 
523
                                (ulong) mach_read_from_4(
 
524
                                        read_buf + FIL_PAGE_OFFSET),
 
525
                                (ulong) mach_read_from_4(
 
526
                                        read_buf
 
527
                                        + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 
528
                        return;
 
529
                case FIL_PAGE_TYPE_XDES:
 
530
                        /* This is an uncompressed page. */
 
531
                        break;
 
532
                }
 
533
        }
 
534
 
 
535
        checksum = srv_use_checksums
 
536
                ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
 
537
        old_checksum = srv_use_checksums
 
538
                ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
 
539
 
 
540
        ut_print_timestamp(stderr);
 
541
        fprintf(stderr,
 
542
                "  InnoDB: Page checksum %lu, prior-to-4.0.14-form"
 
543
                " checksum %lu\n"
 
544
                "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
 
545
                " stored checksum %lu\n"
 
546
                "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
 
547
                " at page end %lu\n"
 
548
                "InnoDB: Page number (if stored to page already) %lu,\n"
 
549
                "InnoDB: space id (if created with >= MySQL-4.1.1"
 
550
                " and stored already) %lu\n",
 
551
                (ulong) checksum, (ulong) old_checksum,
 
552
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
 
553
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
 
554
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM),
 
555
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
 
556
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
 
557
                (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
 
558
                                         - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
 
559
                (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
 
560
                (ulong) mach_read_from_4(read_buf
 
561
                                         + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
 
562
 
 
563
#ifndef UNIV_HOTBACKUP
 
564
        if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
 
565
            == TRX_UNDO_INSERT) {
 
566
                fprintf(stderr,
 
567
                        "InnoDB: Page may be an insert undo log page\n");
 
568
        } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
 
569
                                    + TRX_UNDO_PAGE_TYPE)
 
570
                   == TRX_UNDO_UPDATE) {
 
571
                fprintf(stderr,
 
572
                        "InnoDB: Page may be an update undo log page\n");
 
573
        }
 
574
#endif /* !UNIV_HOTBACKUP */
 
575
 
 
576
        switch (fil_page_get_type(read_buf)) {
 
577
        case FIL_PAGE_INDEX:
 
578
                fprintf(stderr,
 
579
                        "InnoDB: Page may be an index page where"
 
580
                        " index id is %lu %lu\n",
 
581
                        (ulong) ut_dulint_get_high(
 
582
                                btr_page_get_index_id(read_buf)),
 
583
                        (ulong) ut_dulint_get_low(
 
584
                                btr_page_get_index_id(read_buf)));
 
585
#ifndef UNIV_HOTBACKUP
 
586
                index = dict_index_find_on_id_low(
 
587
                        btr_page_get_index_id(read_buf));
 
588
                if (index) {
 
589
                        fputs("InnoDB: (", stderr);
 
590
                        dict_index_name_print(stderr, NULL, index);
 
591
                        fputs(")\n", stderr);
 
592
                }
 
593
#endif /* !UNIV_HOTBACKUP */
 
594
                break;
 
595
        case FIL_PAGE_INODE:
 
596
                fputs("InnoDB: Page may be an 'inode' page\n", stderr);
 
597
                break;
 
598
        case FIL_PAGE_IBUF_FREE_LIST:
 
599
                fputs("InnoDB: Page may be an insert buffer free list page\n",
 
600
                      stderr);
 
601
                break;
 
602
        case FIL_PAGE_TYPE_ALLOCATED:
 
603
                fputs("InnoDB: Page may be a freshly allocated page\n",
 
604
                      stderr);
 
605
                break;
 
606
        case FIL_PAGE_IBUF_BITMAP:
 
607
                fputs("InnoDB: Page may be an insert buffer bitmap page\n",
 
608
                      stderr);
 
609
                break;
 
610
        case FIL_PAGE_TYPE_SYS:
 
611
                fputs("InnoDB: Page may be a system page\n",
 
612
                      stderr);
 
613
                break;
 
614
        case FIL_PAGE_TYPE_TRX_SYS:
 
615
                fputs("InnoDB: Page may be a transaction system page\n",
 
616
                      stderr);
 
617
                break;
 
618
        case FIL_PAGE_TYPE_FSP_HDR:
 
619
                fputs("InnoDB: Page may be a file space header page\n",
 
620
                      stderr);
 
621
                break;
 
622
        case FIL_PAGE_TYPE_XDES:
 
623
                fputs("InnoDB: Page may be an extent descriptor page\n",
 
624
                      stderr);
 
625
                break;
 
626
        case FIL_PAGE_TYPE_BLOB:
 
627
                fputs("InnoDB: Page may be a BLOB page\n",
 
628
                      stderr);
 
629
                break;
 
630
        case FIL_PAGE_TYPE_ZBLOB:
 
631
        case FIL_PAGE_TYPE_ZBLOB2:
 
632
                fputs("InnoDB: Page may be a compressed BLOB page\n",
 
633
                      stderr);
 
634
                break;
 
635
        }
 
636
}
 
637
 
 
638
#ifndef UNIV_HOTBACKUP
 
639
/********************************************************************//**
 
640
Initializes a buffer control block when the buf_pool is created. */
 
641
static
 
642
void
 
643
buf_block_init(
 
644
/*===========*/
 
645
        buf_block_t*    block,  /*!< in: pointer to control block */
 
646
        byte*           frame)  /*!< in: pointer to buffer frame */
 
647
{
 
648
        UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
 
649
 
 
650
        block->frame = frame;
 
651
 
 
652
        block->page.state = BUF_BLOCK_NOT_USED;
 
653
        block->page.buf_fix_count = 0;
 
654
        block->page.io_fix = BUF_IO_NONE;
 
655
 
 
656
        block->modify_clock = 0;
 
657
 
 
658
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
659
        block->page.file_page_was_freed = FALSE;
 
660
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
661
 
 
662
        block->check_index_page_at_flush = FALSE;
 
663
        block->index = NULL;
 
664
 
 
665
#ifdef UNIV_DEBUG
 
666
        block->page.in_page_hash = FALSE;
 
667
        block->page.in_zip_hash = FALSE;
 
668
        block->page.in_flush_list = FALSE;
 
669
        block->page.in_free_list = FALSE;
 
670
        block->page.in_LRU_list = FALSE;
 
671
        block->in_unzip_LRU_list = FALSE;
 
672
#endif /* UNIV_DEBUG */
 
673
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 
674
        block->n_pointers = 0;
 
675
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
676
        page_zip_des_init(&block->page.zip);
 
677
 
 
678
        mutex_create(&block->mutex, SYNC_BUF_BLOCK);
 
679
 
 
680
        rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
 
681
        ut_ad(rw_lock_validate(&(block->lock)));
 
682
 
 
683
#ifdef UNIV_SYNC_DEBUG
 
684
        rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
 
685
#endif /* UNIV_SYNC_DEBUG */
 
686
}
 
687
 
 
688
/********************************************************************//**
 
689
Allocates a chunk of buffer frames.
 
690
@return chunk, or NULL on failure */
 
691
static
 
692
buf_chunk_t*
 
693
buf_chunk_init(
 
694
/*===========*/
 
695
        buf_chunk_t*    chunk,          /*!< out: chunk of buffers */
 
696
        ulint           mem_size)       /*!< in: requested size in bytes */
 
697
{
 
698
        buf_block_t*    block;
 
699
        byte*           frame;
 
700
        ulint           i;
 
701
 
 
702
        /* Round down to a multiple of page size,
 
703
        although it already should be. */
 
704
        mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
 
705
        /* Reserve space for the block descriptors. */
 
706
        mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
 
707
                                  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
 
708
 
 
709
        chunk->mem_size = mem_size;
 
710
        chunk->mem = os_mem_alloc_large(&chunk->mem_size);
 
711
 
 
712
        if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 
713
 
 
714
                return(NULL);
 
715
        }
 
716
 
 
717
        /* Allocate the block descriptors from
 
718
        the start of the memory block. */
 
719
        chunk->blocks = chunk->mem;
 
720
 
 
721
        /* Align a pointer to the first frame.  Note that when
 
722
        os_large_page_size is smaller than UNIV_PAGE_SIZE,
 
723
        we may allocate one fewer block than requested.  When
 
724
        it is bigger, we may allocate more blocks than requested. */
 
725
 
 
726
        frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
 
727
        chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
 
728
                - (frame != chunk->mem);
 
729
 
 
730
        /* Subtract the space needed for block descriptors. */
 
731
        {
 
732
                ulint   size = chunk->size;
 
733
 
 
734
                while (frame < (byte*) (chunk->blocks + size)) {
 
735
                        frame += UNIV_PAGE_SIZE;
 
736
                        size--;
 
737
                }
 
738
 
 
739
                chunk->size = size;
 
740
        }
 
741
 
 
742
        /* Init block structs and assign frames for them. Then we
 
743
        assign the frames to the first blocks (we already mapped the
 
744
        memory above). */
 
745
 
 
746
        block = chunk->blocks;
 
747
 
 
748
        for (i = chunk->size; i--; ) {
 
749
 
 
750
                buf_block_init(block, frame);
 
751
 
 
752
#ifdef HAVE_purify
 
753
                /* Wipe contents of frame to eliminate a Purify warning */
 
754
                memset(block->frame, '\0', UNIV_PAGE_SIZE);
 
755
#endif
 
756
                /* Add the block to the free list */
 
757
                UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
 
758
                ut_d(block->page.in_free_list = TRUE);
 
759
 
 
760
                block++;
 
761
                frame += UNIV_PAGE_SIZE;
 
762
        }
 
763
 
 
764
        return(chunk);
 
765
}
 
766
 
 
767
#ifdef UNIV_DEBUG
 
768
/*********************************************************************//**
 
769
Finds a block in the given buffer chunk that points to a
 
770
given compressed page.
 
771
@return buffer block pointing to the compressed page, or NULL */
 
772
static
 
773
buf_block_t*
 
774
buf_chunk_contains_zip(
 
775
/*===================*/
 
776
        buf_chunk_t*    chunk,  /*!< in: chunk being checked */
 
777
        const void*     data)   /*!< in: pointer to compressed page */
 
778
{
 
779
        buf_block_t*    block;
 
780
        ulint           i;
 
781
 
 
782
        ut_ad(buf_pool);
 
783
        ut_ad(buf_pool_mutex_own());
 
784
 
 
785
        block = chunk->blocks;
 
786
 
 
787
        for (i = chunk->size; i--; block++) {
 
788
                if (block->page.zip.data == data) {
 
789
 
 
790
                        return(block);
 
791
                }
 
792
        }
 
793
 
 
794
        return(NULL);
 
795
}
 
796
 
 
797
/*********************************************************************//**
 
798
Finds a block in the buffer pool that points to a
 
799
given compressed page.
 
800
@return buffer block pointing to the compressed page, or NULL */
 
801
UNIV_INTERN
 
802
buf_block_t*
 
803
buf_pool_contains_zip(
 
804
/*==================*/
 
805
        const void*     data)   /*!< in: pointer to compressed page */
 
806
{
 
807
        ulint           n;
 
808
        buf_chunk_t*    chunk = buf_pool->chunks;
 
809
 
 
810
        for (n = buf_pool->n_chunks; n--; chunk++) {
 
811
                buf_block_t* block = buf_chunk_contains_zip(chunk, data);
 
812
 
 
813
                if (block) {
 
814
                        return(block);
 
815
                }
 
816
        }
 
817
 
 
818
        return(NULL);
 
819
}
 
820
#endif /* UNIV_DEBUG */
 
821
 
 
822
/*********************************************************************//**
 
823
Checks that all file pages in the buffer chunk are in a replaceable state.
 
824
@return address of a non-free block, or NULL if all freed */
 
825
static
 
826
const buf_block_t*
 
827
buf_chunk_not_freed(
 
828
/*================*/
 
829
        buf_chunk_t*    chunk)  /*!< in: chunk being checked */
 
830
{
 
831
        buf_block_t*    block;
 
832
        ulint           i;
 
833
 
 
834
        ut_ad(buf_pool);
 
835
        ut_ad(buf_pool_mutex_own());
 
836
 
 
837
        block = chunk->blocks;
 
838
 
 
839
        for (i = chunk->size; i--; block++) {
 
840
                mutex_enter(&block->mutex);
 
841
 
 
842
                if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
 
843
                    && !buf_flush_ready_for_replace(&block->page)) {
 
844
 
 
845
                        mutex_exit(&block->mutex);
 
846
                        return(block);
 
847
                }
 
848
 
 
849
                mutex_exit(&block->mutex);
 
850
        }
 
851
 
 
852
        return(NULL);
 
853
}
 
854
 
 
855
/*********************************************************************//**
 
856
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
 
857
@return TRUE if all freed */
 
858
static
 
859
ibool
 
860
buf_chunk_all_free(
 
861
/*===============*/
 
862
        const buf_chunk_t*      chunk)  /*!< in: chunk being checked */
 
863
{
 
864
        const buf_block_t*      block;
 
865
        ulint                   i;
 
866
 
 
867
        ut_ad(buf_pool);
 
868
        ut_ad(buf_pool_mutex_own());
 
869
 
 
870
        block = chunk->blocks;
 
871
 
 
872
        for (i = chunk->size; i--; block++) {
 
873
 
 
874
                if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
 
875
 
 
876
                        return(FALSE);
 
877
                }
 
878
        }
 
879
 
 
880
        return(TRUE);
 
881
}
 
882
 
 
883
/********************************************************************//**
 
884
Frees a chunk of buffer frames. */
 
885
static
 
886
void
 
887
buf_chunk_free(
 
888
/*===========*/
 
889
        buf_chunk_t*    chunk)          /*!< out: chunk of buffers */
 
890
{
 
891
        buf_block_t*            block;
 
892
        const buf_block_t*      block_end;
 
893
 
 
894
        ut_ad(buf_pool_mutex_own());
 
895
 
 
896
        block_end = chunk->blocks + chunk->size;
 
897
 
 
898
        for (block = chunk->blocks; block < block_end; block++) {
 
899
                ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
 
900
                ut_a(!block->page.zip.data);
 
901
 
 
902
                ut_ad(!block->page.in_LRU_list);
 
903
                ut_ad(!block->in_unzip_LRU_list);
 
904
                ut_ad(!block->page.in_flush_list);
 
905
                /* Remove the block from the free list. */
 
906
                ut_ad(block->page.in_free_list);
 
907
                UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
 
908
 
 
909
                /* Free the latches. */
 
910
                mutex_free(&block->mutex);
 
911
                rw_lock_free(&block->lock);
 
912
#ifdef UNIV_SYNC_DEBUG
 
913
                rw_lock_free(&block->debug_latch);
 
914
#endif /* UNIV_SYNC_DEBUG */
 
915
                UNIV_MEM_UNDESC(block);
 
916
        }
 
917
 
 
918
        os_mem_free_large(chunk->mem, chunk->mem_size);
 
919
}
 
920
 
 
921
/********************************************************************//**
 
922
Creates the buffer pool.
 
923
@return own: buf_pool object, NULL if not enough memory or error */
 
924
UNIV_INTERN
 
925
buf_pool_t*
 
926
buf_pool_init(void)
 
927
/*===============*/
 
928
{
 
929
        buf_chunk_t*    chunk;
 
930
        ulint           i;
 
931
 
 
932
        buf_pool = mem_zalloc(sizeof(buf_pool_t));
 
933
 
 
934
        /* 1. Initialize general fields
 
935
        ------------------------------- */
 
936
        mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
 
937
        mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
 
938
 
 
939
        buf_pool_mutex_enter();
 
940
 
 
941
        buf_pool->n_chunks = 1;
 
942
        buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
 
943
 
 
944
        UT_LIST_INIT(buf_pool->free);
 
945
 
 
946
        if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
 
947
                mem_free(chunk);
 
948
                mem_free(buf_pool);
 
949
                buf_pool = NULL;
 
950
                return(NULL);
 
951
        }
 
952
 
 
953
        srv_buf_pool_old_size = srv_buf_pool_size;
 
954
        buf_pool->curr_size = chunk->size;
 
955
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
956
 
 
957
        buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
 
958
        buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
 
959
 
 
960
        buf_pool->last_printout_time = time(NULL);
 
961
 
 
962
        /* 2. Initialize flushing fields
 
963
        -------------------------------- */
 
964
 
 
965
        for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
 
966
                buf_pool->no_flush[i] = os_event_create(NULL);
 
967
        }
 
968
 
 
969
        buf_pool->ulint_clock = 1;
 
970
 
 
971
        /* 3. Initialize LRU fields
 
972
        --------------------------- */
 
973
        /* All fields are initialized by mem_zalloc(). */
 
974
 
 
975
        buf_pool_mutex_exit();
 
976
 
 
977
        btr_search_sys_create(buf_pool->curr_size
 
978
                              * UNIV_PAGE_SIZE / sizeof(void*) / 64);
 
979
 
 
980
        /* 4. Initialize the buddy allocator fields */
 
981
        /* All fields are initialized by mem_zalloc(). */
 
982
 
 
983
        return(buf_pool);
 
984
}
 
985
 
 
986
/********************************************************************//**
 
987
Frees the buffer pool at shutdown.  This must not be invoked before
 
988
freeing all mutexes. */
 
989
UNIV_INTERN
 
990
void
 
991
buf_pool_free(void)
 
992
/*===============*/
 
993
{
 
994
        buf_chunk_t*    chunk;
 
995
        buf_chunk_t*    chunks;
 
996
 
 
997
        chunks = buf_pool->chunks;
 
998
        chunk = chunks + buf_pool->n_chunks;
 
999
 
 
1000
        while (--chunk >= chunks) {
 
1001
                /* Bypass the checks of buf_chunk_free(), since they
 
1002
                would fail at shutdown. */
 
1003
                os_mem_free_large(chunk->mem, chunk->mem_size);
 
1004
        }
 
1005
 
 
1006
        buf_pool->n_chunks = 0;
 
1007
}
 
1008
 
 
1009
/********************************************************************//**
 
1010
Drops the adaptive hash index.  To prevent a livelock, this function
 
1011
is only to be called while holding btr_search_latch and while
 
1012
btr_search_enabled == FALSE. */
 
1013
UNIV_INTERN
 
1014
void
 
1015
buf_pool_drop_hash_index(void)
 
1016
/*==========================*/
 
1017
{
 
1018
        ibool           released_search_latch;
 
1019
 
 
1020
#ifdef UNIV_SYNC_DEBUG
 
1021
        ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
 
1022
#endif /* UNIV_SYNC_DEBUG */
 
1023
        ut_ad(!btr_search_enabled);
 
1024
 
 
1025
        do {
 
1026
                buf_chunk_t*    chunks  = buf_pool->chunks;
 
1027
                buf_chunk_t*    chunk   = chunks + buf_pool->n_chunks;
 
1028
 
 
1029
                released_search_latch = FALSE;
 
1030
 
 
1031
                while (--chunk >= chunks) {
 
1032
                        buf_block_t*    block   = chunk->blocks;
 
1033
                        ulint           i       = chunk->size;
 
1034
 
 
1035
                        for (; i--; block++) {
 
1036
                                /* block->is_hashed cannot be modified
 
1037
                                when we have an x-latch on btr_search_latch;
 
1038
                                see the comment in buf0buf.h */
 
1039
 
 
1040
                                if (!block->is_hashed) {
 
1041
                                        continue;
 
1042
                                }
 
1043
 
 
1044
                                /* To follow the latching order, we
 
1045
                                have to release btr_search_latch
 
1046
                                before acquiring block->latch. */
 
1047
                                rw_lock_x_unlock(&btr_search_latch);
 
1048
                                /* When we release the search latch,
 
1049
                                we must rescan all blocks, because
 
1050
                                some may become hashed again. */
 
1051
                                released_search_latch = TRUE;
 
1052
 
 
1053
                                rw_lock_x_lock(&block->lock);
 
1054
 
 
1055
                                /* This should be guaranteed by the
 
1056
                                callers, which will be holding
 
1057
                                btr_search_enabled_mutex. */
 
1058
                                ut_ad(!btr_search_enabled);
 
1059
 
 
1060
                                /* Because we did not buffer-fix the
 
1061
                                block by calling buf_block_get_gen(),
 
1062
                                it is possible that the block has been
 
1063
                                allocated for some other use after
 
1064
                                btr_search_latch was released above.
 
1065
                                We do not care which file page the
 
1066
                                block is mapped to.  All we want to do
 
1067
                                is to drop any hash entries referring
 
1068
                                to the page. */
 
1069
 
 
1070
                                /* It is possible that
 
1071
                                block->page.state != BUF_FILE_PAGE.
 
1072
                                Even that does not matter, because
 
1073
                                btr_search_drop_page_hash_index() will
 
1074
                                check block->is_hashed before doing
 
1075
                                anything.  block->is_hashed can only
 
1076
                                be set on uncompressed file pages. */
 
1077
 
 
1078
                                btr_search_drop_page_hash_index(block);
 
1079
 
 
1080
                                rw_lock_x_unlock(&block->lock);
 
1081
 
 
1082
                                rw_lock_x_lock(&btr_search_latch);
 
1083
 
 
1084
                                ut_ad(!btr_search_enabled);
 
1085
                        }
 
1086
                }
 
1087
        } while (released_search_latch);
 
1088
}
 
1089
 
 
1090
/********************************************************************//**
 
1091
Relocate a buffer control block.  Relocates the block on the LRU list
 
1092
and in buf_pool->page_hash.  Does not relocate bpage->list.
 
1093
The caller must take care of relocating bpage->list. */
 
1094
UNIV_INTERN
 
1095
void
 
1096
buf_relocate(
 
1097
/*=========*/
 
1098
        buf_page_t*     bpage,  /*!< in/out: control block being relocated;
 
1099
                                buf_page_get_state(bpage) must be
 
1100
                                BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
 
1101
        buf_page_t*     dpage)  /*!< in/out: destination control block */
 
1102
{
 
1103
        buf_page_t*     b;
 
1104
        ulint           fold;
 
1105
 
 
1106
        ut_ad(buf_pool_mutex_own());
 
1107
        ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
1108
        ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 
1109
        ut_a(bpage->buf_fix_count == 0);
 
1110
        ut_ad(bpage->in_LRU_list);
 
1111
        ut_ad(!bpage->in_zip_hash);
 
1112
        ut_ad(bpage->in_page_hash);
 
1113
        ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
 
1114
#ifdef UNIV_DEBUG
 
1115
        switch (buf_page_get_state(bpage)) {
 
1116
        case BUF_BLOCK_ZIP_FREE:
 
1117
        case BUF_BLOCK_NOT_USED:
 
1118
        case BUF_BLOCK_READY_FOR_USE:
 
1119
        case BUF_BLOCK_FILE_PAGE:
 
1120
        case BUF_BLOCK_MEMORY:
 
1121
        case BUF_BLOCK_REMOVE_HASH:
 
1122
                ut_error;
 
1123
        case BUF_BLOCK_ZIP_DIRTY:
 
1124
        case BUF_BLOCK_ZIP_PAGE:
 
1125
                break;
 
1126
        }
 
1127
#endif /* UNIV_DEBUG */
 
1128
 
 
1129
        memcpy(dpage, bpage, sizeof *dpage);
 
1130
 
 
1131
        ut_d(bpage->in_LRU_list = FALSE);
 
1132
        ut_d(bpage->in_page_hash = FALSE);
 
1133
 
 
1134
        /* relocate buf_pool->LRU */
 
1135
        b = UT_LIST_GET_PREV(LRU, bpage);
 
1136
        UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
 
1137
 
 
1138
        if (b) {
 
1139
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
 
1140
        } else {
 
1141
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
 
1142
        }
 
1143
 
 
1144
        if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
 
1145
                buf_pool->LRU_old = dpage;
 
1146
#ifdef UNIV_LRU_DEBUG
 
1147
                /* buf_pool->LRU_old must be the first item in the LRU list
 
1148
                whose "old" flag is set. */
 
1149
                ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
 
1150
                     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
 
1151
                ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
 
1152
                     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
 
1153
#endif /* UNIV_LRU_DEBUG */
 
1154
        }
 
1155
 
 
1156
        ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
 
1157
                              ut_ad(ut_list_node_313->in_LRU_list)));
 
1158
 
 
1159
        /* relocate buf_pool->page_hash */
 
1160
        fold = buf_page_address_fold(bpage->space, bpage->offset);
 
1161
 
 
1162
        HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 
1163
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
 
1164
 
 
1165
        UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
1166
}
 
1167
 
 
1168
/********************************************************************//**
 
1169
Shrinks the buffer pool. */
 
1170
static
 
1171
void
 
1172
buf_pool_shrink(
 
1173
/*============*/
 
1174
        ulint   chunk_size)     /*!< in: number of pages to remove */
 
1175
{
 
1176
        buf_chunk_t*    chunks;
 
1177
        buf_chunk_t*    chunk;
 
1178
        ulint           max_size;
 
1179
        ulint           max_free_size;
 
1180
        buf_chunk_t*    max_chunk;
 
1181
        buf_chunk_t*    max_free_chunk;
 
1182
 
 
1183
        ut_ad(!buf_pool_mutex_own());
 
1184
 
 
1185
try_again:
 
1186
        btr_search_disable(); /* Empty the adaptive hash index again */
 
1187
        buf_pool_mutex_enter();
 
1188
 
 
1189
shrink_again:
 
1190
        if (buf_pool->n_chunks <= 1) {
 
1191
 
 
1192
                /* Cannot shrink if there is only one chunk */
 
1193
                goto func_done;
 
1194
        }
 
1195
 
 
1196
        /* Search for the largest free chunk
 
1197
        not larger than the size difference */
 
1198
        chunks = buf_pool->chunks;
 
1199
        chunk = chunks + buf_pool->n_chunks;
 
1200
        max_size = max_free_size = 0;
 
1201
        max_chunk = max_free_chunk = NULL;
 
1202
 
 
1203
        while (--chunk >= chunks) {
 
1204
                if (chunk->size <= chunk_size
 
1205
                    && chunk->size > max_free_size) {
 
1206
                        if (chunk->size > max_size) {
 
1207
                                max_size = chunk->size;
 
1208
                                max_chunk = chunk;
 
1209
                        }
 
1210
 
 
1211
                        if (buf_chunk_all_free(chunk)) {
 
1212
                                max_free_size = chunk->size;
 
1213
                                max_free_chunk = chunk;
 
1214
                        }
 
1215
                }
 
1216
        }
 
1217
 
 
1218
        if (!max_free_size) {
 
1219
 
 
1220
                ulint           dirty   = 0;
 
1221
                ulint           nonfree = 0;
 
1222
                buf_block_t*    block;
 
1223
                buf_block_t*    bend;
 
1224
 
 
1225
                /* Cannot shrink: try again later
 
1226
                (do not assign srv_buf_pool_old_size) */
 
1227
                if (!max_chunk) {
 
1228
 
 
1229
                        goto func_exit;
 
1230
                }
 
1231
 
 
1232
                block = max_chunk->blocks;
 
1233
                bend = block + max_chunk->size;
 
1234
 
 
1235
                /* Move the blocks of chunk to the end of the
 
1236
                LRU list and try to flush them. */
 
1237
                for (; block < bend; block++) {
 
1238
                        switch (buf_block_get_state(block)) {
 
1239
                        case BUF_BLOCK_NOT_USED:
 
1240
                                continue;
 
1241
                        case BUF_BLOCK_FILE_PAGE:
 
1242
                                break;
 
1243
                        default:
 
1244
                                nonfree++;
 
1245
                                continue;
 
1246
                        }
 
1247
 
 
1248
                        mutex_enter(&block->mutex);
 
1249
                        /* The following calls will temporarily
 
1250
                        release block->mutex and buf_pool_mutex.
 
1251
                        Therefore, we have to always retry,
 
1252
                        even if !dirty && !nonfree. */
 
1253
 
 
1254
                        if (!buf_flush_ready_for_replace(&block->page)) {
 
1255
 
 
1256
                                buf_LRU_make_block_old(&block->page);
 
1257
                                dirty++;
 
1258
                        } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
 
1259
                                   != BUF_LRU_FREED) {
 
1260
                                nonfree++;
 
1261
                        }
 
1262
 
 
1263
                        mutex_exit(&block->mutex);
 
1264
                }
 
1265
 
 
1266
                buf_pool_mutex_exit();
 
1267
 
 
1268
                /* Request for a flush of the chunk if it helps.
 
1269
                Do not flush if there are non-free blocks, since
 
1270
                flushing will not make the chunk freeable. */
 
1271
                if (nonfree) {
 
1272
                        /* Avoid busy-waiting. */
 
1273
                        os_thread_sleep(100000);
 
1274
                } else if (dirty
 
1275
                           && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
 
1276
                           == ULINT_UNDEFINED) {
 
1277
 
 
1278
                        buf_flush_wait_batch_end(BUF_FLUSH_LRU);
 
1279
                }
 
1280
 
 
1281
                goto try_again;
 
1282
        }
 
1283
 
 
1284
        max_size = max_free_size;
 
1285
        max_chunk = max_free_chunk;
 
1286
 
 
1287
        srv_buf_pool_old_size = srv_buf_pool_size;
 
1288
 
 
1289
        /* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
 
1290
        chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
 
1291
        memcpy(chunks, buf_pool->chunks,
 
1292
               (max_chunk - buf_pool->chunks) * sizeof *chunks);
 
1293
        memcpy(chunks + (max_chunk - buf_pool->chunks),
 
1294
               max_chunk + 1,
 
1295
               buf_pool->chunks + buf_pool->n_chunks
 
1296
               - (max_chunk + 1));
 
1297
        ut_a(buf_pool->curr_size > max_chunk->size);
 
1298
        buf_pool->curr_size -= max_chunk->size;
 
1299
        srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
1300
        chunk_size -= max_chunk->size;
 
1301
        buf_chunk_free(max_chunk);
 
1302
        mem_free(buf_pool->chunks);
 
1303
        buf_pool->chunks = chunks;
 
1304
        buf_pool->n_chunks--;
 
1305
 
 
1306
        /* Allow a slack of one megabyte. */
 
1307
        if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
 
1308
 
 
1309
                goto shrink_again;
 
1310
        }
 
1311
 
 
1312
func_done:
 
1313
        srv_buf_pool_old_size = srv_buf_pool_size;
 
1314
func_exit:
 
1315
        buf_pool_mutex_exit();
 
1316
        btr_search_enable();
 
1317
}
 
1318
 
 
1319
/********************************************************************//**
 
1320
Rebuild buf_pool->page_hash. */
 
1321
static
 
1322
void
 
1323
buf_pool_page_hash_rebuild(void)
 
1324
/*============================*/
 
1325
{
 
1326
        ulint           i;
 
1327
        ulint           n_chunks;
 
1328
        buf_chunk_t*    chunk;
 
1329
        hash_table_t*   page_hash;
 
1330
        hash_table_t*   zip_hash;
 
1331
        buf_page_t*     b;
 
1332
 
 
1333
        buf_pool_mutex_enter();
 
1334
 
 
1335
        /* Free, create, and populate the hash table. */
 
1336
        hash_table_free(buf_pool->page_hash);
 
1337
        buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
 
1338
        zip_hash = hash_create(2 * buf_pool->curr_size);
 
1339
 
 
1340
        HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
 
1341
                     BUF_POOL_ZIP_FOLD_BPAGE);
 
1342
 
 
1343
        hash_table_free(buf_pool->zip_hash);
 
1344
        buf_pool->zip_hash = zip_hash;
 
1345
 
 
1346
        /* Insert the uncompressed file pages to buf_pool->page_hash. */
 
1347
 
 
1348
        chunk = buf_pool->chunks;
 
1349
        n_chunks = buf_pool->n_chunks;
 
1350
 
 
1351
        for (i = 0; i < n_chunks; i++, chunk++) {
 
1352
                ulint           j;
 
1353
                buf_block_t*    block = chunk->blocks;
 
1354
 
 
1355
                for (j = 0; j < chunk->size; j++, block++) {
 
1356
                        if (buf_block_get_state(block)
 
1357
                            == BUF_BLOCK_FILE_PAGE) {
 
1358
                                ut_ad(!block->page.in_zip_hash);
 
1359
                                ut_ad(block->page.in_page_hash);
 
1360
 
 
1361
                                HASH_INSERT(buf_page_t, hash, page_hash,
 
1362
                                            buf_page_address_fold(
 
1363
                                                    block->page.space,
 
1364
                                                    block->page.offset),
 
1365
                                            &block->page);
 
1366
                        }
 
1367
                }
 
1368
        }
 
1369
 
 
1370
        /* Insert the compressed-only pages to buf_pool->page_hash.
 
1371
        All such blocks are either in buf_pool->zip_clean or
 
1372
        in buf_pool->flush_list. */
 
1373
 
 
1374
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
 
1375
             b = UT_LIST_GET_NEXT(list, b)) {
 
1376
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 
1377
                ut_ad(!b->in_flush_list);
 
1378
                ut_ad(b->in_LRU_list);
 
1379
                ut_ad(b->in_page_hash);
 
1380
                ut_ad(!b->in_zip_hash);
 
1381
 
 
1382
                HASH_INSERT(buf_page_t, hash, page_hash,
 
1383
                            buf_page_address_fold(b->space, b->offset), b);
 
1384
        }
 
1385
 
 
1386
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
 
1387
             b = UT_LIST_GET_NEXT(list, b)) {
 
1388
                ut_ad(b->in_flush_list);
 
1389
                ut_ad(b->in_LRU_list);
 
1390
                ut_ad(b->in_page_hash);
 
1391
                ut_ad(!b->in_zip_hash);
 
1392
 
 
1393
                switch (buf_page_get_state(b)) {
 
1394
                case BUF_BLOCK_ZIP_DIRTY:
 
1395
                        HASH_INSERT(buf_page_t, hash, page_hash,
 
1396
                                    buf_page_address_fold(b->space,
 
1397
                                                          b->offset), b);
 
1398
                        break;
 
1399
                case BUF_BLOCK_FILE_PAGE:
 
1400
                        /* uncompressed page */
 
1401
                        break;
 
1402
                case BUF_BLOCK_ZIP_FREE:
 
1403
                case BUF_BLOCK_ZIP_PAGE:
 
1404
                case BUF_BLOCK_NOT_USED:
 
1405
                case BUF_BLOCK_READY_FOR_USE:
 
1406
                case BUF_BLOCK_MEMORY:
 
1407
                case BUF_BLOCK_REMOVE_HASH:
 
1408
                        ut_error;
 
1409
                        break;
 
1410
                }
 
1411
        }
 
1412
 
 
1413
        buf_pool_mutex_exit();
 
1414
}
 
1415
 
 
1416
/********************************************************************//**
 
1417
Resizes the buffer pool. */
 
1418
UNIV_INTERN
 
1419
void
 
1420
buf_pool_resize(void)
 
1421
/*=================*/
 
1422
{
 
1423
        buf_pool_mutex_enter();
 
1424
 
 
1425
        if (srv_buf_pool_old_size == srv_buf_pool_size) {
 
1426
 
 
1427
                buf_pool_mutex_exit();
 
1428
                return;
 
1429
        }
 
1430
 
 
1431
        if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
 
1432
 
 
1433
                buf_pool_mutex_exit();
 
1434
 
 
1435
                /* Disable adaptive hash indexes and empty the index
 
1436
                in order to free up memory in the buffer pool chunks. */
 
1437
                buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
 
1438
                                / UNIV_PAGE_SIZE);
 
1439
        } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
 
1440
 
 
1441
                /* Enlarge the buffer pool by at least one megabyte */
 
1442
 
 
1443
                ulint           mem_size
 
1444
                        = srv_buf_pool_size - srv_buf_pool_curr_size;
 
1445
                buf_chunk_t*    chunks;
 
1446
                buf_chunk_t*    chunk;
 
1447
 
 
1448
                chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
 
1449
 
 
1450
                memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
 
1451
                       * sizeof *chunks);
 
1452
 
 
1453
                chunk = &chunks[buf_pool->n_chunks];
 
1454
 
 
1455
                if (!buf_chunk_init(chunk, mem_size)) {
 
1456
                        mem_free(chunks);
 
1457
                } else {
 
1458
                        buf_pool->curr_size += chunk->size;
 
1459
                        srv_buf_pool_curr_size = buf_pool->curr_size
 
1460
                                * UNIV_PAGE_SIZE;
 
1461
                        mem_free(buf_pool->chunks);
 
1462
                        buf_pool->chunks = chunks;
 
1463
                        buf_pool->n_chunks++;
 
1464
                }
 
1465
 
 
1466
                srv_buf_pool_old_size = srv_buf_pool_size;
 
1467
                buf_pool_mutex_exit();
 
1468
        }
 
1469
 
 
1470
        buf_pool_page_hash_rebuild();
 
1471
}
 
1472
 
 
1473
/********************************************************************//**
 
1474
Moves the block to the start of the LRU list if there is a danger
 
1475
that the block would drift out of the buffer pool. */
 
1476
UNIV_INLINE
 
1477
void
 
1478
buf_block_make_young(
 
1479
/*=================*/
 
1480
        buf_page_t*     bpage)  /*!< in: block to make younger */
 
1481
{
 
1482
        ut_ad(!buf_pool_mutex_own());
 
1483
 
 
1484
        /* Note that we read freed_page_clock's without holding any mutex:
 
1485
        this is allowed since the result is used only in heuristics */
 
1486
 
 
1487
        if (buf_page_peek_if_too_old(bpage)) {
 
1488
 
 
1489
                buf_pool_mutex_enter();
 
1490
                /* There has been freeing activity in the LRU list:
 
1491
                best to move to the head of the LRU list */
 
1492
 
 
1493
                buf_LRU_make_block_young(bpage);
 
1494
                buf_pool_mutex_exit();
 
1495
        }
 
1496
}
 
1497
 
 
1498
/********************************************************************//**
 
1499
Moves a page to the start of the buffer pool LRU list. This high-level
 
1500
function can be used to prevent an important page from from slipping out of
 
1501
the buffer pool. */
 
1502
UNIV_INTERN
 
1503
void
 
1504
buf_page_make_young(
 
1505
/*================*/
 
1506
        buf_page_t*     bpage)  /*!< in: buffer block of a file page */
 
1507
{
 
1508
        buf_pool_mutex_enter();
 
1509
 
 
1510
        ut_a(buf_page_in_file(bpage));
 
1511
 
 
1512
        buf_LRU_make_block_young(bpage);
 
1513
 
 
1514
        buf_pool_mutex_exit();
 
1515
}
 
1516
 
 
1517
/********************************************************************//**
 
1518
Resets the check_index_page_at_flush field of a page if found in the buffer
 
1519
pool. */
 
1520
UNIV_INTERN
 
1521
void
 
1522
buf_reset_check_index_page_at_flush(
 
1523
/*================================*/
 
1524
        ulint   space,  /*!< in: space id */
 
1525
        ulint   offset) /*!< in: page number */
 
1526
{
 
1527
        buf_block_t*    block;
 
1528
 
 
1529
        buf_pool_mutex_enter();
 
1530
 
 
1531
        block = (buf_block_t*) buf_page_hash_get(space, offset);
 
1532
 
 
1533
        if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
 
1534
                block->check_index_page_at_flush = FALSE;
 
1535
        }
 
1536
 
 
1537
        buf_pool_mutex_exit();
 
1538
}
 
1539
 
 
1540
/********************************************************************//**
 
1541
Returns the current state of is_hashed of a page. FALSE if the page is
 
1542
not in the pool. NOTE that this operation does not fix the page in the
 
1543
pool if it is found there.
 
1544
@return TRUE if page hash index is built in search system */
 
1545
UNIV_INTERN
 
1546
ibool
 
1547
buf_page_peek_if_search_hashed(
 
1548
/*===========================*/
 
1549
        ulint   space,  /*!< in: space id */
 
1550
        ulint   offset) /*!< in: page number */
 
1551
{
 
1552
        buf_block_t*    block;
 
1553
        ibool           is_hashed;
 
1554
 
 
1555
        buf_pool_mutex_enter();
 
1556
 
 
1557
        block = (buf_block_t*) buf_page_hash_get(space, offset);
 
1558
 
 
1559
        if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
1560
                is_hashed = FALSE;
 
1561
        } else {
 
1562
                is_hashed = block->is_hashed;
 
1563
        }
 
1564
 
 
1565
        buf_pool_mutex_exit();
 
1566
 
 
1567
        return(is_hashed);
 
1568
}
 
1569
 
 
1570
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
1571
/********************************************************************//**
 
1572
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
 
1573
This function should be called when we free a file page and want the
 
1574
debug version to check that it is not accessed any more unless
 
1575
reallocated.
 
1576
@return control block if found in page hash table, otherwise NULL */
 
1577
UNIV_INTERN
 
1578
buf_page_t*
 
1579
buf_page_set_file_page_was_freed(
 
1580
/*=============================*/
 
1581
        ulint   space,  /*!< in: space id */
 
1582
        ulint   offset) /*!< in: page number */
 
1583
{
 
1584
        buf_page_t*     bpage;
 
1585
 
 
1586
        buf_pool_mutex_enter();
 
1587
 
 
1588
        bpage = buf_page_hash_get(space, offset);
 
1589
 
 
1590
        if (bpage) {
 
1591
                bpage->file_page_was_freed = TRUE;
 
1592
        }
 
1593
 
 
1594
        buf_pool_mutex_exit();
 
1595
 
 
1596
        return(bpage);
 
1597
}
 
1598
 
 
1599
/********************************************************************//**
 
1600
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
 
1601
This function should be called when we free a file page and want the
 
1602
debug version to check that it is not accessed any more unless
 
1603
reallocated.
 
1604
@return control block if found in page hash table, otherwise NULL */
 
1605
UNIV_INTERN
 
1606
buf_page_t*
 
1607
buf_page_reset_file_page_was_freed(
 
1608
/*===============================*/
 
1609
        ulint   space,  /*!< in: space id */
 
1610
        ulint   offset) /*!< in: page number */
 
1611
{
 
1612
        buf_page_t*     bpage;
 
1613
 
 
1614
        buf_pool_mutex_enter();
 
1615
 
 
1616
        bpage = buf_page_hash_get(space, offset);
 
1617
 
 
1618
        if (bpage) {
 
1619
                bpage->file_page_was_freed = FALSE;
 
1620
        }
 
1621
 
 
1622
        buf_pool_mutex_exit();
 
1623
 
 
1624
        return(bpage);
 
1625
}
 
1626
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
1627
 
 
1628
/********************************************************************//**
 
1629
Get read access to a compressed page (usually of type
 
1630
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 
1631
The page must be released with buf_page_release_zip().
 
1632
NOTE: the page is not protected by any latch.  Mutual exclusion has to
 
1633
be implemented at a higher level.  In other words, all possible
 
1634
accesses to a given page through this function must be protected by
 
1635
the same set of mutexes or latches.
 
1636
@return pointer to the block */
 
1637
UNIV_INTERN
 
1638
buf_page_t*
 
1639
buf_page_get_zip(
 
1640
/*=============*/
 
1641
        ulint           space,  /*!< in: space id */
 
1642
        ulint           zip_size,/*!< in: compressed page size */
 
1643
        ulint           offset) /*!< in: page number */
 
1644
{
 
1645
        buf_page_t*     bpage;
 
1646
        mutex_t*        block_mutex;
 
1647
        ibool           must_read;
 
1648
 
 
1649
#ifndef UNIV_LOG_DEBUG
 
1650
        ut_ad(!ibuf_inside());
 
1651
#endif
 
1652
        buf_pool->n_page_gets++;
 
1653
 
 
1654
        for (;;) {
 
1655
                buf_pool_mutex_enter();
 
1656
lookup:
 
1657
                bpage = buf_page_hash_get(space, offset);
 
1658
                if (bpage) {
 
1659
                        break;
 
1660
                }
 
1661
 
 
1662
                /* Page not in buf_pool: needs to be read from file */
 
1663
 
 
1664
                buf_pool_mutex_exit();
 
1665
 
 
1666
                buf_read_page(space, zip_size, offset);
 
1667
 
 
1668
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
1669
                ut_a(++buf_dbg_counter % 37 || buf_validate());
 
1670
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1671
        }
 
1672
 
 
1673
        if (UNIV_UNLIKELY(!bpage->zip.data)) {
 
1674
                /* There is no compressed page. */
 
1675
err_exit:
 
1676
                buf_pool_mutex_exit();
 
1677
                return(NULL);
 
1678
        }
 
1679
 
 
1680
        switch (buf_page_get_state(bpage)) {
 
1681
        case BUF_BLOCK_NOT_USED:
 
1682
        case BUF_BLOCK_READY_FOR_USE:
 
1683
        case BUF_BLOCK_MEMORY:
 
1684
        case BUF_BLOCK_REMOVE_HASH:
 
1685
        case BUF_BLOCK_ZIP_FREE:
 
1686
                break;
 
1687
        case BUF_BLOCK_ZIP_PAGE:
 
1688
        case BUF_BLOCK_ZIP_DIRTY:
 
1689
                block_mutex = &buf_pool_zip_mutex;
 
1690
                mutex_enter(block_mutex);
 
1691
                bpage->buf_fix_count++;
 
1692
                goto got_block;
 
1693
        case BUF_BLOCK_FILE_PAGE:
 
1694
                block_mutex = &((buf_block_t*) bpage)->mutex;
 
1695
                mutex_enter(block_mutex);
 
1696
 
 
1697
                /* Discard the uncompressed page frame if possible. */
 
1698
                if (buf_LRU_free_block(bpage, FALSE, NULL)
 
1699
                    == BUF_LRU_FREED) {
 
1700
 
 
1701
                        mutex_exit(block_mutex);
 
1702
                        goto lookup;
 
1703
                }
 
1704
 
 
1705
                buf_block_buf_fix_inc((buf_block_t*) bpage,
 
1706
                                      __FILE__, __LINE__);
 
1707
                goto got_block;
 
1708
        }
 
1709
 
 
1710
        ut_error;
 
1711
        goto err_exit;
 
1712
 
 
1713
got_block:
 
1714
        must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 
1715
 
 
1716
        buf_pool_mutex_exit();
 
1717
 
 
1718
        buf_page_set_accessed(bpage, TRUE);
 
1719
 
 
1720
        mutex_exit(block_mutex);
 
1721
 
 
1722
        buf_block_make_young(bpage);
 
1723
 
 
1724
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
1725
        ut_a(!bpage->file_page_was_freed);
 
1726
#endif
 
1727
 
 
1728
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
1729
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
1730
        ut_a(bpage->buf_fix_count > 0);
 
1731
        ut_a(buf_page_in_file(bpage));
 
1732
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
1733
 
 
1734
        if (must_read) {
 
1735
                /* Let us wait until the read operation
 
1736
                completes */
 
1737
 
 
1738
                for (;;) {
 
1739
                        enum buf_io_fix io_fix;
 
1740
 
 
1741
                        mutex_enter(block_mutex);
 
1742
                        io_fix = buf_page_get_io_fix(bpage);
 
1743
                        mutex_exit(block_mutex);
 
1744
 
 
1745
                        if (io_fix == BUF_IO_READ) {
 
1746
 
 
1747
                                os_thread_sleep(WAIT_FOR_READ);
 
1748
                        } else {
 
1749
                                break;
 
1750
                        }
 
1751
                }
 
1752
        }
 
1753
 
 
1754
#ifdef UNIV_IBUF_COUNT_DEBUG
 
1755
        ut_a(ibuf_count_get(buf_page_get_space(bpage),
 
1756
                            buf_page_get_page_no(bpage)) == 0);
 
1757
#endif
 
1758
        return(bpage);
 
1759
}
 
1760
 
 
1761
/********************************************************************//**
 
1762
Initialize some fields of a control block. */
 
1763
UNIV_INLINE
 
1764
void
 
1765
buf_block_init_low(
 
1766
/*===============*/
 
1767
        buf_block_t*    block)  /*!< in: block to init */
 
1768
{
 
1769
        block->check_index_page_at_flush = FALSE;
 
1770
        block->index            = NULL;
 
1771
 
 
1772
        block->n_hash_helps     = 0;
 
1773
        block->is_hashed        = FALSE;
 
1774
        block->n_fields         = 1;
 
1775
        block->n_bytes          = 0;
 
1776
        block->left_side        = TRUE;
 
1777
}
 
1778
#endif /* !UNIV_HOTBACKUP */
 
1779
 
 
1780
/********************************************************************//**
 
1781
Decompress a block.
 
1782
@return TRUE if successful */
 
1783
UNIV_INTERN
 
1784
ibool
 
1785
buf_zip_decompress(
 
1786
/*===============*/
 
1787
        buf_block_t*    block,  /*!< in/out: block */
 
1788
        ibool           check)  /*!< in: TRUE=verify the page checksum */
 
1789
{
 
1790
        const byte* frame = block->page.zip.data;
 
1791
 
 
1792
        ut_ad(buf_block_get_zip_size(block));
 
1793
        ut_a(buf_block_get_space(block) != 0);
 
1794
 
 
1795
        if (UNIV_LIKELY(check)) {
 
1796
                ulint   stamp_checksum  = mach_read_from_4(
 
1797
                        frame + FIL_PAGE_SPACE_OR_CHKSUM);
 
1798
                ulint   calc_checksum   = page_zip_calc_checksum(
 
1799
                        frame, page_zip_get_size(&block->page.zip));
 
1800
 
 
1801
                if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
 
1802
                        ut_print_timestamp(stderr);
 
1803
                        fprintf(stderr,
 
1804
                                "  InnoDB: compressed page checksum mismatch"
 
1805
                                " (space %u page %u): %lu != %lu\n",
 
1806
                                block->page.space, block->page.offset,
 
1807
                                stamp_checksum, calc_checksum);
 
1808
                        return(FALSE);
 
1809
                }
 
1810
        }
 
1811
 
 
1812
        switch (fil_page_get_type(frame)) {
 
1813
        case FIL_PAGE_INDEX:
 
1814
                if (page_zip_decompress(&block->page.zip,
 
1815
                                        block->frame)) {
 
1816
                        return(TRUE);
 
1817
                }
 
1818
 
 
1819
                fprintf(stderr,
 
1820
                        "InnoDB: unable to decompress space %lu page %lu\n",
 
1821
                        (ulong) block->page.space,
 
1822
                        (ulong) block->page.offset);
 
1823
                return(FALSE);
 
1824
 
 
1825
        case FIL_PAGE_TYPE_ALLOCATED:
 
1826
        case FIL_PAGE_INODE:
 
1827
        case FIL_PAGE_IBUF_BITMAP:
 
1828
        case FIL_PAGE_TYPE_FSP_HDR:
 
1829
        case FIL_PAGE_TYPE_XDES:
 
1830
        case FIL_PAGE_TYPE_ZBLOB:
 
1831
        case FIL_PAGE_TYPE_ZBLOB2:
 
1832
                /* Copy to uncompressed storage. */
 
1833
                memcpy(block->frame, frame,
 
1834
                       buf_block_get_zip_size(block));
 
1835
                return(TRUE);
 
1836
        }
 
1837
 
 
1838
        ut_print_timestamp(stderr);
 
1839
        fprintf(stderr,
 
1840
                "  InnoDB: unknown compressed page"
 
1841
                " type %lu\n",
 
1842
                fil_page_get_type(frame));
 
1843
        return(FALSE);
 
1844
}
 
1845
 
 
1846
#ifndef UNIV_HOTBACKUP
 
1847
/*******************************************************************//**
 
1848
Gets the block to whose frame the pointer is pointing to.
 
1849
@return pointer to block, never NULL */
 
1850
UNIV_INTERN
 
1851
buf_block_t*
 
1852
buf_block_align(
 
1853
/*============*/
 
1854
        const byte*     ptr)    /*!< in: pointer to a frame */
 
1855
{
 
1856
        buf_chunk_t*    chunk;
 
1857
        ulint           i;
 
1858
 
 
1859
        /* TODO: protect buf_pool->chunks with a mutex (it will
 
1860
        currently remain constant after buf_pool_init()) */
 
1861
        for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
 
1862
                lint    offs = ptr - chunk->blocks->frame;
 
1863
 
 
1864
                if (UNIV_UNLIKELY(offs < 0)) {
 
1865
 
 
1866
                        continue;
 
1867
                }
 
1868
 
 
1869
                offs >>= UNIV_PAGE_SIZE_SHIFT;
 
1870
 
 
1871
                if (UNIV_LIKELY((ulint) offs < chunk->size)) {
 
1872
                        buf_block_t*    block = &chunk->blocks[offs];
 
1873
 
 
1874
                        /* The function buf_chunk_init() invokes
 
1875
                        buf_block_init() so that block[n].frame ==
 
1876
                        block->frame + n * UNIV_PAGE_SIZE.  Check it. */
 
1877
                        ut_ad(block->frame == page_align(ptr));
 
1878
#ifdef UNIV_DEBUG
 
1879
                        /* A thread that updates these fields must
 
1880
                        hold buf_pool_mutex and block->mutex.  Acquire
 
1881
                        only the latter. */
 
1882
                        mutex_enter(&block->mutex);
 
1883
 
 
1884
                        switch (buf_block_get_state(block)) {
 
1885
                        case BUF_BLOCK_ZIP_FREE:
 
1886
                        case BUF_BLOCK_ZIP_PAGE:
 
1887
                        case BUF_BLOCK_ZIP_DIRTY:
 
1888
                                /* These types should only be used in
 
1889
                                the compressed buffer pool, whose
 
1890
                                memory is allocated from
 
1891
                                buf_pool->chunks, in UNIV_PAGE_SIZE
 
1892
                                blocks flagged as BUF_BLOCK_MEMORY. */
 
1893
                                ut_error;
 
1894
                                break;
 
1895
                        case BUF_BLOCK_NOT_USED:
 
1896
                        case BUF_BLOCK_READY_FOR_USE:
 
1897
                        case BUF_BLOCK_MEMORY:
 
1898
                                /* Some data structures contain
 
1899
                                "guess" pointers to file pages.  The
 
1900
                                file pages may have been freed and
 
1901
                                reused.  Do not complain. */
 
1902
                                break;
 
1903
                        case BUF_BLOCK_REMOVE_HASH:
 
1904
                                /* buf_LRU_block_remove_hashed_page()
 
1905
                                will overwrite the FIL_PAGE_OFFSET and
 
1906
                                FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
 
1907
                                0xff and set the state to
 
1908
                                BUF_BLOCK_REMOVE_HASH. */
 
1909
                                ut_ad(page_get_space_id(page_align(ptr))
 
1910
                                      == 0xffffffff);
 
1911
                                ut_ad(page_get_page_no(page_align(ptr))
 
1912
                                      == 0xffffffff);
 
1913
                                break;
 
1914
                        case BUF_BLOCK_FILE_PAGE:
 
1915
                                ut_ad(block->page.space
 
1916
                                      == page_get_space_id(page_align(ptr)));
 
1917
                                ut_ad(block->page.offset
 
1918
                                      == page_get_page_no(page_align(ptr)));
 
1919
                                break;
 
1920
                        }
 
1921
 
 
1922
                        mutex_exit(&block->mutex);
 
1923
#endif /* UNIV_DEBUG */
 
1924
 
 
1925
                        return(block);
 
1926
                }
 
1927
        }
 
1928
 
 
1929
        /* The block should always be found. */
 
1930
        ut_error;
 
1931
        return(NULL);
 
1932
}
 
1933
 
 
1934
/********************************************************************//**
 
1935
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 
1936
the buf_block_t itself or a member of it
 
1937
@return TRUE if ptr belongs to a buf_block_t struct */
 
1938
UNIV_INTERN
 
1939
ibool
 
1940
buf_pointer_is_block_field(
 
1941
/*=======================*/
 
1942
        const void*             ptr)    /*!< in: pointer not
 
1943
                                        dereferenced */
 
1944
{
 
1945
        const buf_chunk_t*              chunk   = buf_pool->chunks;
 
1946
        const buf_chunk_t* const        echunk  = chunk + buf_pool->n_chunks;
 
1947
 
 
1948
        /* TODO: protect buf_pool->chunks with a mutex (it will
 
1949
        currently remain constant after buf_pool_init()) */
 
1950
        while (chunk < echunk) {
 
1951
                if (ptr >= (void *)chunk->blocks
 
1952
                    && ptr < (void *)(chunk->blocks + chunk->size)) {
 
1953
 
 
1954
                        return(TRUE);
 
1955
                }
 
1956
 
 
1957
                chunk++;
 
1958
        }
 
1959
 
 
1960
        return(FALSE);
 
1961
}
 
1962
 
 
1963
/********************************************************************//**
 
1964
Find out if a buffer block was created by buf_chunk_init().
 
1965
@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
 
1966
static
 
1967
ibool
 
1968
buf_block_is_uncompressed(
 
1969
/*======================*/
 
1970
        const buf_block_t*      block)  /*!< in: pointer to block,
 
1971
                                        not dereferenced */
 
1972
{
 
1973
        ut_ad(buf_pool_mutex_own());
 
1974
 
 
1975
        if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
 
1976
                /* The pointer should be aligned. */
 
1977
                return(FALSE);
 
1978
        }
 
1979
 
 
1980
        return(buf_pointer_is_block_field((void *)block));
 
1981
}
 
1982
 
 
1983
/********************************************************************//**
 
1984
This is the general function used to get access to a database page.
 
1985
@return pointer to the block or NULL */
 
1986
UNIV_INTERN
 
1987
buf_block_t*
 
1988
buf_page_get_gen(
 
1989
/*=============*/
 
1990
        ulint           space,  /*!< in: space id */
 
1991
        ulint           zip_size,/*!< in: compressed page size in bytes
 
1992
                                or 0 for uncompressed pages */
 
1993
        ulint           offset, /*!< in: page number */
 
1994
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
 
1995
        buf_block_t*    guess,  /*!< in: guessed block or NULL */
 
1996
        ulint           mode,   /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
 
1997
                                BUF_GET_NO_LATCH */
 
1998
        const char*     file,   /*!< in: file name */
 
1999
        ulint           line,   /*!< in: line where called */
 
2000
        mtr_t*          mtr)    /*!< in: mini-transaction */
 
2001
{
 
2002
        buf_block_t*    block;
 
2003
        ibool           accessed;
 
2004
        ulint           fix_type;
 
2005
        ibool           must_read;
 
2006
 
 
2007
        ut_ad(mtr);
 
2008
        ut_ad((rw_latch == RW_S_LATCH)
 
2009
              || (rw_latch == RW_X_LATCH)
 
2010
              || (rw_latch == RW_NO_LATCH));
 
2011
        ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
 
2012
        ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
 
2013
              || (mode == BUF_GET_NO_LATCH));
 
2014
        ut_ad(zip_size == fil_space_get_zip_size(space));
 
2015
        ut_ad(ut_is_2pow(zip_size));
 
2016
#ifndef UNIV_LOG_DEBUG
 
2017
        ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
 
2018
#endif
 
2019
        buf_pool->n_page_gets++;
 
2020
loop:
 
2021
        block = guess;
 
2022
        buf_pool_mutex_enter();
 
2023
 
 
2024
        if (block) {
 
2025
                /* If the guess is a compressed page descriptor that
 
2026
                has been allocated by buf_buddy_alloc(), it may have
 
2027
                been invalidated by buf_buddy_relocate().  In that
 
2028
                case, block could point to something that happens to
 
2029
                contain the expected bits in block->page.  Similarly,
 
2030
                the guess may be pointing to a buffer pool chunk that
 
2031
                has been released when resizing the buffer pool. */
 
2032
 
 
2033
                if (!buf_block_is_uncompressed(block)
 
2034
                    || offset != block->page.offset
 
2035
                    || space != block->page.space
 
2036
                    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
2037
 
 
2038
                        block = guess = NULL;
 
2039
                } else {
 
2040
                        ut_ad(!block->page.in_zip_hash);
 
2041
                        ut_ad(block->page.in_page_hash);
 
2042
                }
 
2043
        }
 
2044
 
 
2045
        if (block == NULL) {
 
2046
                block = (buf_block_t*) buf_page_hash_get(space, offset);
 
2047
        }
 
2048
 
 
2049
loop2:
 
2050
        if (block == NULL) {
 
2051
                /* Page not in buf_pool: needs to be read from file */
 
2052
 
 
2053
                buf_pool_mutex_exit();
 
2054
 
 
2055
                if (mode == BUF_GET_IF_IN_POOL) {
 
2056
 
 
2057
                        return(NULL);
 
2058
                }
 
2059
 
 
2060
                buf_read_page(space, zip_size, offset);
 
2061
 
 
2062
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2063
                ut_a(++buf_dbg_counter % 37 || buf_validate());
 
2064
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2065
                goto loop;
 
2066
        }
 
2067
 
 
2068
        ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
 
2069
 
 
2070
        must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
 
2071
 
 
2072
        if (must_read && mode == BUF_GET_IF_IN_POOL) {
 
2073
                /* The page is only being read to buffer */
 
2074
                buf_pool_mutex_exit();
 
2075
 
 
2076
                return(NULL);
 
2077
        }
 
2078
 
 
2079
        switch (buf_block_get_state(block)) {
 
2080
                buf_page_t*     bpage;
 
2081
                ibool           success;
 
2082
 
 
2083
        case BUF_BLOCK_FILE_PAGE:
 
2084
                break;
 
2085
 
 
2086
        case BUF_BLOCK_ZIP_PAGE:
 
2087
        case BUF_BLOCK_ZIP_DIRTY:
 
2088
                bpage = &block->page;
 
2089
                /* Protect bpage->buf_fix_count. */
 
2090
                mutex_enter(&buf_pool_zip_mutex);
 
2091
 
 
2092
                if (bpage->buf_fix_count
 
2093
                    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
 
2094
                        /* This condition often occurs when the buffer
 
2095
                        is not buffer-fixed, but I/O-fixed by
 
2096
                        buf_page_init_for_read(). */
 
2097
                        mutex_exit(&buf_pool_zip_mutex);
 
2098
wait_until_unfixed:
 
2099
                        /* The block is buffer-fixed or I/O-fixed.
 
2100
                        Try again later. */
 
2101
                        buf_pool_mutex_exit();
 
2102
                        os_thread_sleep(WAIT_FOR_READ);
 
2103
 
 
2104
                        goto loop;
 
2105
                }
 
2106
 
 
2107
                /* Allocate an uncompressed page. */
 
2108
                buf_pool_mutex_exit();
 
2109
                mutex_exit(&buf_pool_zip_mutex);
 
2110
 
 
2111
                block = buf_LRU_get_free_block(0);
 
2112
                ut_a(block);
 
2113
 
 
2114
                buf_pool_mutex_enter();
 
2115
                mutex_enter(&block->mutex);
 
2116
 
 
2117
                {
 
2118
                        buf_page_t*     hash_bpage
 
2119
                                = buf_page_hash_get(space, offset);
 
2120
 
 
2121
                        if (UNIV_UNLIKELY(bpage != hash_bpage)) {
 
2122
                                /* The buf_pool->page_hash was modified
 
2123
                                while buf_pool_mutex was released.
 
2124
                                Free the block that was allocated. */
 
2125
 
 
2126
                                buf_LRU_block_free_non_file_page(block);
 
2127
                                mutex_exit(&block->mutex);
 
2128
 
 
2129
                                block = (buf_block_t*) hash_bpage;
 
2130
                                goto loop2;
 
2131
                        }
 
2132
                }
 
2133
 
 
2134
                if (UNIV_UNLIKELY
 
2135
                    (bpage->buf_fix_count
 
2136
                     || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
 
2137
 
 
2138
                        /* The block was buffer-fixed or I/O-fixed
 
2139
                        while buf_pool_mutex was not held by this thread.
 
2140
                        Free the block that was allocated and try again.
 
2141
                        This should be extremely unlikely. */
 
2142
 
 
2143
                        buf_LRU_block_free_non_file_page(block);
 
2144
                        mutex_exit(&block->mutex);
 
2145
 
 
2146
                        goto wait_until_unfixed;
 
2147
                }
 
2148
 
 
2149
                /* Move the compressed page from bpage to block,
 
2150
                and uncompress it. */
 
2151
 
 
2152
                mutex_enter(&buf_pool_zip_mutex);
 
2153
 
 
2154
                buf_relocate(bpage, &block->page);
 
2155
                buf_block_init_low(block);
 
2156
                block->lock_hash_val = lock_rec_hash(space, offset);
 
2157
 
 
2158
                UNIV_MEM_DESC(&block->page.zip.data,
 
2159
                              page_zip_get_size(&block->page.zip), block);
 
2160
 
 
2161
                if (buf_page_get_state(&block->page)
 
2162
                    == BUF_BLOCK_ZIP_PAGE) {
 
2163
                        UT_LIST_REMOVE(list, buf_pool->zip_clean,
 
2164
                                       &block->page);
 
2165
                        ut_ad(!block->page.in_flush_list);
 
2166
                } else {
 
2167
                        /* Relocate buf_pool->flush_list. */
 
2168
                        buf_page_t*     b;
 
2169
 
 
2170
                        b = UT_LIST_GET_PREV(list, &block->page);
 
2171
                        ut_ad(block->page.in_flush_list);
 
2172
                        UT_LIST_REMOVE(list, buf_pool->flush_list,
 
2173
                                       &block->page);
 
2174
 
 
2175
                        if (b) {
 
2176
                                UT_LIST_INSERT_AFTER(
 
2177
                                        list, buf_pool->flush_list, b,
 
2178
                                        &block->page);
 
2179
                        } else {
 
2180
                                UT_LIST_ADD_FIRST(
 
2181
                                        list, buf_pool->flush_list,
 
2182
                                        &block->page);
 
2183
                        }
 
2184
                }
 
2185
 
 
2186
                /* Buffer-fix, I/O-fix, and X-latch the block
 
2187
                for the duration of the decompression.
 
2188
                Also add the block to the unzip_LRU list. */
 
2189
                block->page.state = BUF_BLOCK_FILE_PAGE;
 
2190
 
 
2191
                /* Insert at the front of unzip_LRU list */
 
2192
                buf_unzip_LRU_add_block(block, FALSE);
 
2193
 
 
2194
                block->page.buf_fix_count = 1;
 
2195
                buf_block_set_io_fix(block, BUF_IO_READ);
 
2196
                rw_lock_x_lock(&block->lock);
 
2197
                mutex_exit(&block->mutex);
 
2198
                mutex_exit(&buf_pool_zip_mutex);
 
2199
                buf_pool->n_pend_unzip++;
 
2200
 
 
2201
                buf_buddy_free(bpage, sizeof *bpage);
 
2202
 
 
2203
                buf_pool_mutex_exit();
 
2204
 
 
2205
                /* Decompress the page and apply buffered operations
 
2206
                while not holding buf_pool_mutex or block->mutex. */
 
2207
                success = buf_zip_decompress(block, srv_use_checksums);
 
2208
 
 
2209
                if (UNIV_LIKELY(success)) {
 
2210
                        ibuf_merge_or_delete_for_page(block, space, offset,
 
2211
                                                      zip_size, TRUE);
 
2212
                }
 
2213
 
 
2214
                /* Unfix and unlatch the block. */
 
2215
                buf_pool_mutex_enter();
 
2216
                mutex_enter(&block->mutex);
 
2217
                block->page.buf_fix_count--;
 
2218
                buf_block_set_io_fix(block, BUF_IO_NONE);
 
2219
                mutex_exit(&block->mutex);
 
2220
                buf_pool->n_pend_unzip--;
 
2221
                rw_lock_x_unlock(&block->lock);
 
2222
 
 
2223
                if (UNIV_UNLIKELY(!success)) {
 
2224
 
 
2225
                        buf_pool_mutex_exit();
 
2226
                        return(NULL);
 
2227
                }
 
2228
 
 
2229
                break;
 
2230
 
 
2231
        case BUF_BLOCK_ZIP_FREE:
 
2232
        case BUF_BLOCK_NOT_USED:
 
2233
        case BUF_BLOCK_READY_FOR_USE:
 
2234
        case BUF_BLOCK_MEMORY:
 
2235
        case BUF_BLOCK_REMOVE_HASH:
 
2236
                ut_error;
 
2237
                break;
 
2238
        }
 
2239
 
 
2240
        ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2241
 
 
2242
        mutex_enter(&block->mutex);
 
2243
        UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
 
2244
 
 
2245
        buf_block_buf_fix_inc(block, file, line);
 
2246
        buf_pool_mutex_exit();
 
2247
 
 
2248
        /* Check if this is the first access to the page */
 
2249
 
 
2250
        accessed = buf_page_is_accessed(&block->page);
 
2251
 
 
2252
        buf_page_set_accessed(&block->page, TRUE);
 
2253
 
 
2254
        mutex_exit(&block->mutex);
 
2255
 
 
2256
        buf_block_make_young(&block->page);
 
2257
 
 
2258
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2259
        ut_a(!block->page.file_page_was_freed);
 
2260
#endif
 
2261
 
 
2262
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2263
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2264
        ut_a(block->page.buf_fix_count > 0);
 
2265
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2266
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2267
 
 
2268
        switch (rw_latch) {
 
2269
        case RW_NO_LATCH:
 
2270
                if (must_read) {
 
2271
                        /* Let us wait until the read operation
 
2272
                        completes */
 
2273
 
 
2274
                        for (;;) {
 
2275
                                enum buf_io_fix io_fix;
 
2276
 
 
2277
                                mutex_enter(&block->mutex);
 
2278
                                io_fix = buf_block_get_io_fix(block);
 
2279
                                mutex_exit(&block->mutex);
 
2280
 
 
2281
                                if (io_fix == BUF_IO_READ) {
 
2282
 
 
2283
                                        os_thread_sleep(WAIT_FOR_READ);
 
2284
                                } else {
 
2285
                                        break;
 
2286
                                }
 
2287
                        }
 
2288
                }
 
2289
 
 
2290
                fix_type = MTR_MEMO_BUF_FIX;
 
2291
                break;
 
2292
 
 
2293
        case RW_S_LATCH:
 
2294
                rw_lock_s_lock_func(&(block->lock), 0, file, line);
 
2295
 
 
2296
                fix_type = MTR_MEMO_PAGE_S_FIX;
 
2297
                break;
 
2298
 
 
2299
        default:
 
2300
                ut_ad(rw_latch == RW_X_LATCH);
 
2301
                rw_lock_x_lock_func(&(block->lock), 0, file, line);
 
2302
 
 
2303
                fix_type = MTR_MEMO_PAGE_X_FIX;
 
2304
                break;
 
2305
        }
 
2306
 
 
2307
        mtr_memo_push(mtr, block, fix_type);
 
2308
 
 
2309
        if (!accessed) {
 
2310
                /* In the case of a first access, try to apply linear
 
2311
                read-ahead */
 
2312
 
 
2313
                buf_read_ahead_linear(space, zip_size, offset);
 
2314
        }
 
2315
 
 
2316
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2317
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
2318
                            buf_block_get_page_no(block)) == 0);
 
2319
#endif
 
2320
        return(block);
 
2321
}
 
2322
 
 
2323
/********************************************************************//**
 
2324
This is the general function used to get optimistic access to a database
 
2325
page.
 
2326
@return TRUE if success */
 
2327
UNIV_INTERN
 
2328
ibool
 
2329
buf_page_optimistic_get_func(
 
2330
/*=========================*/
 
2331
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
 
2332
        buf_block_t*    block,  /*!< in: guessed buffer block */
 
2333
        ib_uint64_t     modify_clock,/*!< in: modify clock value if mode is
 
2334
                                ..._GUESS_ON_CLOCK */
 
2335
        const char*     file,   /*!< in: file name */
 
2336
        ulint           line,   /*!< in: line where called */
 
2337
        mtr_t*          mtr)    /*!< in: mini-transaction */
 
2338
{
 
2339
        ibool           accessed;
 
2340
        ibool           success;
 
2341
        ulint           fix_type;
 
2342
 
 
2343
        ut_ad(mtr && block);
 
2344
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
2345
 
 
2346
        mutex_enter(&block->mutex);
 
2347
 
 
2348
        if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
 
2349
 
 
2350
                mutex_exit(&block->mutex);
 
2351
 
 
2352
                return(FALSE);
 
2353
        }
 
2354
 
 
2355
        buf_block_buf_fix_inc(block, file, line);
 
2356
        accessed = buf_page_is_accessed(&block->page);
 
2357
        buf_page_set_accessed(&block->page, TRUE);
 
2358
 
 
2359
        mutex_exit(&block->mutex);
 
2360
 
 
2361
        buf_block_make_young(&block->page);
 
2362
 
 
2363
        /* Check if this is the first access to the page */
 
2364
 
 
2365
        ut_ad(!ibuf_inside()
 
2366
              || ibuf_page(buf_block_get_space(block),
 
2367
                           buf_block_get_zip_size(block),
 
2368
                           buf_block_get_page_no(block), NULL));
 
2369
 
 
2370
        if (rw_latch == RW_S_LATCH) {
 
2371
                success = rw_lock_s_lock_nowait(&(block->lock),
 
2372
                                                file, line);
 
2373
                fix_type = MTR_MEMO_PAGE_S_FIX;
 
2374
        } else {
 
2375
                success = rw_lock_x_lock_func_nowait(&(block->lock),
 
2376
                                                     file, line);
 
2377
                fix_type = MTR_MEMO_PAGE_X_FIX;
 
2378
        }
 
2379
 
 
2380
        if (UNIV_UNLIKELY(!success)) {
 
2381
                mutex_enter(&block->mutex);
 
2382
                buf_block_buf_fix_dec(block);
 
2383
                mutex_exit(&block->mutex);
 
2384
 
 
2385
                return(FALSE);
 
2386
        }
 
2387
 
 
2388
        if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
 
2389
                buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
2390
 
 
2391
                if (rw_latch == RW_S_LATCH) {
 
2392
                        rw_lock_s_unlock(&(block->lock));
 
2393
                } else {
 
2394
                        rw_lock_x_unlock(&(block->lock));
 
2395
                }
 
2396
 
 
2397
                mutex_enter(&block->mutex);
 
2398
                buf_block_buf_fix_dec(block);
 
2399
                mutex_exit(&block->mutex);
 
2400
 
 
2401
                return(FALSE);
 
2402
        }
 
2403
 
 
2404
        mtr_memo_push(mtr, block, fix_type);
 
2405
 
 
2406
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2407
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2408
        ut_a(block->page.buf_fix_count > 0);
 
2409
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2410
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2411
 
 
2412
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2413
        ut_a(block->page.file_page_was_freed == FALSE);
 
2414
#endif
 
2415
        if (UNIV_UNLIKELY(!accessed)) {
 
2416
                /* In the case of a first access, try to apply linear
 
2417
                read-ahead */
 
2418
 
 
2419
                buf_read_ahead_linear(buf_block_get_space(block),
 
2420
                                      buf_block_get_zip_size(block),
 
2421
                                      buf_block_get_page_no(block));
 
2422
        }
 
2423
 
 
2424
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2425
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
2426
                            buf_block_get_page_no(block)) == 0);
 
2427
#endif
 
2428
        buf_pool->n_page_gets++;
 
2429
 
 
2430
        return(TRUE);
 
2431
}
 
2432
 
 
2433
/********************************************************************//**
 
2434
This is used to get access to a known database page, when no waiting can be
 
2435
done. For example, if a search in an adaptive hash index leads us to this
 
2436
frame.
 
2437
@return TRUE if success */
 
2438
UNIV_INTERN
 
2439
ibool
 
2440
buf_page_get_known_nowait(
 
2441
/*======================*/
 
2442
        ulint           rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
 
2443
        buf_block_t*    block,  /*!< in: the known page */
 
2444
        ulint           mode,   /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
 
2445
        const char*     file,   /*!< in: file name */
 
2446
        ulint           line,   /*!< in: line where called */
 
2447
        mtr_t*          mtr)    /*!< in: mini-transaction */
 
2448
{
 
2449
        ibool           success;
 
2450
        ulint           fix_type;
 
2451
 
 
2452
        ut_ad(mtr);
 
2453
        ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
2454
 
 
2455
        mutex_enter(&block->mutex);
 
2456
 
 
2457
        if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
 
2458
                /* Another thread is just freeing the block from the LRU list
 
2459
                of the buffer pool: do not try to access this page; this
 
2460
                attempt to access the page can only come through the hash
 
2461
                index because when the buffer block state is ..._REMOVE_HASH,
 
2462
                we have already removed it from the page address hash table
 
2463
                of the buffer pool. */
 
2464
 
 
2465
                mutex_exit(&block->mutex);
 
2466
 
 
2467
                return(FALSE);
 
2468
        }
 
2469
 
 
2470
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2471
 
 
2472
        buf_block_buf_fix_inc(block, file, line);
 
2473
 
 
2474
        mutex_exit(&block->mutex);
 
2475
 
 
2476
        if (mode == BUF_MAKE_YOUNG) {
 
2477
                buf_block_make_young(&block->page);
 
2478
        }
 
2479
 
 
2480
        ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
 
2481
 
 
2482
        if (rw_latch == RW_S_LATCH) {
 
2483
                success = rw_lock_s_lock_nowait(&(block->lock),
 
2484
                                                file, line);
 
2485
                fix_type = MTR_MEMO_PAGE_S_FIX;
 
2486
        } else {
 
2487
                success = rw_lock_x_lock_func_nowait(&(block->lock),
 
2488
                                                     file, line);
 
2489
                fix_type = MTR_MEMO_PAGE_X_FIX;
 
2490
        }
 
2491
 
 
2492
        if (!success) {
 
2493
                mutex_enter(&block->mutex);
 
2494
                buf_block_buf_fix_dec(block);
 
2495
                mutex_exit(&block->mutex);
 
2496
 
 
2497
                return(FALSE);
 
2498
        }
 
2499
 
 
2500
        mtr_memo_push(mtr, block, fix_type);
 
2501
 
 
2502
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2503
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2504
        ut_a(block->page.buf_fix_count > 0);
 
2505
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2506
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2507
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2508
        ut_a(block->page.file_page_was_freed == FALSE);
 
2509
#endif
 
2510
 
 
2511
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2512
        ut_a((mode == BUF_KEEP_OLD)
 
2513
             || (ibuf_count_get(buf_block_get_space(block),
 
2514
                                buf_block_get_page_no(block)) == 0));
 
2515
#endif
 
2516
        buf_pool->n_page_gets++;
 
2517
 
 
2518
        return(TRUE);
 
2519
}
 
2520
 
 
2521
/*******************************************************************//**
 
2522
Given a tablespace id and page number tries to get that page. If the
 
2523
page is not in the buffer pool it is not loaded and NULL is returned.
 
2524
Suitable for using when holding the kernel mutex.
 
2525
@return pointer to a page or NULL */
 
2526
UNIV_INTERN
 
2527
const buf_block_t*
 
2528
buf_page_try_get_func(
 
2529
/*==================*/
 
2530
        ulint           space_id,/*!< in: tablespace id */
 
2531
        ulint           page_no,/*!< in: page number */
 
2532
        const char*     file,   /*!< in: file name */
 
2533
        ulint           line,   /*!< in: line where called */
 
2534
        mtr_t*          mtr)    /*!< in: mini-transaction */
 
2535
{
 
2536
        buf_block_t*    block;
 
2537
        ibool           success;
 
2538
        ulint           fix_type;
 
2539
 
 
2540
        buf_pool_mutex_enter();
 
2541
        block = buf_block_hash_get(space_id, page_no);
 
2542
 
 
2543
        if (!block) {
 
2544
                buf_pool_mutex_exit();
 
2545
                return(NULL);
 
2546
        }
 
2547
 
 
2548
        mutex_enter(&block->mutex);
 
2549
        buf_pool_mutex_exit();
 
2550
 
 
2551
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2552
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2553
        ut_a(buf_block_get_space(block) == space_id);
 
2554
        ut_a(buf_block_get_page_no(block) == page_no);
 
2555
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2556
 
 
2557
        buf_block_buf_fix_inc(block, file, line);
 
2558
        mutex_exit(&block->mutex);
 
2559
 
 
2560
        fix_type = MTR_MEMO_PAGE_S_FIX;
 
2561
        success = rw_lock_s_lock_nowait(&block->lock, file, line);
 
2562
 
 
2563
        if (!success) {
 
2564
                /* Let us try to get an X-latch. If the current thread
 
2565
                is holding an X-latch on the page, we cannot get an
 
2566
                S-latch. */
 
2567
 
 
2568
                fix_type = MTR_MEMO_PAGE_X_FIX;
 
2569
                success = rw_lock_x_lock_func_nowait(&block->lock,
 
2570
                                                     file, line);
 
2571
        }
 
2572
 
 
2573
        if (!success) {
 
2574
                mutex_enter(&block->mutex);
 
2575
                buf_block_buf_fix_dec(block);
 
2576
                mutex_exit(&block->mutex);
 
2577
 
 
2578
                return(NULL);
 
2579
        }
 
2580
 
 
2581
        mtr_memo_push(mtr, block, fix_type);
 
2582
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2583
        ut_a(++buf_dbg_counter % 5771 || buf_validate());
 
2584
        ut_a(block->page.buf_fix_count > 0);
 
2585
        ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
2586
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2587
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2588
        ut_a(block->page.file_page_was_freed == FALSE);
 
2589
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
2590
        buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
2591
 
 
2592
        buf_pool->n_page_gets++;
 
2593
 
 
2594
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2595
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
2596
                            buf_block_get_page_no(block)) == 0);
 
2597
#endif
 
2598
 
 
2599
        return(block);
 
2600
}
 
2601
 
 
2602
/********************************************************************//**
 
2603
Initialize some fields of a control block. */
 
2604
UNIV_INLINE
 
2605
void
 
2606
buf_page_init_low(
 
2607
/*==============*/
 
2608
        buf_page_t*     bpage)  /*!< in: block to init */
 
2609
{
 
2610
        bpage->flush_type = BUF_FLUSH_LRU;
 
2611
        bpage->accessed = FALSE;
 
2612
        bpage->io_fix = BUF_IO_NONE;
 
2613
        bpage->buf_fix_count = 0;
 
2614
        bpage->freed_page_clock = 0;
 
2615
        bpage->newest_modification = 0;
 
2616
        bpage->oldest_modification = 0;
 
2617
        HASH_INVALIDATE(bpage, hash);
 
2618
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2619
        bpage->file_page_was_freed = FALSE;
 
2620
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
2621
}
 
2622
 
 
2623
/********************************************************************//**
 
2624
Inits a page to the buffer buf_pool. */
 
2625
static
 
2626
void
 
2627
buf_page_init(
 
2628
/*==========*/
 
2629
        ulint           space,  /*!< in: space id */
 
2630
        ulint           offset, /*!< in: offset of the page within space
 
2631
                                in units of a page */
 
2632
        buf_block_t*    block)  /*!< in: block to init */
 
2633
{
 
2634
        buf_page_t*     hash_page;
 
2635
 
 
2636
        ut_ad(buf_pool_mutex_own());
 
2637
        ut_ad(mutex_own(&(block->mutex)));
 
2638
        ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
2639
 
 
2640
        /* Set the state of the block */
 
2641
        buf_block_set_file_page(block, space, offset);
 
2642
 
 
2643
#ifdef UNIV_DEBUG_VALGRIND
 
2644
        if (!space) {
 
2645
                /* Silence valid Valgrind warnings about uninitialized
 
2646
                data being written to data files.  There are some unused
 
2647
                bytes on some pages that InnoDB does not initialize. */
 
2648
                UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
 
2649
        }
 
2650
#endif /* UNIV_DEBUG_VALGRIND */
 
2651
 
 
2652
        buf_block_init_low(block);
 
2653
 
 
2654
        block->lock_hash_val    = lock_rec_hash(space, offset);
 
2655
 
 
2656
        /* Insert into the hash table of file pages */
 
2657
 
 
2658
        hash_page = buf_page_hash_get(space, offset);
 
2659
 
 
2660
        if (UNIV_LIKELY_NULL(hash_page)) {
 
2661
                fprintf(stderr,
 
2662
                        "InnoDB: Error: page %lu %lu already found"
 
2663
                        " in the hash table: %p, %p\n",
 
2664
                        (ulong) space,
 
2665
                        (ulong) offset,
 
2666
                        (const void*) hash_page, (const void*) block);
 
2667
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
2668
                mutex_exit(&block->mutex);
 
2669
                buf_pool_mutex_exit();
 
2670
                buf_print();
 
2671
                buf_LRU_print();
 
2672
                buf_validate();
 
2673
                buf_LRU_validate();
 
2674
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
2675
                ut_error;
 
2676
        }
 
2677
 
 
2678
        buf_page_init_low(&block->page);
 
2679
 
 
2680
        ut_ad(!block->page.in_zip_hash);
 
2681
        ut_ad(!block->page.in_page_hash);
 
2682
        ut_d(block->page.in_page_hash = TRUE);
 
2683
        HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 
2684
                    buf_page_address_fold(space, offset), &block->page);
 
2685
}
 
2686
 
 
2687
/********************************************************************//**
 
2688
Function which inits a page for read to the buffer buf_pool. If the page is
 
2689
(1) already in buf_pool, or
 
2690
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
 
2691
(3) if the space is deleted or being deleted,
 
2692
then this function does nothing.
 
2693
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
 
2694
on the buffer frame. The io-handler must take care that the flag is cleared
 
2695
and the lock released later.
 
2696
@return pointer to the block or NULL */
 
2697
UNIV_INTERN
 
2698
buf_page_t*
 
2699
buf_page_init_for_read(
 
2700
/*===================*/
 
2701
        ulint*          err,    /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
 
2702
        ulint           mode,   /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
 
2703
        ulint           space,  /*!< in: space id */
 
2704
        ulint           zip_size,/*!< in: compressed page size, or 0 */
 
2705
        ibool           unzip,  /*!< in: TRUE=request uncompressed page */
 
2706
        ib_int64_t      tablespace_version,/*!< in: prevents reading from a wrong
 
2707
                                version of the tablespace in case we have done
 
2708
                                DISCARD + IMPORT */
 
2709
        ulint           offset) /*!< in: page number */
 
2710
{
 
2711
        buf_block_t*    block;
 
2712
        buf_page_t*     bpage;
 
2713
        mtr_t           mtr;
 
2714
        ibool           lru     = FALSE;
 
2715
        void*           data;
 
2716
 
 
2717
        ut_ad(buf_pool);
 
2718
 
 
2719
        *err = DB_SUCCESS;
 
2720
 
 
2721
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
2722
                /* It is a read-ahead within an ibuf routine */
 
2723
 
 
2724
                ut_ad(!ibuf_bitmap_page(zip_size, offset));
 
2725
                ut_ad(ibuf_inside());
 
2726
 
 
2727
                mtr_start(&mtr);
 
2728
 
 
2729
                if (!recv_no_ibuf_operations
 
2730
                    && !ibuf_page(space, zip_size, offset, &mtr)) {
 
2731
 
 
2732
                        mtr_commit(&mtr);
 
2733
 
 
2734
                        return(NULL);
 
2735
                }
 
2736
        } else {
 
2737
                ut_ad(mode == BUF_READ_ANY_PAGE);
 
2738
        }
 
2739
 
 
2740
        if (zip_size && UNIV_LIKELY(!unzip)
 
2741
            && UNIV_LIKELY(!recv_recovery_is_on())) {
 
2742
                block = NULL;
 
2743
        } else {
 
2744
                block = buf_LRU_get_free_block(0);
 
2745
                ut_ad(block);
 
2746
        }
 
2747
 
 
2748
        buf_pool_mutex_enter();
 
2749
 
 
2750
        if (buf_page_hash_get(space, offset)) {
 
2751
                /* The page is already in the buffer pool. */
 
2752
err_exit:
 
2753
                if (block) {
 
2754
                        mutex_enter(&block->mutex);
 
2755
                        buf_LRU_block_free_non_file_page(block);
 
2756
                        mutex_exit(&block->mutex);
 
2757
                }
 
2758
 
 
2759
                bpage = NULL;
 
2760
                goto func_exit;
 
2761
        }
 
2762
 
 
2763
        if (fil_tablespace_deleted_or_being_deleted_in_mem(
 
2764
                    space, tablespace_version)) {
 
2765
                /* The page belongs to a space which has been
 
2766
                deleted or is being deleted. */
 
2767
                *err = DB_TABLESPACE_DELETED;
 
2768
 
 
2769
                goto err_exit;
 
2770
        }
 
2771
 
 
2772
        if (block) {
 
2773
                bpage = &block->page;
 
2774
                mutex_enter(&block->mutex);
 
2775
                buf_page_init(space, offset, block);
 
2776
 
 
2777
                /* The block must be put to the LRU list, to the old blocks */
 
2778
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 
2779
 
 
2780
                /* We set a pass-type x-lock on the frame because then
 
2781
                the same thread which called for the read operation
 
2782
                (and is running now at this point of code) can wait
 
2783
                for the read to complete by waiting for the x-lock on
 
2784
                the frame; if the x-lock were recursive, the same
 
2785
                thread would illegally get the x-lock before the page
 
2786
                read is completed.  The x-lock is cleared by the
 
2787
                io-handler thread. */
 
2788
 
 
2789
                rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
 
2790
                buf_page_set_io_fix(bpage, BUF_IO_READ);
 
2791
 
 
2792
                if (UNIV_UNLIKELY(zip_size)) {
 
2793
                        page_zip_set_size(&block->page.zip, zip_size);
 
2794
 
 
2795
                        /* buf_pool_mutex may be released and
 
2796
                        reacquired by buf_buddy_alloc().  Thus, we
 
2797
                        must release block->mutex in order not to
 
2798
                        break the latching order in the reacquisition
 
2799
                        of buf_pool_mutex.  We also must defer this
 
2800
                        operation until after the block descriptor has
 
2801
                        been added to buf_pool->LRU and
 
2802
                        buf_pool->page_hash. */
 
2803
                        mutex_exit(&block->mutex);
 
2804
                        data = buf_buddy_alloc(zip_size, &lru);
 
2805
                        mutex_enter(&block->mutex);
 
2806
                        block->page.zip.data = data;
 
2807
 
 
2808
                        /* To maintain the invariant
 
2809
                        block->in_unzip_LRU_list
 
2810
                        == buf_page_belongs_to_unzip_LRU(&block->page)
 
2811
                        we have to add this block to unzip_LRU
 
2812
                        after block->page.zip.data is set. */
 
2813
                        ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
 
2814
                        buf_unzip_LRU_add_block(block, TRUE);
 
2815
                }
 
2816
 
 
2817
                mutex_exit(&block->mutex);
 
2818
        } else {
 
2819
                /* Defer buf_buddy_alloc() until after the block has
 
2820
                been found not to exist.  The buf_buddy_alloc() and
 
2821
                buf_buddy_free() calls may be expensive because of
 
2822
                buf_buddy_relocate(). */
 
2823
 
 
2824
                /* The compressed page must be allocated before the
 
2825
                control block (bpage), in order to avoid the
 
2826
                invocation of buf_buddy_relocate_block() on
 
2827
                uninitialized data. */
 
2828
                data = buf_buddy_alloc(zip_size, &lru);
 
2829
                bpage = buf_buddy_alloc(sizeof *bpage, &lru);
 
2830
 
 
2831
                /* If buf_buddy_alloc() allocated storage from the LRU list,
 
2832
                it released and reacquired buf_pool_mutex.  Thus, we must
 
2833
                check the page_hash again, as it may have been modified. */
 
2834
                if (UNIV_UNLIKELY(lru)
 
2835
                    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
 
2836
 
 
2837
                        /* The block was added by some other thread. */
 
2838
                        buf_buddy_free(bpage, sizeof *bpage);
 
2839
                        buf_buddy_free(data, zip_size);
 
2840
 
 
2841
                        bpage = NULL;
 
2842
                        goto func_exit;
 
2843
                }
 
2844
 
 
2845
                page_zip_des_init(&bpage->zip);
 
2846
                page_zip_set_size(&bpage->zip, zip_size);
 
2847
                bpage->zip.data = data;
 
2848
 
 
2849
                mutex_enter(&buf_pool_zip_mutex);
 
2850
                UNIV_MEM_DESC(bpage->zip.data,
 
2851
                              page_zip_get_size(&bpage->zip), bpage);
 
2852
                buf_page_init_low(bpage);
 
2853
                bpage->state    = BUF_BLOCK_ZIP_PAGE;
 
2854
                bpage->space    = space;
 
2855
                bpage->offset   = offset;
 
2856
 
 
2857
#ifdef UNIV_DEBUG
 
2858
                bpage->in_page_hash = FALSE;
 
2859
                bpage->in_zip_hash = FALSE;
 
2860
                bpage->in_flush_list = FALSE;
 
2861
                bpage->in_free_list = FALSE;
 
2862
                bpage->in_LRU_list = FALSE;
 
2863
#endif /* UNIV_DEBUG */
 
2864
 
 
2865
                ut_d(bpage->in_page_hash = TRUE);
 
2866
                HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 
2867
                            buf_page_address_fold(space, offset), bpage);
 
2868
 
 
2869
                /* The block must be put to the LRU list, to the old blocks */
 
2870
                buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 
2871
                buf_LRU_insert_zip_clean(bpage);
 
2872
 
 
2873
                buf_page_set_io_fix(bpage, BUF_IO_READ);
 
2874
 
 
2875
                mutex_exit(&buf_pool_zip_mutex);
 
2876
        }
 
2877
 
 
2878
        buf_pool->n_pend_reads++;
 
2879
func_exit:
 
2880
        buf_pool_mutex_exit();
 
2881
 
 
2882
        if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
2883
 
 
2884
                mtr_commit(&mtr);
 
2885
        }
 
2886
 
 
2887
        ut_ad(!bpage || buf_page_in_file(bpage));
 
2888
        return(bpage);
 
2889
}
 
2890
 
 
2891
/********************************************************************//**
 
2892
Initializes a page to the buffer buf_pool. The page is usually not read
 
2893
from a file even if it cannot be found in the buffer buf_pool. This is one
 
2894
of the functions which perform to a block a state transition NOT_USED =>
 
2895
FILE_PAGE (the other is buf_page_get_gen).
 
2896
@return pointer to the block, page bufferfixed */
 
2897
UNIV_INTERN
 
2898
buf_block_t*
 
2899
buf_page_create(
 
2900
/*============*/
 
2901
        ulint   space,  /*!< in: space id */
 
2902
        ulint   offset, /*!< in: offset of the page within space in units of
 
2903
                        a page */
 
2904
        ulint   zip_size,/*!< in: compressed page size, or 0 */
 
2905
        mtr_t*  mtr)    /*!< in: mini-transaction handle */
 
2906
{
 
2907
        buf_frame_t*    frame;
 
2908
        buf_block_t*    block;
 
2909
        buf_block_t*    free_block      = NULL;
 
2910
 
 
2911
        ut_ad(mtr);
 
2912
        ut_ad(space || !zip_size);
 
2913
 
 
2914
        free_block = buf_LRU_get_free_block(0);
 
2915
 
 
2916
        buf_pool_mutex_enter();
 
2917
 
 
2918
        block = (buf_block_t*) buf_page_hash_get(space, offset);
 
2919
 
 
2920
        if (block && buf_page_in_file(&block->page)) {
 
2921
#ifdef UNIV_IBUF_COUNT_DEBUG
 
2922
                ut_a(ibuf_count_get(space, offset) == 0);
 
2923
#endif
 
2924
#ifdef UNIV_DEBUG_FILE_ACCESSES
 
2925
                block->page.file_page_was_freed = FALSE;
 
2926
#endif /* UNIV_DEBUG_FILE_ACCESSES */
 
2927
 
 
2928
                /* Page can be found in buf_pool */
 
2929
                buf_pool_mutex_exit();
 
2930
 
 
2931
                buf_block_free(free_block);
 
2932
 
 
2933
                return(buf_page_get_with_no_latch(space, zip_size,
 
2934
                                                  offset, mtr));
 
2935
        }
 
2936
 
 
2937
        /* If we get here, the page was not in buf_pool: init it there */
 
2938
 
 
2939
#ifdef UNIV_DEBUG
 
2940
        if (buf_debug_prints) {
 
2941
                fprintf(stderr, "Creating space %lu page %lu to buffer\n",
 
2942
                        (ulong) space, (ulong) offset);
 
2943
        }
 
2944
#endif /* UNIV_DEBUG */
 
2945
 
 
2946
        block = free_block;
 
2947
 
 
2948
        mutex_enter(&block->mutex);
 
2949
 
 
2950
        buf_page_init(space, offset, block);
 
2951
 
 
2952
        /* The block must be put to the LRU list */
 
2953
        buf_LRU_add_block(&block->page, FALSE);
 
2954
 
 
2955
        buf_block_buf_fix_inc(block, __FILE__, __LINE__);
 
2956
        buf_pool->n_pages_created++;
 
2957
 
 
2958
        if (zip_size) {
 
2959
                void*   data;
 
2960
                ibool   lru;
 
2961
 
 
2962
                /* Prevent race conditions during buf_buddy_alloc(),
 
2963
                which may release and reacquire buf_pool_mutex,
 
2964
                by IO-fixing and X-latching the block. */
 
2965
 
 
2966
                buf_page_set_io_fix(&block->page, BUF_IO_READ);
 
2967
                rw_lock_x_lock(&block->lock);
 
2968
 
 
2969
                page_zip_set_size(&block->page.zip, zip_size);
 
2970
                mutex_exit(&block->mutex);
 
2971
                /* buf_pool_mutex may be released and reacquired by
 
2972
                buf_buddy_alloc().  Thus, we must release block->mutex
 
2973
                in order not to break the latching order in
 
2974
                the reacquisition of buf_pool_mutex.  We also must
 
2975
                defer this operation until after the block descriptor
 
2976
                has been added to buf_pool->LRU and buf_pool->page_hash. */
 
2977
                data = buf_buddy_alloc(zip_size, &lru);
 
2978
                mutex_enter(&block->mutex);
 
2979
                block->page.zip.data = data;
 
2980
 
 
2981
                /* To maintain the invariant
 
2982
                block->in_unzip_LRU_list
 
2983
                == buf_page_belongs_to_unzip_LRU(&block->page)
 
2984
                we have to add this block to unzip_LRU after
 
2985
                block->page.zip.data is set. */
 
2986
                ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
 
2987
                buf_unzip_LRU_add_block(block, FALSE);
 
2988
 
 
2989
                buf_page_set_io_fix(&block->page, BUF_IO_NONE);
 
2990
                rw_lock_x_unlock(&block->lock);
 
2991
        }
 
2992
 
 
2993
        buf_pool_mutex_exit();
 
2994
 
 
2995
        mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
 
2996
 
 
2997
        buf_page_set_accessed(&block->page, TRUE);
 
2998
 
 
2999
        mutex_exit(&block->mutex);
 
3000
 
 
3001
        /* Delete possible entries for the page from the insert buffer:
 
3002
        such can exist if the page belonged to an index which was dropped */
 
3003
 
 
3004
        ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
 
3005
 
 
3006
        /* Flush pages from the end of the LRU list if necessary */
 
3007
        buf_flush_free_margin();
 
3008
 
 
3009
        frame = block->frame;
 
3010
 
 
3011
        memset(frame + FIL_PAGE_PREV, 0xff, 4);
 
3012
        memset(frame + FIL_PAGE_NEXT, 0xff, 4);
 
3013
        mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
 
3014
 
 
3015
        /* Reset to zero the file flush lsn field in the page; if the first
 
3016
        page of an ibdata file is 'created' in this function into the buffer
 
3017
        pool then we lose the original contents of the file flush lsn stamp.
 
3018
        Then InnoDB could in a crash recovery print a big, false, corruption
 
3019
        warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
 
3020
 
 
3021
        memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
 
3022
 
 
3023
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
3024
        ut_a(++buf_dbg_counter % 357 || buf_validate());
 
3025
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
3026
#ifdef UNIV_IBUF_COUNT_DEBUG
 
3027
        ut_a(ibuf_count_get(buf_block_get_space(block),
 
3028
                            buf_block_get_page_no(block)) == 0);
 
3029
#endif
 
3030
        return(block);
 
3031
}
 
3032
 
 
3033
/********************************************************************//**
 
3034
Completes an asynchronous read or write request of a file page to or from
 
3035
the buffer pool. */
 
3036
UNIV_INTERN
 
3037
void
 
3038
buf_page_io_complete(
 
3039
/*=================*/
 
3040
        buf_page_t*     bpage)  /*!< in: pointer to the block in question */
 
3041
{
 
3042
        enum buf_io_fix io_type;
 
3043
        const ibool     uncompressed = (buf_page_get_state(bpage)
 
3044
                                        == BUF_BLOCK_FILE_PAGE);
 
3045
 
 
3046
        ut_a(buf_page_in_file(bpage));
 
3047
 
 
3048
        /* We do not need protect io_fix here by mutex to read
 
3049
        it because this is the only function where we can change the value
 
3050
        from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
 
3051
        ensures that this is the only thread that handles the i/o for this
 
3052
        block. */
 
3053
 
 
3054
        io_type = buf_page_get_io_fix(bpage);
 
3055
        ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
 
3056
 
 
3057
        if (io_type == BUF_IO_READ) {
 
3058
                ulint   read_page_no;
 
3059
                ulint   read_space_id;
 
3060
                byte*   frame;
 
3061
 
 
3062
                if (buf_page_get_zip_size(bpage)) {
 
3063
                        frame = bpage->zip.data;
 
3064
                        buf_pool->n_pend_unzip++;
 
3065
                        if (uncompressed
 
3066
                            && !buf_zip_decompress((buf_block_t*) bpage,
 
3067
                                                   FALSE)) {
 
3068
 
 
3069
                                buf_pool->n_pend_unzip--;
 
3070
                                goto corrupt;
 
3071
                        }
 
3072
                        buf_pool->n_pend_unzip--;
 
3073
                } else {
 
3074
                        ut_a(uncompressed);
 
3075
                        frame = ((buf_block_t*) bpage)->frame;
 
3076
                }
 
3077
 
 
3078
                /* If this page is not uninitialized and not in the
 
3079
                doublewrite buffer, then the page number and space id
 
3080
                should be the same as in block. */
 
3081
                read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
 
3082
                read_space_id = mach_read_from_4(
 
3083
                        frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
3084
 
 
3085
                if (bpage->space == TRX_SYS_SPACE
 
3086
                    && trx_doublewrite_page_inside(bpage->offset)) {
 
3087
 
 
3088
                        ut_print_timestamp(stderr);
 
3089
                        fprintf(stderr,
 
3090
                                "  InnoDB: Error: reading page %lu\n"
 
3091
                                "InnoDB: which is in the"
 
3092
                                " doublewrite buffer!\n",
 
3093
                                (ulong) bpage->offset);
 
3094
                } else if (!read_space_id && !read_page_no) {
 
3095
                        /* This is likely an uninitialized page. */
 
3096
                } else if ((bpage->space
 
3097
                            && bpage->space != read_space_id)
 
3098
                           || bpage->offset != read_page_no) {
 
3099
                        /* We did not compare space_id to read_space_id
 
3100
                        if bpage->space == 0, because the field on the
 
3101
                        page may contain garbage in MySQL < 4.1.1,
 
3102
                        which only supported bpage->space == 0. */
 
3103
 
 
3104
                        ut_print_timestamp(stderr);
 
3105
                        fprintf(stderr,
 
3106
                                "  InnoDB: Error: space id and page n:o"
 
3107
                                " stored in the page\n"
 
3108
                                "InnoDB: read in are %lu:%lu,"
 
3109
                                " should be %lu:%lu!\n",
 
3110
                                (ulong) read_space_id, (ulong) read_page_no,
 
3111
                                (ulong) bpage->space,
 
3112
                                (ulong) bpage->offset);
 
3113
                }
 
3114
 
 
3115
                /* From version 3.23.38 up we store the page checksum
 
3116
                to the 4 first bytes of the page end lsn field */
 
3117
 
 
3118
                if (buf_page_is_corrupted(frame,
 
3119
                                          buf_page_get_zip_size(bpage))) {
 
3120
corrupt:
 
3121
                        fprintf(stderr,
 
3122
                                "InnoDB: Database page corruption on disk"
 
3123
                                " or a failed\n"
 
3124
                                "InnoDB: file read of page %lu.\n"
 
3125
                                "InnoDB: You may have to recover"
 
3126
                                " from a backup.\n",
 
3127
                                (ulong) bpage->offset);
 
3128
                        buf_page_print(frame, buf_page_get_zip_size(bpage));
 
3129
                        fprintf(stderr,
 
3130
                                "InnoDB: Database page corruption on disk"
 
3131
                                " or a failed\n"
 
3132
                                "InnoDB: file read of page %lu.\n"
 
3133
                                "InnoDB: You may have to recover"
 
3134
                                " from a backup.\n",
 
3135
                                (ulong) bpage->offset);
 
3136
                        fputs("InnoDB: It is also possible that"
 
3137
                              " your operating\n"
 
3138
                              "InnoDB: system has corrupted its"
 
3139
                              " own file cache\n"
 
3140
                              "InnoDB: and rebooting your computer"
 
3141
                              " removes the\n"
 
3142
                              "InnoDB: error.\n"
 
3143
                              "InnoDB: If the corrupt page is an index page\n"
 
3144
                              "InnoDB: you can also try to"
 
3145
                              " fix the corruption\n"
 
3146
                              "InnoDB: by dumping, dropping,"
 
3147
                              " and reimporting\n"
 
3148
                              "InnoDB: the corrupt table."
 
3149
                              " You can use CHECK\n"
 
3150
                              "InnoDB: TABLE to scan your"
 
3151
                              " table for corruption.\n"
 
3152
                              "InnoDB: See also "
 
3153
                              REFMAN "forcing-recovery.html\n"
 
3154
                              "InnoDB: about forcing recovery.\n", stderr);
 
3155
 
 
3156
                        if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
 
3157
                                fputs("InnoDB: Ending processing because of"
 
3158
                                      " a corrupt database page.\n",
 
3159
                                      stderr);
 
3160
                                exit(1);
 
3161
                        }
 
3162
                }
 
3163
 
 
3164
                if (recv_recovery_is_on()) {
 
3165
                        /* Pages must be uncompressed for crash recovery. */
 
3166
                        ut_a(uncompressed);
 
3167
                        recv_recover_page(TRUE, (buf_block_t*) bpage);
 
3168
                }
 
3169
 
 
3170
                if (uncompressed && !recv_no_ibuf_operations) {
 
3171
                        ibuf_merge_or_delete_for_page(
 
3172
                                (buf_block_t*) bpage, bpage->space,
 
3173
                                bpage->offset, buf_page_get_zip_size(bpage),
 
3174
                                TRUE);
 
3175
                }
 
3176
        }
 
3177
 
 
3178
        buf_pool_mutex_enter();
 
3179
        mutex_enter(buf_page_get_mutex(bpage));
 
3180
 
 
3181
#ifdef UNIV_IBUF_COUNT_DEBUG
 
3182
        if (io_type == BUF_IO_WRITE || uncompressed) {
 
3183
                /* For BUF_IO_READ of compressed-only blocks, the
 
3184
                buffered operations will be merged by buf_page_get_gen()
 
3185
                after the block has been uncompressed. */
 
3186
                ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
 
3187
        }
 
3188
#endif
 
3189
        /* Because this thread which does the unlocking is not the same that
 
3190
        did the locking, we use a pass value != 0 in unlock, which simply
 
3191
        removes the newest lock debug record, without checking the thread
 
3192
        id. */
 
3193
 
 
3194
        buf_page_set_io_fix(bpage, BUF_IO_NONE);
 
3195
 
 
3196
        switch (io_type) {
 
3197
        case BUF_IO_READ:
 
3198
                /* NOTE that the call to ibuf may have moved the ownership of
 
3199
                the x-latch to this OS thread: do not let this confuse you in
 
3200
                debugging! */
 
3201
 
 
3202
                ut_ad(buf_pool->n_pend_reads > 0);
 
3203
                buf_pool->n_pend_reads--;
 
3204
                buf_pool->n_pages_read++;
 
3205
 
 
3206
                if (uncompressed) {
 
3207
                        rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
 
3208
                                             BUF_IO_READ);
 
3209
                }
 
3210
 
 
3211
                break;
 
3212
 
 
3213
        case BUF_IO_WRITE:
 
3214
                /* Write means a flush operation: call the completion
 
3215
                routine in the flush system */
 
3216
 
 
3217
                buf_flush_write_complete(bpage);
 
3218
 
 
3219
                if (uncompressed) {
 
3220
                        rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
 
3221
                                             BUF_IO_WRITE);
 
3222
                }
 
3223
 
 
3224
                buf_pool->n_pages_written++;
 
3225
 
 
3226
                break;
 
3227
 
 
3228
        default:
 
3229
                ut_error;
 
3230
        }
 
3231
 
 
3232
#ifdef UNIV_DEBUG
 
3233
        if (buf_debug_prints) {
 
3234
                fprintf(stderr, "Has %s page space %lu page no %lu\n",
 
3235
                        io_type == BUF_IO_READ ? "read" : "written",
 
3236
                        (ulong) buf_page_get_space(bpage),
 
3237
                        (ulong) buf_page_get_page_no(bpage));
 
3238
        }
 
3239
#endif /* UNIV_DEBUG */
 
3240
 
 
3241
        mutex_exit(buf_page_get_mutex(bpage));
 
3242
        buf_pool_mutex_exit();
 
3243
}
 
3244
 
 
3245
/*********************************************************************//**
 
3246
Invalidates the file pages in the buffer pool when an archive recovery is
 
3247
completed. All the file pages buffered must be in a replaceable state when
 
3248
this function is called: not latched and not modified. */
 
3249
UNIV_INTERN
 
3250
void
 
3251
buf_pool_invalidate(void)
 
3252
/*=====================*/
 
3253
{
 
3254
        ibool   freed;
 
3255
 
 
3256
        ut_ad(buf_all_freed());
 
3257
 
 
3258
        freed = TRUE;
 
3259
 
 
3260
        while (freed) {
 
3261
                freed = buf_LRU_search_and_free_block(100);
 
3262
        }
 
3263
 
 
3264
        buf_pool_mutex_enter();
 
3265
 
 
3266
        ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
 
3267
        ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
 
3268
 
 
3269
        buf_pool_mutex_exit();
 
3270
}
 
3271
 
 
3272
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
3273
/*********************************************************************//**
 
3274
Validates the buffer buf_pool data structure.
 
3275
@return TRUE */
 
3276
UNIV_INTERN
 
3277
ibool
 
3278
buf_validate(void)
 
3279
/*==============*/
 
3280
{
 
3281
        buf_page_t*     b;
 
3282
        buf_chunk_t*    chunk;
 
3283
        ulint           i;
 
3284
        ulint           n_single_flush  = 0;
 
3285
        ulint           n_lru_flush     = 0;
 
3286
        ulint           n_list_flush    = 0;
 
3287
        ulint           n_lru           = 0;
 
3288
        ulint           n_flush         = 0;
 
3289
        ulint           n_free          = 0;
 
3290
        ulint           n_zip           = 0;
 
3291
 
 
3292
        ut_ad(buf_pool);
 
3293
 
 
3294
        buf_pool_mutex_enter();
 
3295
 
 
3296
        chunk = buf_pool->chunks;
 
3297
 
 
3298
        /* Check the uncompressed blocks. */
 
3299
 
 
3300
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3301
 
 
3302
                ulint           j;
 
3303
                buf_block_t*    block = chunk->blocks;
 
3304
 
 
3305
                for (j = chunk->size; j--; block++) {
 
3306
 
 
3307
                        mutex_enter(&block->mutex);
 
3308
 
 
3309
                        switch (buf_block_get_state(block)) {
 
3310
                        case BUF_BLOCK_ZIP_FREE:
 
3311
                        case BUF_BLOCK_ZIP_PAGE:
 
3312
                        case BUF_BLOCK_ZIP_DIRTY:
 
3313
                                /* These should only occur on
 
3314
                                zip_clean, zip_free[], or flush_list. */
 
3315
                                ut_error;
 
3316
                                break;
 
3317
 
 
3318
                        case BUF_BLOCK_FILE_PAGE:
 
3319
                                ut_a(buf_page_hash_get(buf_block_get_space(
 
3320
                                                               block),
 
3321
                                                       buf_block_get_page_no(
 
3322
                                                               block))
 
3323
                                     == &block->page);
 
3324
 
 
3325
#ifdef UNIV_IBUF_COUNT_DEBUG
 
3326
                                ut_a(buf_page_get_io_fix(&block->page)
 
3327
                                     == BUF_IO_READ
 
3328
                                     || !ibuf_count_get(buf_block_get_space(
 
3329
                                                                block),
 
3330
                                                        buf_block_get_page_no(
 
3331
                                                                block)));
 
3332
#endif
 
3333
                                switch (buf_page_get_io_fix(&block->page)) {
 
3334
                                case BUF_IO_NONE:
 
3335
                                        break;
 
3336
 
 
3337
                                case BUF_IO_WRITE:
 
3338
                                        switch (buf_page_get_flush_type(
 
3339
                                                        &block->page)) {
 
3340
                                        case BUF_FLUSH_LRU:
 
3341
                                                n_lru_flush++;
 
3342
                                                ut_a(rw_lock_is_locked(
 
3343
                                                             &block->lock,
 
3344
                                                             RW_LOCK_SHARED));
 
3345
                                                break;
 
3346
                                        case BUF_FLUSH_LIST:
 
3347
                                                n_list_flush++;
 
3348
                                                break;
 
3349
                                        case BUF_FLUSH_SINGLE_PAGE:
 
3350
                                                n_single_flush++;
 
3351
                                                break;
 
3352
                                        default:
 
3353
                                                ut_error;
 
3354
                                        }
 
3355
 
 
3356
                                        break;
 
3357
 
 
3358
                                case BUF_IO_READ:
 
3359
 
 
3360
                                        ut_a(rw_lock_is_locked(&block->lock,
 
3361
                                                               RW_LOCK_EX));
 
3362
                                        break;
 
3363
                                }
 
3364
 
 
3365
                                n_lru++;
 
3366
 
 
3367
                                if (block->page.oldest_modification > 0) {
 
3368
                                        n_flush++;
 
3369
                                }
 
3370
 
 
3371
                                break;
 
3372
 
 
3373
                        case BUF_BLOCK_NOT_USED:
 
3374
                                n_free++;
 
3375
                                break;
 
3376
 
 
3377
                        case BUF_BLOCK_READY_FOR_USE:
 
3378
                        case BUF_BLOCK_MEMORY:
 
3379
                        case BUF_BLOCK_REMOVE_HASH:
 
3380
                                /* do nothing */
 
3381
                                break;
 
3382
                        }
 
3383
 
 
3384
                        mutex_exit(&block->mutex);
 
3385
                }
 
3386
        }
 
3387
 
 
3388
        mutex_enter(&buf_pool_zip_mutex);
 
3389
 
 
3390
        /* Check clean compressed-only blocks. */
 
3391
 
 
3392
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
 
3393
             b = UT_LIST_GET_NEXT(list, b)) {
 
3394
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 
3395
                switch (buf_page_get_io_fix(b)) {
 
3396
                case BUF_IO_NONE:
 
3397
                        /* All clean blocks should be I/O-unfixed. */
 
3398
                        break;
 
3399
                case BUF_IO_READ:
 
3400
                        /* In buf_LRU_free_block(), we temporarily set
 
3401
                        b->io_fix = BUF_IO_READ for a newly allocated
 
3402
                        control block in order to prevent
 
3403
                        buf_page_get_gen() from decompressing the block. */
 
3404
                        break;
 
3405
                default:
 
3406
                        ut_error;
 
3407
                        break;
 
3408
                }
 
3409
                ut_a(!b->oldest_modification);
 
3410
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
 
3411
 
 
3412
                n_lru++;
 
3413
                n_zip++;
 
3414
        }
 
3415
 
 
3416
        /* Check dirty compressed-only blocks. */
 
3417
 
 
3418
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
 
3419
             b = UT_LIST_GET_NEXT(list, b)) {
 
3420
                ut_ad(b->in_flush_list);
 
3421
 
 
3422
                switch (buf_page_get_state(b)) {
 
3423
                case BUF_BLOCK_ZIP_DIRTY:
 
3424
                        ut_a(b->oldest_modification);
 
3425
                        n_lru++;
 
3426
                        n_flush++;
 
3427
                        n_zip++;
 
3428
                        switch (buf_page_get_io_fix(b)) {
 
3429
                        case BUF_IO_NONE:
 
3430
                        case BUF_IO_READ:
 
3431
                                break;
 
3432
 
 
3433
                        case BUF_IO_WRITE:
 
3434
                                switch (buf_page_get_flush_type(b)) {
 
3435
                                case BUF_FLUSH_LRU:
 
3436
                                        n_lru_flush++;
 
3437
                                        break;
 
3438
                                case BUF_FLUSH_LIST:
 
3439
                                        n_list_flush++;
 
3440
                                        break;
 
3441
                                case BUF_FLUSH_SINGLE_PAGE:
 
3442
                                        n_single_flush++;
 
3443
                                        break;
 
3444
                                default:
 
3445
                                        ut_error;
 
3446
                                }
 
3447
                                break;
 
3448
                        }
 
3449
                        break;
 
3450
                case BUF_BLOCK_FILE_PAGE:
 
3451
                        /* uncompressed page */
 
3452
                        break;
 
3453
                case BUF_BLOCK_ZIP_FREE:
 
3454
                case BUF_BLOCK_ZIP_PAGE:
 
3455
                case BUF_BLOCK_NOT_USED:
 
3456
                case BUF_BLOCK_READY_FOR_USE:
 
3457
                case BUF_BLOCK_MEMORY:
 
3458
                case BUF_BLOCK_REMOVE_HASH:
 
3459
                        ut_error;
 
3460
                        break;
 
3461
                }
 
3462
                ut_a(buf_page_hash_get(b->space, b->offset) == b);
 
3463
        }
 
3464
 
 
3465
        mutex_exit(&buf_pool_zip_mutex);
 
3466
 
 
3467
        if (n_lru + n_free > buf_pool->curr_size + n_zip) {
 
3468
                fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
 
3469
                        (ulong) n_lru, (ulong) n_free,
 
3470
                        (ulong) buf_pool->curr_size, (ulong) n_zip);
 
3471
                ut_error;
 
3472
        }
 
3473
 
 
3474
        ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
 
3475
        if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
 
3476
                fprintf(stderr, "Free list len %lu, free blocks %lu\n",
 
3477
                        (ulong) UT_LIST_GET_LEN(buf_pool->free),
 
3478
                        (ulong) n_free);
 
3479
                ut_error;
 
3480
        }
 
3481
        ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
 
3482
 
 
3483
        ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
 
3484
        ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
 
3485
        ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
 
3486
 
 
3487
        buf_pool_mutex_exit();
 
3488
 
 
3489
        ut_a(buf_LRU_validate());
 
3490
        ut_a(buf_flush_validate());
 
3491
 
 
3492
        return(TRUE);
 
3493
}
 
3494
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
3495
 
 
3496
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 
3497
/*********************************************************************//**
 
3498
Prints info of the buffer buf_pool data structure. */
 
3499
UNIV_INTERN
 
3500
void
 
3501
buf_print(void)
 
3502
/*===========*/
 
3503
{
 
3504
        dulint*         index_ids;
 
3505
        ulint*          counts;
 
3506
        ulint           size;
 
3507
        ulint           i;
 
3508
        ulint           j;
 
3509
        dulint          id;
 
3510
        ulint           n_found;
 
3511
        buf_chunk_t*    chunk;
 
3512
        dict_index_t*   index;
 
3513
 
 
3514
        ut_ad(buf_pool);
 
3515
 
 
3516
        size = buf_pool->curr_size;
 
3517
 
 
3518
        index_ids = mem_alloc(sizeof(dulint) * size);
 
3519
        counts = mem_alloc(sizeof(ulint) * size);
 
3520
 
 
3521
        buf_pool_mutex_enter();
 
3522
 
 
3523
        fprintf(stderr,
 
3524
                "buf_pool size %lu\n"
 
3525
                "database pages %lu\n"
 
3526
                "free pages %lu\n"
 
3527
                "modified database pages %lu\n"
 
3528
                "n pending decompressions %lu\n"
 
3529
                "n pending reads %lu\n"
 
3530
                "n pending flush LRU %lu list %lu single page %lu\n"
 
3531
                "pages read %lu, created %lu, written %lu\n",
 
3532
                (ulong) size,
 
3533
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
 
3534
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
 
3535
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
 
3536
                (ulong) buf_pool->n_pend_unzip,
 
3537
                (ulong) buf_pool->n_pend_reads,
 
3538
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
 
3539
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
 
3540
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
 
3541
                (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
 
3542
                (ulong) buf_pool->n_pages_written);
 
3543
 
 
3544
        /* Count the number of blocks belonging to each index in the buffer */
 
3545
 
 
3546
        n_found = 0;
 
3547
 
 
3548
        chunk = buf_pool->chunks;
 
3549
 
 
3550
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3551
                buf_block_t*    block           = chunk->blocks;
 
3552
                ulint           n_blocks        = chunk->size;
 
3553
 
 
3554
                for (; n_blocks--; block++) {
 
3555
                        const buf_frame_t* frame = block->frame;
 
3556
 
 
3557
                        if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
 
3558
 
 
3559
                                id = btr_page_get_index_id(frame);
 
3560
 
 
3561
                                /* Look for the id in the index_ids array */
 
3562
                                j = 0;
 
3563
 
 
3564
                                while (j < n_found) {
 
3565
 
 
3566
                                        if (ut_dulint_cmp(index_ids[j],
 
3567
                                                          id) == 0) {
 
3568
                                                counts[j]++;
 
3569
 
 
3570
                                                break;
 
3571
                                        }
 
3572
                                        j++;
 
3573
                                }
 
3574
 
 
3575
                                if (j == n_found) {
 
3576
                                        n_found++;
 
3577
                                        index_ids[j] = id;
 
3578
                                        counts[j] = 1;
 
3579
                                }
 
3580
                        }
 
3581
                }
 
3582
        }
 
3583
 
 
3584
        buf_pool_mutex_exit();
 
3585
 
 
3586
        for (i = 0; i < n_found; i++) {
 
3587
                index = dict_index_get_if_in_cache(index_ids[i]);
 
3588
 
 
3589
                fprintf(stderr,
 
3590
                        "Block count for index %lu in buffer is about %lu",
 
3591
                        (ulong) ut_dulint_get_low(index_ids[i]),
 
3592
                        (ulong) counts[i]);
 
3593
 
 
3594
                if (index) {
 
3595
                        putc(' ', stderr);
 
3596
                        dict_index_name_print(stderr, NULL, index);
 
3597
                }
 
3598
 
 
3599
                putc('\n', stderr);
 
3600
        }
 
3601
 
 
3602
        mem_free(index_ids);
 
3603
        mem_free(counts);
 
3604
 
 
3605
        ut_a(buf_validate());
 
3606
}
 
3607
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
 
3608
 
 
3609
#ifdef UNIV_DEBUG
 
3610
/*********************************************************************//**
 
3611
Returns the number of latched pages in the buffer pool.
 
3612
@return number of latched pages */
 
3613
UNIV_INTERN
 
3614
ulint
 
3615
buf_get_latched_pages_number(void)
 
3616
/*==============================*/
 
3617
{
 
3618
        buf_chunk_t*    chunk;
 
3619
        buf_page_t*     b;
 
3620
        ulint           i;
 
3621
        ulint           fixed_pages_number = 0;
 
3622
 
 
3623
        buf_pool_mutex_enter();
 
3624
 
 
3625
        chunk = buf_pool->chunks;
 
3626
 
 
3627
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3628
                buf_block_t*    block;
 
3629
                ulint           j;
 
3630
 
 
3631
                block = chunk->blocks;
 
3632
 
 
3633
                for (j = chunk->size; j--; block++) {
 
3634
                        if (buf_block_get_state(block)
 
3635
                            != BUF_BLOCK_FILE_PAGE) {
 
3636
 
 
3637
                                continue;
 
3638
                        }
 
3639
 
 
3640
                        mutex_enter(&block->mutex);
 
3641
 
 
3642
                        if (block->page.buf_fix_count != 0
 
3643
                            || buf_page_get_io_fix(&block->page)
 
3644
                            != BUF_IO_NONE) {
 
3645
                                fixed_pages_number++;
 
3646
                        }
 
3647
 
 
3648
                        mutex_exit(&block->mutex);
 
3649
                }
 
3650
        }
 
3651
 
 
3652
        mutex_enter(&buf_pool_zip_mutex);
 
3653
 
 
3654
        /* Traverse the lists of clean and dirty compressed-only blocks. */
 
3655
 
 
3656
        for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
 
3657
             b = UT_LIST_GET_NEXT(list, b)) {
 
3658
                ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
 
3659
                ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
 
3660
 
 
3661
                if (b->buf_fix_count != 0
 
3662
                    || buf_page_get_io_fix(b) != BUF_IO_NONE) {
 
3663
                        fixed_pages_number++;
 
3664
                }
 
3665
        }
 
3666
 
 
3667
        for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
 
3668
             b = UT_LIST_GET_NEXT(list, b)) {
 
3669
                ut_ad(b->in_flush_list);
 
3670
 
 
3671
                switch (buf_page_get_state(b)) {
 
3672
                case BUF_BLOCK_ZIP_DIRTY:
 
3673
                        if (b->buf_fix_count != 0
 
3674
                            || buf_page_get_io_fix(b) != BUF_IO_NONE) {
 
3675
                                fixed_pages_number++;
 
3676
                        }
 
3677
                        break;
 
3678
                case BUF_BLOCK_FILE_PAGE:
 
3679
                        /* uncompressed page */
 
3680
                        break;
 
3681
                case BUF_BLOCK_ZIP_FREE:
 
3682
                case BUF_BLOCK_ZIP_PAGE:
 
3683
                case BUF_BLOCK_NOT_USED:
 
3684
                case BUF_BLOCK_READY_FOR_USE:
 
3685
                case BUF_BLOCK_MEMORY:
 
3686
                case BUF_BLOCK_REMOVE_HASH:
 
3687
                        ut_error;
 
3688
                        break;
 
3689
                }
 
3690
        }
 
3691
 
 
3692
        mutex_exit(&buf_pool_zip_mutex);
 
3693
        buf_pool_mutex_exit();
 
3694
 
 
3695
        return(fixed_pages_number);
 
3696
}
 
3697
#endif /* UNIV_DEBUG */
 
3698
 
 
3699
/*********************************************************************//**
 
3700
Returns the number of pending buf pool ios.
 
3701
@return number of pending I/O operations */
 
3702
UNIV_INTERN
 
3703
ulint
 
3704
buf_get_n_pending_ios(void)
 
3705
/*=======================*/
 
3706
{
 
3707
        return(buf_pool->n_pend_reads
 
3708
               + buf_pool->n_flush[BUF_FLUSH_LRU]
 
3709
               + buf_pool->n_flush[BUF_FLUSH_LIST]
 
3710
               + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
 
3711
}
 
3712
 
 
3713
/*********************************************************************//**
 
3714
Returns the ratio in percents of modified pages in the buffer pool /
 
3715
database pages in the buffer pool.
 
3716
@return modified page percentage ratio */
 
3717
UNIV_INTERN
 
3718
ulint
 
3719
buf_get_modified_ratio_pct(void)
 
3720
/*============================*/
 
3721
{
 
3722
        ulint   ratio;
 
3723
 
 
3724
        buf_pool_mutex_enter();
 
3725
 
 
3726
        ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
 
3727
                / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
 
3728
                   + UT_LIST_GET_LEN(buf_pool->free));
 
3729
 
 
3730
        /* 1 + is there to avoid division by zero */
 
3731
 
 
3732
        buf_pool_mutex_exit();
 
3733
 
 
3734
        return(ratio);
 
3735
}
 
3736
 
 
3737
/*********************************************************************//**
 
3738
Prints info of the buffer i/o. */
 
3739
UNIV_INTERN
 
3740
void
 
3741
buf_print_io(
 
3742
/*=========*/
 
3743
        FILE*   file)   /*!< in/out: buffer where to print */
 
3744
{
 
3745
        time_t  current_time;
 
3746
        double  time_elapsed;
 
3747
        ulint   size;
 
3748
 
 
3749
        ut_ad(buf_pool);
 
3750
        size = buf_pool->curr_size;
 
3751
 
 
3752
        buf_pool_mutex_enter();
 
3753
 
 
3754
        fprintf(file,
 
3755
                "Buffer pool size   %lu\n"
 
3756
                "Free buffers       %lu\n"
 
3757
                "Database pages     %lu\n"
 
3758
                "Modified db pages  %lu\n"
 
3759
                "Pending reads %lu\n"
 
3760
                "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
 
3761
                (ulong) size,
 
3762
                (ulong) UT_LIST_GET_LEN(buf_pool->free),
 
3763
                (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
 
3764
                (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
 
3765
                (ulong) buf_pool->n_pend_reads,
 
3766
                (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
 
3767
                + buf_pool->init_flush[BUF_FLUSH_LRU],
 
3768
                (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
 
3769
                + buf_pool->init_flush[BUF_FLUSH_LIST],
 
3770
                (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
 
3771
 
 
3772
        current_time = time(NULL);
 
3773
        time_elapsed = 0.001 + difftime(current_time,
 
3774
                                        buf_pool->last_printout_time);
 
3775
        buf_pool->last_printout_time = current_time;
 
3776
 
 
3777
        fprintf(file,
 
3778
                "Pages read %lu, created %lu, written %lu\n"
 
3779
                "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
 
3780
                (ulong) buf_pool->n_pages_read,
 
3781
                (ulong) buf_pool->n_pages_created,
 
3782
                (ulong) buf_pool->n_pages_written,
 
3783
                (buf_pool->n_pages_read - buf_pool->n_pages_read_old)
 
3784
                / time_elapsed,
 
3785
                (buf_pool->n_pages_created - buf_pool->n_pages_created_old)
 
3786
                / time_elapsed,
 
3787
                (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
 
3788
                / time_elapsed);
 
3789
 
 
3790
        if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
 
3791
                fprintf(file, "Buffer pool hit rate %lu / 1000\n",
 
3792
                        (ulong)
 
3793
                        (1000 - ((1000 * (buf_pool->n_pages_read
 
3794
                                          - buf_pool->n_pages_read_old))
 
3795
                                 / (buf_pool->n_page_gets
 
3796
                                    - buf_pool->n_page_gets_old))));
 
3797
        } else {
 
3798
                fputs("No buffer pool page gets since the last printout\n",
 
3799
                      file);
 
3800
        }
 
3801
 
 
3802
        buf_pool->n_page_gets_old = buf_pool->n_page_gets;
 
3803
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
 
3804
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
 
3805
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
 
3806
 
 
3807
        /* Print some values to help us with visualizing what is
 
3808
        happening with LRU eviction. */
 
3809
        fprintf(file,
 
3810
                "LRU len: %lu, unzip_LRU len: %lu\n"
 
3811
                "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
 
3812
                UT_LIST_GET_LEN(buf_pool->LRU),
 
3813
                UT_LIST_GET_LEN(buf_pool->unzip_LRU),
 
3814
                buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
 
3815
                buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
 
3816
 
 
3817
        buf_pool_mutex_exit();
 
3818
}
 
3819
 
 
3820
/**********************************************************************//**
 
3821
Refreshes the statistics used to print per-second averages. */
 
3822
UNIV_INTERN
 
3823
void
 
3824
buf_refresh_io_stats(void)
 
3825
/*======================*/
 
3826
{
 
3827
        buf_pool->last_printout_time = time(NULL);
 
3828
        buf_pool->n_page_gets_old = buf_pool->n_page_gets;
 
3829
        buf_pool->n_pages_read_old = buf_pool->n_pages_read;
 
3830
        buf_pool->n_pages_created_old = buf_pool->n_pages_created;
 
3831
        buf_pool->n_pages_written_old = buf_pool->n_pages_written;
 
3832
}
 
3833
 
 
3834
/*********************************************************************//**
 
3835
Asserts that all file pages in the buffer are in a replaceable state.
 
3836
@return TRUE */
 
3837
UNIV_INTERN
 
3838
ibool
 
3839
buf_all_freed(void)
 
3840
/*===============*/
 
3841
{
 
3842
        buf_chunk_t*    chunk;
 
3843
        ulint           i;
 
3844
 
 
3845
        ut_ad(buf_pool);
 
3846
 
 
3847
        buf_pool_mutex_enter();
 
3848
 
 
3849
        chunk = buf_pool->chunks;
 
3850
 
 
3851
        for (i = buf_pool->n_chunks; i--; chunk++) {
 
3852
 
 
3853
                const buf_block_t* block = buf_chunk_not_freed(chunk);
 
3854
 
 
3855
                if (UNIV_LIKELY_NULL(block)) {
 
3856
                        fprintf(stderr,
 
3857
                                "Page %lu %lu still fixed or dirty\n",
 
3858
                                (ulong) block->page.space,
 
3859
                                (ulong) block->page.offset);
 
3860
                        ut_error;
 
3861
                }
 
3862
        }
 
3863
 
 
3864
        buf_pool_mutex_exit();
 
3865
 
 
3866
        return(TRUE);
 
3867
}
 
3868
 
 
3869
/*********************************************************************//**
 
3870
Checks that there currently are no pending i/o-operations for the buffer
 
3871
pool.
 
3872
@return TRUE if there is no pending i/o */
 
3873
UNIV_INTERN
 
3874
ibool
 
3875
buf_pool_check_no_pending_io(void)
 
3876
/*==============================*/
 
3877
{
 
3878
        ibool   ret;
 
3879
 
 
3880
        buf_pool_mutex_enter();
 
3881
 
 
3882
        if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
 
3883
            + buf_pool->n_flush[BUF_FLUSH_LIST]
 
3884
            + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
 
3885
                ret = FALSE;
 
3886
        } else {
 
3887
                ret = TRUE;
 
3888
        }
 
3889
 
 
3890
        buf_pool_mutex_exit();
 
3891
 
 
3892
        return(ret);
 
3893
}
 
3894
 
 
3895
/*********************************************************************//**
 
3896
Gets the current length of the free list of buffer blocks.
 
3897
@return length of the free list */
 
3898
UNIV_INTERN
 
3899
ulint
 
3900
buf_get_free_list_len(void)
 
3901
/*=======================*/
 
3902
{
 
3903
        ulint   len;
 
3904
 
 
3905
        buf_pool_mutex_enter();
 
3906
 
 
3907
        len = UT_LIST_GET_LEN(buf_pool->free);
 
3908
 
 
3909
        buf_pool_mutex_exit();
 
3910
 
 
3911
        return(len);
 
3912
}
 
3913
#else /* !UNIV_HOTBACKUP */
 
3914
/********************************************************************//**
 
3915
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
 
3916
UNIV_INTERN
 
3917
void
 
3918
buf_page_init_for_backup_restore(
 
3919
/*=============================*/
 
3920
        ulint           space,  /*!< in: space id */
 
3921
        ulint           offset, /*!< in: offset of the page within space
 
3922
                                in units of a page */
 
3923
        ulint           zip_size,/*!< in: compressed page size in bytes
 
3924
                                or 0 for uncompressed pages */
 
3925
        buf_block_t*    block)  /*!< in: block to init */
 
3926
{
 
3927
        block->page.state       = BUF_BLOCK_FILE_PAGE;
 
3928
        block->page.space       = space;
 
3929
        block->page.offset      = offset;
 
3930
 
 
3931
        page_zip_des_init(&block->page.zip);
 
3932
 
 
3933
        /* We assume that block->page.data has been allocated
 
3934
        with zip_size == UNIV_PAGE_SIZE. */
 
3935
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
 
3936
        ut_ad(ut_is_2pow(zip_size));
 
3937
        page_zip_set_size(&block->page.zip, zip_size);
 
3938
        if (zip_size) {
 
3939
                block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
 
3940
        }
 
3941
}
 
3942
#endif /* !UNIV_HOTBACKUP */