1.4.1
by Marc Deslauriers
Import upstream version 5.1.61 |
1 |
/*
|
2 |
Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
1
by Norbert Tretkowski
Import upstream version 5.1.45 |
3 |
|
4 |
This program is free software; you can redistribute it and/or modify
|
|
5 |
it under the terms of the GNU General Public License as published by
|
|
6 |
the Free Software Foundation; version 2 of the License.
|
|
7 |
||
8 |
This program is distributed in the hope that it will be useful,
|
|
9 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
GNU General Public License for more details.
|
|
12 |
||
13 |
You should have received a copy of the GNU General Public License
|
|
14 |
along with this program; if not, write to the Free Software
|
|
1.4.1
by Marc Deslauriers
Import upstream version 5.1.61 |
15 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
16 |
*/
|
|
1
by Norbert Tretkowski
Import upstream version 5.1.45 |
17 |
|
18 |
/**
|
|
19 |
@file
|
|
20 |
These functions handle keyblock cacheing for ISAM and MyISAM tables.
|
|
21 |
||
22 |
One cache can handle many files.
|
|
23 |
It must contain buffers of the same blocksize.
|
|
24 |
init_key_cache() should be used to init cache handler.
|
|
25 |
||
26 |
The free list (free_block_list) is a stack like structure.
|
|
27 |
When a block is freed by free_block(), it is pushed onto the stack.
|
|
28 |
When a new block is required it is first tried to pop one from the stack.
|
|
29 |
If the stack is empty, it is tried to get a never-used block from the pool.
|
|
30 |
If this is empty too, then a block is taken from the LRU ring, flushing it
|
|
31 |
to disk, if neccessary. This is handled in find_key_block().
|
|
32 |
With the new free list, the blocks can have three temperatures:
|
|
33 |
hot, warm and cold (which is free). This is remembered in the block header
|
|
34 |
by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
|
|
35 |
temperature is neccessary to correctly count the number of warm blocks,
|
|
36 |
which is required to decide when blocks are allowed to become hot. Whenever
|
|
37 |
a block is inserted to another (sub-)chain, we take the old and new
|
|
38 |
temperature into account to decide if we got one more or less warm block.
|
|
39 |
blocks_unused is the sum of never used blocks in the pool and of currently
|
|
40 |
free blocks. blocks_used is the number of blocks fetched from the pool and
|
|
41 |
as such gives the maximum number of in-use blocks at any time.
|
|
42 |
*/
|
|
43 |
||
44 |
/*
|
|
45 |
Key Cache Locking
|
|
46 |
=================
|
|
47 |
||
48 |
All key cache locking is done with a single mutex per key cache:
|
|
49 |
keycache->cache_lock. This mutex is locked almost all the time
|
|
50 |
when executing code in this file (mf_keycache.c).
|
|
51 |
However it is released for I/O and some copy operations.
|
|
52 |
||
53 |
The cache_lock is also released when waiting for some event. Waiting
|
|
54 |
and signalling is done via condition variables. In most cases the
|
|
55 |
thread waits on its thread->suspend condition variable. Every thread
|
|
56 |
has a my_thread_var structure, which contains this variable and a
|
|
57 |
'*next' and '**prev' pointer. These pointers are used to insert the
|
|
58 |
thread into a wait queue.
|
|
59 |
||
60 |
A thread can wait for one block and thus be in one wait queue at a
|
|
61 |
time only.
|
|
62 |
||
63 |
Before starting to wait on its condition variable with
|
|
64 |
pthread_cond_wait(), the thread enters itself to a specific wait queue
|
|
65 |
with link_into_queue() (double linked with '*next' + '**prev') or
|
|
66 |
wait_on_queue() (single linked with '*next').
|
|
67 |
||
68 |
Another thread, when releasing a resource, looks up the waiting thread
|
|
69 |
in the related wait queue. It sends a signal with
|
|
70 |
pthread_cond_signal() to the waiting thread.
|
|
71 |
||
72 |
NOTE: Depending on the particular wait situation, either the sending
|
|
73 |
thread removes the waiting thread from the wait queue with
|
|
74 |
unlink_from_queue() or release_whole_queue() respectively, or the waiting
|
|
75 |
thread removes itself.
|
|
76 |
||
77 |
There is one exception from this locking scheme when one thread wants
|
|
78 |
to reuse a block for some other address. This works by first marking
|
|
79 |
the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
|
|
80 |
threads that are reading the block to finish. Each block has a
|
|
81 |
reference to a condition variable (condvar). It holds a reference to
|
|
82 |
the thread->suspend condition variable for the waiting thread (if such
|
|
83 |
a thread exists). When that thread is signaled, the reference is
|
|
84 |
cleared. The number of readers of a block is registered in
|
|
85 |
block->hash_link->requests. See wait_for_readers() / remove_reader()
|
|
86 |
for details. This is similar to the above, but it clearly means that
|
|
87 |
only one thread can wait for a particular block. There is no queue in
|
|
88 |
this case. Strangely enough block->convar is used for waiting for the
|
|
89 |
assigned hash_link only. More precisely it is used to wait for all
|
|
90 |
requests to be unregistered from the assigned hash_link.
|
|
91 |
||
92 |
The resize_queue serves two purposes:
|
|
93 |
1. Threads that want to do a resize wait there if in_resize is set.
|
|
94 |
This is not used in the server. The server refuses a second resize
|
|
95 |
request if one is already active. keycache->in_init is used for the
|
|
96 |
synchronization. See set_var.cc.
|
|
97 |
2. Threads that want to access blocks during resize wait here during
|
|
98 |
the re-initialization phase.
|
|
99 |
When the resize is done, all threads on the queue are signalled.
|
|
100 |
Hypothetical resizers can compete for resizing, and read/write
|
|
101 |
requests will restart to request blocks from the freshly resized
|
|
102 |
cache. If the cache has been resized too small, it is disabled and
|
|
103 |
'can_be_used' is false. In this case read/write requests bypass the
|
|
104 |
cache. Since they increment and decrement 'cnt_for_resize_op', the
|
|
105 |
next resizer can wait on the queue 'waiting_for_resize_cnt' until all
|
|
106 |
I/O finished.
|
|
107 |
*/
|
|
108 |
||
109 |
#include "mysys_priv.h" |
|
110 |
#include "mysys_err.h" |
|
111 |
#include <keycache.h> |
|
112 |
#include "my_static.h" |
|
113 |
#include <m_string.h> |
|
114 |
#include <my_bit.h> |
|
115 |
#include <errno.h> |
|
116 |
#include <stdarg.h> |
|
117 |
||
118 |
/*
|
|
119 |
Some compilation flags have been added specifically for this module
|
|
120 |
to control the following:
|
|
121 |
- not to let a thread to yield the control when reading directly
|
|
122 |
from key cache, which might improve performance in many cases;
|
|
123 |
to enable this add:
|
|
124 |
#define SERIALIZED_READ_FROM_CACHE
|
|
125 |
- to set an upper bound for number of threads simultaneously
|
|
126 |
using the key cache; this setting helps to determine an optimal
|
|
127 |
size for hash table and improve performance when the number of
|
|
128 |
blocks in the key cache much less than the number of threads
|
|
129 |
accessing it;
|
|
130 |
to set this number equal to <N> add
|
|
131 |
#define MAX_THREADS <N>
|
|
132 |
- to substitute calls of pthread_cond_wait for calls of
|
|
133 |
pthread_cond_timedwait (wait with timeout set up);
|
|
134 |
this setting should be used only when you want to trap a deadlock
|
|
135 |
situation, which theoretically should not happen;
|
|
136 |
to set timeout equal to <T> seconds add
|
|
137 |
#define KEYCACHE_TIMEOUT <T>
|
|
138 |
- to enable the module traps and to send debug information from
|
|
139 |
key cache module to a special debug log add:
|
|
140 |
#define KEYCACHE_DEBUG
|
|
141 |
the name of this debug log file <LOG NAME> can be set through:
|
|
142 |
#define KEYCACHE_DEBUG_LOG <LOG NAME>
|
|
143 |
if the name is not defined, it's set by default;
|
|
144 |
if the KEYCACHE_DEBUG flag is not set up and we are in a debug
|
|
145 |
mode, i.e. when ! defined(DBUG_OFF), the debug information from the
|
|
146 |
module is sent to the regular debug log.
|
|
147 |
||
148 |
Example of the settings:
|
|
149 |
#define SERIALIZED_READ_FROM_CACHE
|
|
150 |
#define MAX_THREADS 100
|
|
151 |
#define KEYCACHE_TIMEOUT 1
|
|
152 |
#define KEYCACHE_DEBUG
|
|
153 |
#define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log"
|
|
154 |
*/
|
|
155 |
||
156 |
#define STRUCT_PTR(TYPE, MEMBER, a) \
|
|
157 |
(TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
|
|
158 |
||
159 |
/* types of condition variables */
|
|
160 |
#define COND_FOR_REQUESTED 0
|
|
161 |
#define COND_FOR_SAVED 1
|
|
162 |
#define COND_FOR_READERS 2
|
|
163 |
||
164 |
typedef pthread_cond_t KEYCACHE_CONDVAR; |
|
165 |
||
166 |
/* descriptor of the page in the key cache block buffer */
|
|
167 |
struct st_keycache_page |
|
168 |
{
|
|
169 |
int file; /* file to which the page belongs to */ |
|
170 |
my_off_t filepos; /* position of the page in the file */ |
|
171 |
};
|
|
172 |
||
173 |
/* element in the chain of a hash table bucket */
|
|
174 |
struct st_hash_link |
|
175 |
{
|
|
176 |
struct st_hash_link *next, **prev; /* to connect links in the same bucket */ |
|
177 |
struct st_block_link *block; /* reference to the block for the page: */ |
|
178 |
File file; /* from such a file */ |
|
179 |
my_off_t diskpos; /* with such an offset */ |
|
180 |
uint requests; /* number of requests for the page */ |
|
181 |
};
|
|
182 |
||
183 |
/* simple states of a block */
|
|
184 |
#define BLOCK_ERROR 1 /* an error occured when performing file i/o */ |
|
185 |
#define BLOCK_READ 2 /* file block is in the block buffer */ |
|
186 |
#define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ |
|
187 |
#define BLOCK_REASSIGNED 8 /* blk does not accept requests for old page */ |
|
188 |
#define BLOCK_IN_FLUSH 16 /* block is selected for flush */ |
|
189 |
#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ |
|
190 |
#define BLOCK_IN_USE 64 /* block is not free */ |
|
191 |
#define BLOCK_IN_EVICTION 128 /* block is selected for eviction */ |
|
192 |
#define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file */ |
|
193 |
#define BLOCK_FOR_UPDATE 512 /* block is selected for buffer modification */ |
|
194 |
||
195 |
/* page status, returned by find_key_block */
|
|
196 |
#define PAGE_READ 0
|
|
197 |
#define PAGE_TO_BE_READ 1
|
|
198 |
#define PAGE_WAIT_TO_BE_READ 2
|
|
199 |
||
200 |
/* block temperature determines in which (sub-)chain the block currently is */
|
|
201 |
enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; |
|
202 |
||
203 |
/* key cache block */
|
|
204 |
struct st_block_link |
|
205 |
{
|
|
206 |
struct st_block_link |
|
207 |
*next_used, **prev_used; /* to connect links in the LRU chain (ring) */ |
|
208 |
struct st_block_link |
|
209 |
*next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ |
|
210 |
struct st_hash_link *hash_link; /* backward ptr to referring hash_link */ |
|
211 |
KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */ |
|
212 |
uint requests; /* number of requests for the block */ |
|
213 |
uchar *buffer; /* buffer for the block page */ |
|
214 |
uint offset; /* beginning of modified data in the buffer */ |
|
215 |
uint length; /* end of data in the buffer */ |
|
216 |
uint status; /* state of the block */ |
|
217 |
enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ |
|
218 |
uint hits_left; /* number of hits left until promotion */ |
|
219 |
ulonglong last_hit_time; /* timestamp of the last hit */ |
|
220 |
KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ |
|
221 |
};
|
|
222 |
||
223 |
KEY_CACHE dflt_key_cache_var; |
|
224 |
KEY_CACHE *dflt_key_cache= &dflt_key_cache_var; |
|
225 |
||
226 |
#define FLUSH_CACHE 2000 /* sort this many blocks at once */ |
|
227 |
||
228 |
static int flush_all_key_blocks(KEY_CACHE *keycache); |
|
229 |
#ifdef THREAD
|
|
230 |
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue, |
|
231 |
pthread_mutex_t *mutex); |
|
232 |
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue); |
|
233 |
#else
|
|
234 |
#define wait_on_queue(wqueue, mutex) do {} while (0)
|
|
235 |
#define release_whole_queue(wqueue) do {} while (0)
|
|
236 |
#endif
|
|
237 |
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block); |
|
238 |
#if !defined(DBUG_OFF)
|
|
239 |
static void test_key_cache(KEY_CACHE *keycache, |
|
240 |
const char *where, my_bool lock); |
|
241 |
#endif
|
|
242 |
||
243 |
#define KEYCACHE_HASH(f, pos) \
|
|
244 |
(((ulong) ((pos) / keycache->key_cache_block_size) + \
|
|
245 |
(ulong) (f)) & (keycache->hash_entries-1))
|
|
246 |
#define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
|
|
247 |
||
248 |
#define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log"
|
|
249 |
||
250 |
#if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
|
|
251 |
#define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG
|
|
252 |
#endif
|
|
253 |
||
254 |
#if defined(KEYCACHE_DEBUG_LOG)
|
|
255 |
static FILE *keycache_debug_log=NULL; |
|
256 |
static void keycache_debug_print _VARARGS((const char *fmt,...)); |
|
257 |
#define KEYCACHE_DEBUG_OPEN \
|
|
258 |
if (!keycache_debug_log) \
|
|
259 |
{ \
|
|
260 |
keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \
|
|
261 |
(void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \
|
|
262 |
}
|
|
263 |
||
264 |
#define KEYCACHE_DEBUG_CLOSE \
|
|
265 |
if (keycache_debug_log) \
|
|
266 |
{ \
|
|
267 |
fclose(keycache_debug_log); \
|
|
268 |
keycache_debug_log= 0; \
|
|
269 |
}
|
|
270 |
#else
|
|
271 |
#define KEYCACHE_DEBUG_OPEN
|
|
272 |
#define KEYCACHE_DEBUG_CLOSE
|
|
273 |
#endif /* defined(KEYCACHE_DEBUG_LOG) */ |
|
274 |
||
275 |
#if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
|
|
276 |
#define KEYCACHE_DBUG_PRINT(l, m) \
|
|
277 |
{ if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
|
|
278 |
keycache_debug_print m; }
|
|
279 |
||
280 |
#define KEYCACHE_DBUG_ASSERT(a) \
|
|
281 |
{ if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \
|
|
282 |
assert(a); }
|
|
283 |
#else
|
|
284 |
#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
|
|
285 |
#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
|
|
286 |
#endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */ |
|
287 |
||
288 |
#if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
|
|
289 |
#ifdef THREAD
|
|
290 |
static long keycache_thread_id; |
|
291 |
#define KEYCACHE_THREAD_TRACE(l) \
|
|
292 |
KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
|
|
293 |
||
294 |
#define KEYCACHE_THREAD_TRACE_BEGIN(l) \
|
|
295 |
{ struct st_my_thread_var *thread_var= my_thread_var; \
|
|
296 |
keycache_thread_id= thread_var->id; \
|
|
297 |
KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
|
|
298 |
||
299 |
#define KEYCACHE_THREAD_TRACE_END(l) \
|
|
300 |
KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
|
|
301 |
#else /* THREAD */ |
|
302 |
#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,(""))
|
|
303 |
#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,(""))
|
|
304 |
#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,(""))
|
|
305 |
#endif /* THREAD */ |
|
306 |
#else
|
|
307 |
#define KEYCACHE_THREAD_TRACE_BEGIN(l)
|
|
308 |
#define KEYCACHE_THREAD_TRACE_END(l)
|
|
309 |
#define KEYCACHE_THREAD_TRACE(l)
|
|
310 |
#endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */ |
|
311 |
||
312 |
#define BLOCK_NUMBER(b) \
|
|
313 |
((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
|
|
314 |
#define HASH_LINK_NUMBER(h) \
|
|
315 |
((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
|
|
316 |
||
317 |
#if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
|
|
318 |
static int keycache_pthread_cond_wait(pthread_cond_t *cond, |
|
319 |
pthread_mutex_t *mutex); |
|
320 |
#else
|
|
321 |
#define keycache_pthread_cond_wait pthread_cond_wait
|
|
322 |
#endif
|
|
323 |
||
324 |
#if defined(KEYCACHE_DEBUG)
|
|
325 |
static int keycache_pthread_mutex_lock(pthread_mutex_t *mutex); |
|
326 |
static void keycache_pthread_mutex_unlock(pthread_mutex_t *mutex); |
|
327 |
static int keycache_pthread_cond_signal(pthread_cond_t *cond); |
|
328 |
#else
|
|
329 |
#define keycache_pthread_mutex_lock pthread_mutex_lock
|
|
330 |
#define keycache_pthread_mutex_unlock pthread_mutex_unlock
|
|
331 |
#define keycache_pthread_cond_signal pthread_cond_signal
|
|
332 |
#endif /* defined(KEYCACHE_DEBUG) */ |
|
333 |
||
334 |
#if !defined(DBUG_OFF)
|
|
335 |
#if defined(inline)
|
|
336 |
#undef inline
|
|
337 |
#endif
|
|
338 |
#define inline /* disabled inline for easier debugging */ |
|
339 |
static int fail_block(BLOCK_LINK *block); |
|
340 |
static int fail_hlink(HASH_LINK *hlink); |
|
341 |
static int cache_empty(KEY_CACHE *keycache); |
|
342 |
#endif
|
|
343 |
||
344 |
static inline uint next_power(uint value) |
|
345 |
{
|
|
346 |
return (uint) my_round_up_to_next_power((uint32) value) << 1; |
|
347 |
}
|
|
348 |
||
349 |
||
350 |
/*
|
|
351 |
Initialize a key cache
|
|
352 |
||
353 |
SYNOPSIS
|
|
354 |
init_key_cache()
|
|
355 |
keycache pointer to a key cache data structure
|
|
356 |
key_cache_block_size size of blocks to keep cached data
|
|
357 |
use_mem total memory to use for the key cache
|
|
358 |
division_limit division limit (may be zero)
|
|
359 |
age_threshold age threshold (may be zero)
|
|
360 |
||
361 |
RETURN VALUE
|
|
362 |
number of blocks in the key cache, if successful,
|
|
363 |
0 - otherwise.
|
|
364 |
||
365 |
NOTES.
|
|
366 |
if keycache->key_cache_inited != 0 we assume that the key cache
|
|
367 |
is already initialized. This is for now used by myisamchk, but shouldn't
|
|
368 |
be something that a program should rely on!
|
|
369 |
||
370 |
It's assumed that no two threads call this function simultaneously
|
|
371 |
referring to the same key cache handle.
|
|
372 |
||
373 |
*/
|
|
374 |
||
375 |
int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, |
|
376 |
size_t use_mem, uint division_limit, |
|
377 |
uint age_threshold) |
|
378 |
{
|
|
379 |
ulong blocks, hash_links; |
|
380 |
size_t length; |
|
381 |
int error; |
|
382 |
DBUG_ENTER("init_key_cache"); |
|
383 |
DBUG_ASSERT(key_cache_block_size >= 512); |
|
384 |
||
385 |
KEYCACHE_DEBUG_OPEN; |
|
386 |
if (keycache->key_cache_inited && keycache->disk_blocks > 0) |
|
387 |
{
|
|
388 |
DBUG_PRINT("warning",("key cache already in use")); |
|
389 |
DBUG_RETURN(0); |
|
390 |
}
|
|
391 |
||
392 |
keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0; |
|
393 |
keycache->global_cache_read= keycache->global_cache_write= 0; |
|
394 |
keycache->disk_blocks= -1; |
|
395 |
if (! keycache->key_cache_inited) |
|
396 |
{
|
|
397 |
keycache->key_cache_inited= 1; |
|
398 |
/*
|
|
399 |
Initialize these variables once only.
|
|
400 |
Their value must survive re-initialization during resizing.
|
|
401 |
*/
|
|
402 |
keycache->in_resize= 0; |
|
403 |
keycache->resize_in_flush= 0; |
|
404 |
keycache->cnt_for_resize_op= 0; |
|
405 |
keycache->waiting_for_resize_cnt.last_thread= NULL; |
|
406 |
keycache->in_init= 0; |
|
407 |
pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST); |
|
408 |
keycache->resize_queue.last_thread= NULL; |
|
409 |
}
|
|
410 |
||
411 |
keycache->key_cache_mem_size= use_mem; |
|
412 |
keycache->key_cache_block_size= key_cache_block_size; |
|
413 |
DBUG_PRINT("info", ("key_cache_block_size: %u", |
|
414 |
key_cache_block_size)); |
|
415 |
||
416 |
blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) + |
|
417 |
sizeof(HASH_LINK*) * 5/4 + key_cache_block_size)); |
|
418 |
/* It doesn't make sense to have too few blocks (less than 8) */
|
|
419 |
if (blocks >= 8) |
|
420 |
{
|
|
421 |
for ( ; ; ) |
|
422 |
{
|
|
423 |
/* Set my_hash_entries to the next bigger 2 power */
|
|
424 |
if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4) |
|
425 |
keycache->hash_entries<<= 1; |
|
426 |
hash_links= 2 * blocks; |
|
427 |
#if defined(MAX_THREADS)
|
|
428 |
if (hash_links < MAX_THREADS + blocks - 1) |
|
429 |
hash_links= MAX_THREADS + blocks - 1; |
|
430 |
#endif
|
|
431 |
while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) + |
|
432 |
ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) + |
|
433 |
ALIGN_SIZE(sizeof(HASH_LINK*) * |
|
434 |
keycache->hash_entries))) + |
|
435 |
((size_t) blocks * keycache->key_cache_block_size) > use_mem) |
|
436 |
blocks--; |
|
437 |
/* Allocate memory for cache page buffers */
|
|
438 |
if ((keycache->block_mem= |
|
439 |
my_large_malloc((size_t) blocks * keycache->key_cache_block_size, |
|
440 |
MYF(0)))) |
|
441 |
{
|
|
442 |
/*
|
|
443 |
Allocate memory for blocks, hash_links and hash entries;
|
|
444 |
For each block 2 hash links are allocated
|
|
445 |
*/
|
|
446 |
if ((keycache->block_root= (BLOCK_LINK*) my_malloc(length, |
|
447 |
MYF(0)))) |
|
448 |
break; |
|
449 |
my_large_free(keycache->block_mem, MYF(0)); |
|
450 |
keycache->block_mem= 0; |
|
451 |
}
|
|
452 |
if (blocks < 8) |
|
453 |
{
|
|
454 |
my_errno= ENOMEM; |
|
455 |
my_error(EE_OUTOFMEMORY, MYF(0), blocks * keycache->key_cache_block_size); |
|
456 |
goto err; |
|
457 |
}
|
|
458 |
blocks= blocks / 4*3; |
|
459 |
}
|
|
460 |
keycache->blocks_unused= blocks; |
|
461 |
keycache->disk_blocks= (int) blocks; |
|
462 |
keycache->hash_links= hash_links; |
|
463 |
keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root + |
|
464 |
ALIGN_SIZE(blocks*sizeof(BLOCK_LINK))); |
|
465 |
keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root + |
|
466 |
ALIGN_SIZE((sizeof(HASH_LINK*) * |
|
467 |
keycache->hash_entries))); |
|
468 |
bzero((uchar*) keycache->block_root, |
|
469 |
keycache->disk_blocks * sizeof(BLOCK_LINK)); |
|
470 |
bzero((uchar*) keycache->hash_root, |
|
471 |
keycache->hash_entries * sizeof(HASH_LINK*)); |
|
472 |
bzero((uchar*) keycache->hash_link_root, |
|
473 |
keycache->hash_links * sizeof(HASH_LINK)); |
|
474 |
keycache->hash_links_used= 0; |
|
475 |
keycache->free_hash_list= NULL; |
|
476 |
keycache->blocks_used= keycache->blocks_changed= 0; |
|
477 |
||
478 |
keycache->global_blocks_changed= 0; |
|
479 |
keycache->blocks_available=0; /* For debugging */ |
|
480 |
||
481 |
/* The LRU chain is empty after initialization */
|
|
482 |
keycache->used_last= NULL; |
|
483 |
keycache->used_ins= NULL; |
|
484 |
keycache->free_block_list= NULL; |
|
485 |
keycache->keycache_time= 0; |
|
486 |
keycache->warm_blocks= 0; |
|
487 |
keycache->min_warm_blocks= (division_limit ? |
|
488 |
blocks * division_limit / 100 + 1 : |
|
489 |
blocks); |
|
490 |
keycache->age_threshold= (age_threshold ? |
|
491 |
blocks * age_threshold / 100 : |
|
492 |
blocks); |
|
493 |
||
494 |
keycache->can_be_used= 1; |
|
495 |
||
496 |
keycache->waiting_for_hash_link.last_thread= NULL; |
|
497 |
keycache->waiting_for_block.last_thread= NULL; |
|
498 |
DBUG_PRINT("exit", |
|
499 |
("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ |
|
500 |
hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", |
|
501 |
keycache->disk_blocks, (long) keycache->block_root, |
|
502 |
keycache->hash_entries, (long) keycache->hash_root, |
|
503 |
keycache->hash_links, (long) keycache->hash_link_root)); |
|
504 |
bzero((uchar*) keycache->changed_blocks, |
|
505 |
sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH); |
|
506 |
bzero((uchar*) keycache->file_blocks, |
|
507 |
sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH); |
|
508 |
}
|
|
509 |
else
|
|
510 |
{
|
|
511 |
/* key_buffer_size is specified too small. Disable the cache. */
|
|
512 |
keycache->can_be_used= 0; |
|
513 |
}
|
|
514 |
||
515 |
keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0; |
|
516 |
DBUG_RETURN((int) keycache->disk_blocks); |
|
517 |
||
518 |
err: |
|
519 |
error= my_errno; |
|
520 |
keycache->disk_blocks= 0; |
|
521 |
keycache->blocks= 0; |
|
522 |
if (keycache->block_mem) |
|
523 |
{
|
|
524 |
my_large_free((uchar*) keycache->block_mem, MYF(0)); |
|
525 |
keycache->block_mem= NULL; |
|
526 |
}
|
|
527 |
if (keycache->block_root) |
|
528 |
{
|
|
529 |
my_free((uchar*) keycache->block_root, MYF(0)); |
|
530 |
keycache->block_root= NULL; |
|
531 |
}
|
|
532 |
my_errno= error; |
|
533 |
keycache->can_be_used= 0; |
|
534 |
DBUG_RETURN(0); |
|
535 |
}
|
|
536 |
||
537 |
||
538 |
/*
|
|
539 |
Resize a key cache
|
|
540 |
||
541 |
SYNOPSIS
|
|
542 |
resize_key_cache()
|
|
543 |
keycache pointer to a key cache data structure
|
|
544 |
key_cache_block_size size of blocks to keep cached data
|
|
545 |
use_mem total memory to use for the new key cache
|
|
546 |
division_limit new division limit (if not zero)
|
|
547 |
age_threshold new age threshold (if not zero)
|
|
548 |
||
549 |
RETURN VALUE
|
|
550 |
number of blocks in the key cache, if successful,
|
|
551 |
0 - otherwise.
|
|
552 |
||
553 |
NOTES.
|
|
554 |
The function first compares the memory size and the block size parameters
|
|
555 |
with the key cache values.
|
|
556 |
||
557 |
If they differ the function free the the memory allocated for the
|
|
558 |
old key cache blocks by calling the end_key_cache function and
|
|
559 |
then rebuilds the key cache with new blocks by calling
|
|
560 |
init_key_cache.
|
|
561 |
||
562 |
The function starts the operation only when all other threads
|
|
563 |
performing operations with the key cache let her to proceed
|
|
564 |
(when cnt_for_resize=0).
|
|
565 |
*/
|
|
566 |
||
567 |
int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, |
|
568 |
size_t use_mem, uint division_limit, |
|
569 |
uint age_threshold) |
|
570 |
{
|
|
571 |
int blocks; |
|
572 |
DBUG_ENTER("resize_key_cache"); |
|
573 |
||
574 |
if (!keycache->key_cache_inited) |
|
575 |
DBUG_RETURN(keycache->disk_blocks); |
|
576 |
||
577 |
if(key_cache_block_size == keycache->key_cache_block_size && |
|
578 |
use_mem == keycache->key_cache_mem_size) |
|
579 |
{
|
|
580 |
change_key_cache_param(keycache, division_limit, age_threshold); |
|
581 |
DBUG_RETURN(keycache->disk_blocks); |
|
582 |
}
|
|
583 |
||
584 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
585 |
||
586 |
#ifdef THREAD
|
|
587 |
/*
|
|
588 |
We may need to wait for another thread which is doing a resize
|
|
589 |
already. This cannot happen in the MySQL server though. It allows
|
|
590 |
one resizer only. In set_var.cc keycache->in_init is used to block
|
|
591 |
multiple attempts.
|
|
592 |
*/
|
|
593 |
while (keycache->in_resize) |
|
594 |
{
|
|
595 |
/* purecov: begin inspected */
|
|
596 |
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); |
|
597 |
/* purecov: end */
|
|
598 |
}
|
|
599 |
#endif
|
|
600 |
||
601 |
/*
|
|
602 |
Mark the operation in progress. This blocks other threads from doing
|
|
603 |
a resize in parallel. It prohibits new blocks to enter the cache.
|
|
604 |
Read/write requests can bypass the cache during the flush phase.
|
|
605 |
*/
|
|
606 |
keycache->in_resize= 1; |
|
607 |
||
608 |
/* Need to flush only if keycache is enabled. */
|
|
609 |
if (keycache->can_be_used) |
|
610 |
{
|
|
611 |
/* Start the flush phase. */
|
|
612 |
keycache->resize_in_flush= 1; |
|
613 |
||
614 |
if (flush_all_key_blocks(keycache)) |
|
615 |
{
|
|
616 |
/* TODO: if this happens, we should write a warning in the log file ! */
|
|
617 |
keycache->resize_in_flush= 0; |
|
618 |
blocks= 0; |
|
619 |
keycache->can_be_used= 0; |
|
620 |
goto finish; |
|
621 |
}
|
|
622 |
DBUG_ASSERT(cache_empty(keycache)); |
|
623 |
||
624 |
/* End the flush phase. */
|
|
625 |
keycache->resize_in_flush= 0; |
|
626 |
}
|
|
627 |
||
628 |
#ifdef THREAD
|
|
629 |
/*
|
|
630 |
Some direct read/write operations (bypassing the cache) may still be
|
|
631 |
unfinished. Wait until they are done. If the key cache can be used,
|
|
632 |
direct I/O is done in increments of key_cache_block_size. That is,
|
|
633 |
every block is checked if it is in the cache. We need to wait for
|
|
634 |
pending I/O before re-initializing the cache, because we may change
|
|
635 |
the block size. Otherwise they could check for blocks at file
|
|
636 |
positions where the new block division has none. We do also want to
|
|
637 |
wait for I/O done when (if) the cache was disabled. It must not
|
|
638 |
run in parallel with normal cache operation.
|
|
639 |
*/
|
|
640 |
while (keycache->cnt_for_resize_op) |
|
641 |
wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock); |
|
642 |
#else
|
|
643 |
KEYCACHE_DBUG_ASSERT(keycache->cnt_for_resize_op == 0); |
|
644 |
#endif
|
|
645 |
||
646 |
/*
|
|
647 |
Free old cache structures, allocate new structures, and initialize
|
|
648 |
them. Note that the cache_lock mutex and the resize_queue are left
|
|
649 |
untouched. We do not lose the cache_lock and will release it only at
|
|
650 |
the end of this function.
|
|
651 |
*/
|
|
652 |
end_key_cache(keycache, 0); /* Don't free mutex */ |
|
653 |
/* The following will work even if use_mem is 0 */
|
|
654 |
blocks= init_key_cache(keycache, key_cache_block_size, use_mem, |
|
655 |
division_limit, age_threshold); |
|
656 |
||
657 |
finish: |
|
658 |
/*
|
|
659 |
Mark the resize finished. This allows other threads to start a
|
|
660 |
resize or to request new cache blocks.
|
|
661 |
*/
|
|
662 |
keycache->in_resize= 0; |
|
663 |
||
664 |
/* Signal waiting threads. */
|
|
665 |
release_whole_queue(&keycache->resize_queue); |
|
666 |
||
667 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
668 |
DBUG_RETURN(blocks); |
|
669 |
}
|
|
670 |
||
671 |
||
672 |
/*
|
|
673 |
Increment counter blocking resize key cache operation
|
|
674 |
*/
|
|
675 |
static inline void inc_counter_for_resize_op(KEY_CACHE *keycache) |
|
676 |
{
|
|
677 |
keycache->cnt_for_resize_op++; |
|
678 |
}
|
|
679 |
||
680 |
||
681 |
/*
|
|
682 |
Decrement counter blocking resize key cache operation;
|
|
683 |
Signal the operation to proceed when counter becomes equal zero
|
|
684 |
*/
|
|
685 |
static inline void dec_counter_for_resize_op(KEY_CACHE *keycache) |
|
686 |
{
|
|
687 |
if (!--keycache->cnt_for_resize_op) |
|
688 |
release_whole_queue(&keycache->waiting_for_resize_cnt); |
|
689 |
}
|
|
690 |
||
691 |
/*
|
|
692 |
Change the key cache parameters
|
|
693 |
||
694 |
SYNOPSIS
|
|
695 |
change_key_cache_param()
|
|
696 |
keycache pointer to a key cache data structure
|
|
697 |
division_limit new division limit (if not zero)
|
|
698 |
age_threshold new age threshold (if not zero)
|
|
699 |
||
700 |
RETURN VALUE
|
|
701 |
none
|
|
702 |
||
703 |
NOTES.
|
|
704 |
Presently the function resets the key cache parameters
|
|
705 |
concerning midpoint insertion strategy - division_limit and
|
|
706 |
age_threshold.
|
|
707 |
*/
|
|
708 |
||
709 |
void change_key_cache_param(KEY_CACHE *keycache, uint division_limit, |
|
710 |
uint age_threshold) |
|
711 |
{
|
|
712 |
DBUG_ENTER("change_key_cache_param"); |
|
713 |
||
714 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
715 |
if (division_limit) |
|
716 |
keycache->min_warm_blocks= (keycache->disk_blocks * |
|
717 |
division_limit / 100 + 1); |
|
718 |
if (age_threshold) |
|
719 |
keycache->age_threshold= (keycache->disk_blocks * |
|
720 |
age_threshold / 100); |
|
721 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
722 |
DBUG_VOID_RETURN; |
|
723 |
}
|
|
724 |
||
725 |
||
726 |
/*
|
|
727 |
Remove key_cache from memory
|
|
728 |
||
729 |
SYNOPSIS
|
|
730 |
end_key_cache()
|
|
731 |
keycache key cache handle
|
|
732 |
cleanup Complete free (Free also mutex for key cache)
|
|
733 |
||
734 |
RETURN VALUE
|
|
735 |
none
|
|
736 |
*/
|
|
737 |
||
738 |
void end_key_cache(KEY_CACHE *keycache, my_bool cleanup) |
|
739 |
{
|
|
740 |
DBUG_ENTER("end_key_cache"); |
|
741 |
DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache)); |
|
742 |
||
743 |
if (!keycache->key_cache_inited) |
|
744 |
DBUG_VOID_RETURN; |
|
745 |
||
746 |
if (keycache->disk_blocks > 0) |
|
747 |
{
|
|
748 |
if (keycache->block_mem) |
|
749 |
{
|
|
750 |
my_large_free((uchar*) keycache->block_mem, MYF(0)); |
|
751 |
keycache->block_mem= NULL; |
|
752 |
my_free((uchar*) keycache->block_root, MYF(0)); |
|
753 |
keycache->block_root= NULL; |
|
754 |
}
|
|
755 |
keycache->disk_blocks= -1; |
|
756 |
/* Reset blocks_changed to be safe if flush_all_key_blocks is called */
|
|
757 |
keycache->blocks_changed= 0; |
|
758 |
}
|
|
759 |
||
760 |
DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " |
|
761 |
"writes: %lu r_requests: %lu reads: %lu", |
|
762 |
keycache->blocks_used, keycache->global_blocks_changed, |
|
763 |
(ulong) keycache->global_cache_w_requests, |
|
764 |
(ulong) keycache->global_cache_write, |
|
765 |
(ulong) keycache->global_cache_r_requests, |
|
766 |
(ulong) keycache->global_cache_read)); |
|
767 |
||
768 |
/*
|
|
769 |
Reset these values to be able to detect a disabled key cache.
|
|
770 |
See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
|
|
771 |
*/
|
|
772 |
keycache->blocks_used= 0; |
|
773 |
keycache->blocks_unused= 0; |
|
774 |
||
775 |
if (cleanup) |
|
776 |
{
|
|
777 |
pthread_mutex_destroy(&keycache->cache_lock); |
|
778 |
keycache->key_cache_inited= keycache->can_be_used= 0; |
|
779 |
KEYCACHE_DEBUG_CLOSE; |
|
780 |
}
|
|
781 |
DBUG_VOID_RETURN; |
|
782 |
} /* end_key_cache */ |
|
783 |
||
784 |
||
785 |
#ifdef THREAD
|
|
786 |
||
787 |
/*
|
|
788 |
Link a thread into double-linked queue of waiting threads.
|
|
789 |
||
790 |
SYNOPSIS
|
|
791 |
link_into_queue()
|
|
792 |
wqueue pointer to the queue structure
|
|
793 |
thread pointer to the thread to be added to the queue
|
|
794 |
||
795 |
RETURN VALUE
|
|
796 |
none
|
|
797 |
||
798 |
NOTES.
|
|
799 |
Queue is represented by a circular list of the thread structures
|
|
800 |
The list is double-linked of the type (**prev,*next), accessed by
|
|
801 |
a pointer to the last element.
|
|
802 |
*/
|
|
803 |
||
804 |
static void link_into_queue(KEYCACHE_WQUEUE *wqueue, |
|
805 |
struct st_my_thread_var *thread) |
|
806 |
{
|
|
807 |
struct st_my_thread_var *last; |
|
808 |
||
809 |
DBUG_ASSERT(!thread->next && !thread->prev); |
|
810 |
if (! (last= wqueue->last_thread)) |
|
811 |
{
|
|
812 |
/* Queue is empty */
|
|
813 |
thread->next= thread; |
|
814 |
thread->prev= &thread->next; |
|
815 |
}
|
|
816 |
else
|
|
817 |
{
|
|
818 |
thread->prev= last->next->prev; |
|
819 |
last->next->prev= &thread->next; |
|
820 |
thread->next= last->next; |
|
821 |
last->next= thread; |
|
822 |
}
|
|
823 |
wqueue->last_thread= thread; |
|
824 |
}
|
|
825 |
||
826 |
/*
|
|
827 |
Unlink a thread from double-linked queue of waiting threads
|
|
828 |
||
829 |
SYNOPSIS
|
|
830 |
unlink_from_queue()
|
|
831 |
wqueue pointer to the queue structure
|
|
832 |
thread pointer to the thread to be removed from the queue
|
|
833 |
||
834 |
RETURN VALUE
|
|
835 |
none
|
|
836 |
||
837 |
NOTES.
|
|
838 |
See NOTES for link_into_queue
|
|
839 |
*/
|
|
840 |
||
841 |
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue, |
|
842 |
struct st_my_thread_var *thread) |
|
843 |
{
|
|
844 |
KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id)); |
|
845 |
DBUG_ASSERT(thread->next && thread->prev); |
|
846 |
if (thread->next == thread) |
|
847 |
/* The queue contains only one member */
|
|
848 |
wqueue->last_thread= NULL; |
|
849 |
else
|
|
850 |
{
|
|
851 |
thread->next->prev= thread->prev; |
|
852 |
*thread->prev=thread->next; |
|
853 |
if (wqueue->last_thread == thread) |
|
854 |
wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, |
|
855 |
thread->prev); |
|
856 |
}
|
|
857 |
thread->next= NULL; |
|
858 |
#if !defined(DBUG_OFF)
|
|
859 |
/*
|
|
860 |
This makes it easier to see it's not in a chain during debugging.
|
|
861 |
And some DBUG_ASSERT() rely on it.
|
|
862 |
*/
|
|
863 |
thread->prev= NULL; |
|
864 |
#endif
|
|
865 |
}
|
|
866 |
||
867 |
||
868 |
/*
|
|
869 |
Add a thread to single-linked queue of waiting threads
|
|
870 |
||
871 |
SYNOPSIS
|
|
872 |
wait_on_queue()
|
|
873 |
wqueue Pointer to the queue structure.
|
|
874 |
mutex Cache_lock to acquire after awake.
|
|
875 |
||
876 |
RETURN VALUE
|
|
877 |
none
|
|
878 |
||
879 |
NOTES.
|
|
880 |
Queue is represented by a circular list of the thread structures
|
|
881 |
The list is single-linked of the type (*next), accessed by a pointer
|
|
882 |
to the last element.
|
|
883 |
||
884 |
The function protects against stray signals by verifying that the
|
|
885 |
current thread is unlinked from the queue when awaking. However,
|
|
886 |
since several threads can wait for the same event, it might be
|
|
887 |
necessary for the caller of the function to check again if the
|
|
888 |
condition for awake is indeed matched.
|
|
889 |
*/
|
|
890 |
||
891 |
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue, |
|
892 |
pthread_mutex_t *mutex) |
|
893 |
{
|
|
894 |
struct st_my_thread_var *last; |
|
895 |
struct st_my_thread_var *thread= my_thread_var; |
|
896 |
||
897 |
/* Add to queue. */
|
|
898 |
DBUG_ASSERT(!thread->next); |
|
899 |
DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */ |
|
900 |
if (! (last= wqueue->last_thread)) |
|
901 |
thread->next= thread; |
|
902 |
else
|
|
903 |
{
|
|
904 |
thread->next= last->next; |
|
905 |
last->next= thread; |
|
906 |
}
|
|
907 |
wqueue->last_thread= thread; |
|
908 |
||
909 |
/*
|
|
910 |
Wait until thread is removed from queue by the signalling thread.
|
|
911 |
The loop protects against stray signals.
|
|
912 |
*/
|
|
913 |
do
|
|
914 |
{
|
|
915 |
KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", thread->id)); |
|
916 |
keycache_pthread_cond_wait(&thread->suspend, mutex); |
|
917 |
}
|
|
918 |
while (thread->next); |
|
919 |
}
|
|
920 |
||
921 |
||
922 |
/*
|
|
923 |
Remove all threads from queue signaling them to proceed
|
|
924 |
||
925 |
SYNOPSIS
|
|
926 |
release_whole_queue()
|
|
927 |
wqueue pointer to the queue structure
|
|
928 |
||
929 |
RETURN VALUE
|
|
930 |
none
|
|
931 |
||
932 |
NOTES.
|
|
933 |
See notes for wait_on_queue().
|
|
934 |
When removed from the queue each thread is signaled via condition
|
|
935 |
variable thread->suspend.
|
|
936 |
*/
|
|
937 |
||
938 |
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue) |
|
939 |
{
|
|
940 |
struct st_my_thread_var *last; |
|
941 |
struct st_my_thread_var *next; |
|
942 |
struct st_my_thread_var *thread; |
|
943 |
||
944 |
/* Queue may be empty. */
|
|
945 |
if (!(last= wqueue->last_thread)) |
|
946 |
return; |
|
947 |
||
948 |
next= last->next; |
|
949 |
do
|
|
950 |
{
|
|
951 |
thread=next; |
|
952 |
KEYCACHE_DBUG_PRINT("release_whole_queue: signal", |
|
953 |
("thread %ld", thread->id)); |
|
954 |
/* Signal the thread. */
|
|
955 |
keycache_pthread_cond_signal(&thread->suspend); |
|
956 |
/* Take thread from queue. */
|
|
957 |
next=thread->next; |
|
958 |
thread->next= NULL; |
|
959 |
}
|
|
960 |
while (thread != last); |
|
961 |
||
962 |
/* Now queue is definitely empty. */
|
|
963 |
wqueue->last_thread= NULL; |
|
964 |
}
|
|
965 |
||
966 |
#endif /* THREAD */ |
|
967 |
||
968 |
||
969 |
/*
|
|
970 |
Unlink a block from the chain of dirty/clean blocks
|
|
971 |
*/
|
|
972 |
||
973 |
static inline void unlink_changed(BLOCK_LINK *block) |
|
974 |
{
|
|
975 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
976 |
if (block->next_changed) |
|
977 |
block->next_changed->prev_changed= block->prev_changed; |
|
978 |
*block->prev_changed= block->next_changed; |
|
979 |
||
980 |
#if !defined(DBUG_OFF)
|
|
981 |
/*
|
|
982 |
This makes it easier to see it's not in a chain during debugging.
|
|
983 |
And some DBUG_ASSERT() rely on it.
|
|
984 |
*/
|
|
985 |
block->next_changed= NULL; |
|
986 |
block->prev_changed= NULL; |
|
987 |
#endif
|
|
988 |
}
|
|
989 |
||
990 |
||
991 |
/*
|
|
992 |
Link a block into the chain of dirty/clean blocks
|
|
993 |
*/
|
|
994 |
||
995 |
static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead) |
|
996 |
{
|
|
997 |
DBUG_ASSERT(!block->next_changed); |
|
998 |
DBUG_ASSERT(!block->prev_changed); |
|
999 |
block->prev_changed= phead; |
|
1000 |
if ((block->next_changed= *phead)) |
|
1001 |
(*phead)->prev_changed= &block->next_changed; |
|
1002 |
*phead= block; |
|
1003 |
}
|
|
1004 |
||
1005 |
||
1006 |
/*
|
|
1007 |
Link a block in a chain of clean blocks of a file.
|
|
1008 |
||
1009 |
SYNOPSIS
|
|
1010 |
link_to_file_list()
|
|
1011 |
keycache Key cache handle
|
|
1012 |
block Block to relink
|
|
1013 |
file File to be linked to
|
|
1014 |
unlink If to unlink first
|
|
1015 |
||
1016 |
DESCRIPTION
|
|
1017 |
Unlink a block from whichever chain it is linked in, if it's
|
|
1018 |
asked for, and link it to the chain of clean blocks of the
|
|
1019 |
specified file.
|
|
1020 |
||
1021 |
NOTE
|
|
1022 |
Please do never set/clear BLOCK_CHANGED outside of
|
|
1023 |
link_to_file_list() or link_to_changed_list().
|
|
1024 |
You would risk to damage correct counting of changed blocks
|
|
1025 |
and to find blocks in the wrong hash.
|
|
1026 |
||
1027 |
RETURN
|
|
1028 |
void
|
|
1029 |
*/
|
|
1030 |
||
1031 |
static void link_to_file_list(KEY_CACHE *keycache, |
|
1032 |
BLOCK_LINK *block, int file, |
|
1033 |
my_bool unlink_block) |
|
1034 |
{
|
|
1035 |
DBUG_ASSERT(block->status & BLOCK_IN_USE); |
|
1036 |
DBUG_ASSERT(block->hash_link && block->hash_link->block == block); |
|
1037 |
DBUG_ASSERT(block->hash_link->file == file); |
|
1038 |
if (unlink_block) |
|
1039 |
unlink_changed(block); |
|
1040 |
link_changed(block, &keycache->file_blocks[FILE_HASH(file)]); |
|
1041 |
if (block->status & BLOCK_CHANGED) |
|
1042 |
{
|
|
1043 |
block->status&= ~BLOCK_CHANGED; |
|
1044 |
keycache->blocks_changed--; |
|
1045 |
keycache->global_blocks_changed--; |
|
1046 |
}
|
|
1047 |
}
|
|
1048 |
||
1049 |
||
1050 |
/*
|
|
1051 |
Re-link a block from the clean chain to the dirty chain of a file.
|
|
1052 |
||
1053 |
SYNOPSIS
|
|
1054 |
link_to_changed_list()
|
|
1055 |
keycache key cache handle
|
|
1056 |
block block to relink
|
|
1057 |
||
1058 |
DESCRIPTION
|
|
1059 |
Unlink a block from the chain of clean blocks of a file
|
|
1060 |
and link it to the chain of dirty blocks of the same file.
|
|
1061 |
||
1062 |
NOTE
|
|
1063 |
Please do never set/clear BLOCK_CHANGED outside of
|
|
1064 |
link_to_file_list() or link_to_changed_list().
|
|
1065 |
You would risk to damage correct counting of changed blocks
|
|
1066 |
and to find blocks in the wrong hash.
|
|
1067 |
||
1068 |
RETURN
|
|
1069 |
void
|
|
1070 |
*/
|
|
1071 |
||
1072 |
static void link_to_changed_list(KEY_CACHE *keycache, |
|
1073 |
BLOCK_LINK *block) |
|
1074 |
{
|
|
1075 |
DBUG_ASSERT(block->status & BLOCK_IN_USE); |
|
1076 |
DBUG_ASSERT(!(block->status & BLOCK_CHANGED)); |
|
1077 |
DBUG_ASSERT(block->hash_link && block->hash_link->block == block); |
|
1078 |
||
1079 |
unlink_changed(block); |
|
1080 |
link_changed(block, |
|
1081 |
&keycache->changed_blocks[FILE_HASH(block->hash_link->file)]); |
|
1082 |
block->status|=BLOCK_CHANGED; |
|
1083 |
keycache->blocks_changed++; |
|
1084 |
keycache->global_blocks_changed++; |
|
1085 |
}
|
|
1086 |
||
1087 |
||
1088 |
/*
|
|
1089 |
Link a block to the LRU chain at the beginning or at the end of
|
|
1090 |
one of two parts.
|
|
1091 |
||
1092 |
SYNOPSIS
|
|
1093 |
link_block()
|
|
1094 |
keycache pointer to a key cache data structure
|
|
1095 |
block pointer to the block to link to the LRU chain
|
|
1096 |
hot <-> to link the block into the hot subchain
|
|
1097 |
at_end <-> to link the block at the end of the subchain
|
|
1098 |
||
1099 |
RETURN VALUE
|
|
1100 |
none
|
|
1101 |
||
1102 |
NOTES.
|
|
1103 |
The LRU ring is represented by a circular list of block structures.
|
|
1104 |
The list is double-linked of the type (**prev,*next) type.
|
|
1105 |
The LRU ring is divided into two parts - hot and warm.
|
|
1106 |
There are two pointers to access the last blocks of these two
|
|
1107 |
parts. The beginning of the warm part follows right after the
|
|
1108 |
end of the hot part.
|
|
1109 |
Only blocks of the warm part can be used for eviction.
|
|
1110 |
The first block from the beginning of this subchain is always
|
|
1111 |
taken for eviction (keycache->last_used->next)
|
|
1112 |
||
1113 |
LRU chain: +------+ H O T +------+
|
|
1114 |
+----| end |----...<----| beg |----+
|
|
1115 |
| +------+last +------+ |
|
|
1116 |
v<-link in latest hot (new end) |
|
|
1117 |
| link in latest warm (new end)->^
|
|
1118 |
| +------+ W A R M +------+ |
|
|
1119 |
+----| beg |---->...----| end |----+
|
|
1120 |
+------+ +------+ins
|
|
1121 |
first for eviction
|
|
1122 |
||
1123 |
It is also possible that the block is selected for eviction and thus
|
|
1124 |
not linked in the LRU ring.
|
|
1125 |
*/
|
|
1126 |
||
1127 |
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot, |
|
1128 |
my_bool at_end) |
|
1129 |
{
|
|
1130 |
BLOCK_LINK *ins; |
|
1131 |
BLOCK_LINK **pins; |
|
1132 |
||
1133 |
DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE)); |
|
1134 |
DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ |
|
1135 |
DBUG_ASSERT(!block->requests); |
|
1136 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
1137 |
DBUG_ASSERT(!block->next_used); |
|
1138 |
DBUG_ASSERT(!block->prev_used); |
|
1139 |
#ifdef THREAD
|
|
1140 |
if (!hot && keycache->waiting_for_block.last_thread) |
|
1141 |
{
|
|
1142 |
/* Signal that in the LRU warm sub-chain an available block has appeared */
|
|
1143 |
struct st_my_thread_var *last_thread= |
|
1144 |
keycache->waiting_for_block.last_thread; |
|
1145 |
struct st_my_thread_var *first_thread= last_thread->next; |
|
1146 |
struct st_my_thread_var *next_thread= first_thread; |
|
1147 |
HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info; |
|
1148 |
struct st_my_thread_var *thread; |
|
1149 |
do
|
|
1150 |
{
|
|
1151 |
thread= next_thread; |
|
1152 |
next_thread= thread->next; |
|
1153 |
/*
|
|
1154 |
We notify about the event all threads that ask
|
|
1155 |
for the same page as the first thread in the queue
|
|
1156 |
*/
|
|
1157 |
if ((HASH_LINK *) thread->opt_info == hash_link) |
|
1158 |
{
|
|
1159 |
KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); |
|
1160 |
keycache_pthread_cond_signal(&thread->suspend); |
|
1161 |
unlink_from_queue(&keycache->waiting_for_block, thread); |
|
1162 |
block->requests++; |
|
1163 |
}
|
|
1164 |
}
|
|
1165 |
while (thread != last_thread); |
|
1166 |
hash_link->block= block; |
|
1167 |
/*
|
|
1168 |
NOTE: We assigned the block to the hash_link and signalled the
|
|
1169 |
requesting thread(s). But it is possible that other threads runs
|
|
1170 |
first. These threads see the hash_link assigned to a block which
|
|
1171 |
is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
|
|
1172 |
This can be a problem for functions that do not select the block
|
|
1173 |
via its hash_link: flush and free. They do only see a block which
|
|
1174 |
is in a "normal" state and don't know that it will be evicted soon.
|
|
1175 |
||
1176 |
We cannot set BLOCK_IN_SWITCH here because only one of the
|
|
1177 |
requesting threads must handle the eviction. All others must wait
|
|
1178 |
for it to complete. If we set the flag here, the threads would not
|
|
1179 |
know who is in charge of the eviction. Without the flag, the first
|
|
1180 |
thread takes the stick and sets the flag.
|
|
1181 |
||
1182 |
But we need to note in the block that is has been selected for
|
|
1183 |
eviction. It must not be freed. The evicting thread will not
|
|
1184 |
expect the block in the free list. Before freeing we could also
|
|
1185 |
check if block->requests > 1. But I think including another flag
|
|
1186 |
in the check of block->status is slightly more efficient and
|
|
1187 |
probably easier to read.
|
|
1188 |
*/
|
|
1189 |
block->status|= BLOCK_IN_EVICTION; |
|
1190 |
KEYCACHE_THREAD_TRACE("link_block: after signaling"); |
|
1191 |
#if defined(KEYCACHE_DEBUG)
|
|
1192 |
KEYCACHE_DBUG_PRINT("link_block", |
|
1193 |
("linked,unlinked block %u status=%x #requests=%u #available=%u", |
|
1194 |
BLOCK_NUMBER(block), block->status, |
|
1195 |
block->requests, keycache->blocks_available)); |
|
1196 |
#endif
|
|
1197 |
return; |
|
1198 |
}
|
|
1199 |
#else /* THREAD */ |
|
1200 |
KEYCACHE_DBUG_ASSERT(! (!hot && keycache->waiting_for_block.last_thread)); |
|
1201 |
/* Condition not transformed using DeMorgan, to keep the text identical */
|
|
1202 |
#endif /* THREAD */ |
|
1203 |
pins= hot ? &keycache->used_ins : &keycache->used_last; |
|
1204 |
ins= *pins; |
|
1205 |
if (ins) |
|
1206 |
{
|
|
1207 |
ins->next_used->prev_used= &block->next_used; |
|
1208 |
block->next_used= ins->next_used; |
|
1209 |
block->prev_used= &ins->next_used; |
|
1210 |
ins->next_used= block; |
|
1211 |
if (at_end) |
|
1212 |
*pins= block; |
|
1213 |
}
|
|
1214 |
else
|
|
1215 |
{
|
|
1216 |
/* The LRU ring is empty. Let the block point to itself. */
|
|
1217 |
keycache->used_last= keycache->used_ins= block->next_used= block; |
|
1218 |
block->prev_used= &block->next_used; |
|
1219 |
}
|
|
1220 |
KEYCACHE_THREAD_TRACE("link_block"); |
|
1221 |
#if defined(KEYCACHE_DEBUG)
|
|
1222 |
keycache->blocks_available++; |
|
1223 |
KEYCACHE_DBUG_PRINT("link_block", |
|
1224 |
("linked block %u:%1u status=%x #requests=%u #available=%u", |
|
1225 |
BLOCK_NUMBER(block), at_end, block->status, |
|
1226 |
block->requests, keycache->blocks_available)); |
|
1227 |
KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <= |
|
1228 |
keycache->blocks_used); |
|
1229 |
#endif
|
|
1230 |
}
|
|
1231 |
||
1232 |
||
1233 |
/*
|
|
1234 |
Unlink a block from the LRU chain
|
|
1235 |
||
1236 |
SYNOPSIS
|
|
1237 |
unlink_block()
|
|
1238 |
keycache pointer to a key cache data structure
|
|
1239 |
block pointer to the block to unlink from the LRU chain
|
|
1240 |
||
1241 |
RETURN VALUE
|
|
1242 |
none
|
|
1243 |
||
1244 |
NOTES.
|
|
1245 |
See NOTES for link_block
|
|
1246 |
*/
|
|
1247 |
||
1248 |
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) |
|
1249 |
{
|
|
1250 |
DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE)); |
|
1251 |
DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ |
|
1252 |
DBUG_ASSERT(!block->requests); |
|
1253 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
1254 |
DBUG_ASSERT(block->next_used && block->prev_used && |
|
1255 |
(block->next_used->prev_used == &block->next_used) && |
|
1256 |
(*block->prev_used == block)); |
|
1257 |
if (block->next_used == block) |
|
1258 |
/* The list contains only one member */
|
|
1259 |
keycache->used_last= keycache->used_ins= NULL; |
|
1260 |
else
|
|
1261 |
{
|
|
1262 |
block->next_used->prev_used= block->prev_used; |
|
1263 |
*block->prev_used= block->next_used; |
|
1264 |
if (keycache->used_last == block) |
|
1265 |
keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used); |
|
1266 |
if (keycache->used_ins == block) |
|
1267 |
keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used); |
|
1268 |
}
|
|
1269 |
block->next_used= NULL; |
|
1270 |
#if !defined(DBUG_OFF)
|
|
1271 |
/*
|
|
1272 |
This makes it easier to see it's not in a chain during debugging.
|
|
1273 |
And some DBUG_ASSERT() rely on it.
|
|
1274 |
*/
|
|
1275 |
block->prev_used= NULL; |
|
1276 |
#endif
|
|
1277 |
||
1278 |
KEYCACHE_THREAD_TRACE("unlink_block"); |
|
1279 |
#if defined(KEYCACHE_DEBUG)
|
|
1280 |
KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0); |
|
1281 |
keycache->blocks_available--; |
|
1282 |
KEYCACHE_DBUG_PRINT("unlink_block", |
|
1283 |
("unlinked block %u status=%x #requests=%u #available=%u", |
|
1284 |
BLOCK_NUMBER(block), block->status, |
|
1285 |
block->requests, keycache->blocks_available)); |
|
1286 |
#endif
|
|
1287 |
}
|
|
1288 |
||
1289 |
||
1290 |
/*
|
|
1291 |
Register requests for a block.
|
|
1292 |
||
1293 |
SYNOPSIS
|
|
1294 |
reg_requests()
|
|
1295 |
keycache Pointer to a key cache data structure.
|
|
1296 |
block Pointer to the block to register a request on.
|
|
1297 |
count Number of requests. Always 1.
|
|
1298 |
||
1299 |
NOTE
|
|
1300 |
The first request unlinks the block from the LRU ring. This means
|
|
1301 |
that it is protected against eveiction.
|
|
1302 |
||
1303 |
RETURN
|
|
1304 |
void
|
|
1305 |
*/
|
|
1306 |
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count) |
|
1307 |
{
|
|
1308 |
DBUG_ASSERT(block->status & BLOCK_IN_USE); |
|
1309 |
DBUG_ASSERT(block->hash_link); |
|
1310 |
||
1311 |
if (!block->requests) |
|
1312 |
unlink_block(keycache, block); |
|
1313 |
block->requests+=count; |
|
1314 |
}
|
|
1315 |
||
1316 |
||
1317 |
/*
|
|
1318 |
Unregister request for a block
|
|
1319 |
linking it to the LRU chain if it's the last request
|
|
1320 |
||
1321 |
SYNOPSIS
|
|
1322 |
unreg_request()
|
|
1323 |
keycache pointer to a key cache data structure
|
|
1324 |
block pointer to the block to link to the LRU chain
|
|
1325 |
at_end <-> to link the block at the end of the LRU chain
|
|
1326 |
||
1327 |
RETURN VALUE
|
|
1328 |
none
|
|
1329 |
||
1330 |
NOTES.
|
|
1331 |
Every linking to the LRU ring decrements by one a special block
|
|
1332 |
counter (if it's positive). If the at_end parameter is TRUE the block is
|
|
1333 |
added either at the end of warm sub-chain or at the end of hot sub-chain.
|
|
1334 |
It is added to the hot subchain if its counter is zero and number of
|
|
1335 |
blocks in warm sub-chain is not less than some low limit (determined by
|
|
1336 |
the division_limit parameter). Otherwise the block is added to the warm
|
|
1337 |
sub-chain. If the at_end parameter is FALSE the block is always added
|
|
1338 |
at beginning of the warm sub-chain.
|
|
1339 |
Thus a warm block can be promoted to the hot sub-chain when its counter
|
|
1340 |
becomes zero for the first time.
|
|
1341 |
At the same time the block at the very beginning of the hot subchain
|
|
1342 |
might be moved to the beginning of the warm subchain if it stays untouched
|
|
1343 |
for a too long time (this time is determined by parameter age_threshold).
|
|
1344 |
||
1345 |
It is also possible that the block is selected for eviction and thus
|
|
1346 |
not linked in the LRU ring.
|
|
1347 |
*/
|
|
1348 |
||
1349 |
static void unreg_request(KEY_CACHE *keycache, |
|
1350 |
BLOCK_LINK *block, int at_end) |
|
1351 |
{
|
|
1352 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
1353 |
DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ |
|
1354 |
DBUG_ASSERT(block->requests); |
|
1355 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
1356 |
DBUG_ASSERT(!block->next_used); |
|
1357 |
DBUG_ASSERT(!block->prev_used); |
|
1358 |
/*
|
|
1359 |
Unregister the request, but do not link erroneous blocks into the
|
|
1360 |
LRU ring.
|
|
1361 |
*/
|
|
1362 |
if (!--block->requests && !(block->status & BLOCK_ERROR)) |
|
1363 |
{
|
|
1364 |
my_bool hot; |
|
1365 |
if (block->hits_left) |
|
1366 |
block->hits_left--; |
|
1367 |
hot= !block->hits_left && at_end && |
|
1368 |
keycache->warm_blocks > keycache->min_warm_blocks; |
|
1369 |
if (hot) |
|
1370 |
{
|
|
1371 |
if (block->temperature == BLOCK_WARM) |
|
1372 |
keycache->warm_blocks--; |
|
1373 |
block->temperature= BLOCK_HOT; |
|
1374 |
KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", |
|
1375 |
keycache->warm_blocks)); |
|
1376 |
}
|
|
1377 |
link_block(keycache, block, hot, (my_bool)at_end); |
|
1378 |
block->last_hit_time= keycache->keycache_time; |
|
1379 |
keycache->keycache_time++; |
|
1380 |
/*
|
|
1381 |
At this place, the block might be in the LRU ring or not. If an
|
|
1382 |
evicter was waiting for a block, it was selected for eviction and
|
|
1383 |
not linked in the LRU ring.
|
|
1384 |
*/
|
|
1385 |
||
1386 |
/*
|
|
1387 |
Check if we should link a hot block to the warm block sub-chain.
|
|
1388 |
It is possible that we select the same block as above. But it can
|
|
1389 |
also be another block. In any case a block from the LRU ring is
|
|
1390 |
selected. In other words it works even if the above block was
|
|
1391 |
selected for eviction and not linked in the LRU ring. Since this
|
|
1392 |
happens only if the LRU ring is empty, the block selected below
|
|
1393 |
would be NULL and the rest of the function skipped.
|
|
1394 |
*/
|
|
1395 |
block= keycache->used_ins; |
|
1396 |
if (block && keycache->keycache_time - block->last_hit_time > |
|
1397 |
keycache->age_threshold) |
|
1398 |
{
|
|
1399 |
unlink_block(keycache, block); |
|
1400 |
link_block(keycache, block, 0, 0); |
|
1401 |
if (block->temperature != BLOCK_WARM) |
|
1402 |
{
|
|
1403 |
keycache->warm_blocks++; |
|
1404 |
block->temperature= BLOCK_WARM; |
|
1405 |
}
|
|
1406 |
KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", |
|
1407 |
keycache->warm_blocks)); |
|
1408 |
}
|
|
1409 |
}
|
|
1410 |
}
|
|
1411 |
||
1412 |
/*
|
|
1413 |
Remove a reader of the page in block
|
|
1414 |
*/
|
|
1415 |
||
1416 |
static void remove_reader(BLOCK_LINK *block) |
|
1417 |
{
|
|
1418 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
1419 |
DBUG_ASSERT(block->hash_link && block->hash_link->block == block); |
|
1420 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
1421 |
DBUG_ASSERT(!block->next_used); |
|
1422 |
DBUG_ASSERT(!block->prev_used); |
|
1423 |
DBUG_ASSERT(block->hash_link->requests); |
|
1424 |
#ifdef THREAD
|
|
1425 |
if (! --block->hash_link->requests && block->condvar) |
|
1426 |
keycache_pthread_cond_signal(block->condvar); |
|
1427 |
#else
|
|
1428 |
--block->hash_link->requests; |
|
1429 |
#endif
|
|
1430 |
}
|
|
1431 |
||
1432 |
||
1433 |
/*
|
|
1434 |
Wait until the last reader of the page in block
|
|
1435 |
signals on its termination
|
|
1436 |
*/
|
|
1437 |
||
1438 |
static void wait_for_readers(KEY_CACHE *keycache, |
|
1439 |
BLOCK_LINK *block) |
|
1440 |
{
|
|
1441 |
#ifdef THREAD
|
|
1442 |
struct st_my_thread_var *thread= my_thread_var; |
|
1443 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
1444 |
DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED))); |
|
1445 |
DBUG_ASSERT(block->hash_link); |
|
1446 |
DBUG_ASSERT(block->hash_link->block == block); |
|
1447 |
/* Linked in file_blocks or changed_blocks hash. */
|
|
1448 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
1449 |
/* Not linked in LRU ring. */
|
|
1450 |
DBUG_ASSERT(!block->next_used); |
|
1451 |
DBUG_ASSERT(!block->prev_used); |
|
1452 |
while (block->hash_link->requests) |
|
1453 |
{
|
|
1454 |
KEYCACHE_DBUG_PRINT("wait_for_readers: wait", |
|
1455 |
("suspend thread %ld block %u", |
|
1456 |
thread->id, BLOCK_NUMBER(block))); |
|
1457 |
/* There must be no other waiter. We have no queue here. */
|
|
1458 |
DBUG_ASSERT(!block->condvar); |
|
1459 |
block->condvar= &thread->suspend; |
|
1460 |
keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock); |
|
1461 |
block->condvar= NULL; |
|
1462 |
}
|
|
1463 |
#else
|
|
1464 |
KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0); |
|
1465 |
#endif
|
|
1466 |
}
|
|
1467 |
||
1468 |
||
1469 |
/*
|
|
1470 |
Add a hash link to a bucket in the hash_table
|
|
1471 |
*/
|
|
1472 |
||
1473 |
static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link) |
|
1474 |
{
|
|
1475 |
if (*start) |
|
1476 |
(*start)->prev= &hash_link->next; |
|
1477 |
hash_link->next= *start; |
|
1478 |
hash_link->prev= start; |
|
1479 |
*start= hash_link; |
|
1480 |
}
|
|
1481 |
||
1482 |
||
1483 |
/*
|
|
1484 |
Remove a hash link from the hash table
|
|
1485 |
*/
|
|
1486 |
||
1487 |
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link) |
|
1488 |
{
|
|
1489 |
KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", |
|
1490 |
(uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests)); |
|
1491 |
KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); |
|
1492 |
if ((*hash_link->prev= hash_link->next)) |
|
1493 |
hash_link->next->prev= hash_link->prev; |
|
1494 |
hash_link->block= NULL; |
|
1495 |
#ifdef THREAD
|
|
1496 |
if (keycache->waiting_for_hash_link.last_thread) |
|
1497 |
{
|
|
1498 |
/* Signal that a free hash link has appeared */
|
|
1499 |
struct st_my_thread_var *last_thread= |
|
1500 |
keycache->waiting_for_hash_link.last_thread; |
|
1501 |
struct st_my_thread_var *first_thread= last_thread->next; |
|
1502 |
struct st_my_thread_var *next_thread= first_thread; |
|
1503 |
KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info); |
|
1504 |
struct st_my_thread_var *thread; |
|
1505 |
||
1506 |
hash_link->file= first_page->file; |
|
1507 |
hash_link->diskpos= first_page->filepos; |
|
1508 |
do
|
|
1509 |
{
|
|
1510 |
KEYCACHE_PAGE *page; |
|
1511 |
thread= next_thread; |
|
1512 |
page= (KEYCACHE_PAGE *) thread->opt_info; |
|
1513 |
next_thread= thread->next; |
|
1514 |
/*
|
|
1515 |
We notify about the event all threads that ask
|
|
1516 |
for the same page as the first thread in the queue
|
|
1517 |
*/
|
|
1518 |
if (page->file == hash_link->file && page->filepos == hash_link->diskpos) |
|
1519 |
{
|
|
1520 |
KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); |
|
1521 |
keycache_pthread_cond_signal(&thread->suspend); |
|
1522 |
unlink_from_queue(&keycache->waiting_for_hash_link, thread); |
|
1523 |
}
|
|
1524 |
}
|
|
1525 |
while (thread != last_thread); |
|
1526 |
link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file, |
|
1527 |
hash_link->diskpos)], |
|
1528 |
hash_link); |
|
1529 |
return; |
|
1530 |
}
|
|
1531 |
#else /* THREAD */ |
|
1532 |
KEYCACHE_DBUG_ASSERT(! (keycache->waiting_for_hash_link.last_thread)); |
|
1533 |
#endif /* THREAD */ |
|
1534 |
hash_link->next= keycache->free_hash_list; |
|
1535 |
keycache->free_hash_list= hash_link; |
|
1536 |
}
|
|
1537 |
||
1538 |
||
1539 |
/*
|
|
1540 |
Get the hash link for a page
|
|
1541 |
*/
|
|
1542 |
||
1543 |
static HASH_LINK *get_hash_link(KEY_CACHE *keycache, |
|
1544 |
int file, my_off_t filepos) |
|
1545 |
{
|
|
1546 |
reg1 HASH_LINK *hash_link, **start; |
|
1547 |
#if defined(KEYCACHE_DEBUG)
|
|
1548 |
int cnt; |
|
1549 |
#endif
|
|
1550 |
||
1551 |
KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", |
|
1552 |
(uint) file,(ulong) filepos)); |
|
1553 |
||
1554 |
restart: |
|
1555 |
/*
|
|
1556 |
Find the bucket in the hash table for the pair (file, filepos);
|
|
1557 |
start contains the head of the bucket list,
|
|
1558 |
hash_link points to the first member of the list
|
|
1559 |
*/
|
|
1560 |
hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]); |
|
1561 |
#if defined(KEYCACHE_DEBUG)
|
|
1562 |
cnt= 0; |
|
1563 |
#endif
|
|
1564 |
/* Look for an element for the pair (file, filepos) in the bucket chain */
|
|
1565 |
while (hash_link && |
|
1566 |
(hash_link->diskpos != filepos || hash_link->file != file)) |
|
1567 |
{
|
|
1568 |
hash_link= hash_link->next; |
|
1569 |
#if defined(KEYCACHE_DEBUG)
|
|
1570 |
cnt++; |
|
1571 |
if (! (cnt <= keycache->hash_links_used)) |
|
1572 |
{
|
|
1573 |
int i; |
|
1574 |
for (i=0, hash_link= *start ; |
|
1575 |
i < cnt ; i++, hash_link= hash_link->next) |
|
1576 |
{
|
|
1577 |
KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", |
|
1578 |
(uint) hash_link->file,(ulong) hash_link->diskpos)); |
|
1579 |
}
|
|
1580 |
}
|
|
1581 |
KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used); |
|
1582 |
#endif
|
|
1583 |
}
|
|
1584 |
if (! hash_link) |
|
1585 |
{
|
|
1586 |
/* There is no hash link in the hash table for the pair (file, filepos) */
|
|
1587 |
if (keycache->free_hash_list) |
|
1588 |
{
|
|
1589 |
hash_link= keycache->free_hash_list; |
|
1590 |
keycache->free_hash_list= hash_link->next; |
|
1591 |
}
|
|
1592 |
else if (keycache->hash_links_used < keycache->hash_links) |
|
1593 |
{
|
|
1594 |
hash_link= &keycache->hash_link_root[keycache->hash_links_used++]; |
|
1595 |
}
|
|
1596 |
else
|
|
1597 |
{
|
|
1598 |
#ifdef THREAD
|
|
1599 |
/* Wait for a free hash link */
|
|
1600 |
struct st_my_thread_var *thread= my_thread_var; |
|
1601 |
KEYCACHE_PAGE page; |
|
1602 |
KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); |
|
1603 |
page.file= file; |
|
1604 |
page.filepos= filepos; |
|
1605 |
thread->opt_info= (void *) &page; |
|
1606 |
link_into_queue(&keycache->waiting_for_hash_link, thread); |
|
1607 |
KEYCACHE_DBUG_PRINT("get_hash_link: wait", |
|
1608 |
("suspend thread %ld", thread->id)); |
|
1609 |
keycache_pthread_cond_wait(&thread->suspend, |
|
1610 |
&keycache->cache_lock); |
|
1611 |
thread->opt_info= NULL; |
|
1612 |
#else
|
|
1613 |
KEYCACHE_DBUG_ASSERT(0); |
|
1614 |
#endif
|
|
1615 |
goto restart; |
|
1616 |
}
|
|
1617 |
hash_link->file= file; |
|
1618 |
hash_link->diskpos= filepos; |
|
1619 |
link_hash(start, hash_link); |
|
1620 |
}
|
|
1621 |
/* Register the request for the page */
|
|
1622 |
hash_link->requests++; |
|
1623 |
||
1624 |
return hash_link; |
|
1625 |
}
|
|
1626 |
||
1627 |
||
1628 |
/*
|
|
1629 |
Get a block for the file page requested by a keycache read/write operation;
|
|
1630 |
If the page is not in the cache return a free block, if there is none
|
|
1631 |
return the lru block after saving its buffer if the page is dirty.
|
|
1632 |
||
1633 |
SYNOPSIS
|
|
1634 |
||
1635 |
find_key_block()
|
|
1636 |
keycache pointer to a key cache data structure
|
|
1637 |
file handler for the file to read page from
|
|
1638 |
filepos position of the page in the file
|
|
1639 |
init_hits_left how initialize the block counter for the page
|
|
1640 |
wrmode <-> get for writing
|
|
1641 |
page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
|
|
1642 |
||
1643 |
RETURN VALUE
|
|
1644 |
Pointer to the found block if successful, 0 - otherwise
|
|
1645 |
||
1646 |
NOTES.
|
|
1647 |
For the page from file positioned at filepos the function checks whether
|
|
1648 |
the page is in the key cache specified by the first parameter.
|
|
1649 |
If this is the case it immediately returns the block.
|
|
1650 |
If not, the function first chooses a block for this page. If there is
|
|
1651 |
no not used blocks in the key cache yet, the function takes the block
|
|
1652 |
at the very beginning of the warm sub-chain. It saves the page in that
|
|
1653 |
block if it's dirty before returning the pointer to it.
|
|
1654 |
The function returns in the page_st parameter the following values:
|
|
1655 |
PAGE_READ - if page already in the block,
|
|
1656 |
PAGE_TO_BE_READ - if it is to be read yet by the current thread
|
|
1657 |
WAIT_TO_BE_READ - if it is to be read by another thread
|
|
1658 |
If an error occurs THE BLOCK_ERROR bit is set in the block status.
|
|
1659 |
It might happen that there are no blocks in LRU chain (in warm part) -
|
|
1660 |
all blocks are unlinked for some read/write operations. Then the function
|
|
1661 |
waits until first of this operations links any block back.
|
|
1662 |
*/
|
|
1663 |
||
1664 |
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache, |
|
1665 |
File file, my_off_t filepos, |
|
1666 |
int init_hits_left, |
|
1667 |
int wrmode, int *page_st) |
|
1668 |
{
|
|
1669 |
HASH_LINK *hash_link; |
|
1670 |
BLOCK_LINK *block; |
|
1671 |
int error= 0; |
|
1672 |
int page_status; |
|
1673 |
||
1674 |
DBUG_ENTER("find_key_block"); |
|
1675 |
KEYCACHE_THREAD_TRACE("find_key_block:begin"); |
|
1676 |
DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", |
|
1677 |
file, (ulong) filepos, wrmode)); |
|
1678 |
KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d", |
|
1679 |
file, (ulong) filepos, |
|
1680 |
wrmode)); |
|
1681 |
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
|
|
1682 |
DBUG_EXECUTE("check_keycache2", |
|
1683 |
test_key_cache(keycache, "start of find_key_block", 0);); |
|
1684 |
#endif
|
|
1685 |
||
1686 |
restart: |
|
1687 |
/*
|
|
1688 |
If the flush phase of a resize operation fails, the cache is left
|
|
1689 |
unusable. This will be detected only after "goto restart".
|
|
1690 |
*/
|
|
1691 |
if (!keycache->can_be_used) |
|
1692 |
DBUG_RETURN(0); |
|
1693 |
||
1694 |
/*
|
|
1695 |
Find the hash_link for the requested file block (file, filepos). We
|
|
1696 |
do always get a hash_link here. It has registered our request so
|
|
1697 |
that no other thread can use it for another file block until we
|
|
1698 |
release the request (which is done by remove_reader() usually). The
|
|
1699 |
hash_link can have a block assigned to it or not. If there is a
|
|
1700 |
block, it may be assigned to this hash_link or not. In cases where a
|
|
1701 |
block is evicted from the cache, it is taken from the LRU ring and
|
|
1702 |
referenced by the new hash_link. But the block can still be assigned
|
|
1703 |
to its old hash_link for some time if it needs to be flushed first,
|
|
1704 |
or if there are other threads still reading it.
|
|
1705 |
||
1706 |
Summary:
|
|
1707 |
hash_link is always returned.
|
|
1708 |
hash_link->block can be:
|
|
1709 |
- NULL or
|
|
1710 |
- not assigned to this hash_link or
|
|
1711 |
- assigned to this hash_link. If assigned, the block can have
|
|
1712 |
- invalid data (when freshly assigned) or
|
|
1713 |
- valid data. Valid data can be
|
|
1714 |
- changed over the file contents (dirty) or
|
|
1715 |
- not changed (clean).
|
|
1716 |
*/
|
|
1717 |
hash_link= get_hash_link(keycache, file, filepos); |
|
1718 |
DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos)); |
|
1719 |
||
1720 |
page_status= -1; |
|
1721 |
if ((block= hash_link->block) && |
|
1722 |
block->hash_link == hash_link && (block->status & BLOCK_READ)) |
|
1723 |
{
|
|
1724 |
/* Assigned block with valid (changed or unchanged) contents. */
|
|
1725 |
page_status= PAGE_READ; |
|
1726 |
}
|
|
1727 |
/*
|
|
1728 |
else (page_status == -1)
|
|
1729 |
- block == NULL or
|
|
1730 |
- block not assigned to this hash_link or
|
|
1731 |
- block assigned but not yet read from file (invalid data).
|
|
1732 |
*/
|
|
1733 |
||
1734 |
#ifdef THREAD
|
|
1735 |
if (keycache->in_resize) |
|
1736 |
{
|
|
1737 |
/* This is a request during a resize operation */
|
|
1738 |
||
1739 |
if (!block) |
|
1740 |
{
|
|
1741 |
struct st_my_thread_var *thread; |
|
1742 |
||
1743 |
/*
|
|
1744 |
The file block is not in the cache. We don't need it in the
|
|
1745 |
cache: we are going to read or write directly to file. Cancel
|
|
1746 |
the request. We can simply decrement hash_link->requests because
|
|
1747 |
we did not release cache_lock since increasing it. So no other
|
|
1748 |
thread can wait for our request to become released.
|
|
1749 |
*/
|
|
1750 |
if (hash_link->requests == 1) |
|
1751 |
{
|
|
1752 |
/*
|
|
1753 |
We are the only one to request this hash_link (this file/pos).
|
|
1754 |
Free the hash_link.
|
|
1755 |
*/
|
|
1756 |
hash_link->requests--; |
|
1757 |
unlink_hash(keycache, hash_link); |
|
1758 |
DBUG_RETURN(0); |
|
1759 |
}
|
|
1760 |
||
1761 |
/*
|
|
1762 |
More requests on the hash_link. Someone tries to evict a block
|
|
1763 |
for this hash_link (could have started before resizing started).
|
|
1764 |
This means that the LRU ring is empty. Otherwise a block could
|
|
1765 |
be assigned immediately. Behave like a thread that wants to
|
|
1766 |
evict a block for this file/pos. Add to the queue of threads
|
|
1767 |
waiting for a block. Wait until there is one assigned.
|
|
1768 |
||
1769 |
Refresh the request on the hash-link so that it cannot be reused
|
|
1770 |
for another file/pos.
|
|
1771 |
*/
|
|
1772 |
thread= my_thread_var; |
|
1773 |
thread->opt_info= (void *) hash_link; |
|
1774 |
link_into_queue(&keycache->waiting_for_block, thread); |
|
1775 |
do
|
|
1776 |
{
|
|
1777 |
KEYCACHE_DBUG_PRINT("find_key_block: wait", |
|
1778 |
("suspend thread %ld", thread->id)); |
|
1779 |
keycache_pthread_cond_wait(&thread->suspend, |
|
1780 |
&keycache->cache_lock); |
|
1781 |
} while (thread->next); |
|
1782 |
thread->opt_info= NULL; |
|
1783 |
/*
|
|
1784 |
A block should now be assigned to the hash_link. But it may
|
|
1785 |
still need to be evicted. Anyway, we should re-check the
|
|
1786 |
situation. page_status must be set correctly.
|
|
1787 |
*/
|
|
1788 |
hash_link->requests--; |
|
1789 |
goto restart; |
|
1790 |
} /* end of if (!block) */ |
|
1791 |
||
1792 |
/*
|
|
1793 |
There is a block for this file/pos in the cache. Register a
|
|
1794 |
request on it. This unlinks it from the LRU ring (if it is there)
|
|
1795 |
and hence protects it against eviction (if not already in
|
|
1796 |
eviction). We need this for returning the block to the caller, for
|
|
1797 |
calling remove_reader() (for debugging purposes), and for calling
|
|
1798 |
free_block(). The only case where we don't need the request is if
|
|
1799 |
the block is in eviction. In that case we have to unregister the
|
|
1800 |
request later.
|
|
1801 |
*/
|
|
1802 |
reg_requests(keycache, block, 1); |
|
1803 |
||
1804 |
if (page_status != PAGE_READ) |
|
1805 |
{
|
|
1806 |
/*
|
|
1807 |
- block not assigned to this hash_link or
|
|
1808 |
- block assigned but not yet read from file (invalid data).
|
|
1809 |
||
1810 |
This must be a block in eviction. It will be read soon. We need
|
|
1811 |
to wait here until this happened. Otherwise the caller could
|
|
1812 |
access a wrong block or a block which is in read. While waiting
|
|
1813 |
we cannot lose hash_link nor block. We have registered a request
|
|
1814 |
on the hash_link. Everything can happen to the block but changes
|
|
1815 |
in the hash_link -> block relationship. In other words:
|
|
1816 |
everything can happen to the block but free or another completed
|
|
1817 |
eviction.
|
|
1818 |
||
1819 |
Note that we bahave like a secondary requestor here. We just
|
|
1820 |
cannot return with PAGE_WAIT_TO_BE_READ. This would work for
|
|
1821 |
read requests and writes on dirty blocks that are not in flush
|
|
1822 |
only. Waiting here on COND_FOR_REQUESTED works in all
|
|
1823 |
situations.
|
|
1824 |
*/
|
|
1825 |
DBUG_ASSERT(((block->hash_link != hash_link) && |
|
1826 |
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) || |
|
1827 |
((block->hash_link == hash_link) && |
|
1828 |
!(block->status & BLOCK_READ))); |
|
1829 |
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock); |
|
1830 |
/*
|
|
1831 |
Here we can trust that the block has been assigned to this
|
|
1832 |
hash_link (block->hash_link == hash_link) and read into the
|
|
1833 |
buffer (BLOCK_READ). The worst things possible here are that the
|
|
1834 |
block is in free (BLOCK_REASSIGNED). But the block is still
|
|
1835 |
assigned to the hash_link. The freeing thread waits until we
|
|
1836 |
release our request on the hash_link. The block must not be
|
|
1837 |
again in eviction because we registered an request on it before
|
|
1838 |
starting to wait.
|
|
1839 |
*/
|
|
1840 |
DBUG_ASSERT(block->hash_link == hash_link); |
|
1841 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
1842 |
DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))); |
|
1843 |
}
|
|
1844 |
/*
|
|
1845 |
The block is in the cache. Assigned to the hash_link. Valid data.
|
|
1846 |
Note that in case of page_st == PAGE_READ, the block can be marked
|
|
1847 |
for eviction. In any case it can be marked for freeing.
|
|
1848 |
*/
|
|
1849 |
||
1850 |
if (!wrmode) |
|
1851 |
{
|
|
1852 |
/* A reader can just read the block. */
|
|
1853 |
*page_st= PAGE_READ; |
|
1854 |
DBUG_ASSERT((hash_link->file == file) && |
|
1855 |
(hash_link->diskpos == filepos) && |
|
1856 |
(block->hash_link == hash_link)); |
|
1857 |
DBUG_RETURN(block); |
|
1858 |
}
|
|
1859 |
||
1860 |
/*
|
|
1861 |
This is a writer. No two writers for the same block can exist.
|
|
1862 |
This must be assured by locks outside of the key cache.
|
|
1863 |
*/
|
|
1864 |
DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block)); |
|
1865 |
||
1866 |
while (block->status & BLOCK_IN_FLUSH) |
|
1867 |
{
|
|
1868 |
/*
|
|
1869 |
Wait until the block is flushed to file. Do not release the
|
|
1870 |
request on the hash_link yet to prevent that the block is freed
|
|
1871 |
or reassigned while we wait. While we wait, several things can
|
|
1872 |
happen to the block, including another flush. But the block
|
|
1873 |
cannot be reassigned to another hash_link until we release our
|
|
1874 |
request on it. But it can be marked BLOCK_REASSIGNED from free
|
|
1875 |
or eviction, while they wait for us to release the hash_link.
|
|
1876 |
*/
|
|
1877 |
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock); |
|
1878 |
/*
|
|
1879 |
If the flush phase failed, the resize could have finished while
|
|
1880 |
we waited here.
|
|
1881 |
*/
|
|
1882 |
if (!keycache->in_resize) |
|
1883 |
{
|
|
1884 |
remove_reader(block); |
|
1885 |
unreg_request(keycache, block, 1); |
|
1886 |
goto restart; |
|
1887 |
}
|
|
1888 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
1889 |
DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block)); |
|
1890 |
DBUG_ASSERT(block->hash_link == hash_link); |
|
1891 |
}
|
|
1892 |
||
1893 |
if (block->status & BLOCK_CHANGED) |
|
1894 |
{
|
|
1895 |
/*
|
|
1896 |
We want to write a block with changed contents. If the cache
|
|
1897 |
block size is bigger than the callers block size (e.g. MyISAM),
|
|
1898 |
the caller may replace part of the block only. Changes of the
|
|
1899 |
other part of the block must be preserved. Since the block has
|
|
1900 |
not yet been selected for flush, we can still add our changes.
|
|
1901 |
*/
|
|
1902 |
*page_st= PAGE_READ; |
|
1903 |
DBUG_ASSERT((hash_link->file == file) && |
|
1904 |
(hash_link->diskpos == filepos) && |
|
1905 |
(block->hash_link == hash_link)); |
|
1906 |
DBUG_RETURN(block); |
|
1907 |
}
|
|
1908 |
||
1909 |
/*
|
|
1910 |
This is a write request for a clean block. We do not want to have
|
|
1911 |
new dirty blocks in the cache while resizing. We will free the
|
|
1912 |
block and write directly to file. If the block is in eviction or
|
|
1913 |
in free, we just let it go.
|
|
1914 |
||
1915 |
Unregister from the hash_link. This must be done before freeing
|
|
1916 |
the block. And it must be done if not freeing the block. Because
|
|
1917 |
we could have waited above, we need to call remove_reader(). Other
|
|
1918 |
threads could wait for us to release our request on the hash_link.
|
|
1919 |
*/
|
|
1920 |
remove_reader(block); |
|
1921 |
||
1922 |
/* If the block is not in eviction and not in free, we can free it. */
|
|
1923 |
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | |
|
1924 |
BLOCK_REASSIGNED))) |
|
1925 |
{
|
|
1926 |
/*
|
|
1927 |
Free block as we are going to write directly to file.
|
|
1928 |
Although we have an exlusive lock for the updated key part,
|
|
1929 |
the control can be yielded by the current thread as we might
|
|
1930 |
have unfinished readers of other key parts in the block
|
|
1931 |
buffer. Still we are guaranteed not to have any readers
|
|
1932 |
of the key part we are writing into until the block is
|
|
1933 |
removed from the cache as we set the BLOCK_REASSIGNED
|
|
1934 |
flag (see the code below that handles reading requests).
|
|
1935 |
*/
|
|
1936 |
free_block(keycache, block); |
|
1937 |
}
|
|
1938 |
else
|
|
1939 |
{
|
|
1940 |
/*
|
|
1941 |
The block will be evicted/freed soon. Don't touch it in any way.
|
|
1942 |
Unregister the request that we registered above.
|
|
1943 |
*/
|
|
1944 |
unreg_request(keycache, block, 1); |
|
1945 |
||
1946 |
/*
|
|
1947 |
The block is still assigned to the hash_link (the file/pos that
|
|
1948 |
we are going to write to). Wait until the eviction/free is
|
|
1949 |
complete. Otherwise the direct write could complete before all
|
|
1950 |
readers are done with the block. So they could read outdated
|
|
1951 |
data.
|
|
1952 |
||
1953 |
Since we released our request on the hash_link, it can be reused
|
|
1954 |
for another file/pos. Hence we cannot just check for
|
|
1955 |
block->hash_link == hash_link. As long as the resize is
|
|
1956 |
proceeding the block cannot be reassigned to the same file/pos
|
|
1957 |
again. So we can terminate the loop when the block is no longer
|
|
1958 |
assigned to this file/pos.
|
|
1959 |
*/
|
|
1960 |
do
|
|
1961 |
{
|
|
1962 |
wait_on_queue(&block->wqueue[COND_FOR_SAVED], |
|
1963 |
&keycache->cache_lock); |
|
1964 |
/*
|
|
1965 |
If the flush phase failed, the resize could have finished
|
|
1966 |
while we waited here.
|
|
1967 |
*/
|
|
1968 |
if (!keycache->in_resize) |
|
1969 |
goto restart; |
|
1970 |
} while (block->hash_link && |
|
1971 |
(block->hash_link->file == file) && |
|
1972 |
(block->hash_link->diskpos == filepos)); |
|
1973 |
}
|
|
1974 |
DBUG_RETURN(0); |
|
1975 |
}
|
|
1976 |
#else /* THREAD */ |
|
1977 |
DBUG_ASSERT(!keycache->in_resize); |
|
1978 |
#endif
|
|
1979 |
||
1980 |
if (page_status == PAGE_READ && |
|
1981 |
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | |
|
1982 |
BLOCK_REASSIGNED))) |
|
1983 |
{
|
|
1984 |
/*
|
|
1985 |
This is a request for a block to be removed from cache. The block
|
|
1986 |
is assigned to this hash_link and contains valid data, but is
|
|
1987 |
marked for eviction or to be freed. Possible reasons why it has
|
|
1988 |
not yet been evicted/freed can be a flush before reassignment
|
|
1989 |
(BLOCK_IN_SWITCH), readers of the block have not finished yet
|
|
1990 |
(BLOCK_REASSIGNED), or the evicting thread did not yet awake after
|
|
1991 |
the block has been selected for it (BLOCK_IN_EVICTION).
|
|
1992 |
*/
|
|
1993 |
||
1994 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
1995 |
("request for old page in block %u " |
|
1996 |
"wrmode: %d block->status: %d", |
|
1997 |
BLOCK_NUMBER(block), wrmode, block->status)); |
|
1998 |
/*
|
|
1999 |
Only reading requests can proceed until the old dirty page is flushed,
|
|
2000 |
all others are to be suspended, then resubmitted
|
|
2001 |
*/
|
|
2002 |
if (!wrmode && !(block->status & BLOCK_REASSIGNED)) |
|
2003 |
{
|
|
2004 |
/*
|
|
2005 |
This is a read request and the block not yet reassigned. We can
|
|
2006 |
register our request and proceed. This unlinks the block from
|
|
2007 |
the LRU ring and protects it against eviction.
|
|
2008 |
*/
|
|
2009 |
reg_requests(keycache, block, 1); |
|
2010 |
}
|
|
2011 |
else
|
|
2012 |
{
|
|
2013 |
/*
|
|
2014 |
Either this is a write request for a block that is in eviction
|
|
2015 |
or in free. We must not use it any more. Instead we must evict
|
|
2016 |
another block. But we cannot do this before the eviction/free is
|
|
2017 |
done. Otherwise we would find the same hash_link + block again
|
|
2018 |
and again.
|
|
2019 |
||
2020 |
Or this is a read request for a block in eviction/free that does
|
|
2021 |
not require a flush, but waits for readers to finish with the
|
|
2022 |
block. We do not read this block to let the eviction/free happen
|
|
2023 |
as soon as possible. Again we must wait so that we don't find
|
|
2024 |
the same hash_link + block again and again.
|
|
2025 |
*/
|
|
2026 |
DBUG_ASSERT(hash_link->requests); |
|
2027 |
hash_link->requests--; |
|
2028 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2029 |
("request waiting for old page to be saved")); |
|
2030 |
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock); |
|
2031 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2032 |
("request for old page resubmitted")); |
|
2033 |
/*
|
|
2034 |
The block is no longer assigned to this hash_link.
|
|
2035 |
Get another one.
|
|
2036 |
*/
|
|
2037 |
goto restart; |
|
2038 |
}
|
|
2039 |
}
|
|
2040 |
else
|
|
2041 |
{
|
|
2042 |
/*
|
|
2043 |
This is a request for a new block or for a block not to be removed.
|
|
2044 |
Either
|
|
2045 |
- block == NULL or
|
|
2046 |
- block not assigned to this hash_link or
|
|
2047 |
- block assigned but not yet read from file,
|
|
2048 |
or
|
|
2049 |
- block assigned with valid (changed or unchanged) data and
|
|
2050 |
- it will not be reassigned/freed.
|
|
2051 |
*/
|
|
2052 |
if (! block) |
|
2053 |
{
|
|
2054 |
/* No block is assigned to the hash_link yet. */
|
|
2055 |
if (keycache->blocks_unused) |
|
2056 |
{
|
|
2057 |
if (keycache->free_block_list) |
|
2058 |
{
|
|
2059 |
/* There is a block in the free list. */
|
|
2060 |
block= keycache->free_block_list; |
|
2061 |
keycache->free_block_list= block->next_used; |
|
2062 |
block->next_used= NULL; |
|
2063 |
}
|
|
2064 |
else
|
|
2065 |
{
|
|
2066 |
size_t block_mem_offset; |
|
2067 |
/* There are some never used blocks, take first of them */
|
|
2068 |
DBUG_ASSERT(keycache->blocks_used < |
|
2069 |
(ulong) keycache->disk_blocks); |
|
2070 |
block= &keycache->block_root[keycache->blocks_used]; |
|
2071 |
block_mem_offset= |
|
2072 |
((size_t) keycache->blocks_used) * keycache->key_cache_block_size; |
|
2073 |
block->buffer= ADD_TO_PTR(keycache->block_mem, |
|
2074 |
block_mem_offset, |
|
2075 |
uchar*); |
|
2076 |
keycache->blocks_used++; |
|
2077 |
DBUG_ASSERT(!block->next_used); |
|
2078 |
}
|
|
2079 |
DBUG_ASSERT(!block->prev_used); |
|
2080 |
DBUG_ASSERT(!block->next_changed); |
|
2081 |
DBUG_ASSERT(!block->prev_changed); |
|
2082 |
DBUG_ASSERT(!block->hash_link); |
|
2083 |
DBUG_ASSERT(!block->status); |
|
2084 |
DBUG_ASSERT(!block->requests); |
|
2085 |
keycache->blocks_unused--; |
|
2086 |
block->status= BLOCK_IN_USE; |
|
2087 |
block->length= 0; |
|
2088 |
block->offset= keycache->key_cache_block_size; |
|
2089 |
block->requests= 1; |
|
2090 |
block->temperature= BLOCK_COLD; |
|
2091 |
block->hits_left= init_hits_left; |
|
2092 |
block->last_hit_time= 0; |
|
2093 |
block->hash_link= hash_link; |
|
2094 |
hash_link->block= block; |
|
2095 |
link_to_file_list(keycache, block, file, 0); |
|
2096 |
page_status= PAGE_TO_BE_READ; |
|
2097 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2098 |
("got free or never used block %u", |
|
2099 |
BLOCK_NUMBER(block))); |
|
2100 |
}
|
|
2101 |
else
|
|
2102 |
{
|
|
2103 |
/*
|
|
2104 |
There are no free blocks and no never used blocks, use a block
|
|
2105 |
from the LRU ring.
|
|
2106 |
*/
|
|
2107 |
||
2108 |
#ifdef THREAD
|
|
2109 |
if (! keycache->used_last) |
|
2110 |
{
|
|
2111 |
/*
|
|
2112 |
The LRU ring is empty. Wait until a new block is added to
|
|
2113 |
it. Several threads might wait here for the same hash_link,
|
|
2114 |
all of them must get the same block. While waiting for a
|
|
2115 |
block, after a block is selected for this hash_link, other
|
|
2116 |
threads can run first before this one awakes. During this
|
|
2117 |
time interval other threads find this hash_link pointing to
|
|
2118 |
the block, which is still assigned to another hash_link. In
|
|
2119 |
this case the block is not marked BLOCK_IN_SWITCH yet, but
|
|
2120 |
it is marked BLOCK_IN_EVICTION.
|
|
2121 |
*/
|
|
2122 |
||
2123 |
struct st_my_thread_var *thread= my_thread_var; |
|
2124 |
thread->opt_info= (void *) hash_link; |
|
2125 |
link_into_queue(&keycache->waiting_for_block, thread); |
|
2126 |
do
|
|
2127 |
{
|
|
2128 |
KEYCACHE_DBUG_PRINT("find_key_block: wait", |
|
2129 |
("suspend thread %ld", thread->id)); |
|
2130 |
keycache_pthread_cond_wait(&thread->suspend, |
|
2131 |
&keycache->cache_lock); |
|
2132 |
}
|
|
2133 |
while (thread->next); |
|
2134 |
thread->opt_info= NULL; |
|
2135 |
/* Assert that block has a request registered. */
|
|
2136 |
DBUG_ASSERT(hash_link->block->requests); |
|
2137 |
/* Assert that block is not in LRU ring. */
|
|
2138 |
DBUG_ASSERT(!hash_link->block->next_used); |
|
2139 |
DBUG_ASSERT(!hash_link->block->prev_used); |
|
2140 |
}
|
|
2141 |
#else
|
|
2142 |
KEYCACHE_DBUG_ASSERT(keycache->used_last); |
|
2143 |
#endif
|
|
2144 |
/*
|
|
2145 |
If we waited above, hash_link->block has been assigned by
|
|
2146 |
link_block(). Otherwise it is still NULL. In the latter case
|
|
2147 |
we need to grab a block from the LRU ring ourselves.
|
|
2148 |
*/
|
|
2149 |
block= hash_link->block; |
|
2150 |
if (! block) |
|
2151 |
{
|
|
2152 |
/* Select the last block from the LRU ring. */
|
|
2153 |
block= keycache->used_last->next_used; |
|
2154 |
block->hits_left= init_hits_left; |
|
2155 |
block->last_hit_time= 0; |
|
2156 |
hash_link->block= block; |
|
2157 |
/*
|
|
2158 |
Register a request on the block. This unlinks it from the
|
|
2159 |
LRU ring and protects it against eviction.
|
|
2160 |
*/
|
|
2161 |
DBUG_ASSERT(!block->requests); |
|
2162 |
reg_requests(keycache, block,1); |
|
2163 |
/*
|
|
2164 |
We do not need to set block->status|= BLOCK_IN_EVICTION here
|
|
2165 |
because we will set block->status|= BLOCK_IN_SWITCH
|
|
2166 |
immediately without releasing the lock in between. This does
|
|
2167 |
also support debugging. When looking at the block, one can
|
|
2168 |
see if the block has been selected by link_block() after the
|
|
2169 |
LRU ring was empty, or if it was grabbed directly from the
|
|
2170 |
LRU ring in this branch.
|
|
2171 |
*/
|
|
2172 |
}
|
|
2173 |
||
2174 |
/*
|
|
2175 |
If we had to wait above, there is a small chance that another
|
|
2176 |
thread grabbed this block for the same file block already. But
|
|
2177 |
in most cases the first condition is true.
|
|
2178 |
*/
|
|
2179 |
if (block->hash_link != hash_link && |
|
2180 |
! (block->status & BLOCK_IN_SWITCH) ) |
|
2181 |
{
|
|
2182 |
/* this is a primary request for a new page */
|
|
2183 |
block->status|= BLOCK_IN_SWITCH; |
|
2184 |
||
2185 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2186 |
("got block %u for new page", BLOCK_NUMBER(block))); |
|
2187 |
||
2188 |
if (block->status & BLOCK_CHANGED) |
|
2189 |
{
|
|
2190 |
/* The block contains a dirty page - push it out of the cache */
|
|
2191 |
||
2192 |
KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); |
|
2193 |
if (block->status & BLOCK_IN_FLUSH) |
|
2194 |
{
|
|
2195 |
/*
|
|
2196 |
The block is marked for flush. If we do not wait here,
|
|
2197 |
it could happen that we write the block, reassign it to
|
|
2198 |
another file block, then, before the new owner can read
|
|
2199 |
the new file block, the flusher writes the cache block
|
|
2200 |
(which still has the old contents) to the new file block!
|
|
2201 |
*/
|
|
2202 |
wait_on_queue(&block->wqueue[COND_FOR_SAVED], |
|
2203 |
&keycache->cache_lock); |
|
2204 |
/*
|
|
2205 |
The block is marked BLOCK_IN_SWITCH. It should be left
|
|
2206 |
alone except for reading. No free, no write.
|
|
2207 |
*/
|
|
2208 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
2209 |
DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED | |
|
2210 |
BLOCK_CHANGED | |
|
2211 |
BLOCK_FOR_UPDATE))); |
|
2212 |
}
|
|
2213 |
else
|
|
2214 |
{
|
|
2215 |
block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE; |
|
2216 |
/*
|
|
2217 |
BLOCK_IN_EVICTION may be true or not. Other flags must
|
|
2218 |
have a fixed value.
|
|
2219 |
*/
|
|
2220 |
DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) == |
|
2221 |
(BLOCK_READ | BLOCK_IN_SWITCH | |
|
2222 |
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE | |
|
2223 |
BLOCK_CHANGED | BLOCK_IN_USE)); |
|
2224 |
DBUG_ASSERT(block->hash_link); |
|
2225 |
||
2226 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2227 |
/*
|
|
2228 |
The call is thread safe because only the current
|
|
2229 |
thread might change the block->hash_link value
|
|
2230 |
*/
|
|
2231 |
error= my_pwrite(block->hash_link->file, |
|
2232 |
block->buffer + block->offset, |
|
2233 |
block->length - block->offset, |
|
2234 |
block->hash_link->diskpos + block->offset, |
|
2235 |
MYF(MY_NABP | MY_WAIT_IF_FULL)); |
|
2236 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2237 |
||
2238 |
/* Block status must not have changed. */
|
|
2239 |
DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) == |
|
2240 |
(BLOCK_READ | BLOCK_IN_SWITCH | |
|
2241 |
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE | |
|
2242 |
BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block)); |
|
2243 |
keycache->global_cache_write++; |
|
2244 |
}
|
|
2245 |
}
|
|
2246 |
||
2247 |
block->status|= BLOCK_REASSIGNED; |
|
2248 |
/*
|
|
2249 |
The block comes from the LRU ring. It must have a hash_link
|
|
2250 |
assigned.
|
|
2251 |
*/
|
|
2252 |
DBUG_ASSERT(block->hash_link); |
|
2253 |
if (block->hash_link) |
|
2254 |
{
|
|
2255 |
/*
|
|
2256 |
All pending requests for this page must be resubmitted.
|
|
2257 |
This must be done before waiting for readers. They could
|
|
2258 |
wait for the flush to complete. And we must also do it
|
|
2259 |
after the wait. Flushers might try to free the block while
|
|
2260 |
we wait. They would wait until the reassignment is
|
|
2261 |
complete. Also the block status must reflect the correct
|
|
2262 |
situation: The block is not changed nor in flush any more.
|
|
2263 |
Note that we must not change the BLOCK_CHANGED flag
|
|
2264 |
outside of link_to_file_list() so that it is always in the
|
|
2265 |
correct queue and the *blocks_changed counters are
|
|
2266 |
correct.
|
|
2267 |
*/
|
|
2268 |
block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE); |
|
2269 |
link_to_file_list(keycache, block, block->hash_link->file, 1); |
|
2270 |
release_whole_queue(&block->wqueue[COND_FOR_SAVED]); |
|
2271 |
/*
|
|
2272 |
The block is still assigned to its old hash_link.
|
|
2273 |
Wait until all pending read requests
|
|
2274 |
for this page are executed
|
|
2275 |
(we could have avoided this waiting, if we had read
|
|
2276 |
a page in the cache in a sweep, without yielding control)
|
|
2277 |
*/
|
|
2278 |
wait_for_readers(keycache, block); |
|
2279 |
DBUG_ASSERT(block->hash_link && block->hash_link->block == block && |
|
2280 |
block->prev_changed); |
|
2281 |
/* The reader must not have been a writer. */
|
|
2282 |
DBUG_ASSERT(!(block->status & BLOCK_CHANGED)); |
|
2283 |
||
2284 |
/* Wake flushers that might have found the block in between. */
|
|
2285 |
release_whole_queue(&block->wqueue[COND_FOR_SAVED]); |
|
2286 |
||
2287 |
/* Remove the hash link for the old file block from the hash. */
|
|
2288 |
unlink_hash(keycache, block->hash_link); |
|
2289 |
||
2290 |
/*
|
|
2291 |
For sanity checks link_to_file_list() asserts that block
|
|
2292 |
and hash_link refer to each other. Hence we need to assign
|
|
2293 |
the hash_link first, but then we would not know if it was
|
|
2294 |
linked before. Hence we would not know if to unlink it. So
|
|
2295 |
unlink it here and call link_to_file_list(..., FALSE).
|
|
2296 |
*/
|
|
2297 |
unlink_changed(block); |
|
2298 |
}
|
|
2299 |
block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ; |
|
2300 |
block->length= 0; |
|
2301 |
block->offset= keycache->key_cache_block_size; |
|
2302 |
block->hash_link= hash_link; |
|
2303 |
link_to_file_list(keycache, block, file, 0); |
|
2304 |
page_status= PAGE_TO_BE_READ; |
|
2305 |
||
2306 |
KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); |
|
2307 |
KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); |
|
2308 |
}
|
|
2309 |
else
|
|
2310 |
{
|
|
2311 |
/*
|
|
2312 |
Either (block->hash_link == hash_link),
|
|
2313 |
or (block->status & BLOCK_IN_SWITCH).
|
|
2314 |
||
2315 |
This is for secondary requests for a new file block only.
|
|
2316 |
Either it is already assigned to the new hash_link meanwhile
|
|
2317 |
(if we had to wait due to empty LRU), or it is already in
|
|
2318 |
eviction by another thread. Since this block has been
|
|
2319 |
grabbed from the LRU ring and attached to this hash_link,
|
|
2320 |
another thread cannot grab the same block from the LRU ring
|
|
2321 |
anymore. If the block is in eviction already, it must become
|
|
2322 |
attached to the same hash_link and as such destined for the
|
|
2323 |
same file block.
|
|
2324 |
*/
|
|
2325 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2326 |
("block->hash_link: %p hash_link: %p " |
|
2327 |
"block->status: %u", block->hash_link, |
|
2328 |
hash_link, block->status )); |
|
2329 |
page_status= (((block->hash_link == hash_link) && |
|
2330 |
(block->status & BLOCK_READ)) ? |
|
2331 |
PAGE_READ : PAGE_WAIT_TO_BE_READ); |
|
2332 |
}
|
|
2333 |
}
|
|
2334 |
}
|
|
2335 |
else
|
|
2336 |
{
|
|
2337 |
/*
|
|
2338 |
Block is not NULL. This hash_link points to a block.
|
|
2339 |
Either
|
|
2340 |
- block not assigned to this hash_link (yet) or
|
|
2341 |
- block assigned but not yet read from file,
|
|
2342 |
or
|
|
2343 |
- block assigned with valid (changed or unchanged) data and
|
|
2344 |
- it will not be reassigned/freed.
|
|
2345 |
||
2346 |
The first condition means hash_link points to a block in
|
|
2347 |
eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
|
|
2348 |
But then it is marked BLOCK_IN_EVICTION. See the NOTE in
|
|
2349 |
link_block(). In both cases it is destined for this hash_link
|
|
2350 |
and its file block address. When this hash_link got its block
|
|
2351 |
address, the block was removed from the LRU ring and cannot be
|
|
2352 |
selected for eviction (for another hash_link) again.
|
|
2353 |
||
2354 |
Register a request on the block. This is another protection
|
|
2355 |
against eviction.
|
|
2356 |
*/
|
|
2357 |
DBUG_ASSERT(((block->hash_link != hash_link) && |
|
2358 |
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) || |
|
2359 |
((block->hash_link == hash_link) && |
|
2360 |
!(block->status & BLOCK_READ)) || |
|
2361 |
((block->status & BLOCK_READ) && |
|
2362 |
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))); |
|
2363 |
reg_requests(keycache, block, 1); |
|
2364 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2365 |
("block->hash_link: %p hash_link: %p " |
|
2366 |
"block->status: %u", block->hash_link, |
|
2367 |
hash_link, block->status )); |
|
2368 |
page_status= (((block->hash_link == hash_link) && |
|
2369 |
(block->status & BLOCK_READ)) ? |
|
2370 |
PAGE_READ : PAGE_WAIT_TO_BE_READ); |
|
2371 |
}
|
|
2372 |
}
|
|
2373 |
||
2374 |
KEYCACHE_DBUG_ASSERT(page_status != -1); |
|
2375 |
/* Same assert basically, but be very sure. */
|
|
2376 |
KEYCACHE_DBUG_ASSERT(block); |
|
2377 |
/* Assert that block has a request and is not in LRU ring. */
|
|
2378 |
DBUG_ASSERT(block->requests); |
|
2379 |
DBUG_ASSERT(!block->next_used); |
|
2380 |
DBUG_ASSERT(!block->prev_used); |
|
2381 |
/* Assert that we return the correct block. */
|
|
2382 |
DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) || |
|
2383 |
((block->hash_link->file == file) && |
|
2384 |
(block->hash_link->diskpos == filepos))); |
|
2385 |
*page_st=page_status; |
|
2386 |
KEYCACHE_DBUG_PRINT("find_key_block", |
|
2387 |
("fd: %d pos: %lu block->status: %u page_status: %d", |
|
2388 |
file, (ulong) filepos, block->status, |
|
2389 |
page_status)); |
|
2390 |
||
2391 |
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
|
|
2392 |
DBUG_EXECUTE("check_keycache2", |
|
2393 |
test_key_cache(keycache, "end of find_key_block",0);); |
|
2394 |
#endif
|
|
2395 |
KEYCACHE_THREAD_TRACE("find_key_block:end"); |
|
2396 |
DBUG_RETURN(block); |
|
2397 |
}
|
|
2398 |
||
2399 |
||
2400 |
/*
|
|
2401 |
Read into a key cache block buffer from disk.
|
|
2402 |
||
2403 |
SYNOPSIS
|
|
2404 |
||
2405 |
read_block()
|
|
2406 |
keycache pointer to a key cache data structure
|
|
2407 |
block block to which buffer the data is to be read
|
|
2408 |
read_length size of data to be read
|
|
2409 |
min_length at least so much data must be read
|
|
2410 |
primary <-> the current thread will read the data
|
|
2411 |
||
2412 |
RETURN VALUE
|
|
2413 |
None
|
|
2414 |
||
2415 |
NOTES.
|
|
2416 |
The function either reads a page data from file to the block buffer,
|
|
2417 |
or waits until another thread reads it. What page to read is determined
|
|
2418 |
by a block parameter - reference to a hash link for this page.
|
|
2419 |
If an error occurs THE BLOCK_ERROR bit is set in the block status.
|
|
2420 |
We do not report error when the size of successfully read
|
|
2421 |
portion is less than read_length, but not less than min_length.
|
|
2422 |
*/
|
|
2423 |
||
2424 |
static void read_block(KEY_CACHE *keycache, |
|
2425 |
BLOCK_LINK *block, uint read_length, |
|
2426 |
uint min_length, my_bool primary) |
|
2427 |
{
|
|
2428 |
size_t got_length; |
|
2429 |
||
2430 |
/* On entry cache_lock is locked */
|
|
2431 |
||
2432 |
KEYCACHE_THREAD_TRACE("read_block"); |
|
2433 |
if (primary) |
|
2434 |
{
|
|
2435 |
/*
|
|
2436 |
This code is executed only by threads that submitted primary
|
|
2437 |
requests. Until block->status contains BLOCK_READ, all other
|
|
2438 |
request for the block become secondary requests. For a primary
|
|
2439 |
request the block must be properly initialized.
|
|
2440 |
*/
|
|
2441 |
DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) || |
|
2442 |
fail_block(block)); |
|
2443 |
DBUG_ASSERT((block->length == 0) || fail_block(block)); |
|
2444 |
DBUG_ASSERT((block->offset == keycache->key_cache_block_size) || |
|
2445 |
fail_block(block)); |
|
2446 |
DBUG_ASSERT((block->requests > 0) || fail_block(block)); |
|
2447 |
||
2448 |
KEYCACHE_DBUG_PRINT("read_block", |
|
2449 |
("page to be read by primary request")); |
|
2450 |
||
2451 |
keycache->global_cache_read++; |
|
2452 |
/* Page is not in buffer yet, is to be read from disk */
|
|
2453 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2454 |
/*
|
|
2455 |
Here other threads may step in and register as secondary readers.
|
|
2456 |
They will register in block->wqueue[COND_FOR_REQUESTED].
|
|
2457 |
*/
|
|
2458 |
got_length= my_pread(block->hash_link->file, block->buffer, |
|
2459 |
read_length, block->hash_link->diskpos, MYF(0)); |
|
2460 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2461 |
/*
|
|
2462 |
The block can now have been marked for free (in case of
|
|
2463 |
FLUSH_RELEASE). Otherwise the state must be unchanged.
|
|
2464 |
*/
|
|
2465 |
DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED | |
|
2466 |
BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) || |
|
2467 |
fail_block(block)); |
|
2468 |
DBUG_ASSERT((block->length == 0) || fail_block(block)); |
|
2469 |
DBUG_ASSERT((block->offset == keycache->key_cache_block_size) || |
|
2470 |
fail_block(block)); |
|
2471 |
DBUG_ASSERT((block->requests > 0) || fail_block(block)); |
|
2472 |
||
2473 |
if (got_length < min_length) |
|
2474 |
block->status|= BLOCK_ERROR; |
|
2475 |
else
|
|
2476 |
{
|
|
2477 |
block->status|= BLOCK_READ; |
|
2478 |
block->length= got_length; |
|
2479 |
/*
|
|
2480 |
Do not set block->offset here. If this block is marked
|
|
2481 |
BLOCK_CHANGED later, we want to flush only the modified part. So
|
|
2482 |
only a writer may set block->offset down from
|
|
2483 |
keycache->key_cache_block_size.
|
|
2484 |
*/
|
|
2485 |
}
|
|
2486 |
KEYCACHE_DBUG_PRINT("read_block", |
|
2487 |
("primary request: new page in cache")); |
|
2488 |
/* Signal that all pending requests for this page now can be processed */
|
|
2489 |
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]); |
|
2490 |
}
|
|
2491 |
else
|
|
2492 |
{
|
|
2493 |
/*
|
|
2494 |
This code is executed only by threads that submitted secondary
|
|
2495 |
requests. At this point it could happen that the cache block is
|
|
2496 |
not yet assigned to the hash_link for the requested file block.
|
|
2497 |
But at awake from the wait this should be the case. Unfortunately
|
|
2498 |
we cannot assert this here because we do not know the hash_link
|
|
2499 |
for the requested file block nor the file and position. So we have
|
|
2500 |
to assert this in the caller.
|
|
2501 |
*/
|
|
2502 |
KEYCACHE_DBUG_PRINT("read_block", |
|
2503 |
("secondary request waiting for new page to be read")); |
|
2504 |
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock); |
|
2505 |
KEYCACHE_DBUG_PRINT("read_block", |
|
2506 |
("secondary request: new page in cache")); |
|
2507 |
}
|
|
2508 |
}
|
|
2509 |
||
2510 |
||
2511 |
/*
|
|
2512 |
Read a block of data from a cached file into a buffer;
|
|
2513 |
||
2514 |
SYNOPSIS
|
|
2515 |
||
2516 |
key_cache_read()
|
|
2517 |
keycache pointer to a key cache data structure
|
|
2518 |
file handler for the file for the block of data to be read
|
|
2519 |
filepos position of the block of data in the file
|
|
2520 |
level determines the weight of the data
|
|
2521 |
buff buffer to where the data must be placed
|
|
2522 |
length length of the buffer
|
|
2523 |
block_length length of the block in the key cache buffer
|
|
2524 |
return_buffer return pointer to the key cache buffer with the data
|
|
2525 |
||
2526 |
RETURN VALUE
|
|
2527 |
Returns address from where the data is placed if sucessful, 0 - otherwise.
|
|
2528 |
||
2529 |
NOTES.
|
|
2530 |
The function ensures that a block of data of size length from file
|
|
2531 |
positioned at filepos is in the buffers for some key cache blocks.
|
|
2532 |
Then the function either copies the data into the buffer buff, or,
|
|
2533 |
if return_buffer is TRUE, it just returns the pointer to the key cache
|
|
2534 |
buffer with the data.
|
|
2535 |
Filepos must be a multiple of 'block_length', but it doesn't
|
|
2536 |
have to be a multiple of key_cache_block_size;
|
|
2537 |
*/
|
|
2538 |
||
2539 |
uchar *key_cache_read(KEY_CACHE *keycache, |
|
2540 |
File file, my_off_t filepos, int level, |
|
2541 |
uchar *buff, uint length, |
|
2542 |
uint block_length __attribute__((unused)), |
|
2543 |
int return_buffer __attribute__((unused))) |
|
2544 |
{
|
|
2545 |
my_bool locked_and_incremented= FALSE; |
|
2546 |
int error=0; |
|
2547 |
uchar *start= buff; |
|
2548 |
DBUG_ENTER("key_cache_read"); |
|
2549 |
DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", |
|
2550 |
(uint) file, (ulong) filepos, length)); |
|
2551 |
||
2552 |
if (keycache->key_cache_inited) |
|
2553 |
{
|
|
2554 |
/* Key cache is used */
|
|
2555 |
reg1 BLOCK_LINK *block; |
|
2556 |
uint read_length; |
|
2557 |
uint offset; |
|
2558 |
int page_st; |
|
2559 |
||
2560 |
/*
|
|
2561 |
When the key cache is once initialized, we use the cache_lock to
|
|
2562 |
reliably distinguish the cases of normal operation, resizing, and
|
|
2563 |
disabled cache. We always increment and decrement
|
|
2564 |
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
|
|
2565 |
*/
|
|
2566 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2567 |
/*
|
|
2568 |
Cache resizing has two phases: Flushing and re-initializing. In
|
|
2569 |
the flush phase read requests are allowed to bypass the cache for
|
|
2570 |
blocks not in the cache. find_key_block() returns NULL in this
|
|
2571 |
case.
|
|
2572 |
||
2573 |
After the flush phase new I/O requests must wait until the
|
|
2574 |
re-initialization is done. The re-initialization can be done only
|
|
2575 |
if no I/O request is in progress. The reason is that
|
|
2576 |
key_cache_block_size can change. With enabled cache, I/O is done
|
|
2577 |
in chunks of key_cache_block_size. Every chunk tries to use a
|
|
2578 |
cache block first. If the block size changes in the middle, a
|
|
2579 |
block could be missed and old data could be read.
|
|
2580 |
*/
|
|
2581 |
while (keycache->in_resize && !keycache->resize_in_flush) |
|
2582 |
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); |
|
2583 |
/* Register the I/O for the next resize. */
|
|
2584 |
inc_counter_for_resize_op(keycache); |
|
2585 |
locked_and_incremented= TRUE; |
|
2586 |
/* Requested data may not always be aligned to cache blocks. */
|
|
2587 |
offset= (uint) (filepos % keycache->key_cache_block_size); |
|
2588 |
/* Read data in key_cache_block_size increments */
|
|
2589 |
do
|
|
2590 |
{
|
|
2591 |
/* Cache could be disabled in a later iteration. */
|
|
2592 |
if (!keycache->can_be_used) |
|
2593 |
{
|
|
2594 |
KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used")); |
|
2595 |
goto no_key_cache; |
|
2596 |
}
|
|
2597 |
/* Start reading at the beginning of the cache block. */
|
|
2598 |
filepos-= offset; |
|
2599 |
/* Do not read beyond the end of the cache block. */
|
|
2600 |
read_length= length; |
|
2601 |
set_if_smaller(read_length, keycache->key_cache_block_size-offset); |
|
2602 |
KEYCACHE_DBUG_ASSERT(read_length > 0); |
|
2603 |
||
2604 |
#ifndef THREAD
|
|
2605 |
if (block_length > keycache->key_cache_block_size || offset) |
|
2606 |
return_buffer=0; |
|
2607 |
#endif
|
|
2608 |
||
2609 |
/* Request the cache block that matches file/pos. */
|
|
2610 |
keycache->global_cache_r_requests++; |
|
2611 |
block=find_key_block(keycache, file, filepos, level, 0, &page_st); |
|
2612 |
if (!block) |
|
2613 |
{
|
|
2614 |
/*
|
|
2615 |
This happens only for requests submitted during key cache
|
|
2616 |
resize. The block is not in the cache and shall not go in.
|
|
2617 |
Read directly from file.
|
|
2618 |
*/
|
|
2619 |
keycache->global_cache_read++; |
|
2620 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2621 |
error= (my_pread(file, (uchar*) buff, read_length, |
|
2622 |
filepos + offset, MYF(MY_NABP)) != 0); |
|
2623 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2624 |
goto next_block; |
|
2625 |
}
|
|
2626 |
if (!(block->status & BLOCK_ERROR)) |
|
2627 |
{
|
|
2628 |
if (page_st != PAGE_READ) |
|
2629 |
{
|
|
2630 |
/* The requested page is to be read into the block buffer */
|
|
2631 |
read_block(keycache, block, |
|
2632 |
keycache->key_cache_block_size, read_length+offset, |
|
2633 |
(my_bool)(page_st == PAGE_TO_BE_READ)); |
|
2634 |
/*
|
|
2635 |
A secondary request must now have the block assigned to the
|
|
2636 |
requested file block. It does not hurt to check it for
|
|
2637 |
primary requests too.
|
|
2638 |
*/
|
|
2639 |
DBUG_ASSERT(keycache->can_be_used); |
|
2640 |
DBUG_ASSERT(block->hash_link->file == file); |
|
2641 |
DBUG_ASSERT(block->hash_link->diskpos == filepos); |
|
2642 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
2643 |
}
|
|
2644 |
else if (block->length < read_length + offset) |
|
2645 |
{
|
|
2646 |
/*
|
|
2647 |
Impossible if nothing goes wrong:
|
|
2648 |
this could only happen if we are using a file with
|
|
2649 |
small key blocks and are trying to read outside the file
|
|
2650 |
*/
|
|
2651 |
my_errno= -1; |
|
2652 |
block->status|= BLOCK_ERROR; |
|
2653 |
}
|
|
2654 |
}
|
|
2655 |
||
2656 |
/* block status may have added BLOCK_ERROR in the above 'if'. */
|
|
2657 |
if (!(block->status & BLOCK_ERROR)) |
|
2658 |
{
|
|
2659 |
#ifndef THREAD
|
|
2660 |
if (! return_buffer) |
|
2661 |
#endif
|
|
2662 |
{
|
|
2663 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
2664 |
#if !defined(SERIALIZED_READ_FROM_CACHE)
|
|
2665 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2666 |
#endif
|
|
2667 |
||
2668 |
/* Copy data from the cache buffer */
|
|
2669 |
if (!(read_length & 511)) |
|
2670 |
bmove512(buff, block->buffer+offset, read_length); |
|
2671 |
else
|
|
2672 |
memcpy(buff, block->buffer+offset, (size_t) read_length); |
|
2673 |
||
2674 |
#if !defined(SERIALIZED_READ_FROM_CACHE)
|
|
2675 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2676 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
2677 |
#endif
|
|
2678 |
}
|
|
2679 |
}
|
|
2680 |
||
2681 |
remove_reader(block); |
|
2682 |
||
2683 |
/* Error injection for coverage testing. */
|
|
2684 |
DBUG_EXECUTE_IF("key_cache_read_block_error", |
|
2685 |
block->status|= BLOCK_ERROR;); |
|
2686 |
||
2687 |
/* Do not link erroneous blocks into the LRU ring, but free them. */
|
|
2688 |
if (!(block->status & BLOCK_ERROR)) |
|
2689 |
{
|
|
2690 |
/*
|
|
2691 |
Link the block into the LRU ring if it's the last submitted
|
|
2692 |
request for the block. This enables eviction for the block.
|
|
2693 |
*/
|
|
2694 |
unreg_request(keycache, block, 1); |
|
2695 |
}
|
|
2696 |
else
|
|
2697 |
{
|
|
2698 |
free_block(keycache, block); |
|
2699 |
error= 1; |
|
2700 |
break; |
|
2701 |
}
|
|
2702 |
||
2703 |
#ifndef THREAD
|
|
2704 |
/* This is only true if we where able to read everything in one block */
|
|
2705 |
if (return_buffer) |
|
2706 |
DBUG_RETURN(block->buffer); |
|
2707 |
#endif
|
|
2708 |
next_block: |
|
2709 |
buff+= read_length; |
|
2710 |
filepos+= read_length+offset; |
|
2711 |
offset= 0; |
|
2712 |
||
2713 |
} while ((length-= read_length)); |
|
2714 |
goto end; |
|
2715 |
}
|
|
2716 |
KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized")); |
|
2717 |
||
2718 |
no_key_cache: |
|
2719 |
/* Key cache is not used */
|
|
2720 |
||
2721 |
keycache->global_cache_r_requests++; |
|
2722 |
keycache->global_cache_read++; |
|
2723 |
||
2724 |
if (locked_and_incremented) |
|
2725 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2726 |
if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP))) |
|
2727 |
error= 1; |
|
2728 |
if (locked_and_incremented) |
|
2729 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2730 |
||
2731 |
end: |
|
2732 |
if (locked_and_incremented) |
|
2733 |
{
|
|
2734 |
dec_counter_for_resize_op(keycache); |
|
2735 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2736 |
}
|
|
2737 |
DBUG_PRINT("exit", ("error: %d", error )); |
|
2738 |
DBUG_RETURN(error ? (uchar*) 0 : start); |
|
2739 |
}
|
|
2740 |
||
2741 |
||
2742 |
/*
|
|
2743 |
Insert a block of file data from a buffer into key cache
|
|
2744 |
||
2745 |
SYNOPSIS
|
|
2746 |
key_cache_insert()
|
|
2747 |
keycache pointer to a key cache data structure
|
|
2748 |
file handler for the file to insert data from
|
|
2749 |
filepos position of the block of data in the file to insert
|
|
2750 |
level determines the weight of the data
|
|
2751 |
buff buffer to read data from
|
|
2752 |
length length of the data in the buffer
|
|
2753 |
||
2754 |
NOTES
|
|
2755 |
This is used by MyISAM to move all blocks from a index file to the key
|
|
2756 |
cache
|
|
2757 |
||
2758 |
RETURN VALUE
|
|
2759 |
0 if a success, 1 - otherwise.
|
|
2760 |
*/
|
|
2761 |
||
2762 |
int key_cache_insert(KEY_CACHE *keycache, |
|
2763 |
File file, my_off_t filepos, int level, |
|
2764 |
uchar *buff, uint length) |
|
2765 |
{
|
|
2766 |
int error= 0; |
|
2767 |
DBUG_ENTER("key_cache_insert"); |
|
2768 |
DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", |
|
2769 |
(uint) file,(ulong) filepos, length)); |
|
2770 |
||
2771 |
if (keycache->key_cache_inited) |
|
2772 |
{
|
|
2773 |
/* Key cache is used */
|
|
2774 |
reg1 BLOCK_LINK *block; |
|
2775 |
uint read_length; |
|
2776 |
uint offset; |
|
2777 |
int page_st; |
|
2778 |
my_bool locked_and_incremented= FALSE; |
|
2779 |
||
2780 |
/*
|
|
2781 |
When the keycache is once initialized, we use the cache_lock to
|
|
2782 |
reliably distinguish the cases of normal operation, resizing, and
|
|
2783 |
disabled cache. We always increment and decrement
|
|
2784 |
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
|
|
2785 |
*/
|
|
2786 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2787 |
/*
|
|
2788 |
We do not load index data into a disabled cache nor into an
|
|
2789 |
ongoing resize.
|
|
2790 |
*/
|
|
2791 |
if (!keycache->can_be_used || keycache->in_resize) |
|
2792 |
goto no_key_cache; |
|
2793 |
/* Register the pseudo I/O for the next resize. */
|
|
2794 |
inc_counter_for_resize_op(keycache); |
|
2795 |
locked_and_incremented= TRUE; |
|
2796 |
/* Loaded data may not always be aligned to cache blocks. */
|
|
2797 |
offset= (uint) (filepos % keycache->key_cache_block_size); |
|
2798 |
/* Load data in key_cache_block_size increments. */
|
|
2799 |
do
|
|
2800 |
{
|
|
2801 |
/* Cache could be disabled or resizing in a later iteration. */
|
|
2802 |
if (!keycache->can_be_used || keycache->in_resize) |
|
2803 |
goto no_key_cache; |
|
2804 |
/* Start loading at the beginning of the cache block. */
|
|
2805 |
filepos-= offset; |
|
2806 |
/* Do not load beyond the end of the cache block. */
|
|
2807 |
read_length= length; |
|
2808 |
set_if_smaller(read_length, keycache->key_cache_block_size-offset); |
|
2809 |
KEYCACHE_DBUG_ASSERT(read_length > 0); |
|
2810 |
||
2811 |
/* The block has been read by the caller already. */
|
|
2812 |
keycache->global_cache_read++; |
|
2813 |
/* Request the cache block that matches file/pos. */
|
|
2814 |
keycache->global_cache_r_requests++; |
|
2815 |
block= find_key_block(keycache, file, filepos, level, 0, &page_st); |
|
2816 |
if (!block) |
|
2817 |
{
|
|
2818 |
/*
|
|
2819 |
This happens only for requests submitted during key cache
|
|
2820 |
resize. The block is not in the cache and shall not go in.
|
|
2821 |
Stop loading index data.
|
|
2822 |
*/
|
|
2823 |
goto no_key_cache; |
|
2824 |
}
|
|
2825 |
if (!(block->status & BLOCK_ERROR)) |
|
2826 |
{
|
|
2827 |
if ((page_st == PAGE_WAIT_TO_BE_READ) || |
|
2828 |
((page_st == PAGE_TO_BE_READ) && |
|
2829 |
(offset || (read_length < keycache->key_cache_block_size)))) |
|
2830 |
{
|
|
2831 |
/*
|
|
2832 |
Either
|
|
2833 |
||
2834 |
this is a secondary request for a block to be read into the
|
|
2835 |
cache. The block is in eviction. It is not yet assigned to
|
|
2836 |
the requested file block (It does not point to the right
|
|
2837 |
hash_link). So we cannot call remove_reader() on the block.
|
|
2838 |
And we cannot access the hash_link directly here. We need to
|
|
2839 |
wait until the assignment is complete. read_block() executes
|
|
2840 |
the correct wait when called with primary == FALSE.
|
|
2841 |
||
2842 |
Or
|
|
2843 |
||
2844 |
this is a primary request for a block to be read into the
|
|
2845 |
cache and the supplied data does not fill the whole block.
|
|
2846 |
||
2847 |
This function is called on behalf of a LOAD INDEX INTO CACHE
|
|
2848 |
statement, which is a read-only task and allows other
|
|
2849 |
readers. It is possible that a parallel running reader tries
|
|
2850 |
to access this block. If it needs more data than has been
|
|
2851 |
supplied here, it would report an error. To be sure that we
|
|
2852 |
have all data in the block that is available in the file, we
|
|
2853 |
read the block ourselves.
|
|
2854 |
||
2855 |
Though reading again what the caller did read already is an
|
|
2856 |
expensive operation, we need to do this for correctness.
|
|
2857 |
*/
|
|
2858 |
read_block(keycache, block, keycache->key_cache_block_size, |
|
2859 |
read_length + offset, (page_st == PAGE_TO_BE_READ)); |
|
2860 |
/*
|
|
2861 |
A secondary request must now have the block assigned to the
|
|
2862 |
requested file block. It does not hurt to check it for
|
|
2863 |
primary requests too.
|
|
2864 |
*/
|
|
2865 |
DBUG_ASSERT(keycache->can_be_used); |
|
2866 |
DBUG_ASSERT(block->hash_link->file == file); |
|
2867 |
DBUG_ASSERT(block->hash_link->diskpos == filepos); |
|
2868 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
2869 |
}
|
|
2870 |
else if (page_st == PAGE_TO_BE_READ) |
|
2871 |
{
|
|
2872 |
/*
|
|
2873 |
This is a new block in the cache. If we come here, we have
|
|
2874 |
data for the whole block.
|
|
2875 |
*/
|
|
2876 |
DBUG_ASSERT(block->hash_link->requests); |
|
2877 |
DBUG_ASSERT(block->status & BLOCK_IN_USE); |
|
2878 |
DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || |
|
2879 |
(block->status & BLOCK_READ)); |
|
2880 |
||
2881 |
#if !defined(SERIALIZED_READ_FROM_CACHE)
|
|
2882 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2883 |
/*
|
|
2884 |
Here other threads may step in and register as secondary readers.
|
|
2885 |
They will register in block->wqueue[COND_FOR_REQUESTED].
|
|
2886 |
*/
|
|
2887 |
#endif
|
|
2888 |
||
2889 |
/* Copy data from buff */
|
|
2890 |
if (!(read_length & 511)) |
|
2891 |
bmove512(block->buffer+offset, buff, read_length); |
|
2892 |
else
|
|
2893 |
memcpy(block->buffer+offset, buff, (size_t) read_length); |
|
2894 |
||
2895 |
#if !defined(SERIALIZED_READ_FROM_CACHE)
|
|
2896 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
2897 |
DBUG_ASSERT(block->status & BLOCK_IN_USE); |
|
2898 |
DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || |
|
2899 |
(block->status & BLOCK_READ)); |
|
2900 |
#endif
|
|
2901 |
/*
|
|
2902 |
After the data is in the buffer, we can declare the block
|
|
2903 |
valid. Now other threads do not need to register as
|
|
2904 |
secondary readers any more. They can immediately access the
|
|
2905 |
block.
|
|
2906 |
*/
|
|
2907 |
block->status|= BLOCK_READ; |
|
2908 |
block->length= read_length+offset; |
|
2909 |
/*
|
|
2910 |
Do not set block->offset here. If this block is marked
|
|
2911 |
BLOCK_CHANGED later, we want to flush only the modified part. So
|
|
2912 |
only a writer may set block->offset down from
|
|
2913 |
keycache->key_cache_block_size.
|
|
2914 |
*/
|
|
2915 |
KEYCACHE_DBUG_PRINT("key_cache_insert", |
|
2916 |
("primary request: new page in cache")); |
|
2917 |
/* Signal all pending requests. */
|
|
2918 |
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]); |
|
2919 |
}
|
|
2920 |
else
|
|
2921 |
{
|
|
2922 |
/*
|
|
2923 |
page_st == PAGE_READ. The block is in the buffer. All data
|
|
2924 |
must already be present. Blocks are always read with all
|
|
2925 |
data available on file. Assert that the block does not have
|
|
2926 |
less contents than the preloader supplies. If the caller has
|
|
2927 |
data beyond block->length, it means that a file write has
|
|
2928 |
been done while this block was in cache and not extended
|
|
2929 |
with the new data. If the condition is met, we can simply
|
|
2930 |
ignore the block.
|
|
2931 |
*/
|
|
2932 |
DBUG_ASSERT((page_st == PAGE_READ) && |
|
2933 |
(read_length + offset <= block->length)); |
|
2934 |
}
|
|
2935 |
||
2936 |
/*
|
|
2937 |
A secondary request must now have the block assigned to the
|
|
2938 |
requested file block. It does not hurt to check it for primary
|
|
2939 |
requests too.
|
|
2940 |
*/
|
|
2941 |
DBUG_ASSERT(block->hash_link->file == file); |
|
2942 |
DBUG_ASSERT(block->hash_link->diskpos == filepos); |
|
2943 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
2944 |
} /* end of if (!(block->status & BLOCK_ERROR)) */ |
|
2945 |
||
2946 |
remove_reader(block); |
|
2947 |
||
2948 |
/* Error injection for coverage testing. */
|
|
2949 |
DBUG_EXECUTE_IF("key_cache_insert_block_error", |
|
2950 |
block->status|= BLOCK_ERROR; errno=EIO;); |
|
2951 |
||
2952 |
/* Do not link erroneous blocks into the LRU ring, but free them. */
|
|
2953 |
if (!(block->status & BLOCK_ERROR)) |
|
2954 |
{
|
|
2955 |
/*
|
|
2956 |
Link the block into the LRU ring if it's the last submitted
|
|
2957 |
request for the block. This enables eviction for the block.
|
|
2958 |
*/
|
|
2959 |
unreg_request(keycache, block, 1); |
|
2960 |
}
|
|
2961 |
else
|
|
2962 |
{
|
|
2963 |
free_block(keycache, block); |
|
2964 |
error= 1; |
|
2965 |
break; |
|
2966 |
}
|
|
2967 |
||
2968 |
buff+= read_length; |
|
2969 |
filepos+= read_length+offset; |
|
2970 |
offset= 0; |
|
2971 |
||
2972 |
} while ((length-= read_length)); |
|
2973 |
||
2974 |
no_key_cache: |
|
2975 |
if (locked_and_incremented) |
|
2976 |
dec_counter_for_resize_op(keycache); |
|
2977 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
2978 |
}
|
|
2979 |
DBUG_RETURN(error); |
|
2980 |
}
|
|
2981 |
||
2982 |
||
2983 |
/*
|
|
2984 |
Write a buffer into a cached file.
|
|
2985 |
||
2986 |
SYNOPSIS
|
|
2987 |
||
2988 |
key_cache_write()
|
|
2989 |
keycache pointer to a key cache data structure
|
|
2990 |
file handler for the file to write data to
|
|
2991 |
filepos position in the file to write data to
|
|
2992 |
level determines the weight of the data
|
|
2993 |
buff buffer with the data
|
|
2994 |
length length of the buffer
|
|
2995 |
dont_write if is 0 then all dirty pages involved in writing
|
|
2996 |
should have been flushed from key cache
|
|
2997 |
||
2998 |
RETURN VALUE
|
|
2999 |
0 if a success, 1 - otherwise.
|
|
3000 |
||
3001 |
NOTES.
|
|
3002 |
The function copies the data of size length from buff into buffers
|
|
3003 |
for key cache blocks that are assigned to contain the portion of
|
|
3004 |
the file starting with position filepos.
|
|
3005 |
It ensures that this data is flushed to the file if dont_write is FALSE.
|
|
3006 |
Filepos must be a multiple of 'block_length', but it doesn't
|
|
3007 |
have to be a multiple of key_cache_block_size;
|
|
3008 |
||
3009 |
dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
|
|
3010 |
*/
|
|
3011 |
||
3012 |
int key_cache_write(KEY_CACHE *keycache, |
|
3013 |
File file, my_off_t filepos, int level, |
|
3014 |
uchar *buff, uint length, |
|
3015 |
uint block_length __attribute__((unused)), |
|
3016 |
int dont_write) |
|
3017 |
{
|
|
3018 |
my_bool locked_and_incremented= FALSE; |
|
3019 |
int error=0; |
|
3020 |
DBUG_ENTER("key_cache_write"); |
|
3021 |
DBUG_PRINT("enter", |
|
3022 |
("fd: %u pos: %lu length: %u block_length: %u" |
|
3023 |
" key_block_length: %u", |
|
3024 |
(uint) file, (ulong) filepos, length, block_length, |
|
3025 |
keycache ? keycache->key_cache_block_size : 0)); |
|
3026 |
||
3027 |
if (!dont_write) |
|
3028 |
{
|
|
3029 |
/* purecov: begin inspected */
|
|
3030 |
/* Not used in the server. */
|
|
3031 |
/* Force writing from buff into disk. */
|
|
3032 |
keycache->global_cache_w_requests++; |
|
3033 |
keycache->global_cache_write++; |
|
3034 |
if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL))) |
|
3035 |
DBUG_RETURN(1); |
|
3036 |
/* purecov: end */
|
|
3037 |
}
|
|
3038 |
||
3039 |
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
|
|
3040 |
DBUG_EXECUTE("check_keycache", |
|
3041 |
test_key_cache(keycache, "start of key_cache_write", 1);); |
|
3042 |
#endif
|
|
3043 |
||
3044 |
if (keycache->key_cache_inited) |
|
3045 |
{
|
|
3046 |
/* Key cache is used */
|
|
3047 |
reg1 BLOCK_LINK *block; |
|
3048 |
uint read_length; |
|
3049 |
uint offset; |
|
3050 |
int page_st; |
|
3051 |
||
3052 |
/*
|
|
3053 |
When the key cache is once initialized, we use the cache_lock to
|
|
3054 |
reliably distinguish the cases of normal operation, resizing, and
|
|
3055 |
disabled cache. We always increment and decrement
|
|
3056 |
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
|
|
3057 |
*/
|
|
3058 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
3059 |
/*
|
|
3060 |
Cache resizing has two phases: Flushing and re-initializing. In
|
|
3061 |
the flush phase write requests can modify dirty blocks that are
|
|
3062 |
not yet in flush. Otherwise they are allowed to bypass the cache.
|
|
3063 |
find_key_block() returns NULL in both cases (clean blocks and
|
|
3064 |
non-cached blocks).
|
|
3065 |
||
3066 |
After the flush phase new I/O requests must wait until the
|
|
3067 |
re-initialization is done. The re-initialization can be done only
|
|
3068 |
if no I/O request is in progress. The reason is that
|
|
3069 |
key_cache_block_size can change. With enabled cache I/O is done in
|
|
3070 |
chunks of key_cache_block_size. Every chunk tries to use a cache
|
|
3071 |
block first. If the block size changes in the middle, a block
|
|
3072 |
could be missed and data could be written below a cached block.
|
|
3073 |
*/
|
|
3074 |
while (keycache->in_resize && !keycache->resize_in_flush) |
|
3075 |
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); |
|
3076 |
/* Register the I/O for the next resize. */
|
|
3077 |
inc_counter_for_resize_op(keycache); |
|
3078 |
locked_and_incremented= TRUE; |
|
3079 |
/* Requested data may not always be aligned to cache blocks. */
|
|
3080 |
offset= (uint) (filepos % keycache->key_cache_block_size); |
|
3081 |
/* Write data in key_cache_block_size increments. */
|
|
3082 |
do
|
|
3083 |
{
|
|
3084 |
/* Cache could be disabled in a later iteration. */
|
|
3085 |
if (!keycache->can_be_used) |
|
3086 |
goto no_key_cache; |
|
3087 |
/* Start writing at the beginning of the cache block. */
|
|
3088 |
filepos-= offset; |
|
3089 |
/* Do not write beyond the end of the cache block. */
|
|
3090 |
read_length= length; |
|
3091 |
set_if_smaller(read_length, keycache->key_cache_block_size-offset); |
|
3092 |
KEYCACHE_DBUG_ASSERT(read_length > 0); |
|
3093 |
||
3094 |
/* Request the cache block that matches file/pos. */
|
|
3095 |
keycache->global_cache_w_requests++; |
|
3096 |
block= find_key_block(keycache, file, filepos, level, 1, &page_st); |
|
3097 |
if (!block) |
|
3098 |
{
|
|
3099 |
/*
|
|
3100 |
This happens only for requests submitted during key cache
|
|
3101 |
resize. The block is not in the cache and shall not go in.
|
|
3102 |
Write directly to file.
|
|
3103 |
*/
|
|
3104 |
if (dont_write) |
|
3105 |
{
|
|
3106 |
/* Used in the server. */
|
|
3107 |
keycache->global_cache_write++; |
|
3108 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
3109 |
if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset, |
|
3110 |
MYF(MY_NABP | MY_WAIT_IF_FULL))) |
|
3111 |
error=1; |
|
3112 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
3113 |
}
|
|
3114 |
goto next_block; |
|
3115 |
}
|
|
3116 |
/*
|
|
3117 |
Prevent block from flushing and from being selected for to be
|
|
3118 |
freed. This must be set when we release the cache_lock.
|
|
3119 |
However, we must not set the status of the block before it is
|
|
3120 |
assigned to this file/pos.
|
|
3121 |
*/
|
|
3122 |
if (page_st != PAGE_WAIT_TO_BE_READ) |
|
3123 |
block->status|= BLOCK_FOR_UPDATE; |
|
3124 |
/*
|
|
3125 |
We must read the file block first if it is not yet in the cache
|
|
3126 |
and we do not replace all of its contents.
|
|
3127 |
||
3128 |
In cases where the cache block is big enough to contain (parts
|
|
3129 |
of) index blocks of different indexes, our request can be
|
|
3130 |
secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
|
|
3131 |
reading the file block. If the read completes after us, it
|
|
3132 |
overwrites our new contents with the old contents. So we have to
|
|
3133 |
wait for the other thread to complete the read of this block.
|
|
3134 |
read_block() takes care for the wait.
|
|
3135 |
*/
|
|
3136 |
if (!(block->status & BLOCK_ERROR) && |
|
3137 |
((page_st == PAGE_TO_BE_READ && |
|
3138 |
(offset || read_length < keycache->key_cache_block_size)) || |
|
3139 |
(page_st == PAGE_WAIT_TO_BE_READ))) |
|
3140 |
{
|
|
3141 |
read_block(keycache, block, |
|
3142 |
offset + read_length >= keycache->key_cache_block_size? |
|
3143 |
offset : keycache->key_cache_block_size, |
|
3144 |
offset, (page_st == PAGE_TO_BE_READ)); |
|
3145 |
DBUG_ASSERT(keycache->can_be_used); |
|
3146 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
3147 |
/*
|
|
3148 |
Prevent block from flushing and from being selected for to be
|
|
3149 |
freed. This must be set when we release the cache_lock.
|
|
3150 |
Here we set it in case we could not set it above.
|
|
3151 |
*/
|
|
3152 |
block->status|= BLOCK_FOR_UPDATE; |
|
3153 |
}
|
|
3154 |
/*
|
|
3155 |
The block should always be assigned to the requested file block
|
|
3156 |
here. It need not be BLOCK_READ when overwriting the whole block.
|
|
3157 |
*/
|
|
3158 |
DBUG_ASSERT(block->hash_link->file == file); |
|
3159 |
DBUG_ASSERT(block->hash_link->diskpos == filepos); |
|
3160 |
DBUG_ASSERT(block->status & BLOCK_IN_USE); |
|
3161 |
DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ)); |
|
3162 |
/*
|
|
3163 |
The block to be written must not be marked BLOCK_REASSIGNED.
|
|
3164 |
Otherwise it could be freed in dirty state or reused without
|
|
3165 |
another flush during eviction. It must also not be in flush.
|
|
3166 |
Otherwise the old contens may have been flushed already and
|
|
3167 |
the flusher could clear BLOCK_CHANGED without flushing the
|
|
3168 |
new changes again.
|
|
3169 |
*/
|
|
3170 |
DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED)); |
|
3171 |
||
3172 |
while (block->status & BLOCK_IN_FLUSHWRITE) |
|
3173 |
{
|
|
3174 |
/*
|
|
3175 |
Another thread is flushing the block. It was dirty already.
|
|
3176 |
Wait until the block is flushed to file. Otherwise we could
|
|
3177 |
modify the buffer contents just while it is written to file.
|
|
3178 |
An unpredictable file block contents would be the result.
|
|
3179 |
While we wait, several things can happen to the block,
|
|
3180 |
including another flush. But the block cannot be reassigned to
|
|
3181 |
another hash_link until we release our request on it.
|
|
3182 |
*/
|
|
3183 |
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock); |
|
3184 |
DBUG_ASSERT(keycache->can_be_used); |
|
3185 |
DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); |
|
3186 |
/* Still must not be marked for free. */
|
|
3187 |
DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED)); |
|
3188 |
DBUG_ASSERT(block->hash_link && (block->hash_link->block == block)); |
|
3189 |
}
|
|
3190 |
||
3191 |
/*
|
|
3192 |
We could perhaps release the cache_lock during access of the
|
|
3193 |
data like in the other functions. Locks outside of the key cache
|
|
3194 |
assure that readers and a writer do not access the same range of
|
|
3195 |
data. Parallel accesses should happen only if the cache block
|
|
3196 |
contains multiple index block(fragment)s. So different parts of
|
|
3197 |
the buffer would be read/written. An attempt to flush during
|
|
3198 |
memcpy() is prevented with BLOCK_FOR_UPDATE.
|
|
3199 |
*/
|
|
3200 |
if (!(block->status & BLOCK_ERROR)) |
|
3201 |
{
|
|
3202 |
#if !defined(SERIALIZED_READ_FROM_CACHE)
|
|
3203 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
3204 |
#endif
|
|
3205 |
if (!(read_length & 511)) |
|
3206 |
bmove512(block->buffer+offset, buff, read_length); |
|
3207 |
else
|
|
3208 |
memcpy(block->buffer+offset, buff, (size_t) read_length); |
|
3209 |
||
3210 |
#if !defined(SERIALIZED_READ_FROM_CACHE)
|
|
3211 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
3212 |
#endif
|
|
3213 |
}
|
|
3214 |
||
3215 |
if (!dont_write) |
|
3216 |
{
|
|
3217 |
/* Not used in the server. buff has been written to disk at start. */
|
|
3218 |
if ((block->status & BLOCK_CHANGED) && |
|
3219 |
(!offset && read_length >= keycache->key_cache_block_size)) |
|
3220 |
link_to_file_list(keycache, block, block->hash_link->file, 1); |
|
3221 |
}
|
|
3222 |
else if (! (block->status & BLOCK_CHANGED)) |
|
3223 |
link_to_changed_list(keycache, block); |
|
3224 |
block->status|=BLOCK_READ; |
|
3225 |
/*
|
|
3226 |
Allow block to be selected for to be freed. Since it is marked
|
|
3227 |
BLOCK_CHANGED too, it won't be selected for to be freed without
|
|
3228 |
a flush.
|
|
3229 |
*/
|
|
3230 |
block->status&= ~BLOCK_FOR_UPDATE; |
|
3231 |
set_if_smaller(block->offset, offset); |
|
3232 |
set_if_bigger(block->length, read_length+offset); |
|
3233 |
||
3234 |
/* Threads may be waiting for the changes to be complete. */
|
|
3235 |
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]); |
|
3236 |
||
3237 |
/*
|
|
3238 |
If only a part of the cache block is to be replaced, and the
|
|
3239 |
rest has been read from file, then the cache lock has been
|
|
3240 |
released for I/O and it could be possible that another thread
|
|
3241 |
wants to evict or free the block and waits for it to be
|
|
3242 |
released. So we must not just decrement hash_link->requests, but
|
|
3243 |
also wake a waiting thread.
|
|
3244 |
*/
|
|
3245 |
remove_reader(block); |
|
3246 |
||
3247 |
/* Error injection for coverage testing. */
|
|
3248 |
DBUG_EXECUTE_IF("key_cache_write_block_error", |
|
3249 |
block->status|= BLOCK_ERROR;); |
|
3250 |
||
3251 |
/* Do not link erroneous blocks into the LRU ring, but free them. */
|
|
3252 |
if (!(block->status & BLOCK_ERROR)) |
|
3253 |
{
|
|
3254 |
/*
|
|
3255 |
Link the block into the LRU ring if it's the last submitted
|
|
3256 |
request for the block. This enables eviction for the block.
|
|
3257 |
*/
|
|
3258 |
unreg_request(keycache, block, 1); |
|
3259 |
}
|
|
3260 |
else
|
|
3261 |
{
|
|
3262 |
/* Pretend a "clean" block to avoid complications. */
|
|
3263 |
block->status&= ~(BLOCK_CHANGED); |
|
3264 |
free_block(keycache, block); |
|
3265 |
error= 1; |
|
3266 |
break; |
|
3267 |
}
|
|
3268 |
||
3269 |
next_block: |
|
3270 |
buff+= read_length; |
|
3271 |
filepos+= read_length+offset; |
|
3272 |
offset= 0; |
|
3273 |
||
3274 |
} while ((length-= read_length)); |
|
3275 |
goto end; |
|
3276 |
}
|
|
3277 |
||
3278 |
no_key_cache: |
|
3279 |
/* Key cache is not used */
|
|
3280 |
if (dont_write) |
|
3281 |
{
|
|
3282 |
/* Used in the server. */
|
|
3283 |
keycache->global_cache_w_requests++; |
|
3284 |
keycache->global_cache_write++; |
|
3285 |
if (locked_and_incremented) |
|
3286 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
3287 |
if (my_pwrite(file, (uchar*) buff, length, filepos, |
|
3288 |
MYF(MY_NABP | MY_WAIT_IF_FULL))) |
|
3289 |
error=1; |
|
3290 |
if (locked_and_incremented) |
|
3291 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
3292 |
}
|
|
3293 |
||
3294 |
end: |
|
3295 |
if (locked_and_incremented) |
|
3296 |
{
|
|
3297 |
dec_counter_for_resize_op(keycache); |
|
3298 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
3299 |
}
|
|
3300 |
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
|
|
3301 |
DBUG_EXECUTE("exec", |
|
3302 |
test_key_cache(keycache, "end of key_cache_write", 1);); |
|
3303 |
#endif
|
|
3304 |
DBUG_RETURN(error); |
|
3305 |
}
|
|
3306 |
||
3307 |
||
3308 |
/*
|
|
3309 |
Free block.
|
|
3310 |
||
3311 |
SYNOPSIS
|
|
3312 |
free_block()
|
|
3313 |
keycache Pointer to a key cache data structure
|
|
3314 |
block Pointer to the block to free
|
|
3315 |
||
3316 |
DESCRIPTION
|
|
3317 |
Remove reference to block from hash table.
|
|
3318 |
Remove block from the chain of clean blocks.
|
|
3319 |
Add block to the free list.
|
|
3320 |
||
3321 |
NOTE
|
|
3322 |
Block must not be free (status == 0).
|
|
3323 |
Block must not be in free_block_list.
|
|
3324 |
Block must not be in the LRU ring.
|
|
3325 |
Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
|
|
3326 |
Block must not be in free (BLOCK_REASSIGNED).
|
|
3327 |
Block must not be in flush (BLOCK_IN_FLUSH).
|
|
3328 |
Block must not be dirty (BLOCK_CHANGED).
|
|
3329 |
Block must not be in changed_blocks (dirty) hash.
|
|
3330 |
Block must be in file_blocks (clean) hash.
|
|
3331 |
Block must refer to a hash_link.
|
|
3332 |
Block must have a request registered on it.
|
|
3333 |
*/
|
|
3334 |
||
3335 |
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block) |
|
3336 |
{
|
|
3337 |
KEYCACHE_THREAD_TRACE("free block"); |
|
3338 |
KEYCACHE_DBUG_PRINT("free_block", |
|
3339 |
("block %u to be freed, hash_link %p status: %u", |
|
3340 |
BLOCK_NUMBER(block), block->hash_link, |
|
3341 |
block->status)); |
|
3342 |
/*
|
|
3343 |
Assert that the block is not free already. And that it is in a clean
|
|
3344 |
state. Note that the block might just be assigned to a hash_link and
|
|
3345 |
not yet read (BLOCK_READ may not be set here). In this case a reader
|
|
3346 |
is registered in the hash_link and free_block() will wait for it
|
|
3347 |
below.
|
|
3348 |
*/
|
|
3349 |
DBUG_ASSERT((block->status & BLOCK_IN_USE) && |
|
3350 |
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | |
|
3351 |
BLOCK_REASSIGNED | BLOCK_IN_FLUSH | |
|
3352 |
BLOCK_CHANGED | BLOCK_FOR_UPDATE))); |
|
3353 |
/* Assert that the block is in a file_blocks chain. */
|
|
3354 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
3355 |
/* Assert that the block is not in the LRU ring. */
|
|
3356 |
DBUG_ASSERT(!block->next_used && !block->prev_used); |
|
3357 |
/*
|
|
3358 |
IMHO the below condition (if()) makes no sense. I can't see how it
|
|
3359 |
could be possible that free_block() is entered with a NULL hash_link
|
|
3360 |
pointer. The only place where it can become NULL is in free_block()
|
|
3361 |
(or before its first use ever, but for those blocks free_block() is
|
|
3362 |
not called). I don't remove the conditional as it cannot harm, but
|
|
3363 |
place an DBUG_ASSERT to confirm my hypothesis. Eventually the
|
|
3364 |
condition (if()) can be removed.
|
|
3365 |
*/
|
|
3366 |
DBUG_ASSERT(block->hash_link && block->hash_link->block == block); |
|
3367 |
if (block->hash_link) |
|
3368 |
{
|
|
3369 |
/*
|
|
3370 |
While waiting for readers to finish, new readers might request the
|
|
3371 |
block. But since we set block->status|= BLOCK_REASSIGNED, they
|
|
3372 |
will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
|
|
3373 |
later.
|
|
3374 |
*/
|
|
3375 |
block->status|= BLOCK_REASSIGNED; |
|
3376 |
wait_for_readers(keycache, block); |
|
3377 |
/*
|
|
3378 |
The block must not have been freed by another thread. Repeat some
|
|
3379 |
checks. An additional requirement is that it must be read now
|
|
3380 |
(BLOCK_READ).
|
|
3381 |
*/
|
|
3382 |
DBUG_ASSERT(block->hash_link && block->hash_link->block == block); |
|
3383 |
DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE | |
|
3384 |
BLOCK_REASSIGNED)) && |
|
3385 |
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | |
|
3386 |
BLOCK_IN_FLUSH | BLOCK_CHANGED | |
|
3387 |
BLOCK_FOR_UPDATE))); |
|
3388 |
DBUG_ASSERT(block->prev_changed && *block->prev_changed == block); |
|
3389 |
DBUG_ASSERT(!block->prev_used); |
|
3390 |
/*
|
|
3391 |
Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
|
|
3392 |
thread (through unreg_request() below), other threads must not see
|
|
3393 |
this flag. They could become confused.
|
|
3394 |
*/
|
|
3395 |
block->status&= ~BLOCK_REASSIGNED; |
|
3396 |
/*
|
|
3397 |
Do not release the hash_link until the block is off all lists.
|
|
3398 |
At least not if we hand it over for eviction in unreg_request().
|
|
3399 |
*/
|
|
3400 |
}
|
|
3401 |
||
3402 |
/*
|
|
3403 |
Unregister the block request and link the block into the LRU ring.
|
|
3404 |
This enables eviction for the block. If the LRU ring was empty and
|
|
3405 |
threads are waiting for a block, then the block wil be handed over
|
|
3406 |
for eviction immediately. Otherwise we will unlink it from the LRU
|
|
3407 |
ring again, without releasing the lock in between. So decrementing
|
|
3408 |
the request counter and updating statistics are the only relevant
|
|
3409 |
operation in this case. Assert that there are no other requests
|
|
3410 |
registered.
|
|
3411 |
*/
|
|
3412 |
DBUG_ASSERT(block->requests == 1); |
|
3413 |
unreg_request(keycache, block, 0); |
|
3414 |
/*
|
|
3415 |
Note that even without releasing the cache lock it is possible that
|
|
3416 |
the block is immediately selected for eviction by link_block() and
|
|
3417 |
thus not added to the LRU ring. In this case we must not touch the
|
|
3418 |
block any more.
|
|
3419 |
*/
|
|
3420 |
if (block->status & BLOCK_IN_EVICTION) |
|
3421 |
return; |
|
3422 |
||
3423 |
/* Error blocks are not put into the LRU ring. */
|
|
3424 |
if (!(block->status & BLOCK_ERROR)) |
|
3425 |
{
|
|
3426 |
/* Here the block must be in the LRU ring. Unlink it again. */
|
|
3427 |
DBUG_ASSERT(block->next_used && block->prev_used && |
|
3428 |
*block->prev_used == block); |
|
3429 |
unlink_block(keycache, block); |
|
3430 |
}
|
|
3431 |
if (block->temperature == BLOCK_WARM) |
|
3432 |
keycache->warm_blocks--; |
|
3433 |
block->temperature= BLOCK_COLD; |
|
3434 |
||
3435 |
/* Remove from file_blocks hash. */
|
|
3436 |
unlink_changed(block); |
|
3437 |
||
3438 |
/* Remove reference to block from hash table. */
|
|
3439 |
unlink_hash(keycache, block->hash_link); |
|
3440 |
block->hash_link= NULL; |
|
3441 |
||
3442 |
block->status= 0; |
|
3443 |
block->length= 0; |
|
3444 |
block->offset= keycache->key_cache_block_size; |
|
3445 |
KEYCACHE_THREAD_TRACE("free block"); |
|
3446 |
KEYCACHE_DBUG_PRINT("free_block", ("block is freed")); |
|
3447 |
||
3448 |
/* Enforced by unlink_changed(), but just to be sure. */
|
|
3449 |
DBUG_ASSERT(!block->next_changed && !block->prev_changed); |
|
3450 |
/* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
|
|
3451 |
DBUG_ASSERT(!block->next_used && !block->prev_used); |
|
3452 |
/* Insert the free block in the free list. */
|
|
3453 |
block->next_used= keycache->free_block_list; |
|
3454 |
keycache->free_block_list= block; |
|
3455 |
/* Keep track of the number of currently unused blocks. */
|
|
3456 |
keycache->blocks_unused++; |
|
3457 |
||
3458 |
/* All pending requests for this page must be resubmitted. */
|
|
3459 |
release_whole_queue(&block->wqueue[COND_FOR_SAVED]); |
|
3460 |
}
|
|
3461 |
||
3462 |
||
3463 |
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b) |
|
3464 |
{
|
|
3465 |
return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 : |
|
3466 |
((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0); |
|
3467 |
}
|
|
3468 |
||
3469 |
||
3470 |
/*
|
|
3471 |
Flush a portion of changed blocks to disk,
|
|
3472 |
free used blocks if requested
|
|
3473 |
*/
|
|
3474 |
||
3475 |
static int flush_cached_blocks(KEY_CACHE *keycache, |
|
3476 |
File file, BLOCK_LINK **cache, |
|
3477 |
BLOCK_LINK **end, |
|
3478 |
enum flush_type type) |
|
3479 |
{
|
|
3480 |
int error; |
|
3481 |
int last_errno= 0; |
|
3482 |
uint count= (uint) (end-cache); |
|
3483 |
||
3484 |
/* Don't lock the cache during the flush */
|
|
3485 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
3486 |
/*
|
|
3487 |
As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
|
|
3488 |
we are guarunteed no thread will change them
|
|
3489 |
*/
|
|
3490 |
my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); |
|
3491 |
||
3492 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
3493 |
/*
|
|
3494 |
Note: Do not break the loop. We have registered a request on every
|
|
3495 |
block in 'cache'. These must be unregistered by free_block() or
|
|
3496 |
unreg_request().
|
|
3497 |
*/
|
|
3498 |
for ( ; cache != end ; cache++) |
|
3499 |
{
|
|
3500 |
BLOCK_LINK *block= *cache; |
|
3501 |
||
3502 |
KEYCACHE_DBUG_PRINT("flush_cached_blocks", |
|
3503 |
("block %u to be flushed", BLOCK_NUMBER(block))); |
|
3504 |
/*
|
|
3505 |
If the block contents is going to be changed, we abandon the flush
|
|
3506 |
for this block. flush_key_blocks_int() will restart its search and
|
|
3507 |
handle the block properly.
|
|
3508 |
*/
|
|
3509 |
if (!(block->status & BLOCK_FOR_UPDATE)) |
|
3510 |
{
|
|
3511 |
/* Blocks coming here must have a certain status. */
|
|
3512 |
DBUG_ASSERT(block->hash_link); |
|
3513 |
DBUG_ASSERT(block->hash_link->block == block); |
|
3514 |
DBUG_ASSERT(block->hash_link->file == file); |
|
3515 |
DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) == |
|
3516 |
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE)); |
|
3517 |
block->status|= BLOCK_IN_FLUSHWRITE; |
|
3518 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
3519 |
error= my_pwrite(file, block->buffer+block->offset, |
|
3520 |
block->length - block->offset, |
|
3521 |
block->hash_link->diskpos+ block->offset, |
|
3522 |
MYF(MY_NABP | MY_WAIT_IF_FULL)); |
|
3523 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
3524 |
keycache->global_cache_write++; |
|
3525 |
if (error) |
|
3526 |
{
|
|
3527 |
block->status|= BLOCK_ERROR; |
|
3528 |
if (!last_errno) |
|
3529 |
last_errno= errno ? errno : -1; |
|
3530 |
}
|
|
3531 |
block->status&= ~BLOCK_IN_FLUSHWRITE; |
|
3532 |
/* Block must not have changed status except BLOCK_FOR_UPDATE. */
|
|
3533 |
DBUG_ASSERT(block->hash_link); |
|
3534 |
DBUG_ASSERT(block->hash_link->block == block); |
|
3535 |
DBUG_ASSERT(block->hash_link->file == file); |
|
3536 |
DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) == |
|
3537 |
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE)); |
|
3538 |
/*
|
|
3539 |
Set correct status and link in right queue for free or later use.
|
|
3540 |
free_block() must not see BLOCK_CHANGED and it may need to wait
|
|
3541 |
for readers of the block. These should not see the block in the
|
|
3542 |
wrong hash. If not freeing the block, we need to have it in the
|
|
3543 |
right queue anyway.
|
|
3544 |
*/
|
|
3545 |
link_to_file_list(keycache, block, file, 1); |
|
3546 |
}
|
|
3547 |
block->status&= ~BLOCK_IN_FLUSH; |
|
3548 |
/*
|
|
3549 |
Let to proceed for possible waiting requests to write to the block page.
|
|
3550 |
It might happen only during an operation to resize the key cache.
|
|
3551 |
*/
|
|
3552 |
release_whole_queue(&block->wqueue[COND_FOR_SAVED]); |
|
3553 |
/* type will never be FLUSH_IGNORE_CHANGED here */
|
|
3554 |
if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) && |
|
3555 |
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | |
|
3556 |
BLOCK_FOR_UPDATE))) |
|
3557 |
{
|
|
3558 |
/*
|
|
3559 |
Note that a request has been registered against the block in
|
|
3560 |
flush_key_blocks_int().
|
|
3561 |
*/
|
|
3562 |
free_block(keycache, block); |
|
3563 |
}
|
|
3564 |
else
|
|
3565 |
{
|
|
3566 |
/*
|
|
3567 |
Link the block into the LRU ring if it's the last submitted
|
|
3568 |
request for the block. This enables eviction for the block.
|
|
3569 |
Note that a request has been registered against the block in
|
|
3570 |
flush_key_blocks_int().
|
|
3571 |
*/
|
|
3572 |
unreg_request(keycache, block, 1); |
|
3573 |
}
|
|
3574 |
||
3575 |
} /* end of for ( ; cache != end ; cache++) */ |
|
3576 |
return last_errno; |
|
3577 |
}
|
|
3578 |
||
3579 |
||
3580 |
/*
|
|
3581 |
Flush all key blocks for a file to disk, but don't do any mutex locks.
|
|
3582 |
||
3583 |
SYNOPSIS
|
|
3584 |
flush_key_blocks_int()
|
|
3585 |
keycache pointer to a key cache data structure
|
|
3586 |
file handler for the file to flush to
|
|
3587 |
flush_type type of the flush
|
|
3588 |
||
3589 |
NOTES
|
|
3590 |
This function doesn't do any mutex locks because it needs to be called both
|
|
3591 |
from flush_key_blocks and flush_all_key_blocks (the later one does the
|
|
3592 |
mutex lock in the resize_key_cache() function).
|
|
3593 |
||
3594 |
We do only care about changed blocks that exist when the function is
|
|
3595 |
entered. We do not guarantee that all changed blocks of the file are
|
|
3596 |
flushed if more blocks change while this function is running.
|
|
3597 |
||
3598 |
RETURN
|
|
3599 |
0 ok
|
|
3600 |
1 error
|
|
3601 |
*/
|
|
3602 |
||
3603 |
static int flush_key_blocks_int(KEY_CACHE *keycache, |
|
3604 |
File file, enum flush_type type) |
|
3605 |
{
|
|
3606 |
BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; |
|
3607 |
int last_errno= 0; |
|
3608 |
int last_errcnt= 0; |
|
3609 |
DBUG_ENTER("flush_key_blocks_int"); |
|
3610 |
DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", |
|
3611 |
file, keycache->blocks_used, keycache->blocks_changed)); |
|
3612 |
||
3613 |
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
|
|
3614 |
DBUG_EXECUTE("check_keycache", |
|
3615 |
test_key_cache(keycache, "start of flush_key_blocks", 0);); |
|
3616 |
#endif
|
|
3617 |
||
3618 |
cache= cache_buff; |
|
3619 |
if (keycache->disk_blocks > 0 && |
|
3620 |
(!my_disable_flush_key_blocks || type != FLUSH_KEEP)) |
|
3621 |
{
|
|
3622 |
/* Key cache exists and flush is not disabled */
|
|
3623 |
int error= 0; |
|
3624 |
uint count= FLUSH_CACHE; |
|
3625 |
BLOCK_LINK **pos,**end; |
|
3626 |
BLOCK_LINK *first_in_switch= NULL; |
|
3627 |
BLOCK_LINK *last_in_flush; |
|
3628 |
BLOCK_LINK *last_for_update; |
|
3629 |
BLOCK_LINK *block, *next; |
|
3630 |
#if defined(KEYCACHE_DEBUG)
|
|
3631 |
uint cnt=0; |
|
3632 |
#endif
|
|
3633 |
||
3634 |
if (type != FLUSH_IGNORE_CHANGED) |
|
3635 |
{
|
|
3636 |
/*
|
|
3637 |
Count how many key blocks we have to cache to be able
|
|
3638 |
to flush all dirty pages with minimum seek moves
|
|
3639 |
*/
|
|
3640 |
count= 0; |
|
3641 |
for (block= keycache->changed_blocks[FILE_HASH(file)] ; |
|
3642 |
block ; |
|
3643 |
block= block->next_changed) |
|
3644 |
{
|
|
3645 |
if ((block->hash_link->file == file) && |
|
3646 |
!(block->status & BLOCK_IN_FLUSH)) |
|
3647 |
{
|
|
3648 |
count++; |
|
3649 |
KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used); |
|
3650 |
}
|
|
3651 |
}
|
|
3652 |
/*
|
|
3653 |
Allocate a new buffer only if its bigger than the one we have.
|
|
3654 |
Assure that we always have some entries for the case that new
|
|
3655 |
changed blocks appear while we need to wait for something.
|
|
3656 |
*/
|
|
3657 |
if ((count > FLUSH_CACHE) && |
|
3658 |
!(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count, |
|
3659 |
MYF(0)))) |
|
3660 |
cache= cache_buff; |
|
3661 |
/*
|
|
3662 |
After a restart there could be more changed blocks than now.
|
|
3663 |
So we should not let count become smaller than the fixed buffer.
|
|
3664 |
*/
|
|
3665 |
if (cache == cache_buff) |
|
3666 |
count= FLUSH_CACHE; |
|
3667 |
}
|
|
3668 |
||
3669 |
/* Retrieve the blocks and write them to a buffer to be flushed */
|
|
3670 |
restart: |
|
3671 |
last_in_flush= NULL; |
|
3672 |
last_for_update= NULL; |
|
3673 |
end= (pos= cache)+count; |
|
3674 |
for (block= keycache->changed_blocks[FILE_HASH(file)] ; |
|
3675 |
block ; |
|
3676 |
block= next) |
|
3677 |
{
|
|
3678 |
#if defined(KEYCACHE_DEBUG)
|
|
3679 |
cnt++; |
|
3680 |
KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used); |
|
3681 |
#endif
|
|
3682 |
next= block->next_changed; |
|
3683 |
if (block->hash_link->file == file) |
|
3684 |
{
|
|
3685 |
if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE))) |
|
3686 |
{
|
|
3687 |
/*
|
|
3688 |
Note: The special handling of BLOCK_IN_SWITCH is obsolete
|
|
3689 |
since we set BLOCK_IN_FLUSH if the eviction includes a
|
|
3690 |
flush. It can be removed in a later version.
|
|
3691 |
*/
|
|
3692 |
if (!(block->status & BLOCK_IN_SWITCH)) |
|
3693 |
{
|
|
3694 |
/*
|
|
3695 |
We care only for the blocks for which flushing was not
|
|
3696 |
initiated by another thread and which are not in eviction.
|
|
3697 |
Registering a request on the block unlinks it from the LRU
|
|
3698 |
ring and protects against eviction.
|
|
3699 |
*/
|
|
3700 |
reg_requests(keycache, block, 1); |
|
3701 |
if (type != FLUSH_IGNORE_CHANGED) |
|
3702 |
{
|
|
3703 |
/* It's not a temporary file */
|
|
3704 |
if (pos == end) |
|
3705 |
{
|
|
3706 |
/*
|
|
3707 |
This should happen relatively seldom. Remove the
|
|
3708 |
request because we won't do anything with the block
|
|
3709 |
but restart and pick it again in the next iteration.
|
|
3710 |
*/
|
|
3711 |
unreg_request(keycache, block, 0); |
|
3712 |
/*
|
|
3713 |
This happens only if there is not enough
|
|
3714 |
memory for the big block
|
|
3715 |
*/
|
|
3716 |
if ((error= flush_cached_blocks(keycache, file, cache, |
|
3717 |
end,type))) |
|
3718 |
{
|
|
3719 |
/* Do not loop infinitely trying to flush in vain. */
|
|
3720 |
if ((last_errno == error) && (++last_errcnt > 5)) |
|
3721 |
goto err; |
|
3722 |
last_errno= error; |
|
3723 |
}
|
|
3724 |
/*
|
|
3725 |
Restart the scan as some other thread might have changed
|
|
3726 |
the changed blocks chain: the blocks that were in switch
|
|
3727 |
state before the flush started have to be excluded
|
|
3728 |
*/
|
|
3729 |
goto restart; |
|
3730 |
}
|
|
3731 |
/*
|
|
3732 |
Mark the block with BLOCK_IN_FLUSH in order not to let
|
|
3733 |
other threads to use it for new pages and interfere with
|
|
3734 |
our sequence of flushing dirty file pages. We must not
|
|
3735 |
set this flag before actually putting the block on the
|
|
3736 |
write burst array called 'cache'.
|
|
3737 |
*/
|
|
3738 |
block->status|= BLOCK_IN_FLUSH; |
|
3739 |
/* Add block to the array for a write burst. */
|
|
3740 |
*pos++= block; |
|
3741 |
}
|
|
3742 |
else
|
|
3743 |
{
|
|
3744 |
/* It's a temporary file */
|
|
3745 |
DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED)); |
|
3746 |
/*
|
|
3747 |
free_block() must not be called with BLOCK_CHANGED. Note
|
|
3748 |
that we must not change the BLOCK_CHANGED flag outside of
|
|
3749 |
link_to_file_list() so that it is always in the correct
|
|
3750 |
queue and the *blocks_changed counters are correct.
|
|
3751 |
*/
|
|
3752 |
link_to_file_list(keycache, block, file, 1); |
|
3753 |
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) |
|
3754 |
{
|
|
3755 |
/* A request has been registered against the block above. */
|
|
3756 |
free_block(keycache, block); |
|
3757 |
}
|
|
3758 |
else
|
|
3759 |
{
|
|
3760 |
/*
|
|
3761 |
Link the block into the LRU ring if it's the last
|
|
3762 |
submitted request for the block. This enables eviction
|
|
3763 |
for the block. A request has been registered against
|
|
3764 |
the block above.
|
|
3765 |
*/
|
|
3766 |
unreg_request(keycache, block, 1); |
|
3767 |
}
|
|
3768 |
}
|
|
3769 |
}
|
|
3770 |
else
|
|
3771 |
{
|
|
3772 |
/*
|
|
3773 |
Link the block into a list of blocks 'in switch'.
|
|
3774 |
||
3775 |
WARNING: Here we introduce a place where a changed block
|
|
3776 |
is not in the changed_blocks hash! This is acceptable for
|
|
3777 |
a BLOCK_IN_SWITCH. Never try this for another situation.
|
|
3778 |
Other parts of the key cache code rely on changed blocks
|
|
3779 |
being in the changed_blocks hash.
|
|
3780 |
*/
|
|
3781 |
unlink_changed(block); |
|
3782 |
link_changed(block, &first_in_switch); |
|
3783 |
}
|
|
3784 |
}
|
|
3785 |
else if (type != FLUSH_KEEP) |
|
3786 |
{
|
|
3787 |
/*
|
|
3788 |
During the normal flush at end of statement (FLUSH_KEEP) we
|
|
3789 |
do not need to ensure that blocks in flush or update by
|
|
3790 |
other threads are flushed. They will be flushed by them
|
|
3791 |
later. In all other cases we must assure that we do not have
|
|
3792 |
any changed block of this file in the cache when this
|
|
3793 |
function returns.
|
|
3794 |
*/
|
|
3795 |
if (block->status & BLOCK_IN_FLUSH) |
|
3796 |
{
|
|
3797 |
/* Remember the last block found to be in flush. */
|
|
3798 |
last_in_flush= block; |
|
3799 |
}
|
|
3800 |
else
|
|
3801 |
{
|
|
3802 |
/* Remember the last block found to be selected for update. */
|
|
3803 |
last_for_update= block; |
|
3804 |
}
|
|
3805 |
}
|
|
3806 |
}
|
|
3807 |
}
|
|
3808 |
if (pos != cache) |
|
3809 |
{
|
|
3810 |
if ((error= flush_cached_blocks(keycache, file, cache, pos, type))) |
|
3811 |
{
|
|
3812 |
/* Do not loop inifnitely trying to flush in vain. */
|
|
3813 |
if ((last_errno == error) && (++last_errcnt > 5)) |
|
3814 |
goto err; |
|
3815 |
last_errno= error; |
|
3816 |
}
|
|
3817 |
/*
|
|
3818 |
Do not restart here during the normal flush at end of statement
|
|
3819 |
(FLUSH_KEEP). We have now flushed at least all blocks that were
|
|
3820 |
changed when entering this function. In all other cases we must
|
|
3821 |
assure that we do not have any changed block of this file in the
|
|
3822 |
cache when this function returns.
|
|
3823 |
*/
|
|
3824 |
if (type != FLUSH_KEEP) |
|
3825 |
goto restart; |
|
3826 |
}
|
|
3827 |
if (last_in_flush) |
|
3828 |
{
|
|
3829 |
/*
|
|
3830 |
There are no blocks to be flushed by this thread, but blocks in
|
|
3831 |
flush by other threads. Wait until one of the blocks is flushed.
|
|
3832 |
Re-check the condition for last_in_flush. We may have unlocked
|
|
3833 |
the cache_lock in flush_cached_blocks(). The state of the block
|
|
3834 |
could have changed.
|
|
3835 |
*/
|
|
3836 |
if (last_in_flush->status & BLOCK_IN_FLUSH) |
|
3837 |
wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED], |
|
3838 |
&keycache->cache_lock); |
|
3839 |
/* Be sure not to lose a block. They may be flushed in random order. */
|
|
3840 |
goto restart; |
|
3841 |
}
|
|
3842 |
if (last_for_update) |
|
3843 |
{
|
|
3844 |
/*
|
|
3845 |
There are no blocks to be flushed by this thread, but blocks for
|
|
3846 |
update by other threads. Wait until one of the blocks is updated.
|
|
3847 |
Re-check the condition for last_for_update. We may have unlocked
|
|
3848 |
the cache_lock in flush_cached_blocks(). The state of the block
|
|
3849 |
could have changed.
|
|
3850 |
*/
|
|
3851 |
if (last_for_update->status & BLOCK_FOR_UPDATE) |
|
3852 |
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED], |
|
3853 |
&keycache->cache_lock); |
|
3854 |
/* The block is now changed. Flush it. */
|
|
3855 |
goto restart; |
|
3856 |
}
|
|
3857 |
||
3858 |
/*
|
|
3859 |
Wait until the list of blocks in switch is empty. The threads that
|
|
3860 |
are switching these blocks will relink them to clean file chains
|
|
3861 |
while we wait and thus empty the 'first_in_switch' chain.
|
|
3862 |
*/
|
|
3863 |
while (first_in_switch) |
|
3864 |
{
|
|
3865 |
#if defined(KEYCACHE_DEBUG)
|
|
3866 |
cnt= 0; |
|
3867 |
#endif
|
|
3868 |
wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED], |
|
3869 |
&keycache->cache_lock); |
|
3870 |
#if defined(KEYCACHE_DEBUG)
|
|
3871 |
cnt++; |
|
3872 |
KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used); |
|
3873 |
#endif
|
|
3874 |
/*
|
|
3875 |
Do not restart here. We have flushed all blocks that were
|
|
3876 |
changed when entering this function and were not marked for
|
|
3877 |
eviction. Other threads have now flushed all remaining blocks in
|
|
3878 |
the course of their eviction.
|
|
3879 |
*/
|
|
3880 |
}
|
|
3881 |
||
3882 |
if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) |
|
3883 |
{
|
|
3884 |
BLOCK_LINK *last_for_update= NULL; |
|
3885 |
BLOCK_LINK *last_in_switch= NULL; |
|
3886 |
uint total_found= 0; |
|
3887 |
uint found; |
|
3888 |
||
3889 |
/*
|
|
3890 |
Finally free all clean blocks for this file.
|
|
3891 |
During resize this may be run by two threads in parallel.
|
|
3892 |
*/
|
|
3893 |
do
|
|
3894 |
{
|
|
3895 |
found= 0; |
|
3896 |
for (block= keycache->file_blocks[FILE_HASH(file)] ; |
|
3897 |
block ; |
|
3898 |
block= next) |
|
3899 |
{
|
|
3900 |
/* Remember the next block. After freeing we cannot get at it. */
|
|
3901 |
next= block->next_changed; |
|
3902 |
||
3903 |
/* Changed blocks cannot appear in the file_blocks hash. */
|
|
3904 |
DBUG_ASSERT(!(block->status & BLOCK_CHANGED)); |
|
3905 |
if (block->hash_link->file == file) |
|
3906 |
{
|
|
3907 |
/* We must skip blocks that will be changed. */
|
|
3908 |
if (block->status & BLOCK_FOR_UPDATE) |
|
3909 |
{
|
|
3910 |
last_for_update= block; |
|
3911 |
continue; |
|
3912 |
}
|
|
3913 |
||
3914 |
/*
|
|
3915 |
We must not free blocks in eviction (BLOCK_IN_EVICTION |
|
|
3916 |
BLOCK_IN_SWITCH) or blocks intended to be freed
|
|
3917 |
(BLOCK_REASSIGNED).
|
|
3918 |
*/
|
|
3919 |
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH | |
|
3920 |
BLOCK_REASSIGNED))) |
|
3921 |
{
|
|
1.3.4
by Norbert Tretkowski
Import upstream version 5.1.54 |
3922 |
struct st_hash_link *UNINIT_VAR(next_hash_link); |
3923 |
my_off_t UNINIT_VAR(next_diskpos); |
|
3924 |
File UNINIT_VAR(next_file); |
|
3925 |
uint UNINIT_VAR(next_status); |
|
3926 |
uint UNINIT_VAR(hash_requests); |
|
1
by Norbert Tretkowski
Import upstream version 5.1.45 |
3927 |
|
3928 |
total_found++; |
|
3929 |
found++; |
|
3930 |
KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used); |
|
3931 |
||
3932 |
/*
|
|
3933 |
Register a request. This unlinks the block from the LRU
|
|
3934 |
ring and protects it against eviction. This is required
|
|
3935 |
by free_block().
|
|
3936 |
*/
|
|
3937 |
reg_requests(keycache, block, 1); |
|
3938 |
||
3939 |
/*
|
|
3940 |
free_block() may need to wait for readers of the block.
|
|
3941 |
This is the moment where the other thread can move the
|
|
3942 |
'next' block from the chain. free_block() needs to wait
|
|
3943 |
if there are requests for the block pending.
|
|
3944 |
*/
|
|
3945 |
if (next && (hash_requests= block->hash_link->requests)) |
|
3946 |
{
|
|
3947 |
/* Copy values from the 'next' block and its hash_link. */
|
|
3948 |
next_status= next->status; |
|
3949 |
next_hash_link= next->hash_link; |
|
3950 |
next_diskpos= next_hash_link->diskpos; |
|
3951 |
next_file= next_hash_link->file; |
|
3952 |
DBUG_ASSERT(next == next_hash_link->block); |
|
3953 |
}
|
|
3954 |
||
3955 |
free_block(keycache, block); |
|
3956 |
/*
|
|
3957 |
If we had to wait and the state of the 'next' block
|
|
3958 |
changed, break the inner loop. 'next' may no longer be
|
|
3959 |
part of the current chain.
|
|
3960 |
||
3961 |
We do not want to break the loop after every free_block(),
|
|
3962 |
not even only after waits. The chain might be quite long
|
|
3963 |
and contain blocks for many files. Traversing it again and
|
|
3964 |
again to find more blocks for this file could become quite
|
|
3965 |
inefficient.
|
|
3966 |
*/
|
|
3967 |
if (next && hash_requests && |
|
3968 |
((next_status != next->status) || |
|
3969 |
(next_hash_link != next->hash_link) || |
|
3970 |
(next_file != next_hash_link->file) || |
|
3971 |
(next_diskpos != next_hash_link->diskpos) || |
|
3972 |
(next != next_hash_link->block))) |
|
3973 |
break; |
|
3974 |
}
|
|
3975 |
else
|
|
3976 |
{
|
|
3977 |
last_in_switch= block; |
|
3978 |
}
|
|
3979 |
}
|
|
3980 |
} /* end for block in file_blocks */ |
|
3981 |
} while (found); |
|
3982 |
||
3983 |
/*
|
|
3984 |
If any clean block has been found, we may have waited for it to
|
|
3985 |
become free. In this case it could be possible that another clean
|
|
3986 |
block became dirty. This is possible if the write request existed
|
|
3987 |
before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
|
|
3988 |
*/
|
|
3989 |
if (total_found) |
|
3990 |
goto restart; |
|
3991 |
||
3992 |
/*
|
|
3993 |
To avoid an infinite loop, wait until one of the blocks marked
|
|
3994 |
for update is updated.
|
|
3995 |
*/
|
|
3996 |
if (last_for_update) |
|
3997 |
{
|
|
3998 |
/* We did not wait. Block must not have changed status. */
|
|
3999 |
DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE); |
|
4000 |
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED], |
|
4001 |
&keycache->cache_lock); |
|
4002 |
goto restart; |
|
4003 |
}
|
|
4004 |
||
4005 |
/*
|
|
4006 |
To avoid an infinite loop wait until one of the blocks marked
|
|
4007 |
for eviction is switched.
|
|
4008 |
*/
|
|
4009 |
if (last_in_switch) |
|
4010 |
{
|
|
4011 |
/* We did not wait. Block must not have changed status. */
|
|
4012 |
DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION | |
|
4013 |
BLOCK_IN_SWITCH | |
|
4014 |
BLOCK_REASSIGNED)); |
|
4015 |
wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED], |
|
4016 |
&keycache->cache_lock); |
|
4017 |
goto restart; |
|
4018 |
}
|
|
4019 |
||
4020 |
} /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */ |
|
4021 |
||
4022 |
} /* if (keycache->disk_blocks > 0 */ |
|
4023 |
||
4024 |
#ifndef DBUG_OFF
|
|
4025 |
DBUG_EXECUTE("check_keycache", |
|
4026 |
test_key_cache(keycache, "end of flush_key_blocks", 0);); |
|
4027 |
#endif
|
|
4028 |
err: |
|
4029 |
if (cache != cache_buff) |
|
4030 |
my_free((uchar*) cache, MYF(0)); |
|
4031 |
if (last_errno) |
|
4032 |
errno=last_errno; /* Return first error */ |
|
4033 |
DBUG_RETURN(last_errno != 0); |
|
4034 |
}
|
|
4035 |
||
4036 |
||
4037 |
/*
|
|
4038 |
Flush all blocks for a file to disk
|
|
4039 |
||
4040 |
SYNOPSIS
|
|
4041 |
||
4042 |
flush_key_blocks()
|
|
4043 |
keycache pointer to a key cache data structure
|
|
4044 |
file handler for the file to flush to
|
|
4045 |
flush_type type of the flush
|
|
4046 |
||
4047 |
RETURN
|
|
4048 |
0 ok
|
|
4049 |
1 error
|
|
4050 |
*/
|
|
4051 |
||
4052 |
int flush_key_blocks(KEY_CACHE *keycache, |
|
4053 |
File file, enum flush_type type) |
|
4054 |
{
|
|
4055 |
int res= 0; |
|
4056 |
DBUG_ENTER("flush_key_blocks"); |
|
4057 |
DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache)); |
|
4058 |
||
4059 |
if (!keycache->key_cache_inited) |
|
4060 |
DBUG_RETURN(0); |
|
4061 |
||
4062 |
keycache_pthread_mutex_lock(&keycache->cache_lock); |
|
4063 |
/* While waiting for lock, keycache could have been ended. */
|
|
4064 |
if (keycache->disk_blocks > 0) |
|
4065 |
{
|
|
4066 |
inc_counter_for_resize_op(keycache); |
|
4067 |
res= flush_key_blocks_int(keycache, file, type); |
|
4068 |
dec_counter_for_resize_op(keycache); |
|
4069 |
}
|
|
4070 |
keycache_pthread_mutex_unlock(&keycache->cache_lock); |
|
4071 |
DBUG_RETURN(res); |
|
4072 |
}
|
|
4073 |
||
4074 |
||
4075 |
/*
|
|
4076 |
Flush all blocks in the key cache to disk.
|
|
4077 |
||
4078 |
SYNOPSIS
|
|
4079 |
flush_all_key_blocks()
|
|
4080 |
keycache pointer to key cache root structure
|
|
4081 |
||
4082 |
DESCRIPTION
|
|
4083 |
||
4084 |
Flushing of the whole key cache is done in two phases.
|
|
4085 |
||
4086 |
1. Flush all changed blocks, waiting for them if necessary. Loop
|
|
4087 |
until there is no changed block left in the cache.
|
|
4088 |
||
4089 |
2. Free all clean blocks. Normally this means free all blocks. The
|
|
4090 |
changed blocks were flushed in phase 1 and became clean. However we
|
|
4091 |
may need to wait for blocks that are read by other threads. While we
|
|
4092 |
wait, a clean block could become changed if that operation started
|
|
4093 |
before the resize operation started. To be safe we must restart at
|
|
4094 |
phase 1.
|
|
4095 |
||
4096 |
When we can run through the changed_blocks and file_blocks hashes
|
|
4097 |
without finding a block any more, then we are done.
|
|
4098 |
||
4099 |
Note that we hold keycache->cache_lock all the time unless we need
|
|
4100 |
to wait for something.
|
|
4101 |
||
4102 |
RETURN
|
|
4103 |
0 OK
|
|
4104 |
!= 0 Error
|
|
4105 |
*/
|
|
4106 |
||
4107 |
static int flush_all_key_blocks(KEY_CACHE *keycache) |
|
4108 |
{
|
|
4109 |
BLOCK_LINK *block; |
|
4110 |
uint total_found; |
|
4111 |
uint found; |
|
4112 |
uint idx; |
|
4113 |
DBUG_ENTER("flush_all_key_blocks"); |
|
4114 |
||
4115 |
do
|
|
4116 |
{
|
|
4117 |
safe_mutex_assert_owner(&keycache->cache_lock); |
|
4118 |
total_found= 0; |
|
4119 |
||
4120 |
/*
|
|
4121 |
Phase1: Flush all changed blocks, waiting for them if necessary.
|
|
4122 |
Loop until there is no changed block left in the cache.
|
|
4123 |
*/
|
|
4124 |
do
|
|
4125 |
{
|
|
4126 |
found= 0; |
|
4127 |
/* Step over the whole changed_blocks hash array. */
|
|
4128 |
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++) |
|
4129 |
{
|
|
4130 |
/*
|
|
4131 |
If an array element is non-empty, use the first block from its
|
|
4132 |
chain to find a file for flush. All changed blocks for this
|
|
4133 |
file are flushed. So the same block will not appear at this
|
|
4134 |
place again with the next iteration. New writes for blocks are
|
|
4135 |
not accepted during the flush. If multiple files share the
|
|
4136 |
same hash bucket, one of them will be flushed per iteration
|
|
4137 |
of the outer loop of phase 1.
|
|
4138 |
*/
|
|
4139 |
if ((block= keycache->changed_blocks[idx])) |
|
4140 |
{
|
|
4141 |
found++; |
|
4142 |
/*
|
|
4143 |
Flush dirty blocks but do not free them yet. They can be used
|
|
4144 |
for reading until all other blocks are flushed too.
|
|
4145 |
*/
|
|
4146 |
if (flush_key_blocks_int(keycache, block->hash_link->file, |
|
4147 |
FLUSH_FORCE_WRITE)) |
|
4148 |
DBUG_RETURN(1); |
|
4149 |
}
|
|
4150 |
}
|
|
4151 |
||
4152 |
} while (found); |
|
4153 |
||
4154 |
/*
|
|
4155 |
Phase 2: Free all clean blocks. Normally this means free all
|
|
4156 |
blocks. The changed blocks were flushed in phase 1 and became
|
|
4157 |
clean. However we may need to wait for blocks that are read by
|
|
4158 |
other threads. While we wait, a clean block could become changed
|
|
4159 |
if that operation started before the resize operation started. To
|
|
4160 |
be safe we must restart at phase 1.
|
|
4161 |
*/
|
|
4162 |
do
|
|
4163 |
{
|
|
4164 |
found= 0; |
|
4165 |
/* Step over the whole file_blocks hash array. */
|
|
4166 |
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++) |
|
4167 |
{
|
|
4168 |
/*
|
|
4169 |
If an array element is non-empty, use the first block from its
|
|
4170 |
chain to find a file for flush. All blocks for this file are
|
|
4171 |
freed. So the same block will not appear at this place again
|
|
4172 |
with the next iteration. If multiple files share the
|
|
4173 |
same hash bucket, one of them will be flushed per iteration
|
|
4174 |
of the outer loop of phase 2.
|
|
4175 |
*/
|
|
4176 |
if ((block= keycache->file_blocks[idx])) |
|
4177 |
{
|
|
4178 |
total_found++; |
|
4179 |
found++; |
|
4180 |
if (flush_key_blocks_int(keycache, block->hash_link->file, |
|
4181 |
FLUSH_RELEASE)) |
|
4182 |
DBUG_RETURN(1); |
|
4183 |
}
|
|
4184 |
}
|
|
4185 |
||
4186 |
} while (found); |
|
4187 |
||
4188 |
/*
|
|
4189 |
If any clean block has been found, we may have waited for it to
|
|
4190 |
become free. In this case it could be possible that another clean
|
|
4191 |
block became dirty. This is possible if the write request existed
|
|
4192 |
before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
|
|
4193 |
*/
|
|
4194 |
} while (total_found); |
|
4195 |
||
4196 |
#ifndef DBUG_OFF
|
|
4197 |
/* Now there should not exist any block any more. */
|
|
4198 |
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++) |
|
4199 |
{
|
|
4200 |
DBUG_ASSERT(!keycache->changed_blocks[idx]); |
|
4201 |
DBUG_ASSERT(!keycache->file_blocks[idx]); |
|
4202 |
}
|
|
4203 |
#endif
|
|
4204 |
||
4205 |
DBUG_RETURN(0); |
|
4206 |
}
|
|
4207 |
||
4208 |
||
4209 |
/*
|
|
4210 |
Reset the counters of a key cache.
|
|
4211 |
||
4212 |
SYNOPSIS
|
|
4213 |
reset_key_cache_counters()
|
|
4214 |
name the name of a key cache
|
|
4215 |
key_cache pointer to the key kache to be reset
|
|
4216 |
||
4217 |
DESCRIPTION
|
|
4218 |
This procedure is used by process_key_caches() to reset the counters of all
|
|
4219 |
currently used key caches, both the default one and the named ones.
|
|
4220 |
||
4221 |
RETURN
|
|
4222 |
0 on success (always because it can't fail)
|
|
4223 |
*/
|
|
4224 |
||
4225 |
int reset_key_cache_counters(const char *name __attribute__((unused)), |
|
4226 |
KEY_CACHE *key_cache) |
|
4227 |
{
|
|
4228 |
DBUG_ENTER("reset_key_cache_counters"); |
|
4229 |
if (!key_cache->key_cache_inited) |
|
4230 |
{
|
|
4231 |
DBUG_PRINT("info", ("Key cache %s not initialized.", name)); |
|
4232 |
DBUG_RETURN(0); |
|
4233 |
}
|
|
4234 |
DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); |
|
4235 |
||
4236 |
key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ |
|
4237 |
key_cache->global_cache_r_requests= 0; /* Key_read_requests */ |
|
4238 |
key_cache->global_cache_read= 0; /* Key_reads */ |
|
4239 |
key_cache->global_cache_w_requests= 0; /* Key_write_requests */ |
|
4240 |
key_cache->global_cache_write= 0; /* Key_writes */ |
|
4241 |
DBUG_RETURN(0); |
|
4242 |
}
|
|
4243 |
||
4244 |
||
4245 |
#ifndef DBUG_OFF
|
|
4246 |
/*
|
|
4247 |
Test if disk-cache is ok
|
|
4248 |
*/
|
|
4249 |
static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)), |
|
4250 |
const char *where __attribute__((unused)), |
|
4251 |
my_bool lock __attribute__((unused))) |
|
4252 |
{
|
|
4253 |
/* TODO */
|
|
4254 |
}
|
|
4255 |
#endif
|
|
4256 |
||
4257 |
#if defined(KEYCACHE_TIMEOUT)
|
|
4258 |
||
4259 |
#define KEYCACHE_DUMP_FILE "keycache_dump.txt"
|
|
4260 |
#define MAX_QUEUE_LEN 100
|
|
4261 |
||
4262 |
||
4263 |
static void keycache_dump(KEY_CACHE *keycache) |
|
4264 |
{
|
|
4265 |
FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); |
|
4266 |
struct st_my_thread_var *last; |
|
4267 |
struct st_my_thread_var *thread; |
|
4268 |
BLOCK_LINK *block; |
|
4269 |
HASH_LINK *hash_link; |
|
4270 |
KEYCACHE_PAGE *page; |
|
4271 |
uint i; |
|
4272 |
||
4273 |
fprintf(keycache_dump_file, "thread:%u\n", thread->id); |
|
4274 |
||
4275 |
i=0; |
|
4276 |
thread=last=waiting_for_hash_link.last_thread; |
|
4277 |
fprintf(keycache_dump_file, "queue of threads waiting for hash link\n"); |
|
4278 |
if (thread) |
|
4279 |
do
|
|
4280 |
{
|
|
4281 |
thread=thread->next; |
|
4282 |
page= (KEYCACHE_PAGE *) thread->opt_info; |
|
4283 |
fprintf(keycache_dump_file, |
|
4284 |
"thread:%u, (file,filepos)=(%u,%lu)\n", |
|
4285 |
thread->id,(uint) page->file,(ulong) page->filepos); |
|
4286 |
if (++i == MAX_QUEUE_LEN) |
|
4287 |
break; |
|
4288 |
}
|
|
4289 |
while (thread != last); |
|
4290 |
||
4291 |
i=0; |
|
4292 |
thread=last=waiting_for_block.last_thread; |
|
4293 |
fprintf(keycache_dump_file, "queue of threads waiting for block\n"); |
|
4294 |
if (thread) |
|
4295 |
do
|
|
4296 |
{
|
|
4297 |
thread=thread->next; |
|
4298 |
hash_link= (HASH_LINK *) thread->opt_info; |
|
4299 |
fprintf(keycache_dump_file, |
|
4300 |
"thread:%u hash_link:%u (file,filepos)=(%u,%lu)\n", |
|
4301 |
thread->id, (uint) HASH_LINK_NUMBER(hash_link), |
|
4302 |
(uint) hash_link->file,(ulong) hash_link->diskpos); |
|
4303 |
if (++i == MAX_QUEUE_LEN) |
|
4304 |
break; |
|
4305 |
}
|
|
4306 |
while (thread != last); |
|
4307 |
||
4308 |
for (i=0 ; i< keycache->blocks_used ; i++) |
|
4309 |
{
|
|
4310 |
int j; |
|
4311 |
block= &keycache->block_root[i]; |
|
4312 |
hash_link= block->hash_link; |
|
4313 |
fprintf(keycache_dump_file, |
|
4314 |
"block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", |
|
4315 |
i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1), |
|
4316 |
block->status, block->requests, block->condvar ? 1 : 0); |
|
4317 |
for (j=0 ; j < 2; j++) |
|
4318 |
{
|
|
4319 |
KEYCACHE_WQUEUE *wqueue=&block->wqueue[j]; |
|
4320 |
thread= last= wqueue->last_thread; |
|
4321 |
fprintf(keycache_dump_file, "queue #%d\n", j); |
|
4322 |
if (thread) |
|
4323 |
{
|
|
4324 |
do
|
|
4325 |
{
|
|
4326 |
thread=thread->next; |
|
4327 |
fprintf(keycache_dump_file, |
|
4328 |
"thread:%u\n", thread->id); |
|
4329 |
if (++i == MAX_QUEUE_LEN) |
|
4330 |
break; |
|
4331 |
}
|
|
4332 |
while (thread != last); |
|
4333 |
}
|
|
4334 |
}
|
|
4335 |
}
|
|
4336 |
fprintf(keycache_dump_file, "LRU chain:"); |
|
4337 |
block= keycache= used_last; |
|
4338 |
if (block) |
|
4339 |
{
|
|
4340 |
do
|
|
4341 |
{
|
|
4342 |
block= block->next_used; |
|
4343 |
fprintf(keycache_dump_file, |
|
4344 |
"block:%u, ", BLOCK_NUMBER(block)); |
|
4345 |
}
|
|
4346 |
while (block != keycache->used_last); |
|
4347 |
}
|
|
4348 |
fprintf(keycache_dump_file, "\n"); |
|
4349 |
||
4350 |
fclose(keycache_dump_file); |
|
4351 |
}
|
|
4352 |
||
4353 |
#endif /* defined(KEYCACHE_TIMEOUT) */ |
|
4354 |
||
4355 |
#if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
|
|
4356 |
||
4357 |
||
4358 |
static int keycache_pthread_cond_wait(pthread_cond_t *cond, |
|
4359 |
pthread_mutex_t *mutex) |
|
4360 |
{
|
|
4361 |
int rc; |
|
4362 |
struct timeval now; /* time when we started waiting */ |
|
4363 |
struct timespec timeout; /* timeout value for the wait function */ |
|
4364 |
struct timezone tz; |
|
4365 |
#if defined(KEYCACHE_DEBUG)
|
|
4366 |
int cnt=0; |
|
4367 |
#endif
|
|
4368 |
||
4369 |
/* Get current time */
|
|
4370 |
gettimeofday(&now, &tz); |
|
4371 |
/* Prepare timeout value */
|
|
4372 |
timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT; |
|
4373 |
/*
|
|
4374 |
timeval uses microseconds.
|
|
4375 |
timespec uses nanoseconds.
|
|
4376 |
1 nanosecond = 1000 micro seconds
|
|
4377 |
*/
|
|
4378 |
timeout.tv_nsec= now.tv_usec * 1000; |
|
4379 |
KEYCACHE_THREAD_TRACE_END("started waiting"); |
|
4380 |
#if defined(KEYCACHE_DEBUG)
|
|
4381 |
cnt++; |
|
4382 |
if (cnt % 100 == 0) |
|
4383 |
fprintf(keycache_debug_log, "waiting...\n"); |
|
4384 |
fflush(keycache_debug_log); |
|
4385 |
#endif
|
|
4386 |
rc= pthread_cond_timedwait(cond, mutex, &timeout); |
|
4387 |
KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); |
|
4388 |
if (rc == ETIMEDOUT || rc == ETIME) |
|
4389 |
{
|
|
4390 |
#if defined(KEYCACHE_DEBUG)
|
|
4391 |
fprintf(keycache_debug_log,"aborted by keycache timeout\n"); |
|
4392 |
fclose(keycache_debug_log); |
|
4393 |
abort(); |
|
4394 |
#endif
|
|
4395 |
keycache_dump(); |
|
4396 |
}
|
|
4397 |
||
4398 |
#if defined(KEYCACHE_DEBUG)
|
|
4399 |
KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); |
|
4400 |
#else
|
|
4401 |
assert(rc != ETIMEDOUT); |
|
4402 |
#endif
|
|
4403 |
return rc; |
|
4404 |
}
|
|
4405 |
#else
|
|
4406 |
#if defined(KEYCACHE_DEBUG)
|
|
4407 |
static int keycache_pthread_cond_wait(pthread_cond_t *cond, |
|
4408 |
pthread_mutex_t *mutex) |
|
4409 |
{
|
|
4410 |
int rc; |
|
4411 |
KEYCACHE_THREAD_TRACE_END("started waiting"); |
|
4412 |
rc= pthread_cond_wait(cond, mutex); |
|
4413 |
KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); |
|
4414 |
return rc; |
|
4415 |
}
|
|
4416 |
#endif
|
|
4417 |
#endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */ |
|
4418 |
||
4419 |
#if defined(KEYCACHE_DEBUG)
|
|
4420 |
||
4421 |
||
4422 |
static int keycache_pthread_mutex_lock(pthread_mutex_t *mutex) |
|
4423 |
{
|
|
4424 |
int rc; |
|
4425 |
rc= pthread_mutex_lock(mutex); |
|
4426 |
KEYCACHE_THREAD_TRACE_BEGIN(""); |
|
4427 |
return rc; |
|
4428 |
}
|
|
4429 |
||
4430 |
||
4431 |
static void keycache_pthread_mutex_unlock(pthread_mutex_t *mutex) |
|
4432 |
{
|
|
4433 |
KEYCACHE_THREAD_TRACE_END(""); |
|
4434 |
pthread_mutex_unlock(mutex); |
|
4435 |
}
|
|
4436 |
||
4437 |
||
4438 |
static int keycache_pthread_cond_signal(pthread_cond_t *cond) |
|
4439 |
{
|
|
4440 |
int rc; |
|
4441 |
KEYCACHE_THREAD_TRACE("signal"); |
|
4442 |
rc= pthread_cond_signal(cond); |
|
4443 |
return rc; |
|
4444 |
}
|
|
4445 |
||
4446 |
||
4447 |
#if defined(KEYCACHE_DEBUG_LOG)
|
|
4448 |
||
4449 |
||
4450 |
static void keycache_debug_print(const char * fmt,...) |
|
4451 |
{
|
|
4452 |
va_list args; |
|
4453 |
va_start(args,fmt); |
|
4454 |
if (keycache_debug_log) |
|
4455 |
{
|
|
4456 |
(void) vfprintf(keycache_debug_log, fmt, args); |
|
4457 |
(void) fputc('\n',keycache_debug_log); |
|
4458 |
}
|
|
4459 |
va_end(args); |
|
4460 |
}
|
|
4461 |
#endif /* defined(KEYCACHE_DEBUG_LOG) */ |
|
4462 |
||
4463 |
#if defined(KEYCACHE_DEBUG_LOG)
|
|
4464 |
||
4465 |
||
4466 |
void keycache_debug_log_close(void) |
|
4467 |
{
|
|
4468 |
if (keycache_debug_log) |
|
4469 |
fclose(keycache_debug_log); |
|
4470 |
}
|
|
4471 |
#endif /* defined(KEYCACHE_DEBUG_LOG) */ |
|
4472 |
||
4473 |
#endif /* defined(KEYCACHE_DEBUG) */ |
|
4474 |
||
4475 |
#if !defined(DBUG_OFF)
|
|
4476 |
#define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
|
|
4477 |
||
4478 |
static int fail_block(BLOCK_LINK *block) |
|
4479 |
{
|
|
4480 |
F_B_PRT("block->next_used: %lx\n", (ulong) block->next_used); |
|
4481 |
F_B_PRT("block->prev_used: %lx\n", (ulong) block->prev_used); |
|
4482 |
F_B_PRT("block->next_changed: %lx\n", (ulong) block->next_changed); |
|
4483 |
F_B_PRT("block->prev_changed: %lx\n", (ulong) block->prev_changed); |
|
4484 |
F_B_PRT("block->hash_link: %lx\n", (ulong) block->hash_link); |
|
4485 |
F_B_PRT("block->status: %u\n", block->status); |
|
4486 |
F_B_PRT("block->length: %u\n", block->length); |
|
4487 |
F_B_PRT("block->offset: %u\n", block->offset); |
|
4488 |
F_B_PRT("block->requests: %u\n", block->requests); |
|
4489 |
F_B_PRT("block->temperature: %u\n", block->temperature); |
|
4490 |
return 0; /* Let the assert fail. */ |
|
4491 |
}
|
|
4492 |
||
4493 |
static int fail_hlink(HASH_LINK *hlink) |
|
4494 |
{
|
|
4495 |
F_B_PRT("hlink->next: %lx\n", (ulong) hlink->next); |
|
4496 |
F_B_PRT("hlink->prev: %lx\n", (ulong) hlink->prev); |
|
4497 |
F_B_PRT("hlink->block: %lx\n", (ulong) hlink->block); |
|
4498 |
F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos); |
|
4499 |
F_B_PRT("hlink->file: %d\n", hlink->file); |
|
4500 |
return 0; /* Let the assert fail. */ |
|
4501 |
}
|
|
4502 |
||
4503 |
static int cache_empty(KEY_CACHE *keycache) |
|
4504 |
{
|
|
4505 |
int errcnt= 0; |
|
4506 |
int idx; |
|
4507 |
if (keycache->disk_blocks <= 0) |
|
4508 |
return 1; |
|
4509 |
for (idx= 0; idx < keycache->disk_blocks; idx++) |
|
4510 |
{
|
|
4511 |
BLOCK_LINK *block= keycache->block_root + idx; |
|
4512 |
if (block->status || block->requests || block->hash_link) |
|
4513 |
{
|
|
4514 |
fprintf(stderr, "block index: %u\n", idx); |
|
4515 |
fail_block(block); |
|
4516 |
errcnt++; |
|
4517 |
}
|
|
4518 |
}
|
|
4519 |
for (idx= 0; idx < keycache->hash_links; idx++) |
|
4520 |
{
|
|
4521 |
HASH_LINK *hash_link= keycache->hash_link_root + idx; |
|
4522 |
if (hash_link->requests || hash_link->block) |
|
4523 |
{
|
|
4524 |
fprintf(stderr, "hash_link index: %u\n", idx); |
|
4525 |
fail_hlink(hash_link); |
|
4526 |
errcnt++; |
|
4527 |
}
|
|
4528 |
}
|
|
4529 |
if (errcnt) |
|
4530 |
{
|
|
4531 |
fprintf(stderr, "blocks: %d used: %lu\n", |
|
4532 |
keycache->disk_blocks, keycache->blocks_used); |
|
4533 |
fprintf(stderr, "hash_links: %d used: %d\n", |
|
4534 |
keycache->hash_links, keycache->hash_links_used); |
|
4535 |
fprintf(stderr, "\n"); |
|
4536 |
}
|
|
4537 |
return !errcnt; |
|
4538 |
}
|
|
4539 |
#endif
|
|
4540 |