1
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include "maria_def.h"
17
#ifdef HAVE_SYS_MMAN_H
20
#include "ma_blockrec.h"
22
static void maria_extra_keyflag(MARIA_HA *info,
23
enum ha_extra_function function);
26
@brief Set options and buffers to optimize table handling
28
@param name table's name
29
@param info open table
30
@param function operation
31
@param extra_arg Pointer to extra argument (normally pointer to
32
ulong); used when function is one of:
36
@return Operation status
41
int maria_extra(MARIA_HA *info, enum ha_extra_function function,
46
MARIA_SHARE *share= info->s;
47
my_bool block_records= share->data_file_type == BLOCK_RECORD;
48
DBUG_ENTER("maria_extra");
49
DBUG_PRINT("enter",("function: %d",(int) function));
52
case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */
53
info->lastinx= 0; /* Use first index as def */
54
info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
55
info->page_changed= 1;
56
/* Next/prev gives first/last */
57
if (info->opt_flag & READ_CACHE_USED)
59
reinit_io_cache(&info->rec_cache,READ_CACHE,0,
60
(pbool) (info->lock_type != F_UNLCK),
61
(pbool) test(info->update & HA_STATE_ROW_CHANGED)
64
info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
69
break; /* Not supported */
71
if (info->lock_type == F_UNLCK &&
72
(share->options & HA_OPTION_PACK_RECORD))
74
error= 1; /* Not possibly if not locked */
78
if (info->s->file_map) /* Don't use cache if mmap */
80
#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
81
if ((share->options & HA_OPTION_COMPRESS_RECORD))
83
pthread_mutex_lock(&share->intern_lock);
84
if (_ma_memmap_file(info))
86
/* We don't nead MADV_SEQUENTIAL if small file */
87
madvise((char*) share->file_map, share->state.state.data_file_length,
88
share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ?
89
MADV_RANDOM : MADV_SEQUENTIAL);
90
pthread_mutex_unlock(&share->intern_lock);
93
pthread_mutex_unlock(&share->intern_lock);
96
if (info->opt_flag & WRITE_CACHE_USED)
98
info->opt_flag&= ~WRITE_CACHE_USED;
99
if ((error= end_io_cache(&info->rec_cache)))
102
if (!(info->opt_flag &
103
(READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED)))
105
cache_size= (extra_arg ? *(ulong*) extra_arg :
106
my_default_record_cache_size);
107
if (!(init_io_cache(&info->rec_cache, info->dfile.file,
108
(uint) min(share->state.state.data_file_length+1,
110
READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
111
MYF(share->write_flag & MY_WAIT_IF_FULL))))
113
info->opt_flag|= READ_CACHE_USED;
114
info->update&= ~HA_STATE_ROW_CHANGED;
116
if (share->non_transactional_concurrent_insert)
117
info->rec_cache.end_of_file= info->state->data_file_length;
120
case HA_EXTRA_REINIT_CACHE:
121
if (info->opt_flag & READ_CACHE_USED)
123
reinit_io_cache(&info->rec_cache, READ_CACHE, info->cur_row.nextpos,
124
(pbool) (info->lock_type != F_UNLCK),
125
(pbool) test(info->update & HA_STATE_ROW_CHANGED));
126
info->update&= ~HA_STATE_ROW_CHANGED;
127
if (share->non_transactional_concurrent_insert)
128
info->rec_cache.end_of_file= info->state->data_file_length;
131
case HA_EXTRA_WRITE_CACHE:
132
if (info->lock_type == F_UNLCK)
134
error= 1; /* Not possibly if not locked */
138
break; /* Not supported */
140
cache_size= (extra_arg ? *(ulong*) extra_arg :
141
my_default_record_cache_size);
142
if (!(info->opt_flag &
143
(READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) &&
144
!share->state.header.uniques)
145
if (!(init_io_cache(&info->rec_cache, info->dfile.file, cache_size,
146
WRITE_CACHE,share->state.state.data_file_length,
147
(pbool) (info->lock_type != F_UNLCK),
148
MYF(share->write_flag & MY_WAIT_IF_FULL))))
150
info->opt_flag|= WRITE_CACHE_USED;
151
info->update&= ~(HA_STATE_ROW_CHANGED |
152
HA_STATE_WRITE_AT_END |
153
HA_STATE_EXTEND_BLOCK);
156
case HA_EXTRA_PREPARE_FOR_UPDATE:
157
if (info->s->data_file_type != DYNAMIC_RECORD)
159
/* Remove read/write cache if dynamic rows */
160
case HA_EXTRA_NO_CACHE:
161
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
163
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
164
error= end_io_cache(&info->rec_cache);
165
/* Sergei will insert full text index caching here */
167
#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
168
if (info->opt_flag & MEMMAP_USED)
169
madvise((char*) share->file_map, share->state.state.data_file_length,
173
case HA_EXTRA_FLUSH_CACHE:
174
if (info->opt_flag & WRITE_CACHE_USED)
176
if ((error= flush_io_cache(&info->rec_cache)))
178
maria_print_error(info->s, HA_ERR_CRASHED);
179
maria_mark_crashed(info); /* Fatal error found */
183
case HA_EXTRA_NO_READCHECK:
184
info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */
186
case HA_EXTRA_READCHECK:
187
info->opt_flag|= READ_CHECK_USED;
189
case HA_EXTRA_KEYREAD: /* Read only keys to record */
190
case HA_EXTRA_REMEMBER_POS:
191
info->opt_flag|= REMEMBER_OLD_POS;
192
bmove((uchar*) info->last_key.data + share->base.max_key_length*2,
193
(uchar*) info->last_key.data,
194
info->last_key.data_length + info->last_key.ref_length);
195
info->save_update= info->update;
196
info->save_lastinx= info->lastinx;
197
info->save_lastpos= info->cur_row.lastpos;
198
info->save_lastkey_data_length= info->last_key.data_length;
199
info->save_lastkey_ref_length= info->last_key.ref_length;
200
if (function == HA_EXTRA_REMEMBER_POS)
203
case HA_EXTRA_KEYREAD_CHANGE_POS:
204
info->opt_flag|= KEY_READ_USED;
205
info->read_record= _ma_read_key_record;
207
case HA_EXTRA_NO_KEYREAD:
208
case HA_EXTRA_RESTORE_POS:
209
if (info->opt_flag & REMEMBER_OLD_POS)
211
bmove((uchar*) info->last_key.data,
212
(uchar*) info->last_key.data + share->base.max_key_length*2,
213
info->save_lastkey_data_length + info->save_lastkey_ref_length);
214
info->update= info->save_update | HA_STATE_WRITTEN;
215
info->lastinx= info->save_lastinx;
216
info->cur_row.lastpos= info->save_lastpos;
217
info->last_key.data_length= info->save_lastkey_data_length;
218
info->last_key.ref_length= info->save_lastkey_ref_length;
219
info->last_key.flag= 0;
221
info->read_record= share->read_record;
222
info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
224
case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */
225
info->lock_type= F_EXTRA_LCK; /* Simulate as locked */
227
case HA_EXTRA_WAIT_LOCK:
230
case HA_EXTRA_NO_WAIT_LOCK:
231
info->lock_wait= MY_SHORT_WAIT;
233
case HA_EXTRA_NO_KEYS:
234
/* we're going to modify pieces of the state, stall Checkpoint */
235
pthread_mutex_lock(&share->intern_lock);
236
if (info->lock_type == F_UNLCK)
238
pthread_mutex_unlock(&share->intern_lock);
239
error= 1; /* Not possibly if not lock */
242
if (maria_is_any_key_active(share->state.key_map))
244
MARIA_KEYDEF *key= share->keyinfo;
246
for (i =0 ; i < share->base.keys ; i++,key++)
248
if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1)
250
maria_clear_key_active(share->state.key_map, i);
251
info->update|= HA_STATE_CHANGED;
257
share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
258
share->changed= 1; /* Update on close */
259
if (!share->global_changed)
261
share->global_changed= 1;
262
share->state.open_count++;
265
if (!share->now_transactional)
266
share->state.state= *info->state;
268
That state write to disk must be done, even for transactional tables;
269
indeed the table's share is going to be lost (there was a
270
HA_EXTRA_FORCE_REOPEN before, which set share->last_version to
271
0), and so the only way it leaves information (share->state.key_map)
272
for the posterity is by writing it to disk.
274
DBUG_ASSERT(!maria_in_recovery);
275
error= _ma_state_info_write(share, 1|2);
277
pthread_mutex_unlock(&share->intern_lock);
279
case HA_EXTRA_FORCE_REOPEN:
281
MySQL uses this case after it has closed all other instances
283
We however do a flush here for additional safety.
285
/** @todo consider porting these flush-es to MyISAM */
286
DBUG_ASSERT(share->reopen == 1);
287
error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
288
FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE);
289
if (!error && share->changed)
291
pthread_mutex_lock(&share->intern_lock);
292
if (!(error= _ma_state_info_write(share, 1|2)))
294
pthread_mutex_unlock(&share->intern_lock);
296
pthread_mutex_lock(&THR_LOCK_maria);
297
pthread_mutex_lock(&share->intern_lock); /* protect against Checkpoint */
298
/* this makes the share not be re-used next time the table is opened */
299
share->last_version= 0L; /* Impossible version */
300
pthread_mutex_unlock(&share->intern_lock);
301
pthread_mutex_unlock(&THR_LOCK_maria);
303
case HA_EXTRA_PREPARE_FOR_DROP:
304
case HA_EXTRA_PREPARE_FOR_RENAME:
306
my_bool do_flush= test(function != HA_EXTRA_PREPARE_FOR_DROP);
307
enum flush_type type;
308
pthread_mutex_lock(&THR_LOCK_maria);
310
This share, to have last_version=0, needs to save all its data/index
311
blocks to disk if this is not for a DROP TABLE. Otherwise they would be
312
invisible to future openers; and they could even go to disk late and
313
cancel the work of future openers.
315
if (info->lock_type != F_UNLCK && !info->was_locked)
317
info->was_locked= info->lock_type;
318
if (maria_lock_database(info, F_UNLCK))
320
info->lock_type= F_UNLCK;
322
if (share->kfile.file >= 0)
323
_ma_decrement_open_count(info);
324
pthread_mutex_lock(&share->intern_lock);
325
type= do_flush ? FLUSH_RELEASE : FLUSH_IGNORE_CHANGED;
326
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
332
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
334
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
335
if (end_io_cache(&info->rec_cache))
338
if (share->kfile.file >= 0)
342
/* Save the state so that others can find it from disk. */
343
if (_ma_state_info_write(share, 1 | 2) ||
344
my_sync(share->kfile.file, MYF(0)))
351
/* be sure that state is not tried for write as file may be closed */
355
if (share->data_file_type == BLOCK_RECORD &&
356
share->bitmap.file.file >= 0)
358
if (do_flush && my_sync(share->bitmap.file.file, MYF(0)))
361
/* For protection against Checkpoint, we set under intern_lock: */
362
share->last_version= 0L; /* Impossible version */
363
pthread_mutex_unlock(&share->intern_lock);
364
pthread_mutex_unlock(&THR_LOCK_maria);
368
if (!share->temporary)
369
error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
370
FLUSH_KEEP, FLUSH_KEEP);
372
_ma_decrement_open_count(info);
374
if (share->not_flushed)
376
share->not_flushed= 0;
377
if (_ma_sync_table_files(info))
382
maria_print_error(info->s, HA_ERR_CRASHED);
383
maria_mark_crashed(info); /* Fatal error found */
387
case HA_EXTRA_NORMAL: /* Theese isn't in use */
393
case HA_EXTRA_NO_ROWS:
394
if (!share->state.header.uniques)
395
info->opt_flag|= OPT_NO_ROWS;
397
case HA_EXTRA_PRELOAD_BUFFER_SIZE:
398
info->preload_buff_size= *((ulong *) extra_arg);
400
case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
401
case HA_EXTRA_CHANGE_KEY_TO_DUP:
402
maria_extra_keyflag(info, function);
407
break; /* Not supported */
408
pthread_mutex_lock(&share->intern_lock);
410
Memory map the data file if it is not already mapped. It is safe
411
to memory map a file while other threads are using file I/O on it.
412
Assigning a new address to a function pointer is an atomic
413
operation. intern_lock prevents that two or more mappings are done
416
if (!share->file_map)
418
if (_ma_dynmap_file(info, share->state.state.data_file_length))
420
DBUG_PRINT("warning",("mmap failed: errno: %d",errno));
421
error= my_errno= errno;
425
share->file_read= _ma_mmap_pread;
426
share->file_write= _ma_mmap_pwrite;
429
pthread_mutex_unlock(&share->intern_lock);
432
case HA_EXTRA_MARK_AS_LOG_TABLE:
433
pthread_mutex_lock(&share->intern_lock);
434
share->is_log_table= TRUE;
435
pthread_mutex_unlock(&share->intern_lock);
437
case HA_EXTRA_KEY_CACHE:
438
case HA_EXTRA_NO_KEY_CACHE:
447
Start/Stop Inserting Duplicates Into a Table, WL#1648.
450
static void maria_extra_keyflag(MARIA_HA *info,
451
enum ha_extra_function function)
455
for (idx= 0; idx< info->s->base.keys; idx++)
458
case HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
459
info->s->keyinfo[idx].flag|= HA_NOSAME;
461
case HA_EXTRA_CHANGE_KEY_TO_DUP:
462
info->s->keyinfo[idx].flag&= ~(HA_NOSAME);
471
int maria_reset(MARIA_HA *info)
474
MARIA_SHARE *share= info->s;
475
DBUG_ENTER("maria_reset");
477
Free buffers and reset the following flags:
478
EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK
480
If the row buffer cache is large (for dynamic tables), reduce it
483
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
485
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
486
error= end_io_cache(&info->rec_cache);
488
/* Free memory used for keeping blobs */
489
if (share->base.blobs)
491
if (info->rec_buff_size > share->base.default_rec_buff_size)
493
info->rec_buff_size= 1; /* Force realloc */
494
_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
495
share->base.default_rec_buff_size);
497
if (info->blob_buff_size > MARIA_SMALL_BLOB_BUFFER)
499
info->blob_buff_size= 1; /* Force realloc */
500
_ma_alloc_buffer(&info->blob_buff, &info->blob_buff_size,
501
MARIA_SMALL_BLOB_BUFFER);
504
#if defined(HAVE_MMAP) && defined(HAVE_MADVISE)
505
if (info->opt_flag & MEMMAP_USED)
506
madvise((char*) share->file_map, share->state.state.data_file_length,
509
info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS);
511
info->lastinx= 0; /* Use first index as def */
512
info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR;
513
info->page_changed= 1;
514
info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND |
515
HA_STATE_PREV_FOUND);
520
int _ma_sync_table_files(const MARIA_HA *info)
522
return (my_sync(info->dfile.file, MYF(MY_WME)) ||
523
my_sync(info->s->kfile.file, MYF(MY_WME)));
528
@brief flushes the data and/or index file of a table
530
This is useful when one wants to read a table using OS syscalls (like
531
my_copy()) and first wants to be sure that MySQL-level caches go down to
532
the OS so that OS syscalls can see all data. It can flush rec_cache,
533
bitmap, pagecache of data file, pagecache of index file.
536
@param flush_data_or_index one or two of these flags:
537
MARIA_FLUSH_DATA, MARIA_FLUSH_INDEX
538
@param flush_type_for_data
539
@param flush_type_for_index
541
@note does not sync files (@see _ma_sync_table_files()).
542
@note Progressively this function will be used in all places where we flush
543
the index but not the data file (probable bugs).
545
@return Operation status
550
int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
551
enum flush_type flush_type_for_data,
552
enum flush_type flush_type_for_index)
555
MARIA_SHARE *share= info->s;
556
/* flush data file first because it's more critical */
557
if (flush_data_or_index & MARIA_FLUSH_DATA)
559
if ((info->opt_flag & WRITE_CACHE_USED) &&
560
flush_type_for_data != FLUSH_IGNORE_CHANGED &&
561
flush_io_cache(&info->rec_cache))
563
if (share->data_file_type == BLOCK_RECORD)
565
if (flush_type_for_data != FLUSH_IGNORE_CHANGED)
567
if (_ma_bitmap_flush(share))
571
info->s->bitmap.changed= 0;
572
if (flush_pagecache_blocks(share->pagecache, &info->dfile,
573
flush_type_for_data))
577
if ((flush_data_or_index & MARIA_FLUSH_INDEX) &&
578
flush_pagecache_blocks(share->pagecache, &share->kfile,
579
flush_type_for_index))
584
maria_print_error(info->s, HA_ERR_CRASHED);
585
maria_mark_crashed(info);