1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
3
* Derived from ha_example.h
4
* Copyright (C) 2003 MySQL AB
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
* 2005-11-10 Paul McCullagh
26
#ifdef USE_PRAGMA_IMPLEMENTATION
27
#pragma implementation // gcc: Class implementation
30
#include "xt_config.h"
42
#include <drizzled/internal/my_sys.h>
43
#include <drizzled/common.h>
44
#include <drizzled/plugin.h>
45
#include <drizzled/field.h>
46
#include <drizzled/session.h>
47
#include <drizzled/data_home.h>
48
#include <drizzled/error.h>
49
#include <drizzled/table.h>
50
#include <drizzled/field/timestamp.h>
51
#include <drizzled/session.h>
53
#define my_strdup(a,b) strdup(a)
55
using namespace drizzled;
56
using namespace drizzled::plugin;
58
#define DEFAULT_FILE_EXTENSION ".dfe"
61
#include "mysql_priv.h"
62
#include <mysql/plugin.h>
68
#include "strutil_xt.h"
69
#include "database_xt.h"
74
#include "datadic_xt.h"
76
#include "pbms_enabled.h"
78
#include "tabcache_xt.h"
79
#include "systab_xt.h"
80
#include "xaction_xt.h"
81
#include "backup_xt.h"
84
//#define XT_USE_SYS_PAR_DEBUG_SIZES
85
#define PBXT_HANDLER_TRACE
86
//#define PBXT_TRACE_RETURN
87
//#define XT_PRINT_INDEX_OPT
88
//#define XT_SHOW_DUMPS_TRACE
89
//#define XT_UNIT_TEST
90
//#define LOAD_TABLE_ON_OPEN
91
//#define CHECK_TABLE_LOADS
93
/* Enable to trace the statements executed by the engine: */
94
//#define TRACE_STATEMENTS
96
/* Enable to print the trace to the stdout, instead of
99
//#define PRINT_STATEMENTS
103
static handler *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root);
104
static int pbxt_init(void *p);
105
static int pbxt_end(void *p);
106
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag);
107
static void pbxt_drop_database(handlerton *hton, char *path);
108
static int pbxt_close_connection(handlerton *hton, THD* thd);
109
static int pbxt_commit(handlerton *hton, THD *thd, bool all);
110
static int pbxt_rollback(handlerton *hton, THD *thd, bool all);
111
static int pbxt_prepare(handlerton *hton, THD *thd, bool all);
112
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len);
113
static int pbxt_commit_by_xid(handlerton *hton, XID *xid);
114
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid);
115
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd);
117
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
118
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share);
119
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
121
#ifdef TRACE_STATEMENTS
123
#ifdef PRINT_STATEMENTS
124
#define STAT_TRACE(y, x) printf("%s: %s\n", y ? y->t_name : "-unknown-", x)
126
#define STAT_TRACE(y, x) xt_ttraceq(y, x)
131
#define STAT_TRACE(y, x)
135
#ifdef PBXT_HANDLER_TRACE
136
#define PBXT_ALLOW_PRINTING
138
#define XT_TRACE_CALL() ha_trace_function(__FUNC__, NULL)
139
#define XT_TRACE_METHOD() ha_trace_function(__FUNC__, pb_share->sh_table_path->ps_path)
141
#ifdef PBXT_TRACE_RETURN
142
#define XT_RETURN(x) do { printf("%d\n", (int) (x)); return (x); } while (0)
143
#define XT_RETURN_VOID do { printf("out\n"); return; } while (0)
145
#define XT_RETURN(x) return (x)
146
#define XT_RETURN_VOID return
151
#define XT_TRACE_CALL()
152
#define XT_TRACE_METHOD()
153
#define XT_RETURN(x) return (x)
154
#define XT_RETURN_VOID return
158
#ifdef PBXT_ALLOW_PRINTING
159
#define XT_PRINT0(y, x) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-"); } while (0)
160
#define XT_PRINT1(y, x, a) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a); } while (0)
161
#define XT_PRINT2(y, x, a, b) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b); } while (0)
162
#define XT_PRINT3(y, x, a, b, c) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b, c); } while (0)
164
#define XT_PRINT0(y, x)
165
#define XT_PRINT1(y, x, a)
166
#define XT_PRINT2(y, x, a, b)
167
#define XT_PRINT3(y, x, a, b, c)
173
handlerton *pbxt_hton;
174
bool pbxt_inited = false; // Variable for checking the init state of hash
175
xtBool pbxt_ignore_case = true;
176
const char *pbxt_extensions[]= { ".xtr", ".xtd", ".xtl", ".xti", ".xt", "", NULL };
177
#ifdef XT_CRASH_DEBUG
178
xtBool pbxt_crash_debug = TRUE;
180
xtBool pbxt_crash_debug = FALSE;
184
/* Variables for pbxt share methods */
185
static xt_mutex_type pbxt_database_mutex; // Prevent a database from being opened while it is being dropped
186
static XTHashTabPtr pbxt_share_tables; // Hash used to track open tables
187
static char *pbxt_index_cache_size;
188
static char *pbxt_record_cache_size;
189
static char *pbxt_log_cache_size;
190
static char *pbxt_log_file_threshold;
191
static char *pbxt_transaction_buffer_size;
192
static char *pbxt_log_buffer_size;
193
static char *pbxt_checkpoint_frequency;
194
static char *pbxt_data_log_threshold;
195
static char *pbxt_data_file_grow_size;
196
static char *pbxt_row_file_grow_size;
197
static char *pbxt_record_write_threshold;
198
static my_bool pbxt_support_xa;
201
// drizzle complains it's not used
202
static XTXactEnumXARec pbxt_xa_enum;
206
#define XT_SHARE_LOCK_WAIT 5000
208
#define XT_SHARE_LOCK_WAIT 500
212
* Lock timeout in 1/1000ths of a second
214
#define XT_SHARE_LOCK_TIMEOUT 30000
217
* -----------------------------------------------------------------------
222
//#define XT_FOR_TEAMDRIVE
224
typedef struct HAVarParams {
225
const char *vp_var; /* Variable name. */
226
const char *vp_def; /* Default value. */
227
const char *vp_min; /* Minimum allowed value. */
228
const char *vp_max4; /* Maximum allowed value on 32-bit processors. */
229
const char *vp_max8; /* Maximum allowed value on 64-bit processors. */
230
} HAVarParamsRec, *HAVarParamsPtr;
232
#ifdef XT_USE_SYS_PAR_DEBUG_SIZES
233
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
234
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
235
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
236
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
237
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
238
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
239
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
240
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "400K", "400K", "2GB", "256TB" };
241
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
242
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
243
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
244
#define XT_DL_DEFAULT_XLOG_COUNT 3
245
#define XT_DL_DEFAULT_GARBAGE_LEVEL 10
247
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
248
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
249
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
250
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
251
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
252
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
253
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
254
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "64MB", "1MB", "2GB", "256TB" };
255
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
256
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
257
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
258
#define XT_DL_DEFAULT_XLOG_COUNT 3
259
#define XT_DL_DEFAULT_GARBAGE_LEVEL 50
262
#define XT_AUTO_INCREMENT_DEF 0
263
#define XT_DL_DEFAULT_INDEX_DIRTY_LEVEL 80
267
/* For debugging on the Mac, we check the re-use logs: */
268
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_RECYCLE_LOGS
270
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_DELETE_LOGS
273
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_RECYCLE_LOGS
276
/* TeamDrive, uses special auto-increment, and
277
* we keep the logs for the moment:
279
#ifdef XT_FOR_TEAMDRIVE
280
#undef XT_OFFLINE_LOG_FUNCTION_DEF
281
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_KEEP_LOGS
282
//#undef XT_AUTO_INCREMENT_DEF
283
//#define XT_AUTO_INCREMENT_DEF 1
286
#ifdef PBXT_HANDLER_TRACE
287
static void ha_trace_function(const char *function, char *table)
289
char func_buf[50], *ptr;
290
XTThreadPtr thread = xt_get_self();
292
if ((ptr = const_cast<char *>(strchr(function, '(')))) {
294
while (ptr > function) {
295
if (!(isalnum(*ptr) || *ptr == '_'))
300
xt_strcpy(50, func_buf, ptr);
301
if ((ptr = strchr(func_buf, '(')))
305
xt_strcpy(50, func_buf, function);
307
printf("%s %s (%s)\n", thread ? thread->t_name : "-unknown-", func_buf, table);
309
printf("%s %s\n", thread ? thread->t_name : "-unknown-", func_buf);
314
* -----------------------------------------------------------------------
319
static xtBool ha_hash_comp(void *key, void *data)
321
XTSharePtr share = (XTSharePtr) data;
323
return strcmp((char *) key, share->sh_table_path->ps_path) == 0;
326
static xtHashValue ha_hash(xtBool is_key, void *key_data)
328
XTSharePtr share = (XTSharePtr) key_data;
331
return xt_ht_hash((char *) key_data);
332
return xt_ht_hash(share->sh_table_path->ps_path);
335
static xtBool ha_hash_comp_ci(void *key, void *data)
337
XTSharePtr share = (XTSharePtr) data;
339
return strcasecmp((char *) key, share->sh_table_path->ps_path) == 0;
342
static xtHashValue ha_hash_ci(xtBool is_key, void *key_data)
344
XTSharePtr share = (XTSharePtr) key_data;
347
return xt_ht_casehash((char *) key_data);
348
return xt_ht_casehash(share->sh_table_path->ps_path);
351
static void ha_open_share(XTThreadPtr self, XTShareRec *share)
353
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
354
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
356
if (!share->sh_table) {
357
share->sh_table = xt_use_table(self, share->sh_table_path, FALSE, FALSE);
358
share->sh_dic_key_count = share->sh_table->tab_dic.dic_key_count;
359
share->sh_dic_keys = share->sh_table->tab_dic.dic_keys;
360
share->sh_recalc_selectivity = FALSE;
363
freer_(); // xt_ht_unlock(pbxt_share_tables)
366
static void ha_close_share(XTThreadPtr self, XTShareRec *share)
370
if ((tab = share->sh_table)) {
371
/* Save this, in case the share is re-opened. */
372
share->sh_min_auto_inc = tab->tab_auto_inc;
374
xt_heap_release(self, tab);
375
share->sh_table = NULL;
378
/* This are only references: */
379
share->sh_dic_key_count = 0;
380
share->sh_dic_keys = NULL;
383
static void ha_cleanup_share(XTThreadPtr self, XTSharePtr share)
385
ha_close_share(self, share);
387
if (share->sh_table_path) {
388
xt_free(self, share->sh_table_path);
389
share->sh_table_path = NULL;
392
if (share->sh_ex_cond) {
393
thr_lock_delete(&share->sh_lock);
394
xt_delete_cond(self, (xt_cond_type *) share->sh_ex_cond);
395
share->sh_ex_cond = NULL;
398
if (share->sh_ex_mutex) {
399
xt_delete_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
400
share->sh_ex_mutex = NULL;
403
xt_free(self, share);
406
static void ha_hash_free(XTThreadPtr self, void *data)
408
XTSharePtr share = (XTSharePtr) data;
410
ha_cleanup_share(self, share);
414
* This structure contains information that is common to all handles.
415
* (i.e. it is table specific).
417
static XTSharePtr ha_get_share(XTThreadPtr self, const char *table_path, bool open_table)
422
xt_ht_lock(self, pbxt_share_tables);
423
pushr_(xt_ht_unlock, pbxt_share_tables);
425
// Check if the table exists...
426
if (!(share = (XTSharePtr) xt_ht_get(self, pbxt_share_tables, (void *) table_path))) {
427
share = (XTSharePtr) xt_calloc(self, sizeof(XTShareRec));
428
pushr_(ha_cleanup_share, share);
430
share->sh_ex_mutex = (xt_mutex_type *) xt_new_mutex(self);
431
share->sh_ex_cond = (xt_cond_type *) xt_new_cond(self);
433
thr_lock_init(&share->sh_lock);
435
share->sh_use_count = 0;
436
share->sh_table_path = (XTPathStrPtr) xt_dup_string(self, table_path);
439
ha_open_share(self, share);
441
popr_(); // Discard ha_cleanup_share(share);
443
xt_ht_put(self, pbxt_share_tables, share);
446
share->sh_use_count++;
447
freer_(); // xt_ht_unlock(pbxt_share_tables)
453
* Free shared information.
455
static void ha_unget_share(XTThreadPtr self, XTSharePtr share)
457
xt_ht_lock(self, pbxt_share_tables);
458
pushr_(xt_ht_unlock, pbxt_share_tables);
460
if (!--share->sh_use_count)
461
xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
463
freer_(); // xt_ht_unlock(pbxt_share_tables)
466
static xtBool ha_unget_share_removed(XTThreadPtr self, XTSharePtr share)
468
xtBool removed = FALSE;
470
xt_ht_lock(self, pbxt_share_tables);
471
pushr_(xt_ht_unlock, pbxt_share_tables);
473
if (!--share->sh_use_count) {
475
xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
478
freer_(); // xt_ht_unlock(pbxt_share_tables)
482
static inline void thd_init_xact(THD *thd, XTThreadPtr self, bool set_table_trans)
484
self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
485
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
486
self->st_auto_commit = (thd_test_options(thd,(OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
487
if (set_table_trans) {
489
self->st_table_trans = FALSE;
491
self->st_table_trans = thd_sql_command(thd) == SQLCOM_LOCK_TABLES;
494
self->st_abort_trans = FALSE;
495
self->st_stat_ended = FALSE;
496
self->st_stat_trans = FALSE;
497
XT_PRINT0(self, "xt_xn_begin\n");
498
xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
502
* -----------------------------------------------------------------------
507
xtPublic void xt_ha_unlock_table(XTThreadPtr self, void *share)
509
ha_release_exclusive_use(self, (XTSharePtr) share);
510
ha_unget_share(self, (XTSharePtr) share);
513
xtPublic void xt_ha_close_global_database(XTThreadPtr self)
516
xt_heap_release(self, pbxt_database);
517
pbxt_database = NULL;
522
* Open a PBXT database given the path of a table.
523
* This function also returns the name of the table.
525
* We use the pbxt_database_mutex to lock this
526
* operation to make sure it does not occur while
527
* some other thread is doing a "closeall".
529
xtPublic void xt_ha_open_database_of_table(XTThreadPtr self, XTPathStrPtr XT_UNUSED(table_path))
531
#ifdef XT_USE_GLOBAL_DB
532
if (!self->st_database) {
533
if (!pbxt_database) {
534
xt_open_database(self, mysql_real_data_home, TRUE);
536
* This can be done at the same time as the recovery thread,
537
* strictly speaking I need a lock.
539
if (!pbxt_database) {
540
pbxt_database = self->st_database;
541
xt_heap_reference(self, pbxt_database);
545
xt_use_database(self, pbxt_database, XT_FOR_USER);
548
char db_path[PATH_MAX];
550
xt_strcpy(PATH_MAX, db_path, (char *) table_path);
551
xt_remove_last_name_of_path(db_path);
552
xt_remove_dir_char(db_path);
554
if (self->st_database && xt_tab_compare_paths(self->st_database->db_name, xt_last_name_of_path(db_path)) == 0)
555
/* This thread already has this database open! */
558
/* Auto commit before changing the database: */
559
if (self->st_xact_data) {
560
/* PMC - This probably indicates something strange is happening:
562
* This sequence generates this error:
566
* create temporary table t3 (id int)|
568
* create function f10() returns int
570
* drop temporary table if exists t3;
571
* create temporary table t3 (id int) engine=myisam;
572
* insert into t3 select id from t4;
573
* return (select count(*) from t3);
578
* An error is generated because the same thread is used
579
* to open table t4 (at the start of the functions), and
580
* then to drop table t3. To drop t3 we need to
581
* switch the database, so we land up here!
583
xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
585
if (!xt_xn_commit(self))
590
xt_lock_mutex(self, &pbxt_database_mutex);
591
pushr_(xt_unlock_mutex, &pbxt_database_mutex);
592
xt_open_database(self, db_path, FALSE);
593
freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
597
xtPublic XTThreadPtr xt_ha_set_current_thread(THD *thd, XTExceptionPtr e)
600
static int ha_thread_count = 0, ha_id;
603
if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
605
if (!(self = (XTThreadPtr) *thd_ha_data(thd, pbxt_hton))) {
607
// const Security_context *sctx;
611
ha_id = ++ha_thread_count;
612
sprintf(ha_id_str, "_%d", ha_id);
613
xt_strcpy(120,name,"user"); // TODO: Fix this hack
615
sctx = &thd->main_security_ctx;
618
xt_strcpy(120, name, sctx->user);
619
xt_strcat(120, name, "@");
624
xt_strcat(120, name, sctx->host);
626
xt_strcat(120, name, sctx->ip);
627
else if (thd->proc_info)
628
xt_strcat(120, name, (char *) thd->proc_info);
630
xt_strcat(120, name, "system");
632
xt_strcat(120, name, ha_id_str);
633
if (!(self = xt_create_thread(name, FALSE, TRUE, e)))
636
self->st_xact_mode = XT_XACT_REPEATABLE_READ;
638
*thd->getEngineData(pbxt_hton) = (void *) self;
640
*thd_ha_data(thd, pbxt_hton) = (void *) self;
646
xtPublic void xt_ha_close_connection(THD* thd)
651
if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
652
*thd->getEngineData(pbxt_hton) = NULL;
654
if ((self = (XTThreadPtr) *thd_ha_data(thd, pbxt_hton))) {
655
*thd_ha_data(thd, pbxt_hton) = NULL;
657
xt_free_thread(self);
661
xtPublic XTThreadPtr xt_ha_thd_to_self(THD *thd)
664
return (XTThreadPtr) *thd->getEngineData(pbxt_hton);
666
return (XTThreadPtr) *thd_ha_data(thd, pbxt_hton);
671
/* The first bit is 1. */
672
static u_int ha_get_max_bit(MX_BITMAP *map)
675
uint32_t cnt = map->numOfBitsInMap();
676
uint32_t max_bit = 0;
678
for (uint32_t i = 0; i < cnt; i++)
679
if (map->isBitSet(i))
684
my_bitmap_map *data_ptr = map->bitmap;
685
my_bitmap_map *end_ptr = map->last_word_ptr;
686
u_int cnt = map->n_bits;
689
for (; end_ptr >= data_ptr; end_ptr--) {
690
if ((b = *end_ptr)) {
693
if (end_ptr == map->getLastWordPtr() && map->getLastWordMask())
694
mask = map->getLastWordMask() >> 1;
697
while (!(b & mask)) {
699
/* Should not happen, but if it does, we hang! */
701
return map->numOfBitsInMap();
706
if (end_ptr == map->getLastWordPtr())
707
cnt = ((cnt-1) / 32) * 32;
717
* -----------------------------------------------------------------------
723
* In PBXT, as in MySQL: thread == connection.
725
* So we simply attach a PBXT thread to a MySQL thread.
727
static XTThreadPtr ha_set_current_thread(THD *thd, int *err)
732
if (!(self = xt_ha_set_current_thread(thd, &e))) {
733
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
740
xtPublic int xt_ha_pbxt_to_mysql_error(int xt_err)
745
case XT_ERR_DUPLICATE_KEY:
746
return HA_ERR_FOUND_DUPP_KEY;
747
case XT_ERR_DEADLOCK:
748
return HA_ERR_LOCK_DEADLOCK;
749
case XT_ERR_RECORD_CHANGED:
750
/* If we generate HA_ERR_RECORD_CHANGED instead of HA_ERR_LOCK_WAIT_TIMEOUT
751
* then sysbench does not work because it does not handle this error.
753
//return HA_ERR_LOCK_WAIT_TIMEOUT; // but HA_ERR_RECORD_CHANGED is the correct error for a optimistic lock failure.
754
return HA_ERR_RECORD_CHANGED;
755
case XT_ERR_LOCK_TIMEOUT:
756
return HA_ERR_LOCK_WAIT_TIMEOUT;
757
case XT_ERR_TABLE_IN_USE:
758
return HA_ERR_WRONG_COMMAND;
759
case XT_ERR_TABLE_NOT_FOUND:
760
return HA_ERR_NO_SUCH_TABLE;
761
case XT_ERR_TABLE_EXISTS:
762
return HA_ERR_TABLE_EXIST;
763
case XT_ERR_CANNOT_CHANGE_DB:
764
return ER_TRG_IN_WRONG_SCHEMA;
765
case XT_ERR_COLUMN_NOT_FOUND:
766
return HA_ERR_CANNOT_ADD_FOREIGN;
767
case XT_ERR_NO_REFERENCED_ROW:
768
case XT_ERR_REF_TABLE_NOT_FOUND:
769
case XT_ERR_REF_TYPE_WRONG:
770
return HA_ERR_NO_REFERENCED_ROW;
771
case XT_ERR_ROW_IS_REFERENCED:
772
return HA_ERR_ROW_IS_REFERENCED;
773
case XT_ERR_COLUMN_IS_NOT_NULL:
774
case XT_ERR_INCORRECT_NO_OF_COLS:
775
case XT_ERR_FK_ON_TEMP_TABLE:
776
case XT_ERR_FK_REF_TEMP_TABLE:
777
return HA_ERR_CANNOT_ADD_FOREIGN;
778
case XT_ERR_DUPLICATE_FKEY:
779
return HA_ERR_FOREIGN_DUPLICATE_KEY;
780
case XT_ERR_RECORD_DELETED:
781
return HA_ERR_RECORD_DELETED;
783
return(-1); // Unknown error
786
xtPublic int xt_ha_pbxt_thread_error_for_mysql(THD *thd, const XTThreadPtr self, int ignore_dup_key)
788
int xt_err = self->t_exception.e_xt_err;
789
xtBool dup_key = FALSE;
791
XT_PRINT2(self, "xt_ha_pbxt_thread_error_for_mysql xt_err=%d auto commit=%d\n", (int) xt_err, (int) self->st_auto_commit);
795
case XT_ERR_DUPLICATE_KEY:
796
case XT_ERR_DUPLICATE_FKEY:
797
/* Let MySQL call rollback as and when it wants to for duplicate
800
* In addition, we are not allowed to do an auto-rollback
801
* inside a sub-statement (function() or procedure())
806
* create table t3 (c1 char(1) primary key not null)|
808
* create function bug12379()
811
* insert into t3 values('X');
812
* insert into t3 values('X');
820
* Not doing an auto-rollback should solve this problem in the
821
* case of duplicate key (but not in others - like deadlock)!
822
* I don't think this situation is handled correctly by MySQL.
825
/* If we are in auto-commit mode (and we are not ignoring
826
* duplicate keys) then rollback the transaction automatically.
829
if (!ignore_dup_key && self->st_auto_commit)
830
goto abort_transaction;
832
case XT_ERR_DEADLOCK:
833
case XT_ERR_NO_REFERENCED_ROW:
834
case XT_ERR_ROW_IS_REFERENCED:
835
goto abort_transaction;
836
case XT_ERR_RECORD_CHANGED:
837
/* MySQL also handles the locked error. NOTE: There is no automatic
842
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
844
/* PMC 2006-08-30: It should be that this is not necessary!
846
* It is only necessary to call ha_rollback() if the engine
847
* aborts the transaction.
849
* On the other hand, I shouldn't need to rollback the
850
* transaction because, if I return an error, MySQL
851
* should do it for me.
853
* Unfortunately, when auto-commit is off, MySQL does not
854
* rollback automatically (for example when a deadlock
857
* And when we have a multi update we cannot rely on this
858
* either (see comment above).
860
if (self->st_xact_data) {
863
* A result of the "st_abort_trans = TRUE" below is that
864
* the following code results in an empty set.
865
* The reason is "ignore_dup_key" is not set so
866
* the duplicate key leads to an error which causes
867
* the transaction to be aborted.
868
* The delayed inserts are all execute in one transaction.
871
* c1 INT(11) NOT NULL AUTO_INCREMENT,
872
* c2 INT(11) DEFAULT NULL,
876
* INSERT DELAYED INTO t1 VALUES(NULL, 11), (NULL, 12);
877
* INSERT DELAYED INTO t1 VALUES(14, 91);
878
* INSERT DELAYED INTO t1 VALUES (NULL, 92), (NULL, 93);
882
if (self->st_lock_count == 0) {
883
/* No table locks, must rollback immediately
884
* (there will be no possibility later!
886
XT_PRINT1(self, "xt_xn_rollback xt_err=%d\n", xt_err);
887
if (!xt_xn_rollback(self))
888
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
891
/* Locks are held on tables.
892
* Only rollback after locks are released.
894
/* I do not think this is required, because
895
* I tell mysql to rollback below,
896
* besides it is a hack!
897
self->st_auto_commit = TRUE;
899
self->st_abort_trans = TRUE;
901
/* Only tell MySQL to rollback if we automatically rollback.
902
* Note: calling this with (thd, FALSE), cause sp.test to fail.
906
thd_mark_transaction_to_rollback(thd, TRUE);
911
return xt_ha_pbxt_to_mysql_error(xt_err);
914
static void ha_conditional_close_database(XTThreadPtr self, XTThreadPtr other_thr, void *db)
916
if (other_thr->st_database == (XTDatabaseHPtr) db)
917
xt_unuse_database(self, other_thr);
921
* This is only called from drop database, so we know that
922
* no thread is actually using the database. This means that it
923
* must be safe to close the database.
925
xtPublic void xt_ha_all_threads_close_database(XTThreadPtr self, XTDatabaseHPtr db)
927
xt_lock_mutex(self, &pbxt_database_mutex);
928
pushr_(xt_unlock_mutex, &pbxt_database_mutex);
929
xt_do_to_all_threads(self, ha_conditional_close_database, db);
930
freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
933
static int ha_log_pbxt_thread_error_for_mysql(int ignore_dup_key)
935
return xt_ha_pbxt_thread_error_for_mysql(current_thd, myxt_get_self(), ignore_dup_key);
939
* -----------------------------------------------------------------------
943
static xtWord8 ha_set_variable(char **value, HAVarParamsPtr vp)
950
*value = getenv(vp->vp_var);
952
*value = (char *) vp->vp_def;
953
result = xt_byte_size_to_int8(*value);
954
mi = (xtWord8) xt_byte_size_to_int8(vp->vp_min);
957
*value = (char *) vp->vp_min;
959
if (sizeof(size_t) == 8)
960
mm = (char *) vp->vp_max8;
962
mm = (char *) vp->vp_max4;
963
ma = (xtWord8) xt_byte_size_to_int8(mm);
971
static void pbxt_call_init(XTThreadPtr self)
973
xtInt8 index_cache_size;
974
xtInt8 record_cache_size;
975
xtInt8 log_cache_size;
976
xtInt8 log_file_threshold;
977
xtInt8 transaction_buffer_size;
978
xtInt8 log_buffer_size;
979
xtInt8 checkpoint_frequency;
980
xtInt8 data_log_threshold;
981
xtInt8 data_file_grow_size;
982
xtInt8 row_file_grow_size;
983
xtInt8 record_write_threshold;
985
xt_logf(XT_NT_INFO, "PrimeBase XT (PBXT) Engine %s loaded...\n", xt_get_version());
986
xt_logf(XT_NT_INFO, "Paul McCullagh, PrimeBase Technologies GmbH, http://www.primebase.org\n");
988
index_cache_size = ha_set_variable(&pbxt_index_cache_size, &vp_index_cache_size);
989
record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
990
log_cache_size = ha_set_variable(&pbxt_log_cache_size, &vp_log_cache_size);
991
log_file_threshold = ha_set_variable(&pbxt_log_file_threshold, &vp_log_file_threshold);
992
transaction_buffer_size = ha_set_variable(&pbxt_transaction_buffer_size, &vp_transaction_buffer_size);
993
log_buffer_size = ha_set_variable(&pbxt_log_buffer_size, &vp_log_buffer_size);
994
checkpoint_frequency = ha_set_variable(&pbxt_checkpoint_frequency, &vp_checkpoint_frequency);
995
data_log_threshold = ha_set_variable(&pbxt_data_log_threshold, &vp_data_log_threshold);
996
data_file_grow_size = ha_set_variable(&pbxt_data_file_grow_size, &vp_data_file_grow_size);
997
row_file_grow_size = ha_set_variable(&pbxt_row_file_grow_size, &vp_row_file_grow_size);
998
record_write_threshold = ha_set_variable(&pbxt_record_write_threshold, &vp_record_write_threshold);
1000
xt_db_log_file_threshold = (xtLogOffset) log_file_threshold;
1001
xt_db_log_buffer_size = (size_t) xt_align_offset(log_buffer_size, 512);
1002
xt_db_transaction_buffer_size = (size_t) xt_align_offset(transaction_buffer_size, 512);
1003
xt_db_checkpoint_frequency = (size_t) checkpoint_frequency;
1004
xt_db_data_log_threshold = (off_t) data_log_threshold;
1005
xt_db_data_file_grow_size = (size_t) data_file_grow_size;
1006
xt_db_row_file_grow_size = (size_t) row_file_grow_size;
1007
xt_db_record_write_threshold = (size_t) record_write_threshold;
1010
pbxt_ignore_case = TRUE;
1012
pbxt_ignore_case = lower_case_table_names != 0;
1014
if (pbxt_ignore_case)
1015
pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp_ci, ha_hash_ci, ha_hash_free, TRUE, FALSE);
1017
pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp, ha_hash, ha_hash_free, TRUE, FALSE);
1020
xt_lock_installation(self, mysql_real_data_home);
1021
XTSystemTableShare::startUp(self);
1022
xt_init_databases(self);
1023
xt_ind_init(self, (size_t) index_cache_size);
1024
xt_tc_init(self, (size_t) record_cache_size);
1025
xt_xlog_init(self, (size_t) log_cache_size);
1028
static void pbxt_call_exit(XTThreadPtr self)
1030
xt_logf(XT_NT_INFO, "PrimeBase XT Engine shutdown...\n");
1032
#ifdef TRACE_STATEMENTS
1035
#ifdef XT_USE_GLOBAL_DB
1036
xt_ha_close_global_database(self);
1039
//xt_stop_database_threads(self, FALSE);
1040
xt_stop_database_threads(self, TRUE);
1042
xt_stop_database_threads(self, TRUE);
1044
/* This will tell the freeer to quit ASAP: */
1045
xt_quit_freeer(self);
1046
/* We conditional stop the freeer here, because if we are
1047
* in startup, then the free will be hanging.
1050
* This problem has been solved by MySQL!
1052
xt_stop_freeer(self);
1053
xt_exit_databases(self);
1054
XTSystemTableShare::shutDown(self);
1058
xt_unlock_installation(self, mysql_real_data_home);
1060
if (pbxt_share_tables) {
1061
xt_free_hashtable(self, pbxt_share_tables);
1062
pbxt_share_tables = NULL;
1067
* Shutdown the PBXT sub-system.
1069
static void ha_exit(XTThreadPtr self)
1071
xt_xres_terminate_recovery(self);
1073
/* Wrap things up... */
1074
xt_unuse_database(self, self); /* Just in case the main thread has a database in use (for testing)? */
1075
/* This may cause the streaming engine to cleanup connections and
1076
* tables belonging to this engine. This in turn may require some of
1077
* the stuff below (like xt_create_thread() called from pbxt_close_table()! */
1081
pbxt_call_exit(self);
1082
xt_exit_threading(self);
1085
xt_p_mutex_destroy(&pbxt_database_mutex);
1086
pbxt_inited = false;
1090
* Outout the PBXT status. Return FALSE on error.
1093
bool PBXTStorageEngine::show_status(Session *thd, stat_print_fn *stat_print, enum ha_stat_type)
1095
static bool pbxt_show_status(handlerton *XT_UNUSED(hton), THD* thd,
1096
stat_print_fn* stat_print,
1097
enum ha_stat_type XT_UNUSED(stat_type))
1102
XTStringBufferRec strbuf = { 0, 0, 0 };
1103
bool not_ok = FALSE;
1105
if (!(self = ha_set_current_thread(thd, &err)))
1108
#ifdef XT_SHOW_DUMPS_TRACE
1109
//if (pbxt_database)
1110
// xt_dump_xlogs(pbxt_database, 0);
1111
xt_trace("// %s - dump\n", xt_trace_clock_diff(NULL));
1114
#ifdef XT_TRACK_CONNECTIONS
1115
xt_dump_conn_tracking();
1119
xt_unit_test_async_task(self);
1123
myxt_get_status(self, &strbuf);
1131
if (stat_print(thd, "PBXT", 4, "", 0, strbuf.sb_cstring, (uint) strbuf.sb_len))
1134
xt_sb_set_size(self, &strbuf, 0);
1140
* Initialize the PBXT sub-system.
1142
* return 1 on error, else 0.
1145
static int pbxt_init(Context ®istry)
1147
static int pbxt_init(void *p)
1152
XT_PRINT0(NULL, "pbxt_init\n");
1154
if (sizeof(xtWordPS) != sizeof(void *)) {
1155
printf("PBXT: This won't work, I require that sizeof(xtWordPS) == sizeof(void *)!\n");
1159
/* GOTCHA: This will "detect" if are loading the plug-in
1160
* with different --with-debug option to MySQL.
1162
* In this case, you will get an error when loading the
1163
* library that some symbol was not found.
1165
void *dummy = my_malloc(100, MYF(0));
1166
my_free((byte *) dummy, MYF(0));
1169
XTThreadPtr self = NULL;
1171
xt_p_mutex_init_with_autoname(&pbxt_database_mutex, NULL);
1174
pbxt_hton= new PBXTStorageEngine(std::string("PBXT"));
1175
registry.add(pbxt_hton);
1177
pbxt_hton = (handlerton *) p;
1178
pbxt_hton->state = SHOW_OPTION_YES;
1179
pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
1180
pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
1181
pbxt_hton->commit = pbxt_commit; /* commit */
1182
pbxt_hton->rollback = pbxt_rollback; /* rollback */
1183
if (pbxt_support_xa) {
1184
pbxt_hton->prepare = pbxt_prepare;
1185
pbxt_hton->recover = pbxt_recover;
1186
pbxt_hton->commit_by_xid = pbxt_commit_by_xid;
1187
pbxt_hton->rollback_by_xid = pbxt_rollback_by_xid;
1190
pbxt_hton->prepare = NULL;
1191
pbxt_hton->recover = NULL;
1192
pbxt_hton->commit_by_xid = NULL;
1193
pbxt_hton->rollback_by_xid = NULL;
1195
pbxt_hton->create = pbxt_create_handler; /* Create a new handler */
1196
pbxt_hton->drop_database = pbxt_drop_database; /* Drop a database */
1197
pbxt_hton->panic = pbxt_panic; /* Panic call */
1198
pbxt_hton->show_status = pbxt_show_status;
1199
pbxt_hton->flags = HTON_NO_FLAGS; /* HTON_CAN_RECREATE - Without this flags TRUNCATE uses delete_all_rows() */
1200
pbxt_hton->slot = (uint)-1; /* assign invald value, so we know when it's inited later */
1201
pbxt_hton->start_consistent_snapshot = pbxt_start_consistent_snapshot;
1202
#if defined(MYSQL_SUPPORTS_BACKUP) && defined(XT_ENABLE_ONLINE_BACKUP)
1203
pbxt_hton->get_backup_engine = pbxt_backup_engine;
1206
if (!xt_init_logging()) /* Initialize logging */
1210
PBMSResultRec result;
1211
if (!pbms_initialize("PBXT", false, &result)) {
1212
xt_logf(XT_NT_ERROR, "pbms_initialize() Error: %s", result.mr_message);
1217
if (!xt_init_memory()) /* Initialize memory */
1220
self = xt_init_threading(); /* Create the main self: */
1227
/* Initialize all systems */
1228
pbxt_call_init(self);
1230
/* Conditional unit test: */
1232
//xt_unit_test_create_threads(self);
1233
//xt_unit_test_read_write_locks(self);
1234
//xt_unit_test_mutex_locks(self);
1237
/* {OPEN-DB-SWEEPER-WAIT}
1238
* I have to start the freeer before I open and recover the database
1239
* because it we run out of cache while waiting for the sweeper
1242
xt_start_freeer(self);
1244
/* This function is called with LOCK_plugin locked.
1245
* This prevents the opening of .frm files, which
1246
* is required for recovery.
1247
* Our solution is to start reovery in a thread
1248
* so that it can run after LOCK_plugin is released.
1250
xt_xres_start_database_recovery(self);
1253
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
1259
/* {FREEER-HANG} The free-er will be hung in:
1260
#0 0x91fc6a2e in semaphore_wait_signal_trap
1261
#1 0x91fce505 in pthread_mutex_lock
1262
#2 0x00489633 in safe_mutex_lock at thr_mutex.c:149
1263
#3 0x002dfca9 in plugin_thdvar_init at sql_plugin.cc:2398
1264
#4 0x000d6a12 in THD::init at sql_class.cc:715
1265
#5 0x000de9d3 in THD::THD at sql_class.cc:597
1266
#6 0x000debe1 in THD::THD at sql_class.cc:631
1267
#7 0x00e207a4 in myxt_create_thread at myxt_xt.cc:2666
1268
#8 0x00e3134b in tabc_fr_run_thread at tabcache_xt.cc:982
1269
#9 0x00e422ca in xt_thread_main at thread_xt.cc:1006
1270
#10 0x91ff7c55 in _pthread_start
1271
#11 0x91ff7b12 in thread_start
1273
* so it is not good trying to stop it here!
1275
* With regard to this problem, see {OPEN-DB-SWEEPER-WAIT}
1276
* Due to this problem, I will probably have to hack
1277
* the mutex so that the freeer can get started...
1279
* NOPE! problem has gone in 6.0.9. Also not a problem in
1283
/* {OPEN-DB-SWEEPER-WAIT}
1284
* I have to stop the freeer here because it was
1285
* started before opening the database.
1288
/* {FREEER-HANG-ON-INIT-ERROR}
1289
* pbxt_init is called with LOCK_plugin and if it fails and tries to exit
1290
* the freeer here it hangs because the freeer calls THD::~THD which tries
1291
* to aquire the same lock and hangs. OTOH MySQL calls pbxt_end() after
1292
* an unsuccessful call to pbxt_init, so we defer cleaup, except
1295
xt_free_thread(self);
1298
xt_free_thread(self);
1300
XT_RETURN(init_err);
1313
static int pbxt_end(void *)
1323
/* This flag also means "shutting down". */
1324
pbxt_inited = false;
1325
self = xt_create_thread("TempForEnd", FALSE, TRUE, &e);
1327
self->t_main = TRUE;
1335
PBXTStorageEngine::~PBXTStorageEngine()
1341
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag)
1343
return pbxt_end(hton);
1348
* Kill the PBXT thread associated with the MySQL thread.
1351
int PBXTStorageEngine::close_connection(Session *thd)
1353
PBXTStorageEngine * const hton = this;
1355
static int pbxt_close_connection(handlerton *hton, THD* thd)
1362
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1363
*thd->getEngineData(pbxt_hton) = NULL;
1365
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1366
*thd_ha_data(thd, hton) = NULL;
1368
/* Required because freeing the thread could cause
1369
* free of database which could call xt_close_file_ns()!
1372
xt_free_thread(self);
1378
* Currently does nothing because it was all done
1379
* when the last PBXT table was removed from the
1383
void PBXTStorageEngine::drop_database(char *)
1385
static void pbxt_drop_database(handlerton *XT_UNUSED(hton), char *XT_UNUSED(path))
1392
* NOTES ON TRANSACTIONS:
1394
* 1. If self->st_lock_count == 0 and transaction can be ended immediately.
1395
* If not, we must wait until the last lock is released on the last handler
1396
* to ensure that the tables are flushed before the transaction is
1397
* committed or aborted.
1399
* 2. all (below) indicates, within a BEGIN/END (i.e. auto_commit off) whether
1400
* the statement or the entire transation is being terminated.
1401
* We currently ignore statement termination.
1403
* 3. If in BEGIN/END we must call ha_rollback() if we abort the transaction
1406
* NOTE ON CONSISTENT SNAPSHOTS:
1408
* PBXT itself doesn't need this functiona as its transaction mechanism provides
1409
* consistent snapshots for all transactions by default. This function is needed
1410
* only for multi-engine cases like this:
1412
* CREATE TABLE t1 ... ENGINE=INNODB
1413
* CREATE TABLE t2 ... ENGINE=PBXT
1414
* START TRANSACTION WITH CONSISTENT SNAPSHOT
1415
* SELECT * FROM t1 <-- at this point we need to know about the snapshot
1419
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
1422
XTThreadPtr self = ha_set_current_thread(thd, &err);
1424
if (!self->st_database && pbxt_database) {
1425
xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1428
thd_init_xact(thd, self, true);
1430
if (xt_xn_begin(self)) {
1431
trans_register_ha(thd, TRUE, hton);
1433
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1437
* As of MySQL 5.1.41 the return value is not checked, so the server might assume
1438
* everything is fine even it isn't. InnoDB returns 0 on success.
1445
* Commit the PBXT transaction of the given thread.
1446
* thd is the MySQL thread structure.
1447
* pbxt_thr is a pointer the the PBXT thread structure.
1451
int PBXTStorageEngine::commit(Session *thd, bool all)
1453
PBXTStorageEngine * const hton = this;
1455
static int pbxt_commit(handlerton *hton, THD *thd, bool all)
1462
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1464
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1466
XT_PRINT2(self, "%s pbxt_commit all=%d\n", all ? "END CONN XACT" : "END STAT", all);
1468
if (self->st_xact_data) {
1469
/* There are no table locks, commit immediately in all cases
1470
* except when this is a statement commit with an explicit
1471
* transaction (!all && !self->st_auto_commit).
1473
if (all || self->st_auto_commit) {
1474
XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
1476
if (!xt_xn_commit(self))
1477
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1481
self->st_stat_trans = FALSE;
1487
int PBXTStorageEngine::rollback(Session *thd, bool all)
1489
PBXTStorageEngine * const hton = this;
1491
static int pbxt_rollback(handlerton *hton, THD *thd, bool all)
1498
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1500
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1502
XT_PRINT2(self, "%s pbxt_rollback all=%d\n", all ? "CONN END XACT" : "STAT END", all);
1504
if (self->st_xact_data) {
1505
/* There are no table locks, rollback immediately in all cases
1506
* except when this is a statement commit with an explicit
1507
* transaction (!all && !self->st_auto_commit).
1509
* Note, the only reason for a rollback of a operation is
1510
* due to an error. In this case PBXT has already
1511
* undone the effects of the operation.
1513
* However, this is not the same as statement rollback
1514
* which can involve a number of operations.
1516
* TODO: Implement statement rollback.
1518
if (all || self->st_auto_commit) {
1519
XT_PRINT0(self, "xt_xn_rollback\n");
1520
if (!xt_xn_rollback(self))
1521
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1525
self->st_stat_trans = FALSE;
1531
Cursor *PBXTStorageEngine::create(TableShare& table, memory::Root *mem_root)
1533
PBXTStorageEngine * const hton = this;
1534
if (XTSystemTableShare::isSystemTable(table.path.str))
1536
static handler *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root)
1538
if (table && XTSystemTableShare::isSystemTable(table->path.str))
1540
return new (mem_root) ha_xtsys(hton, table);
1542
return new (mem_root) ha_pbxt(hton, table);
1546
* -----------------------------------------------------------------------
1553
static int pbxt_prepare(handlerton *hton, THD *thd, bool all)
1559
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1560
XT_PRINT1(self, "pbxt_commit all=%d\n", all);
1562
if (self->st_xact_data) {
1563
/* There are no table locks, commit immediately in all cases
1564
* except when this is a statement commit with an explicit
1565
* transaction (!all && !self->st_auto_commit).
1567
if (all || self->st_auto_commit) {
1570
XT_PRINT0(self, "xt_xn_prepare in pbxt_prepare\n");
1571
thd_get_xid(thd, (MYSQL_XID*) &xid);
1573
if (!xt_xn_prepare(xid.length(), (xtWord1 *) &xid, self))
1574
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1581
static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, char *thread_name, int *err)
1584
XTThreadPtr self = NULL;
1587
if ((thd = current_thd))
1588
self = (XTThreadPtr) *thd_ha_data(thd, hton);
1590
//thd = (THD *) myxt_create_thread();
1591
//*temp_thread |= 2;
1597
if (!(self = xt_create_thread(thread_name, FALSE, TRUE, &e))) {
1598
*err = xt_ha_pbxt_to_mysql_error(e.e_xt_err);
1599
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1605
xt_xres_wait_for_recovery(self, XT_RECOVER_DONE);
1608
xt_open_database(self, mysql_real_data_home, TRUE);
1611
*err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1612
if ((*temp_thread & 1))
1613
xt_free_thread(self);
1614
if (*temp_thread & 2)
1615
myxt_destroy_thread(thd, FALSE);
1624
static void ha_temp_close_database(XTThreadPtr self, THD *thd, int temp_thread)
1626
xt_unuse_database(self, self);
1627
if (temp_thread & 1)
1628
xt_free_thread(self);
1629
if (temp_thread & 2)
1630
myxt_destroy_thread(thd, TRUE);
1633
/* Return all prepared transactions, found during recovery.
1634
* This function returns a count. If len is returned, the
1635
* function will be called again.
1637
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len)
1643
XTXactPreparePtr xap;
1647
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRecover", &err)))
1650
db = self->st_database;
1652
for (count=0; count<len; count++) {
1653
xap = xt_xn_enum_xa_data(db, &pbxt_xa_enum);
1656
memcpy(&xid_list[count], xap->xp_xa_data, xap->xp_data_len);
1659
ha_temp_close_database(self, thd, temp_thread);
1663
static int pbxt_commit_by_xid(handlerton *hton, XID *xid)
1669
XTXactPreparePtr xap;
1674
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForCommitXA", &err)))
1676
db = self->st_database;
1678
if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1679
if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1680
self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED; // Prepared transactions cannot be swept!
1681
if (!xt_xn_commit(self))
1682
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1684
xt_xn_delete_xa_data(db, xap, TRUE, self);
1687
ha_temp_close_database(self, thd, temp_thread);
1691
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid)
1697
XTXactPreparePtr xap;
1702
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRollbackXA", &err)))
1704
db = self->st_database;
1706
if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1707
if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1708
self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED; // Prepared transactions cannot be swept!
1709
if (!xt_xn_rollback(self))
1710
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1712
xt_xn_delete_xa_data(db, xap, TRUE, self);
1715
ha_temp_close_database(self, thd, temp_thread);
1722
* -----------------------------------------------------------------------
1723
* HANDLER LOCKING FUNCTIONS
1725
* These functions are used get a lock on all handles of a particular table.
1729
static void ha_add_to_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1731
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1732
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1734
handler->pb_ex_next = share->sh_handlers;
1735
handler->pb_ex_prev = NULL;
1736
if (share->sh_handlers)
1737
share->sh_handlers->pb_ex_prev = handler;
1738
share->sh_handlers = handler;
1740
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1743
static void ha_remove_from_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1745
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1746
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1748
/* Move front pointer: */
1749
if (share->sh_handlers == handler)
1750
share->sh_handlers = handler->pb_ex_next;
1752
/* Remove from list: */
1753
if (handler->pb_ex_prev)
1754
handler->pb_ex_prev->pb_ex_next = handler->pb_ex_next;
1755
if (handler->pb_ex_next)
1756
handler->pb_ex_next->pb_ex_prev = handler->pb_ex_prev;
1758
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1762
* Aquire exclusive use of a table, by waiting for all
1763
* threads to complete use of all handlers of the table.
1764
* At the same time we hold up all threads
1765
* that want to use handlers belonging to the table.
1767
* But we do not hold up threads that close the handlers.
1769
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1772
time_t end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1774
XT_PRINT1(self, "ha_aquire_exclusive_use (%s) PBXT X lock\n", share->sh_table_path->ps_path);
1775
/* GOTCHA: It is possible to hang here, if you hold
1776
* onto the sh_ex_mutex lock, before we really
1777
* have the exclusive lock (i.e. before all
1778
* handlers are no longer in use.
1779
* The reason is, because reopen() is not possible
1780
* when some other thread holds sh_ex_mutex.
1781
* So this can prevent a thread from completing its
1782
* use of a handler, when prevents exclusive use
1785
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1786
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1788
/* Wait until we can get an exclusive lock: */
1789
while (share->sh_table_lock) {
1790
xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1791
if (time(NULL) > end_time) {
1792
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1793
xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1797
/* This tells readers (and other exclusive lockers) that someone has an exclusive lock. */
1798
share->sh_table_lock = TRUE;
1800
/* Wait for all open handlers use count to go to 0 */
1802
handler = share->sh_handlers;
1804
if (handler == mine || !handler->pb_ex_in_use)
1805
handler = handler->pb_ex_next;
1807
/* Wait a bit, and try again: */
1808
xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1809
if (time(NULL) > end_time) {
1810
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1811
xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1813
/* Handler may have been freed, check from the begining again: */
1818
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1822
* If you have exclusively locked the table, you can close all handler
1825
* Call ha_close_open_tables() to get an exclusive lock.
1827
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1831
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1832
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1834
/* Now that we know no handler is in use, we can close all the
1837
handler = share->sh_handlers;
1839
if (handler != mine && handler->pb_open_tab) {
1840
xt_db_return_table_to_pool_ns(handler->pb_open_tab);
1841
handler->pb_open_tab = NULL;
1843
handler = handler->pb_ex_next;
1846
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1849
#ifdef PBXT_ALLOW_PRINTING
1850
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share)
1852
static void ha_release_exclusive_use(XTThreadPtr XT_UNUSED(self), XTSharePtr share)
1855
XT_PRINT1(self, "ha_release_exclusive_use (%s) PBXT X UNLOCK\n", share->sh_table_path->ps_path);
1856
xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1857
share->sh_table_lock = FALSE;
1858
xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1859
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1862
static xtBool ha_wait_for_shared_use(ha_pbxt *mine, XTSharePtr share)
1864
time_t end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1866
XT_PRINT1(xt_get_self(), "ha_wait_for_shared_use (%s) share lock wait...\n", share->sh_table_path->ps_path);
1867
mine->pb_ex_in_use = 0;
1868
xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1869
while (share->sh_table_lock) {
1870
/* Wake up the exclusive locker (may be waiting). He can try to continue: */
1871
xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1873
if (!xt_timed_wait_cond(NULL, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT)) {
1874
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1878
if (time(NULL) > end_time) {
1879
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1880
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1884
mine->pb_ex_in_use = 1;
1885
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1889
xtPublic int ha_pbxt::reopen()
1891
THD *thd = current_thd;
1895
if (!(self = ha_set_current_thread(thd, &err)))
1896
return xt_ha_pbxt_to_mysql_error(err);
1899
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
1901
ha_open_share(self, pb_share);
1903
if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
1905
pb_open_tab->ot_thread = self;
1908
* We no longer use the information that a table
1909
* was opened in order to know when to calculate
1912
if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
1913
#ifdef LOAD_TABLE_ON_OPEN
1914
xt_tab_load_table(self, pb_open_tab);
1916
xt_tab_load_row_pointers(self, pb_open_tab);
1918
xt_ind_set_index_selectivity(pb_open_tab, self);
1919
/* If the number of rows is less than 150 we will recalculate the
1920
* selectity of the indices, as soon as the number of rows
1921
* exceeds 200 (see [**])
1923
/* {FREE-ROWS-BAD} */
1924
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
1927
/* I am not doing this anymore because it was only required
1928
* for DELETE FROM table;, which is now implemented
1929
* by deleting each row.
1930
* TRUNCATE TABLE does not preserve the counter value.
1932
//init_auto_increment(pb_share->sh_min_auto_inc);
1933
init_auto_increment(0);
1936
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
1944
* -----------------------------------------------------------------------
1945
* INFORMATION SCHEMA FUNCTIONS
1949
static int pbxt_statistics_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
1951
XTThreadPtr self = NULL;
1955
/* Can't do if PBXT is not loaded! */
1958
xt_exception_xterr(&e, XT_CONTEXT, XT_ERR_PBXT_NOT_INSTALLED);
1959
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1960
/* Just return an empty set: */
1964
if (!(self = ha_set_current_thread(thd, &err)))
1965
return xt_ha_pbxt_to_mysql_error(err);
1969
/* If the thread has no open database, and the global
1970
* database is already open, then open
1971
* the database. Otherwise the statement will be
1972
* executed without an open database, which means
1973
* that the related statistics will be missing.
1975
* This includes all background threads.
1977
if (!self->st_database && pbxt_database) {
1978
xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1981
err = myxt_statistics_fill_table(self, thd, tables, cond, system_charset_info);
1984
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1993
ColumnInfo pbxt_statistics_fields_info[]=
1995
ColumnInfo("ID", 4, MYSQL_TYPE_LONG, 0, 0, "The ID of the statistic", SKIP_OPEN_TABLE),
1996
ColumnInfo("Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE),
1997
ColumnInfo("Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE),
2001
class PBXTStatisticsMethods : public InfoSchemaMethods
2004
int fillTable(Session *session, TableList *tables, COND *cond)
2006
return pbxt_statistics_fill_table(session, tables, cond);
2011
ST_FIELD_INFO pbxt_statistics_fields_info[]=
2013
{ "ID", 4, MYSQL_TYPE_LONG, 0, 0, "The ID of the statistic", SKIP_OPEN_TABLE},
2014
{ "Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE},
2015
{ "Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE},
2016
{ 0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
2022
static InfoSchemaTable *pbxt_statistics_table;
2023
static PBXTStatisticsMethods pbxt_statistics_methods;
2024
static int pbxt_init_statistics(Registry ®istry)
2026
//pbxt_statistics_table = (InfoSchemaTable *)xt_calloc_ns(sizeof(InfoSchemaTable));
2027
//pbxt_statistics_table->table_name= "PBXT_STATISTICS";
2028
pbxt_statistics_table = new InfoSchemaTable("PBXT_STATISTICS");
2029
pbxt_statistics_table->setColumnInfo(pbxt_statistics_fields_info);
2030
pbxt_statistics_table->setInfoSchemaMethods(&pbxt_statistics_methods);
2031
registry.add(pbxt_statistics_table);
2036
static int pbxt_init_statistics(void *p)
2038
ST_SCHEMA_TABLE *pbxt_statistics_table = (ST_SCHEMA_TABLE *) p;
2039
pbxt_statistics_table->fields_info = pbxt_statistics_fields_info;
2040
pbxt_statistics_table->fill_table = pbxt_statistics_fill_table;
2042
#if defined(XT_WIN) && defined(XT_COREDUMP)
2043
void register_crash_filter();
2045
if (pbxt_crash_debug)
2046
register_crash_filter();
2054
static int pbxt_exit_statistics(Registry ®istry)
2055
registry.remove(pbxt_statistics_table);
2056
delete pbxt_statistics_table;
2061
static int pbxt_exit_statistics(void *XT_UNUSED(p))
2068
* -----------------------------------------------------------------------
2074
ha_pbxt::ha_pbxt(handlerton *hton, TableShare& table_arg) : handler(*hton, table_arg)
2076
ha_pbxt::ha_pbxt(handlerton *hton, TABLE_SHARE *table_arg) : handler(hton, table_arg)
2081
pb_key_read = FALSE;
2082
pb_ignore_dup_key = 0;
2083
pb_lock_table = FALSE;
2084
pb_table_locked = 0;
2092
* If frm_error() is called then we will use this to to find out what file extentions
2093
* exist for the storage engine. This is also used by the default rename_table and
2094
* delete_table method in handler.cc.
2097
const char **PBXTStorageEngine::bas_ext() const
2099
const char **ha_pbxt::bas_ext() const
2102
return pbxt_extensions;
2106
* Specify the caching type: HA_CACHE_TBL_NONTRANSACT, HA_CACHE_TBL_NOCACHE
2107
* HA_CACHE_TBL_ASKTRANSACT, HA_CACHE_TBL_TRANSACT
2109
MX_UINT8_T ha_pbxt::table_cache_type()
2111
return HA_CACHE_TBL_TRANSACT; /* Use transactional query cache */
2115
MX_TABLE_TYPES_T ha_pbxt::table_flags() const
2118
/* We need this flag because records are not packed
2119
* into a table which means #ROWID != offset
2122
/* Since PBXT caches read records itself, I believe
2123
* this to be the case.
2127
* I am assuming a "key" means a unique index.
2128
* Of course a primary key does not allow nulls.
2132
* This is necessary because a MySQL blob can be
2135
HA_CAN_INDEX_BLOBS |
2137
* Due to transactional influences, this will be
2139
* Although the count is good enough for practical
2141
HA_NOT_EXACT_COUNT |
2145
* This basically means we have a file with the name of
2146
* database table (which we do).
2151
* Not sure what this does (but MyISAM and InnoDB have it)?!
2152
* Could it mean that we support the handler functions.
2154
HA_CAN_SQL_HANDLER |
2156
* This is not true, we cannot insert delayed, but a
2157
* really cannot see what's wrong with inserting normally
2158
* when asked to insert delayed!
2159
* And the functionallity is required to pass the alter_table
2162
* Disabled because of MySQL bug #40505
2164
/*HA_CAN_INSERT_DELAYED |*/
2165
#if MYSQL_VERSION_ID > 50119
2166
/* We can do row logging, but not statement, because
2167
* MVCC is not serializable!
2169
HA_BINLOG_ROW_CAPABLE |
2172
* Auto-increment is allowed on a partial key.
2179
* The following query from the DBT1 test is VERY slow
2180
* if we do not set HA_READ_ORDER.
2181
* The reason is that it must scan all duplicates, then
2184
* SELECT o_id, o_carrier_id, o_entry_d, o_ol_cnt
2185
* FROM orders FORCE INDEX (o_w_id)
2189
* ORDER BY o_id DESC limit 1;
2192
#define FLAGS_ARE_READ_DYNAMICALLY
2194
MX_ULONG_T ha_pbxt::index_flags(uint XT_UNUSED(inx), uint XT_UNUSED(part), bool XT_UNUSED(all_parts)) const
2196
/* It would be nice if the dynamic version of this function works,
2197
* but it does not. MySQL loads this information when the table is openned,
2198
* and then it is fixed.
2200
* The problem is, I have had to remove the HA_READ_ORDER option although
2201
* it applies to PBXT. PBXT returns entries in index order during an index
2202
* scan in _almost_ all cases.
2204
* A number of cases are demostrated here: [(11)]
2206
* If involves the following conditions:
2207
* - a SELECT FOR UPDATE, UPDATE or DELETE statement
2208
* - an ORDER BY, or join that requires the sort order
2209
* - another transaction which updates the index while it is being
2212
* In this "obscure" case, the index scan may return index
2213
* entries in the wrong order.
2215
#ifdef FLAGS_ARE_READ_DYNAMICALLY
2216
/* If were are in an update (SELECT FOR UPDATE, UPDATE or DELETE), then
2217
* it may be that we return the rows from an index in the wrong
2218
* order! This is due to the fact that update reads wait for transactions
2219
* to commit and this means that index entries may change position during
2222
if (pb_open_tab && pb_open_tab->ot_for_update)
2223
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
2224
/* If I understand HA_KEYREAD_ONLY then this means I do not
2225
* need to fetch the record associated with an index
2228
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE | HA_KEYREAD_ONLY);
2230
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
2234
void ha_pbxt::internal_close(THD *thd, struct XTThread *self)
2241
/* This lock must be held when we remove the handler's
2242
* open table because ha_close_open_tables() can run
2245
xt_lock_mutex_ns(pb_share->sh_ex_mutex);
2246
if ((ot = pb_open_tab)) {
2247
pb_open_tab->ot_thread = self;
2248
if (self->st_database != pb_open_tab->ot_table->tab_db)
2249
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
2251
pushr_(xt_db_return_table_to_pool, ot);
2253
xt_unlock_mutex_ns(pb_share->sh_ex_mutex);
2255
ha_remove_from_handler_list(self, pb_share, this);
2257
/* Someone may be waiting for me to complete: */
2258
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2260
removed = ha_unget_share_removed(self, pb_share);
2263
/* Flush the table if this was the last handler: */
2264
/* This is not necessary but has the affect that
2265
* FLUSH TABLES; does a checkpoint!
2269
* This was killing performance as the number of threads increased!
2271
* When MySQL runs out of table handlers because the table
2272
* handler cache is too small, it starts to close handlers.
2273
* (open_cache.records > table_cache_size)
2275
* Which can lead to closing all handlers for a particular table.
2277
* It does this while holding lock_OPEN!
2278
* So this code below leads to a sync operation while lock_OPEN
2279
* is held. The result is that the whole server comes to a stop.
2281
if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES
2282
xt_sync_flush_table(self, ot, thd ? 0 : 4);
2284
freer_(); // xt_db_return_table_to_pool(ot);
2288
xt_log_and_clear_exception(self);
2297
* Used for opening tables. The name will be the name of the file.
2298
* A table is opened when it needs to be opened. For instance
2299
* when a request comes in for a select on the table (tables are not
2300
* open and closed for each request, they are cached).
2302
* Called from handler.cc by handler::ha_open(). The server opens all tables by
2303
* calling ha_open() which then calls the handler specific open().
2305
int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(test_if_locked))
2307
THD *thd = current_thd;
2311
ref_length = XT_RECORD_OFFS_SIZE;
2313
if (!(self = ha_set_current_thread(thd, &err)))
2314
return xt_ha_pbxt_to_mysql_error(err);
2316
XT_PRINT1(self, "open (%s)\n", table_path);
2320
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
2322
pb_share = ha_get_share(self, table_path, false);
2323
ha_add_to_handler_list(self, pb_share, this);
2324
if (pb_share->sh_table_lock) {
2325
if (!ha_wait_for_shared_use(this, pb_share))
2329
ha_open_share(self, pb_share);
2331
thr_lock_data_init(&pb_share->sh_lock, &pb_lock, NULL);
2332
if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
2334
pb_open_tab->ot_thread = self;
2337
if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
2338
#ifdef LOAD_TABLE_ON_OPEN
2339
xt_tab_load_table(self, pb_open_tab);
2341
xt_tab_load_row_pointers(self, pb_open_tab);
2343
xt_ind_set_index_selectivity(pb_open_tab, self);
2344
/* {FREE-ROWS-BAD} */
2345
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
2348
init_auto_increment(0);
2351
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2352
internal_close(thd, self);
2357
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2361
/* Someone may be waiting for me to complete: */
2362
if (pb_share->sh_table_lock)
2363
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2370
Closes a table. We call the free_share() function to free any resources
2371
that we have allocated in the "shared" structure.
2373
Called from sql_base.cc, sql_select.cc, and table.cc.
2374
In sql_select.cc it is only used to close up temporary tables or during
2375
the process where a temporary table is converted over to being a
2377
For sql_base.cc look at close_data_tables().
2379
int ha_pbxt::close(void)
2381
THD *thd = current_thd;
2382
volatile int err = 0;
2383
volatile XTThreadPtr self;
2386
self = ha_set_current_thread(thd, (int *) &err);
2390
if (!(self = xt_create_thread("TempForClose", FALSE, TRUE, &e))) {
2391
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
2396
XT_PRINT1(self, "close (%s)\n", pb_share && pb_share->sh_table_path->ps_path ? pb_share->sh_table_path->ps_path : "unknown");
2400
internal_close(thd, self);
2403
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2408
xt_free_thread(self);
2411
xt_log(XT_NS_CONTEXT, XT_LOG_WARNING, "Unable to release table reference\n");
2416
void ha_pbxt::init_auto_increment(xtWord8 min_auto_inc)
2422
/* Get the value of the auto-increment value by
2423
* loading the highest value from the index...
2425
tab = pb_open_tab->ot_table;
2427
/* Cannot do this if the index version is bad! */
2428
if (tab->tab_dic.dic_disable_index)
2431
xt_spinlock_lock(&tab->tab_ainc_lock);
2432
if (table->found_next_number_field && !tab->tab_auto_inc) {
2433
Field *tmp_fie = table->next_number_field;
2434
THD *tmp_thd = table->in_use;
2435
xtBool xn_started = FALSE;
2436
XTThreadPtr self = pb_open_tab->ot_thread;
2440
* A table may be opened by a thread with a running
2442
* Since get_auto_increment() does not do an update,
2443
* it should be OK to use the transaction we already
2444
* have to get the next auto-increment value.
2446
if (!self->st_xact_data) {
2447
self->st_xact_mode = XT_XACT_REPEATABLE_READ;
2448
self->st_ignore_fkeys = FALSE;
2449
self->st_auto_commit = TRUE;
2450
self->st_table_trans = FALSE;
2451
self->st_abort_trans = FALSE;
2452
self->st_stat_ended = FALSE;
2453
self->st_stat_trans = FALSE;
2454
self->st_is_update = NULL;
2455
if (!xt_xn_begin(self)) {
2456
xt_spinlock_unlock(&tab->tab_ainc_lock);
2462
/* Setup the conditions for the next call! */
2463
table->in_use = current_thd;
2464
table->next_number_field = table->found_next_number_field;
2466
extra(HA_EXTRA_KEYREAD);
2467
table->mark_columns_used_by_index_no_reset(TS(table)->next_number_index, table->read_set);
2468
column_bitmaps_signal();
2469
index_init(TS(table)->next_number_index, 0);
2470
if (!TS(table)->next_number_key_offset) {
2471
// Autoincrement at key-start
2472
err = index_last(table->record[1]);
2473
if (!err && !table->next_number_field->is_null(TS(table)->rec_buff_length)) {
2475
nr = (xtWord8) table->next_number_field->val_int_offset(TS(table)->rec_buff_length);
2479
/* Do an index scan to find the largest value! */
2480
/* The standard method will not work because it forces
2481
* us to lock that table!
2485
err = index_first(table->record[1]);
2488
val = (xtWord8) table->next_number_field->val_int_offset(TS(table)->rec_buff_length);
2491
err = index_next(table->record[1]);
2496
extra(HA_EXTRA_NO_KEYREAD);
2499
* I have changed this from post increment to pre-increment!
2501
* When using post increment we are not able to return
2502
* the last valid value in the range.
2504
* Here the test example:
2506
* drop table if exists t1;
2507
* create table t1 (i tinyint unsigned not null auto_increment primary key) engine=pbxt;
2508
* insert into t1 set i = 254;
2509
* insert into t1 set i = null;
2511
* With post-increment, this last insert fails because on post increment
2512
* the value overflows!
2514
* Pre-increment means we store the current max, and increment
2515
* before returning the next value.
2517
* This will work in this situation.
2519
tab->tab_auto_inc = nr;
2520
if (tab->tab_auto_inc < tab->tab_dic.dic_min_auto_inc)
2521
tab->tab_auto_inc = tab->tab_dic.dic_min_auto_inc-1;
2522
if (tab->tab_auto_inc < min_auto_inc)
2523
tab->tab_auto_inc = min_auto_inc-1;
2525
/* Restore the changed values: */
2526
table->next_number_field = tmp_fie;
2527
table->in_use = tmp_thd;
2530
XT_PRINT0(self, "xt_xn_commit in init_auto_increment\n");
2534
xt_spinlock_unlock(&tab->tab_ainc_lock);
2537
void ha_pbxt::get_auto_increment(MX_ULONGLONG_T offset, MX_ULONGLONG_T increment,
2538
MX_ULONGLONG_T XT_UNUSED(nb_desired_values),
2539
MX_ULONGLONG_T *first_value,
2540
MX_ULONGLONG_T *nb_reserved_values)
2542
register XTTableHPtr tab;
2543
MX_ULONGLONG_T nr, nr_less_inc;
2545
ASSERT_NS(pb_ex_in_use);
2547
tab = pb_open_tab->ot_table;
2550
* Assume that nr contains the last value returned!
2551
* We will increment and then return the value.
2553
xt_spinlock_lock(&tab->tab_ainc_lock);
2554
nr = (MX_ULONGLONG_T) tab->tab_auto_inc;
2558
else if (increment > 1 && ((nr - offset) % increment) != 0)
2559
nr += increment - ((nr - offset) % increment);
2562
if (table->next_number_field->cmp((const unsigned char *)&nr_less_inc, (const unsigned char *)&nr) < 0)
2563
tab->tab_auto_inc = (xtWord8) (nr);
2565
nr = ~0; /* indicate error to the caller */
2566
xt_spinlock_unlock(&tab->tab_ainc_lock);
2569
*nb_reserved_values = 1;
2572
/* GOTCHA: We need to use signed value here because of the test
2573
* (from auto_increment.test):
2574
* create table t1 (a int not null auto_increment primary key);
2575
* insert into t1 values (NULL);
2576
* insert into t1 values (-1);
2577
* insert into t1 values (NULL);
2579
xtPublic void ha_set_auto_increment(XTOpenTablePtr ot, Field *nr)
2581
register XTTableHPtr tab;
2582
MX_ULONGLONG_T nr_int_val;
2584
nr_int_val = nr->val_int();
2587
if (nr->cmp((const unsigned char *)&tab->tab_auto_inc) > 0) {
2588
xt_spinlock_lock(&tab->tab_ainc_lock);
2590
if (nr->cmp((const unsigned char *)&tab->tab_auto_inc) > 0) {
2592
* We increment later, so just set the value!
2593
MX_ULONGLONG_T nr_int_val_plus_one = nr_int_val + 1;
2594
if (nr->cmp((const unsigned char *)&nr_int_val_plus_one) < 0)
2595
tab->tab_auto_inc = nr_int_val_plus_one;
2598
tab->tab_auto_inc = nr_int_val;
2600
xt_spinlock_unlock(&tab->tab_ainc_lock);
2603
if (xt_db_auto_increment_mode == 1) {
2604
if (nr_int_val > (MX_ULONGLONG_T) tab->tab_dic.dic_min_auto_inc) {
2605
/* Do this every 100 calls: */
2607
tab->tab_dic.dic_min_auto_inc = nr_int_val + 5;
2609
tab->tab_dic.dic_min_auto_inc = nr_int_val + 100;
2611
ot->ot_thread = xt_get_self();
2612
if (!xt_tab_write_min_auto_inc(ot))
2613
xt_log_and_clear_exception(ot->ot_thread);
2619
static void dump_buf(unsigned char *buf, int len)
2623
for (i=0; i<len; i++) printf("%2c", buf[i] <= 127 ? buf[i] : '.');
2625
for (i=0; i<len; i++) printf("%02x", buf[i]);
2631
* write_row() inserts a row. No extra() hint is given currently if a bulk load
2632
* is happeneding. buf() is a byte array of data. You can use the field
2633
* information to extract the data from the native byte array type.
2634
* Example of this would be:
2635
* for (Field **field=table->field ; *field ; field++)
2640
* See ha_tina.cc for an example of extracting all of the data as strings.
2641
* ha_berekly.cc has an example of how to store it intact by "packing" it
2642
* for ha_berkeley's own native storage type.
2644
* See the note for update_row() on auto_increments and timestamps. This
2645
* case also applied to write_row().
2647
* Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
2648
* sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
2650
int ha_pbxt::write_row(byte *buf)
2654
ASSERT_NS(pb_ex_in_use);
2656
XT_PRINT1(pb_open_tab->ot_thread, "write_row (%s)\n", pb_share->sh_table_path->ps_path);
2657
XT_DISABLED_TRACE(("INSERT tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2658
//statistic_increment(ha_write_count,&LOCK_status);
2660
PBMSResultRec result;
2661
err = pbms_write_row_blobs(table, buf, &result);
2663
xt_logf(XT_NT_ERROR, "pbms_write_row_blobs() Error: %s", result.mr_message);
2668
/* {START-STAT-HACK} previously position of start statement hack. */
2669
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2671
if (pb_open_tab->ot_thread->st_import_stat) {
2672
if (pb_import_row_count >= XT_IMPORT_ROW_COUNT) {
2673
/* Commit and restart the transaction. */
2674
XTThreadPtr thread = pb_open_tab->ot_thread;
2676
XT_PRINT0(thread, "xt_xn_commit in write_row\n");
2677
if (!xt_xn_commit(thread)) {
2678
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2681
XT_PRINT0(thread, "xt_xn_begin in write_row\n");
2682
if (!xt_xn_begin(thread)) {
2683
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2686
pb_import_row_count = 0;
2689
pb_import_row_count++;
2692
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
2693
table->timestamp_field->set_time();
2695
if (table->next_number_field && buf == table->record[0]) {
2696
int update_err = update_auto_increment();
2698
ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2702
ha_set_auto_increment(pb_open_tab, table->next_number_field);
2705
if (!xt_tab_new_record(pb_open_tab, (xtWord1 *) buf)) {
2706
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2709
* This is needed to allow the same row to be updated multiple times in case of bulk REPLACE.
2710
* This happens during execution of LOAD DATA...REPLACE MySQL first tries to INSERT the row
2711
* and if it gets dup-key error it tries UPDATE, so the same row can be overwriten multiple
2712
* times within the same statement
2714
if (err == HA_ERR_FOUND_DUPP_KEY && pb_open_tab->ot_thread->st_is_update) {
2715
/* Pop the update stack: */
2716
//pb_open_tab->ot_thread->st_update_id++;
2717
XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
2719
pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
2720
curr->ot_prev_update = NULL;
2726
pbms_completed(table, (err == 0));
2732
static int equ_bin(const byte *a, const char *b)
2742
static void dump_bin(const byte *a_in, int offset, int len_in)
2744
const byte *a = a_in;
2749
xt_trace("%02X", (int) *a);
2758
xt_trace("%c", (*a > 8 && *a < 127) ? *a : '.');
2767
* Yes, update_row() does what you expect, it updates a row. old_data will have
2768
* the previous row record in it, while new_data will have the newest data in
2769
* it. Keep in mind that the server can do updates based on ordering if an ORDER BY
2770
* clause was used. Consecutive ordering is not guarenteed.
2772
* Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
2774
int ha_pbxt::update_row(const byte * old_data, byte * new_data)
2777
register XTThreadPtr self = pb_open_tab->ot_thread;
2779
ASSERT_NS(pb_ex_in_use);
2781
XT_PRINT1(self, "update_row (%s)\n", pb_share->sh_table_path->ps_path);
2782
XT_DISABLED_TRACE(("UPDATE tx=%d val=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&new_data[1])));
2783
//statistic_increment(ha_update_count,&LOCK_status);
2784
/* {START-STAT-HACK} previously position of start statement hack. */
2785
xt_xlog_check_long_writer(self);
2787
/* {UPDATE-STACK} */
2788
if (self->st_is_update != pb_open_tab) {
2789
/* Push the update stack: */
2790
pb_open_tab->ot_prev_update = self->st_is_update;
2791
self->st_is_update = pb_open_tab;
2792
pb_open_tab->ot_update_id++;
2795
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2796
table->timestamp_field->set_time();
2799
PBMSResultRec result;
2801
err = pbms_delete_row_blobs(table, old_data, &result);
2803
xt_logf(XT_NT_ERROR, "update_row:pbms_delete_row_blobs() Error: %s", result.mr_message);
2806
err = pbms_write_row_blobs(table, new_data, &result);
2808
xt_logf(XT_NT_ERROR, "update_row:pbms_write_row_blobs() Error: %s", result.mr_message);
2813
/* GOTCHA: We need to check the auto-increment value on update
2814
* because of the following test (which fails for InnoDB) -
2815
* auto_increment.test:
2816
* create table t1 (a int not null auto_increment primary key, val int);
2817
* insert into t1 (val) values (1);
2818
* update t1 set a=2 where a=1;
2819
* insert into t1 (val) values (1);
2821
if (table->found_next_number_field && new_data == table->record[0]) {
2823
my_bitmap_map *old_map;
2825
old_map = mx_tmp_use_all_columns(table, table->read_set);
2826
nr = table->found_next_number_field->val_int();
2827
ha_set_auto_increment(pb_open_tab, table->found_next_number_field);
2828
mx_tmp_restore_column_map(table, old_map);
2831
if (!xt_tab_update_record(pb_open_tab, (xtWord1 *) old_data, (xtWord1 *) new_data))
2832
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2834
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2838
pbms_completed(table, (err == 0));
2845
* This will delete a row. buf will contain a copy of the row to be deleted.
2846
* The server will call this right after the current row has been called (from
2847
* either a previous rnd_next() or index call).
2849
* Called in sql_acl.cc and sql_udf.cc to manage internal table information.
2850
* Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is
2851
* used for removing duplicates while in insert it is used for REPLACE calls.
2853
int ha_pbxt::delete_row(const byte * buf)
2857
ASSERT_NS(pb_ex_in_use);
2859
XT_PRINT1(pb_open_tab->ot_thread, "delete_row (%s)\n", pb_share->sh_table_path->ps_path);
2860
XT_DISABLED_TRACE(("DELETE tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2861
//statistic_increment(ha_delete_count,&LOCK_status);
2864
PBMSResultRec result;
2866
err = pbms_delete_row_blobs(table, buf, &result);
2868
xt_logf(XT_NT_ERROR, "pbms_delete_row_blobs() Error: %s", result.mr_message);
2872
/* {START-STAT-HACK} previously position of start statement hack. */
2873
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2875
if (!xt_tab_delete_record(pb_open_tab, (xtWord1 *) buf))
2876
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2878
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2881
pbms_completed(table, (err == 0));
2887
* -----------------------------------------------------------------------
2892
* This looks like a hack, but actually, it is OK.
2893
* It depends on the setup done by the super-class. It involves an extra
2894
* range check that we need to do if a "new" record is returned during
2897
* A new record is returned if a row is updated (by another transaction)
2898
* during the index scan. If an update is detected, then the scan stops
2899
* and waits for the transaction to end.
2901
* If the transaction commits, then the updated row is returned instead
2902
* of the row it would have returned when doing a consistant read
2903
* (repeatable read).
2905
* These new records can appear out of index order, and may not even
2906
* belong to the index range that we are concerned with.
2908
* Notice that there is not check for the start of the range. It appears
2909
* that this is not necessary, MySQL seems to have no problem ignoring
2912
* A number of test have been given below which demonstrate the use
2915
* They also demonstrate the ORDER BY problem described here: [(11)].
2917
* DROP TABLE IF EXISTS test_tab, test_tab_1, test_tab_2;
2918
* CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), index(Value, Name)) ENGINE=pbxt;
2919
* INSERT test_tab values(1, 1, 'A');
2920
* INSERT test_tab values(2, 1, 'B');
2921
* INSERT test_tab values(3, 1, 'C');
2922
* INSERT test_tab values(4, 2, 'D');
2923
* INSERT test_tab values(5, 2, 'E');
2924
* INSERT test_tab values(6, 2, 'F');
2925
* INSERT test_tab values(7, 2, 'G');
2927
* select * from test_tab where value = 1 order by value, name for update;
2932
* select * from test_tab where id = 5 for update;
2936
* select * from test_tab where value = 2 order by value, name for update;
2939
* update test_tab set value = 3 where id = 6;
2945
* select * from test_tab where id = 5 for update;
2949
* select * from test_tab where value >= 2 order by value, name for update;
2952
* update test_tab set value = 3 where id = 6;
2958
* select * from test_tab where id = 5 for update;
2962
* select * from test_tab where value = 2 order by value, name for update;
2965
* update test_tab set value = 1 where id = 6;
2969
int ha_pbxt::xt_index_in_range(register XTOpenTablePtr XT_UNUSED(ot), register XTIndexPtr ind,
2970
register XTIdxSearchKeyPtr search_key, xtWord1 *buf)
2972
/* If search key is given, this means we want an exact match. */
2974
xtWord1 key_buf[XT_INDEX_MAX_KEY_SIZE];
2976
myxt_create_key_from_row(ind, key_buf, buf, NULL);
2977
search_key->sk_on_key = myxt_compare_key(ind, search_key->sk_key_value.sv_flags, search_key->sk_key_value.sv_length,
2978
search_key->sk_key_value.sv_key, key_buf) == 0;
2979
return search_key->sk_on_key;
2982
/* Otherwise, check the end of the range. */
2984
return compare_key(end_range) <= 0;
2988
int ha_pbxt::xt_index_next_read(register XTOpenTablePtr ot, register XTIndexPtr ind, xtBool key_only,
2989
register XTIdxSearchKeyPtr search_key, byte *buf)
2991
xt_xlog_check_long_writer(ot->ot_thread);
2994
/* We only need to read the data from the key: */
2995
while (ot->ot_curr_rec_id) {
2996
if (search_key && !search_key->sk_on_key)
2999
switch (xt_tab_visible(ot)) {
3001
if (xt_idx_next(ot, ind, search_key))
3006
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3008
if (xt_index_in_range(ot, ind, search_key, buf)) {
3011
if (!xt_idx_next(ot, ind, search_key))
3015
/* We cannot start from the beginning again, if we have
3016
* already output rows!
3017
* And we need the orginal search key.
3019
* The case in which this occurs is:
3021
* T1: UPDATE tbl_file SET GlobalID = 'DBCD5C4514210200825501089884844_6M' WHERE ID = 39
3022
* Locks a particular row.
3024
* T2: SELECT ID,Flags FROM tbl_file WHERE SpaceID = 1 AND Path = '/zi/America/' AND
3025
* Name = 'Cuiaba' AND Flags IN ( 0,1,4,5 ) FOR UPDATE
3026
* scans the index and stops on the lock (of the before image) above.
3028
* T1 quits, the sweeper deletes the record updated by T1?!
3029
* BUG: Cleanup should wait until T2 is complete!
3031
* T2 continues, and returns XT_RETRY.
3033
* At this stage T2 has already returned some rows, so it may not retry from the
3034
* start. Instead it tries to locate the last record it tried to lock.
3035
* This record is gone (or not visible), so it finds the next one.
3037
* POTENTIAL BUG: If cleanup does not wait until T2 is complete, then
3038
* I may miss the update record, if it is moved before the index scan
3041
if (!pb_ind_row_count && search_key) {
3042
if (!xt_idx_search(pb_open_tab, ind, search_key))
3043
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3046
if (!xt_idx_research(pb_open_tab, ind))
3051
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3058
while (ot->ot_curr_rec_id) {
3059
if (search_key && !search_key->sk_on_key)
3062
switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3064
XT_DISABLED_TRACE(("not visi tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3065
if (xt_idx_next(ot, ind, search_key))
3070
if (xt_index_in_range(ot, ind, search_key, buf))
3072
if (!xt_idx_next(ot, ind, search_key))
3076
if (!pb_ind_row_count && search_key) {
3077
if (!xt_idx_search(pb_open_tab, ind, search_key))
3078
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3081
if (!xt_idx_research(pb_open_tab, ind))
3086
XT_DISABLED_TRACE(("visible tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3091
return HA_ERR_END_OF_FILE;
3094
return ha_log_pbxt_thread_error_for_mysql(FALSE);
3097
int ha_pbxt::xt_index_prev_read(XTOpenTablePtr ot, XTIndexPtr ind, xtBool key_only,
3098
register XTIdxSearchKeyPtr search_key, byte *buf)
3101
/* We only need to read the data from the key: */
3102
while (ot->ot_curr_rec_id) {
3103
if (search_key && !search_key->sk_on_key)
3106
switch (xt_tab_visible(ot)) {
3108
if (xt_idx_prev(ot, ind, search_key))
3113
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3115
if (xt_index_in_range(ot, ind, search_key, buf))
3117
if (!xt_idx_next(ot, ind, search_key))
3121
if (!pb_ind_row_count && search_key) {
3122
if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3123
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3126
if (!xt_idx_research(pb_open_tab, ind))
3131
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3138
/* We need to read the entire record: */
3139
while (ot->ot_curr_rec_id) {
3140
if (search_key && !search_key->sk_on_key)
3143
switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3145
if (xt_idx_prev(ot, ind, search_key))
3150
if (xt_index_in_range(ot, ind, search_key, buf))
3152
if (!xt_idx_next(ot, ind, search_key))
3156
if (!pb_ind_row_count && search_key) {
3157
if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3158
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3161
if (!xt_idx_research(pb_open_tab, ind))
3170
return HA_ERR_END_OF_FILE;
3173
return ha_log_pbxt_thread_error_for_mysql(FALSE);
3176
int ha_pbxt::index_init(uint idx, bool XT_UNUSED(sorted))
3179
XTThreadPtr thread = pb_open_tab->ot_thread;
3181
/* select count(*) from smalltab_PBXT;
3182
* ignores the error below, and continues to
3187
if (pb_open_tab->ot_table->tab_dic.dic_disable_index) {
3188
active_index = MAX_KEY;
3189
xt_tab_set_index_error(pb_open_tab->ot_table);
3190
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3193
/* The number of columns required: */
3194
if (pb_open_tab->ot_is_modify) {
3196
pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3197
#ifdef XT_PRINT_INDEX_OPT
3198
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3200
printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, pb_open_tab->ot_cols_req, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap);
3202
/* {START-STAT-HACK} previously position of start statement hack,
3203
* previous comment to code below: */
3204
/* Start a statement based transaction as soon
3205
* as a read is done for a modify type statement!
3206
* Previously, this was done too late!
3210
//pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3211
pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3213
/* Check for index coverage!
3215
* Given the following table:
3217
* CREATE TABLE `customer` (
3218
* `c_id` int(11) NOT NULL DEFAULT '0',
3219
* `c_d_id` int(11) NOT NULL DEFAULT '0',
3220
* `c_w_id` int(11) NOT NULL DEFAULT '0',
3221
* `c_first` varchar(16) DEFAULT NULL,
3222
* `c_middle` char(2) DEFAULT NULL,
3223
* `c_last` varchar(16) DEFAULT NULL,
3224
* `c_street_1` varchar(20) DEFAULT NULL,
3225
* `c_street_2` varchar(20) DEFAULT NULL,
3226
* `c_city` varchar(20) DEFAULT NULL,
3227
* `c_state` char(2) DEFAULT NULL,
3228
* `c_zip` varchar(9) DEFAULT NULL,
3229
* `c_phone` varchar(16) DEFAULT NULL,
3230
* `c_since` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
3231
* `c_credit` char(2) DEFAULT NULL,
3232
* `c_credit_lim` decimal(24,12) DEFAULT NULL,
3233
* `c_discount` double DEFAULT NULL,
3234
* `c_balance` decimal(24,12) DEFAULT NULL,
3235
* `c_ytd_payment` decimal(24,12) DEFAULT NULL,
3236
* `c_payment_cnt` double DEFAULT NULL,
3237
* `c_delivery_cnt` double DEFAULT NULL,
3239
* PRIMARY KEY (`c_w_id`,`c_d_id`,`c_id`),
3240
* KEY `c_w_id` (`c_w_id`,`c_d_id`,`c_last`,`c_first`,`c_id`)
3243
* MySQL does not recognize index coverage on the followin select:
3245
* SELECT c_id FROM customer WHERE c_w_id = 3 AND c_d_id = 8 AND
3246
* c_last = 'EINGATIONANTI' ORDER BY c_first ASC LIMIT 1;
3248
* TODO: Find out why this is necessary, MyISAM does not
3249
* seem to have this problem!
3251
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3252
if (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map))
3254
#ifdef XT_PRINT_INDEX_OPT
3255
printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X converage=%d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, table->read_set->MX_BIT_SIZE(), (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map) != 0));
3259
xt_xlog_check_long_writer(thread);
3261
pb_open_tab->ot_thread->st_statistics.st_scan_index++;
3265
int ha_pbxt::index_end()
3271
XTThreadPtr thread = pb_open_tab->ot_thread;
3274
* the assertion below is not always held, because the sometimes handler is unlocked
3275
* before this function is called
3277
/*ASSERT_NS(pb_ex_in_use);*/
3279
if (pb_open_tab->ot_ind_rhandle) {
3280
xt_ind_release_handle(pb_open_tab->ot_ind_rhandle, FALSE, thread);
3281
pb_open_tab->ot_ind_rhandle = NULL;
3285
* make permanent the lock for the last scanned row
3288
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3290
xt_xlog_check_long_writer(thread);
3292
active_index = MAX_KEY;
3296
#ifdef XT_TRACK_RETURNED_ROWS
3297
void ha_start_scan(XTOpenTablePtr ot, u_int index)
3299
xt_ttracef(ot->ot_thread, "SCAN %d:%d\n", (int) ot->ot_table->tab_id, (int) index);
3300
ot->ot_rows_ret_curr = 0;
3301
for (u_int i=0; i<ot->ot_rows_ret_max; i++)
3302
ot->ot_rows_returned[i] = 0;
3305
void ha_return_row(XTOpenTablePtr ot, u_int index)
3307
xt_ttracef(ot->ot_thread, "%d:%d ROW=%d:%d\n",
3308
(int) ot->ot_table->tab_id, (int) index, (int) ot->ot_curr_row_id, (int) ot->ot_curr_rec_id);
3309
ot->ot_rows_ret_curr++;
3310
if (ot->ot_curr_row_id >= ot->ot_rows_ret_max) {
3311
if (!xt_realloc_ns((void **) &ot->ot_rows_returned, (ot->ot_curr_row_id+1) * sizeof(xtRecordID)))
3313
memset(&ot->ot_rows_returned[ot->ot_rows_ret_max], 0, (ot->ot_curr_row_id+1 - ot->ot_rows_ret_max) * sizeof(xtRecordID));
3314
ot->ot_rows_ret_max = ot->ot_curr_row_id+1;
3316
if (!ot->ot_curr_row_id || !ot->ot_curr_rec_id || ot->ot_rows_returned[ot->ot_curr_row_id]) {
3317
char *sql = *thd_query(current_thd);
3319
xt_ttracef(ot->ot_thread, "DUP %d:%d %s\n",
3320
(int) ot->ot_table->tab_id, (int) index, *thd_query(current_thd));
3322
printf("ERROR: row=%d rec=%d newr=%d, already returned!\n", (int) ot->ot_curr_row_id, (int) ot->ot_rows_returned[ot->ot_curr_row_id], (int) ot->ot_curr_rec_id);
3323
printf("ERROR: %s\n", sql);
3325
FatalAppExit(0, "Debug Me!");
3329
ot->ot_rows_returned[ot->ot_curr_row_id] = ot->ot_curr_rec_id;
3333
int ha_pbxt::index_read_xt(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3338
XTIdxSearchKeyRec search_key;
3340
if (idx == MAX_KEY) {
3341
err = HA_ERR_WRONG_INDEX;
3344
#ifdef XT_TRACK_RETURNED_ROWS
3345
ha_start_scan(pb_open_tab, idx);
3348
/* This call starts a search on this handler! */
3349
pb_ind_row_count = 0;
3351
ASSERT_NS(pb_ex_in_use);
3353
XT_PRINT1(pb_open_tab->ot_thread, "index_read_xt (%s)\n", pb_share->sh_table_path->ps_path);
3354
XT_DISABLED_TRACE(("search tx=%d val=%d update=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), pb_modified));
3355
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3357
switch (find_flag) {
3358
case HA_READ_PREFIX_LAST:
3359
case HA_READ_PREFIX_LAST_OR_PREV:
3360
prefix = SEARCH_PREFIX;
3361
case HA_READ_BEFORE_KEY:
3362
case HA_READ_KEY_OR_PREV: // I assume you want to be positioned on the last entry in the key duplicate list!!
3363
xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_BEFORE_KEY) ? 0 : XT_SEARCH_AFTER_KEY) | prefix, (xtWord1 *) key, (size_t) key_len);
3364
if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3365
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3367
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read,
3368
(find_flag == HA_READ_PREFIX_LAST) ? &search_key : NULL, buf);
3370
case HA_READ_PREFIX:
3371
prefix = SEARCH_PREFIX;
3372
case HA_READ_KEY_EXACT:
3373
case HA_READ_KEY_OR_NEXT:
3374
case HA_READ_AFTER_KEY:
3376
xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_AFTER_KEY) ? XT_SEARCH_AFTER_KEY : 0) | prefix, (xtWord1 *) key, key_len);
3377
if (!xt_idx_search(pb_open_tab, ind, &search_key))
3378
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3380
err = xt_index_next_read(pb_open_tab, ind, pb_key_read,
3381
(find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX) ? &search_key : NULL, buf);
3382
if (err == HA_ERR_END_OF_FILE && find_flag == HA_READ_AFTER_KEY)
3383
err = HA_ERR_KEY_NOT_FOUND;
3389
#ifdef XT_TRACK_RETURNED_ROWS
3391
ha_return_row(pb_open_tab, idx);
3393
XT_DISABLED_TRACE(("search tx=%d val=%d err=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), err));
3396
table->status = STATUS_NOT_FOUND;
3398
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3405
* Positions an index cursor to the index specified in the handle. Fetches the
3406
* row if available. If the key value is null, begin at the first key of the
3409
int ha_pbxt::index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag)
3411
//statistic_increment(ha_read_key_count,&LOCK_status);
3412
return index_read_xt(buf, active_index, key, key_len, find_flag);
3415
int ha_pbxt::index_read_idx(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3417
//statistic_increment(ha_read_key_count,&LOCK_status);
3418
return index_read_xt(buf, idx, key, key_len, find_flag);
3421
int ha_pbxt::index_read_last(byte * buf, const byte * key, uint key_len)
3423
//statistic_increment(ha_read_key_count,&LOCK_status);
3424
return index_read_xt(buf, active_index, key, key_len, HA_READ_PREFIX_LAST);
3428
* Used to read forward through the index.
3430
int ha_pbxt::index_next(byte * buf)
3436
//statistic_increment(ha_read_next_count,&LOCK_status);
3437
ASSERT_NS(pb_ex_in_use);
3439
if (active_index == MAX_KEY) {
3440
err = HA_ERR_WRONG_INDEX;
3443
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3445
if (!xt_idx_next(pb_open_tab, ind, NULL))
3446
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3448
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3451
#ifdef XT_TRACK_RETURNED_ROWS
3453
ha_return_row(pb_open_tab, active_index);
3457
table->status = STATUS_NOT_FOUND;
3459
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3466
* I have implemented this because there is currently a
3467
* bug in handler::index_next_same().
3469
* drop table if exists t1;
3470
* CREATE TABLE t1 (a int, b int, primary key(a,b))
3471
* PARTITION BY KEY(b,a) PARTITIONS 2;
3472
* insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6);
3473
* select * from t1 where a = 4;
3476
int ha_pbxt::index_next_same(byte * buf, const byte *key, uint length)
3480
XTIdxSearchKeyRec search_key;
3483
//statistic_increment(ha_read_next_count,&LOCK_status);
3484
ASSERT_NS(pb_ex_in_use);
3486
if (active_index == MAX_KEY) {
3487
err = HA_ERR_WRONG_INDEX;
3490
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3492
search_key.sk_key_value.sv_flags = HA_READ_KEY_EXACT;
3493
search_key.sk_key_value.sv_rec_id = 0;
3494
search_key.sk_key_value.sv_row_id = 0;
3495
search_key.sk_key_value.sv_key = search_key.sk_key_buf;
3496
search_key.sk_key_value.sv_length = myxt_create_key_from_key(ind, search_key.sk_key_buf, (xtWord1 *) key, (u_int) length);
3497
search_key.sk_on_key = TRUE;
3499
if (!xt_idx_next(pb_open_tab, ind, &search_key))
3500
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3502
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, &search_key, buf);
3505
#ifdef XT_TRACK_RETURNED_ROWS
3507
ha_return_row(pb_open_tab, active_index);
3511
table->status = STATUS_NOT_FOUND;
3513
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3520
* Used to read backwards through the index.
3522
int ha_pbxt::index_prev(byte * buf)
3528
//statistic_increment(ha_read_prev_count,&LOCK_status);
3529
ASSERT_NS(pb_ex_in_use);
3531
if (active_index == MAX_KEY) {
3532
err = HA_ERR_WRONG_INDEX;
3535
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3537
if (!xt_idx_prev(pb_open_tab, ind, NULL))
3538
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3540
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3543
#ifdef XT_TRACK_RETURNED_ROWS
3545
ha_return_row(pb_open_tab, active_index);
3549
table->status = STATUS_NOT_FOUND;
3551
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3558
* index_first() asks for the first key in the index.
3560
int ha_pbxt::index_first(byte * buf)
3564
XTIdxSearchKeyRec search_key;
3567
//statistic_increment(ha_read_first_count,&LOCK_status);
3568
ASSERT_NS(pb_ex_in_use);
3570
/* This is required because MySQL ignores the error returned
3571
* init init_index sometimes, for example:
3573
* if (!table->file->inited)
3574
* table->file->ha_index_init(tab->index, tab->sorted);
3575
* if ((error=tab->table->file->index_first(tab->table->record[0])))
3577
if (active_index == MAX_KEY) {
3578
err = HA_ERR_WRONG_INDEX;
3582
#ifdef XT_TRACK_RETURNED_ROWS
3583
ha_start_scan(pb_open_tab, active_index);
3585
pb_ind_row_count = 0;
3587
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3589
xt_idx_prep_key(ind, &search_key, XT_SEARCH_FIRST_FLAG, NULL, 0);
3590
if (!xt_idx_search(pb_open_tab, ind, &search_key))
3591
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3593
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3596
#ifdef XT_TRACK_RETURNED_ROWS
3598
ha_return_row(pb_open_tab, active_index);
3602
table->status = STATUS_NOT_FOUND;
3604
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3611
* index_last() asks for the last key in the index.
3613
int ha_pbxt::index_last(byte * buf)
3617
XTIdxSearchKeyRec search_key;
3620
//statistic_increment(ha_read_last_count,&LOCK_status);
3621
ASSERT_NS(pb_ex_in_use);
3623
if (active_index == MAX_KEY) {
3624
err = HA_ERR_WRONG_INDEX;
3628
#ifdef XT_TRACK_RETURNED_ROWS
3629
ha_start_scan(pb_open_tab, active_index);
3631
pb_ind_row_count = 0;
3633
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3635
xt_idx_prep_key(ind, &search_key, XT_SEARCH_AFTER_LAST_FLAG, NULL, 0);
3636
if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3637
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3639
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3642
#ifdef XT_TRACK_RETURNED_ROWS
3644
ha_return_row(pb_open_tab, active_index);
3648
table->status = STATUS_NOT_FOUND;
3650
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3657
* -----------------------------------------------------------------------
3658
* RAMDOM/SEQUENTIAL READ METHODS
3662
* rnd_init() is called when the system wants the storage engine to do a table
3664
* See the example in the introduction at the top of this file to see when
3665
* rnd_init() is called.
3667
* Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3668
* and sql_update.cc.
3670
int ha_pbxt::rnd_init(bool scan)
3673
XTThreadPtr thread = pb_open_tab->ot_thread;
3675
XT_PRINT1(thread, "rnd_init (%s)\n", pb_share->sh_table_path->ps_path);
3676
XT_DISABLED_TRACE(("seq scan tx=%d\n", (int) thread->st_xact_data->xd_start_xn_id));
3678
/* Call xt_tab_seq_exit() to make sure the resources used by the previous
3679
* scan are freed. In particular make sure cache page ref count is decremented.
3680
* This is needed as rnd_init() can be called mulitple times w/o matching calls
3681
* to rnd_end(). Our experience is that currently this is done in queries like:
3683
* SELECT t1.c1,t2.c1 FROM t1 LEFT JOIN t2 USING (c1);
3684
* UPDATE t1 LEFT JOIN t2 USING (c1) SET t1.c1 = t2.c1 WHERE t1.c1 = t2.c1;
3686
* when scanning inner tables. It is important to understand that in such case
3687
* multiple calls to rnd_init() are not semantically equal to a new query. For
3688
* example we cannot make row locks permanent as we do in rnd_end(), as
3689
* ha_pbxt::unlock_row still can be called.
3691
xt_tab_seq_exit(pb_open_tab);
3693
/* The number of columns required: */
3694
if (pb_open_tab->ot_is_modify) {
3695
pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3696
/* {START-STAT-HACK} previously position of start statement hack,
3697
* previous comment to code below: */
3698
/* Start a statement based transaction as soon
3699
* as a read is done for a modify type statement!
3700
* Previously, this was done too late!
3704
//pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3705
pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3708
* in case of queries like SELECT COUNT(*) FROM t
3709
* table->read_set is empty. Otoh, ot_cols_req == 0 can be treated
3710
* as "all columns" by some internal code (see e.g. myxt_load_row),
3711
* which makes such queries very ineffective for the records with
3712
* extended part. Setting column count to 1 makes sure that the
3713
* extended part will not be acessed in most cases.
3716
if (pb_open_tab->ot_cols_req == 0)
3717
pb_open_tab->ot_cols_req = 1;
3720
ASSERT_NS(pb_ex_in_use);
3722
if (!xt_tab_seq_init(pb_open_tab))
3723
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3726
xt_tab_seq_reset(pb_open_tab);
3728
xt_xlog_check_long_writer(thread);
3733
int ha_pbxt::rnd_end()
3738
* make permanent the lock for the last scanned row
3740
XTThreadPtr thread = pb_open_tab->ot_thread;
3742
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3744
xt_xlog_check_long_writer(thread);
3746
xt_tab_seq_exit(pb_open_tab);
3751
* This is called for each row of the table scan. When you run out of records
3752
* you should return HA_ERR_END_OF_FILE. Fill buff up with the row information.
3753
* The Field structure for the table is the key to getting data into buf
3754
* in a manner that will allow the server to understand it.
3756
* Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3757
* and sql_update.cc.
3759
int ha_pbxt::rnd_next(byte *buf)
3765
ASSERT_NS(pb_ex_in_use);
3766
//statistic_increment(ha_read_rnd_next_count, &LOCK_status);
3767
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
3769
if (!xt_tab_seq_next(pb_open_tab, (xtWord1 *) buf, &eof))
3770
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3772
err = HA_ERR_END_OF_FILE;
3775
table->status = STATUS_NOT_FOUND;
3777
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3784
* position() is called after each call to rnd_next() if the data needs
3785
* to be ordered. You can do something like the following to store
3787
* ha_store_ptr(ref, ref_length, current_position);
3789
* The server uses ref to store data. ref_length in the above case is
3790
* the size needed to store current_position. ref is just a byte array
3791
* that the server will maintain. If you are using offsets to mark rows, then
3792
* current_position should be the offset. If it is a primary key like in
3793
* BDB, then it needs to be a primary key.
3795
* Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
3797
void ha_pbxt::position(const byte *XT_UNUSED(record))
3800
ASSERT_NS(pb_ex_in_use);
3802
* I changed this from using little endian to big endian.
3804
* The reason is because sometime the pointer are sorted.
3805
* When they are are sorted a binary compare is used.
3806
* A binary compare sorts big endian values correctly!
3808
* Take the followin example:
3810
* create table t1 (a int, b text);
3811
* insert into t1 values (1, 'aa'), (1, 'bb'), (1, 'cc');
3812
* select group_concat(b) from t1 group by a;
3814
* With little endian pointers the result is:
3817
* With big-endian pointer the result is:
3821
(void) ASSERT_NS(XT_RECORD_OFFS_SIZE == 4);
3822
mi_int4store((xtWord1 *) ref, pb_open_tab->ot_curr_rec_id);
3827
* Given the #ROWID retrieve the record.
3829
* Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc.
3831
int ha_pbxt::rnd_pos(byte * buf, byte *pos)
3836
ASSERT_NS(pb_ex_in_use);
3837
//statistic_increment(ha_read_rnd_count, &LOCK_status);
3838
XT_PRINT1(pb_open_tab->ot_thread, "rnd_pos (%s)\n", pb_share->sh_table_path->ps_path);
3840
pb_open_tab->ot_curr_rec_id = mi_uint4korr((xtWord1 *) pos);
3841
switch (xt_tab_dirty_read_record(pb_open_tab, (xtWord1 *) buf)) {
3843
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3850
table->status = STATUS_NOT_FOUND;
3852
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3859
* -----------------------------------------------------------------------
3864
::info() is used to return information to the optimizer.
3865
Currently this table handler doesn't implement most of the fields
3866
really needed. SHOW also makes use of this data
3867
Another note, you will probably want to have the following in your
3871
The reason is that the server will optimize for cases of only a single
3872
record. If in a table scan you don't know the number of records
3873
it will probably be better to set records to two so you can return
3874
as many records as you need.
3875
Along with records a few more variables you may wish to set are:
3882
Take a look at the public variables in handler.h for more information.
3906
#if MYSQL_VERSION_ID < 50114
3907
void ha_pbxt::info(uint flag)
3909
int ha_pbxt::info(uint flag)
3917
if (!(in_use = pb_ex_in_use)) {
3919
if (pb_share && pb_share->sh_table_lock) {
3920
/* If some thread has an exclusive lock, then
3921
* we wait for the lock to be removed:
3923
#if MYSQL_VERSION_ID < 50114
3924
ha_wait_for_shared_use(this, pb_share);
3927
if (!ha_wait_for_shared_use(this, pb_share))
3928
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3933
if ((ot = pb_open_tab)) {
3934
if (flag & HA_STATUS_VARIABLE) {
3936
* Free row count is not reliable, so ignore it.
3937
* The problem is if tab_row_fnum > tab_row_eof_id - 1 then
3938
* we have a very bad result.
3940
* If stats.records+EXTRA_RECORDS == 0 as returned by
3941
* estimate_rows_upper_bound(), then filesort will crash here:
3943
* make_sortkey(param,sort_keys[idx++],ref_pos);
3945
* #0 0x000bf69c in Field_long::sort_string at field.cc:3766
3946
* #1 0x0022e1f1 in make_sortkey at filesort.cc:769
3947
* #2 0x0022f1cf in find_all_keys at filesort.cc:619
3948
* #3 0x00230eec in filesort at filesort.cc:243
3949
* #4 0x001b9d89 in mysql_update at sql_update.cc:415
3950
* #5 0x0010db12 in mysql_execute_command at sql_parse.cc:2959
3951
* #6 0x0011480d in mysql_parse at sql_parse.cc:5787
3952
* #7 0x00115afb in dispatch_command at sql_parse.cc:1200
3953
* #8 0x00116de2 in do_command at sql_parse.cc:857
3954
* #9 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
3956
* The problem is that sort_keys is allocated to handle just 1 vector.
3957
* Sorting one vector crashes. Although I could not find a check for
3958
* the actual number of vectors. But it must assume that it has at
3959
* least EXTRA_RECORDS vectors.
3961
stats.deleted = /* ot->ot_table->tab_row_fnum */ 0;
3962
stats.records = (ha_rows) (ot->ot_table->tab_row_eof_id - 1 /* - stats.deleted */);
3963
stats.data_file_length = xt_rec_id_to_rec_offset(ot->ot_table, ot->ot_table->tab_rec_eof_id);
3964
stats.index_file_length = xt_ind_node_to_offset(ot->ot_table, ot->ot_table->tab_ind_eof);
3965
stats.delete_length = ot->ot_table->tab_rec_fnum * ot->ot_rec_size;
3966
//check_time = info.check_time;
3967
stats.mean_rec_length = (ulong) ot->ot_rec_size;
3970
if (flag & HA_STATUS_CONST) {
3971
ha_rows rec_per_key;
3973
TABLE_SHARE *share= TS(table);
3975
stats.max_data_file_length = 0x00FFFFFF;
3976
stats.max_index_file_length = 0x00FFFFFF;
3977
//stats.create_time = info.create_time;
3978
ref_length = XT_RECORD_OFFS_SIZE;
3979
//share->db_options_in_use = info.options;
3980
stats.block_size = XT_INDEX_PAGE_SIZE;
3983
if (share->tmp_table == message::Table::STANDARD)
3985
if (share->tmp_table == NO_TMP_TABLE)
3988
#define WHICH_MUTEX mutex
3989
#elif MYSQL_VERSION_ID >= 50404
3990
#define WHICH_MUTEX LOCK_ha_data
3992
if (share->tmp_table == NO_TMP_TABLE)
3993
#define WHICH_MUTEX mutex
3998
#if MYSQL_VERSION_ID < 50404
3999
#if MYSQL_VERSION_ID < 50123
4000
safe_mutex_lock(&share->mutex,__FILE__,__LINE__);
4002
safe_mutex_lock(&share->mutex,0,__FILE__,__LINE__);
4005
safe_mutex_lock(&share->WHICH_MUTEX,0,__FILE__,__LINE__);
4010
#ifdef MY_PTHREAD_FASTMUTEX
4011
my_pthread_fastmutex_lock(&share->WHICH_MUTEX);
4013
pthread_mutex_lock(&share->WHICH_MUTEX);
4016
#endif // SAFE_MUTEX
4018
set_prefix(share->keys_in_use, share->keys);
4019
share->keys_for_keyread&= share->keys_in_use;
4021
share->keys_in_use.set_prefix(share->keys);
4022
//share->keys_in_use.intersect_extended(info.key_map);
4023
share->keys_for_keyread.intersect(share->keys_in_use);
4024
//share->db_record_offset = info.record_offset;
4026
for (u_int i = 0; i < share->keys; i++) {
4027
ind = pb_share->sh_dic_keys[i];
4030
if (ind->mi_seg_count == 1 && (ind->mi_flags & HA_NOSAME))
4035
for (u_int j = 0; j < table->key_info[i].key_parts; j++)
4036
table->key_info[i].rec_per_key[j] = (ulong) rec_per_key;
4039
if (share->tmp_table == message::Table::STANDARD)
4041
if (share->tmp_table == NO_TMP_TABLE)
4044
safe_mutex_unlock(&share->WHICH_MUTEX,__FILE__,__LINE__);
4046
#ifdef MY_PTHREAD_FASTMUTEX
4047
pthread_mutex_unlock(&share->WHICH_MUTEX.mutex);
4049
pthread_mutex_unlock(&share->WHICH_MUTEX);
4053
Set data_file_name and index_file_name to point at the symlink value
4054
if table is symlinked (Ie; Real name is not same as generated name)
4057
data_file_name = index_file_name = 0;
4058
fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2);
4059
if (strcmp(name_buff, info.data_file_name))
4060
data_file_name = info.data_file_name;
4061
strmov(fn_ext(name_buff), MI_NAME_IEXT);
4062
if (strcmp(name_buff, info.index_file_name))
4063
index_file_name = info.index_file_name;
4067
if (flag & HA_STATUS_ERRKEY)
4068
errkey = ot->ot_err_index_no;
4071
* We assume they want the next value to be returned!
4073
* At least, this is what works for the following code:
4075
* create table t1 (a int auto_increment primary key)
4076
* auto_increment=100
4078
* partition by list (a)
4079
* (partition p0 values in (1, 98,99, 100, 101));
4080
* create index inx on t1 (a);
4081
* insert into t1 values (null);
4084
if (flag & HA_STATUS_AUTO)
4085
stats.auto_increment_value = (ulonglong) ot->ot_table->tab_auto_inc+1;
4093
/* Someone may be waiting for me to complete: */
4094
if (pb_share->sh_table_lock)
4095
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4098
#if MYSQL_VERSION_ID < 50114
4106
* extra() is called whenever the server wishes to send a hint to
4107
* the storage engine. The myisam engine implements the most hints.
4108
* ha_innodb.cc has the most exhaustive list of these hints.
4110
int ha_pbxt::extra(enum ha_extra_function operation)
4114
XT_PRINT2(xt_get_self(), "ha_pbxt::extra (%s) operation=%d\n", pb_share->sh_table_path->ps_path, operation);
4116
switch (operation) {
4117
case HA_EXTRA_RESET_STATE:
4118
pb_key_read = FALSE;
4119
pb_ignore_dup_key = 0;
4120
/* As far as I can tell, this function is called for
4121
* every table at the end of a statement.
4123
* So, during a LOCK TABLES ... UNLOCK TABLES, I use
4124
* this to find the end of a statement.
4125
* start_stmt() indicates the start of a statement,
4126
* and is also called once for each table in the
4129
* So the statement boundary is indicated by
4130
* self->st_stat_count == 0
4132
* GOTCHA: I cannot end the transaction here!
4133
* I must end it in start_stmt().
4134
* The reason is because there are situations
4135
* where this would end a transaction that
4136
* was begin by external_lock().
4138
* An example of this is when a function
4139
* is called when doing CREATE TABLE SELECT.
4142
/* NOTE: pb_in_stat is just used to avoid getting
4143
* self, if it is not necessary!!
4149
if (!(self = ha_set_current_thread(pb_mysql_thd, &err)))
4150
return xt_ha_pbxt_to_mysql_error(err);
4152
if (self->st_stat_count > 0) {
4153
self->st_stat_count--;
4154
if (self->st_stat_count == 0)
4155
self->st_stat_ended = TRUE;
4158
/* This is the end of a statement, I can turn any locks into perminant locks now: */
4160
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4163
pb_open_tab->ot_for_update = 0;
4165
case HA_EXTRA_KEYREAD:
4166
/* This means we so not need to read the entire record. */
4169
case HA_EXTRA_NO_KEYREAD:
4170
pb_key_read = FALSE;
4172
case HA_EXTRA_IGNORE_DUP_KEY:
4173
/* NOTE!!! Calls to extra(HA_EXTRA_IGNORE_DUP_KEY) can be nested!
4174
* In fact, the calls are from different threads, so
4175
* strictly speaking I should protect this variable!!
4176
* Here is the sequence that produces the duplicate call:
4178
* drop table if exists t1;
4179
* CREATE TABLE t1 (x int not null, y int, primary key (x)) engine=pbxt;
4180
* insert into t1 values (1, 3), (4, 1);
4181
* replace DELAYED into t1 (x, y) VALUES (4, 2);
4182
* select * from t1 order by x;
4185
pb_ignore_dup_key++;
4187
case HA_EXTRA_NO_IGNORE_DUP_KEY:
4188
pb_ignore_dup_key--;
4190
case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4191
/* MySQL needs all fields */
4192
pb_key_read = FALSE;
4203
* Deprecated and likely to be removed in the future. Storage engines normally
4204
* just make a call like:
4205
* ha_pbxt::extra(HA_EXTRA_RESET);
4208
int ha_pbxt::reset(void)
4211
extra(HA_EXTRA_RESET_STATE);
4215
void ha_pbxt::unlock_row()
4219
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, FALSE);
4223
* Used to delete all rows in a table. Both for cases of truncate and
4224
* for cases where the optimizer realizes that all rows will be
4225
* removed as a result of a SQL statement.
4227
* Called from item_sum.cc by Item_func_group_concat::clear(),
4228
* Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4229
* Called from sql_delete.cc by mysql_delete().
4230
* Called from sql_select.cc by JOIN::reinit().
4231
* Called from sql_union.cc by st_select_lex_unit::exec().
4233
int ha_pbxt::delete_all_rows()
4235
THD *thd = current_thd;
4238
XTDDTable *tab_def = NULL;
4239
char path[PATH_MAX];
4243
if (thd_sql_command(thd) != SQLCOM_TRUNCATE) {
4244
/* Just like InnoDB we only handle TRUNCATE TABLE
4245
* by recreating the table.
4246
* DELETE FROM t must be handled by deleting
4247
* each row because it may be part of a transaction,
4248
* and there may be foreign key actions.
4251
XT_RETURN (errno = HA_ERR_WRONG_COMMAND);
4253
XT_RETURN (my_errno = HA_ERR_WRONG_COMMAND);
4257
if (!(self = ha_set_current_thread(thd, &err)))
4258
return xt_ha_pbxt_to_mysql_error(err);
4261
XTDictionaryRec dic;
4263
memset(&dic, 0, sizeof(dic));
4265
dic = pb_share->sh_table->tab_dic;
4266
xt_strcpy(PATH_MAX, path, pb_share->sh_table->tab_name->ps_path);
4268
if ((tab_def = dic.dic_table))
4269
tab_def->reference();
4271
if (!(thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)))
4272
tab_def->deleteAllRows(self);
4274
/* We should have a table lock! */
4275
//ASSERT(pb_lock_table);
4276
if (!pb_table_locked) {
4277
ha_aquire_exclusive_use(self, pb_share, this);
4278
pushr_(ha_release_exclusive_use, pb_share);
4280
ha_close_open_tables(self, pb_share, NULL);
4282
/* This is required in the case of delete_all_rows, because we must
4283
* ensure that the handlers no longer reference the old
4284
* table, so that it will not be used again. The table
4285
* must be re-openned, because the ID has changed!
4287
* 0.9.86+ Must check if this is still necessary.
4289
* the ha_close_share(self, pb_share) call was moved from above
4290
* (before tab_def = dic.dic_table), because of a crash.
4293
* set storage_engine = pbxt;
4294
* create table t1 (s1 int primary key);
4295
* insert into t1 values (1);
4296
* create table t2 (s1 int, foreign key (s1) references t1 (s1));
4297
* insert into t2 values (1);
4298
* truncate table t1; -- this should fail because of FK constraint
4299
* alter table t1 engine = myisam; -- this caused crash
4302
ha_close_share(self, pb_share);
4304
/* MySQL documentation requires us to reset auto increment value to 1
4305
* on truncate even if the table was created with a different value.
4306
* This is also consistent with other engines.
4308
dic.dic_min_auto_inc = 1;
4310
xt_create_table(self, (XTPathStrPtr) path, &dic);
4311
if (!pb_table_locked)
4312
freer_(); // ha_release_exclusive_use(pb_share)
4315
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4320
tab_def->release(self);
4327
* Assuming a key (a,b,c)
4329
* rec_per_key[0] = SELECT COUNT(*)/COUNT(DISTINCT a) FROM t;
4330
* rec_per_key[1] = SELECT COUNT(*)/COUNT(DISTINCT a,b) FROM t;
4331
* rec_per_key[2] = SELECT COUNT(*)/COUNT(DISTINCT a,b,c) FROM t;
4333
* After this is implemented, the selectivity can serve as
4334
* a quick estimate of records_in_range().
4336
* After you have done this, you need to redo the index_merge*
4337
* tests. Restore the standard result to check if we
4338
* now agree with the MyISAM strategy.
4342
int ha_pbxt::analyze(THD *thd)
4344
int ha_pbxt::analyze(THD *thd, HA_CHECK_OPT *XT_UNUSED(check_opt))
4350
xtXactID clean_xn_id = 0;
4356
if ((err = reopen()))
4360
/* Wait until the sweeper is no longer busy!
4361
* If you want an accurate count(*) value, then call
4362
* ANALYZE TABLE first. This function waits until the
4363
* sweeper has completed.
4365
db = pb_open_tab->ot_table->tab_db;
4368
* Wait until everything is cleaned up before this transaction.
4369
* But this will only work if the we quit out transaction!
4371
* GOTCHA: When a PBXT table is partitioned, then analyze() is
4372
* called for each component. The first calls xt_xn_commit().
4373
* All following calls have no transaction!:
4375
* CREATE TABLE t1 (a int)
4376
* PARTITION BY LIST (a)
4377
* (PARTITION x1 VALUES IN (10), PARTITION x2 VALUES IN (20));
4382
if (pb_open_tab->ot_thread && pb_open_tab->ot_thread->st_xact_data) {
4383
my_xn_id = pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id;
4384
XT_PRINT0(xt_get_self(), "xt_xn_commit\n");
4385
xt_xn_commit(pb_open_tab->ot_thread);
4388
my_xn_id = db->db_xn_to_clean_id;
4390
while ((!db->db_sw_idle || xt_xn_is_before(db->db_xn_to_clean_id, my_xn_id)) && !thd_killed(thd)) {
4394
* It is possible that the sweeper gets stuck because
4395
* it has no dictionary information!
4396
* As in the example below.
4399
* pk_col int auto_increment primary key, a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
4402
* insert into t4 (a1, a2, b, c, d, dummy) select * from t1;
4404
* create index idx12672_0 on t4 (a1);
4405
* create index idx12672_1 on t4 (a1,a2,b,c);
4406
* create index idx12672_2 on t4 (a1,a2,b);
4409
if (db->db_sw_idle) {
4410
/* This will make sure we don't wait forever: */
4411
if (clean_xn_id != db->db_xn_to_clean_id) {
4412
clean_xn_id = db->db_xn_to_clean_id;
4420
xt_wakeup_sweeper(db);
4428
int ha_pbxt::repair(THD *XT_UNUSED(thd), HA_CHECK_OPT *XT_UNUSED(check_opt))
4430
return(HA_ADMIN_TRY_ALTER);
4434
* This is mapped to "ALTER TABLE tablename TYPE=PBXT", which rebuilds
4435
* the table in MySQL.
4437
int ha_pbxt::optimize(THD *XT_UNUSED(thd), HA_CHECK_OPT *XT_UNUSED(check_opt))
4439
return(HA_ADMIN_TRY_ALTER);
4444
extern int pbxt_mysql_trace_on;
4448
int ha_pbxt::check(THD* thd)
4450
int ha_pbxt::check(THD* thd, HA_CHECK_OPT* XT_UNUSED(check_opt))
4456
if (!(self = ha_set_current_thread(thd, &err)))
4457
return xt_ha_pbxt_to_mysql_error(err);
4458
if (self->st_lock_count)
4459
ASSERT(self->st_xact_data);
4461
if (!pb_table_locked) {
4462
ha_aquire_exclusive_use(self, pb_share, this);
4463
pushr_(ha_release_exclusive_use, pb_share);
4466
#ifdef CHECK_TABLE_LOADS
4467
xt_tab_load_table(self, pb_open_tab);
4469
xt_check_table(self, pb_open_tab);
4471
if (!pb_table_locked)
4472
freer_(); // ha_release_exclusive_use(pb_share)
4474
//pbxt_mysql_trace_on = TRUE;
4479
* This function is called:
4480
* For each table in LOCK TABLES,
4482
* For each table in a statement.
4484
* It is called with F_UNLCK:
4487
* at the end of a statement.
4490
xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type)
4492
/* Some compiler complain that: variable 'err' might be clobbered by 'longjmp' or 'vfork' */
4493
volatile int err = 0;
4496
if (!(self = ha_set_current_thread(thd, (int *) &err)))
4497
return xt_ha_pbxt_to_mysql_error(err);
4499
/* F_UNLCK is set when this function is called at end
4500
* of statement or UNLOCK TABLES
4502
if (lock_type == F_UNLCK) {
4503
/* This is not TRUE if external_lock() FAILED!
4504
* Can we rely on external_unlock being called when
4505
* external_lock() fails? Currently yes, but it does
4507
ASSERT_NS(pb_ex_in_use);
4510
XT_PRINT1(self, "EXTERNAL_LOCK (%s) lock_type=UNLOCK\n", pb_share->sh_table_path->ps_path);
4512
/* Make any temporary locks on this table permanent.
4514
* This is required here because of the following example:
4515
* create table t1 (a int NOT NULL, b int, primary key (a));
4516
* create table t2 (a int NOT NULL, b int, primary key (a));
4517
* insert into t1 values (0, 10),(1, 11),(2, 12);
4518
* insert into t2 values (1, 21),(2, 22),(3, 23);
4519
* update t1 set b= (select b from t2 where t1.a = t2.a);
4520
* update t1 set b= (select b from t2 where t1.a = t2.a);
4522
* drop table t1, t2;
4526
/* GOTCHA! It's weird, but, if this function returns an error
4527
* on lock, then UNLOCK is called?!
4528
* This should not be done, because if lock fails, it should be
4529
* assumed that no UNLOCK is required.
4530
* Basically, I have to assume that some code will presume this,
4531
* although the function lock_external() calls unlock, even
4533
* The result is, that my lock count can go wrong. So I could
4534
* change the lock method, and increment the lock count, even
4535
* if it fails. However, the consequences are more serious,
4536
* if some code decides not to call UNLOCK after lock fails.
4537
* The result is that I would have a permanent too high lock,
4538
* count and nothing will work.
4539
* So instead, I handle the fact that I might too many unlocks
4542
if (self->st_lock_count > 0)
4543
self->st_lock_count--;
4544
if (!self->st_lock_count) {
4545
/* This section handles "auto-commit"... */
4547
#ifdef XT_IMPLEMENT_NO_ACTION
4549
* This is required here because it marks the end of a statement.
4550
* If we are in a non-auto-commit mode, then we cannot
4551
* wait for st_is_update to be set by the begining of a new transaction.
4553
if (self->st_restrict_list.bl_count) {
4554
if (!xt_tab_restrict_rows(&self->st_restrict_list, self))
4555
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4559
if (self->st_xact_data) {
4560
if (self->st_auto_commit) {
4562
* Normally I could assume that if the transaction
4563
* has not been aborted by now, then it should be committed.
4565
* Unfortunately, this is not the case!
4567
* create table t1 (id int primary key) engine = pbxt;
4568
* create table t2 (id int) engine = pbxt;
4570
* insert into t1 values ( 1 ) ;
4571
* insert into t1 values ( 2 ) ;
4572
* insert into t2 values ( 1 ) ;
4573
* insert into t2 values ( 2 ) ;
4575
* --This statement is returns an error calls ha_autocommit_or_rollback():
4576
* update t1 set t1.id=1 where t1.id=2;
4578
* --This statement is returns no error and calls ha_autocommit_or_rollback():
4579
* update t1,t2 set t1.id=3, t2.id=3 where t1.id=2 and t2.id = t1.id;
4581
* --But this statement returns an error and does not call ha_autocommit_or_rollback():
4582
* update t1,t2 set t1.id=1, t2.id=1 where t1.id=3 and t2.id = t1.id;
4584
* The result is, I cannot rely on ha_autocommit_or_rollback() being called :(
4585
* So I have to abort myself here...
4588
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4590
if (self->st_abort_trans) {
4591
XT_PRINT0(self, "xt_xn_rollback in unlock\n");
4592
if (!xt_xn_rollback(self))
4593
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4596
XT_PRINT0(self, "xt_xn_commit in unlock\n");
4597
if (!xt_xn_commit(self))
4598
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4603
/* If the previous statement was "for update", then set the visibilty
4604
* so that non- for update SELECTs will see what the for update select
4605
* (or update statement) just saw.
4608
if (pb_open_tab->ot_for_update) {
4609
self->st_visible_time = self->st_database->db_xn_end_time;
4610
pb_open_tab->ot_for_update = 0;
4613
if (pb_share->sh_recalc_selectivity) {
4614
/* {FREE-ROWS-BAD} */
4615
if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) {
4617
pb_share->sh_recalc_selectivity = FALSE;
4618
xt_ind_set_index_selectivity(pb_open_tab, self);
4619
/* {FREE-ROWS-BAD} */
4620
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
4625
if (self->st_stat_modify)
4626
self->st_statistics.st_stat_write++;
4628
self->st_statistics.st_stat_read++;
4629
self->st_stat_modify = FALSE;
4630
self->st_import_stat = XT_IMP_NO_IMPORT;
4632
/* Only reset this if there is no transactions running, and
4633
* no tables are open!
4635
if (!self->st_xact_data)
4636
self->st_non_temp_opened = FALSE;
4639
if (pb_table_locked) {
4641
if (!pb_table_locked)
4642
ha_release_exclusive_use(self, pb_share);
4645
/* No longer in use: */
4647
/* Someone may be waiting for me to complete: */
4648
if (pb_share->sh_table_lock)
4649
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4652
XT_PRINT2(self, "ha_pbxt::EXTERNAL_LOCK (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, lock_type);
4654
if (pb_lock_table) {
4657
if (!pb_table_locked)
4658
ha_aquire_exclusive_use(self, pb_share, this);
4661
ha_close_open_tables(self, pb_share, this);
4663
if (!pb_share->sh_table) {
4664
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4666
ha_open_share(self, pb_share);
4670
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4678
if (pb_share->sh_table_lock && !pb_table_locked) {
4679
/* If some thread has an exclusive lock, then
4680
* we wait for the lock to be removed:
4682
if (!ha_wait_for_shared_use(this, pb_share)) {
4683
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4689
if ((err = reopen())) {
4695
/* Set the current thread for this open table: */
4696
pb_open_tab->ot_thread = self;
4698
/* If this is a set, then it is in UPDATE/DELETE TABLE ...
4699
* or SELECT ... FOR UPDATE
4701
pb_open_tab->ot_is_modify = FALSE;
4702
if ((pb_open_tab->ot_for_update = (lock_type == F_WRLCK))) {
4703
switch ((int) thd_sql_command(thd)) {
4706
case SQLCOM_DELETE_MULTI:
4708
/* turn DELETE IGNORE into normal DELETE. The IGNORE option causes problems because
4709
* when a record is deleted we add an xlog record which we cannot "rollback" later
4710
* when we find that an FK-constraint has failed.
4712
thd->lex->ignore = false;
4715
case SQLCOM_UPDATE_MULTI:
4717
case SQLCOM_REPLACE:
4718
case SQLCOM_REPLACE_SELECT:
4720
case SQLCOM_INSERT_SELECT:
4721
pb_open_tab->ot_is_modify = TRUE;
4722
self->st_stat_modify = TRUE;
4724
case SQLCOM_ALTER_TABLE:
4725
case SQLCOM_CREATE_INDEX:
4728
case SQLCOM_OPTIMIZE:
4730
case SQLCOM_DROP_INDEX:
4731
self->st_stat_modify = TRUE;
4732
self->st_import_stat = XT_IMP_COPY_TABLE;
4733
pb_import_row_count = 0;
4734
/* Do not read FOR UPDATE!
4735
* this avoids taking locks on the rows that are read
4736
* Which leads to the assertion failure:
4737
* int XTRowLocks::xt_make_lock_permanent(XTOpenTable*, XTRowLockList*)(lock_xt.cc:646) item
4738
* after the transaction is committed in write_row.
4740
pb_open_tab->ot_for_update = FALSE;
4743
self->st_stat_modify = TRUE;
4744
self->st_import_stat = XT_IMP_LOAD_TABLE;
4745
pb_import_row_count = 0;
4746
pb_open_tab->ot_for_update = FALSE;
4748
case SQLCOM_CREATE_TABLE:
4749
case SQLCOM_TRUNCATE:
4750
case SQLCOM_DROP_TABLE:
4751
self->st_stat_modify = TRUE;
4756
if (pb_open_tab->ot_is_modify && pb_open_tab->ot_table->tab_dic.dic_disable_index) {
4757
xt_tab_set_index_error(pb_open_tab->ot_table);
4758
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4763
/* Record the associated MySQL thread: */
4766
if (self->st_database != pb_share->sh_table->tab_db) {
4768
/* PBXT does not permit multiple databases us one statement,
4769
* or in a single transaction!
4773
* update mysqltest_1.t1, mysqltest_2.t2 set a=10,d=10;
4775
if (self->st_lock_count > 0)
4776
xt_throw_xterr(XT_CONTEXT, XT_ERR_MULTIPLE_DATABASES);
4778
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4781
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4788
/* See {IS-UPDATE-STAT} nad {UPDATE-STACK} */
4789
self->st_is_update = NULL;
4791
/* Auto begin a transaction (if one is not already running): */
4792
if (!self->st_xact_data) {
4793
/* Transaction mode numbers must be identical! */
4794
(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
4795
(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
4797
thd_init_xact(thd, self, true);
4799
if (!xt_xn_begin(self)) {
4800
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4805
* {START-TRANS} GOTCHA: trans_register_ha() is not mentioned in the documentation.
4806
* It must be called to inform MySQL that we have a transaction (see start_stmt).
4808
* Here are some tests that confirm whether things are done correctly:
4810
* drop table if exists t1, t2;
4811
* create table t1 (c1 int);
4812
* insert t1 values (1);
4814
* rename table t1 to t2;
4816
* rename will generate an error if MySQL thinks a transaction is
4819
* create table t1 (a text character set utf8, b text character set latin1);
4820
* insert t1 values (0x4F736E616272C3BC636B, 0x4BF66C6E);
4822
* --exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/ test
4823
* --exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/t1.sql
4824
* --exec $MYSQL_IMPORT test $MYSQLTEST_VARDIR/tmp/t1.txt
4827
* This test forces a begin transaction in start_stmt()
4829
* drop tables if exists t1;
4830
* create table t1 (c1 int);
4831
* lock tables t1 write;
4832
* insert t1 values (1);
4833
* insert t1 values (2);
4836
* The second select will return an empty result of the
4837
* MySQL is not informed that a transaction is running (auto-commit
4838
* in external_lock comes too late)!
4842
if (!self->st_auto_commit) {
4843
trans_register_ha(thd, TRUE, pbxt_hton);
4844
XT_PRINT0(self, "CONN START XACT - ha_pbxt::external_lock --> trans_register_ha\n");
4849
/* Any open table can cause this to be FALSE: */
4850
if (!XT_IS_TEMP_TABLE(pb_open_tab->ot_table->tab_dic.dic_tab_flags))
4851
self->st_non_temp_opened = TRUE;
4853
/* Start a statment transaction: */
4854
/* {START-STAT-HACK} The problem that ha_commit_trans() is not
4855
* called by MySQL seems to be fixed (tests confirm this).
4856
* Here is the previous comment when this code was execute
4857
* here {START-STAT-HACK}
4859
* GOTCHA: I have a huge problem with the transaction statement.
4860
* It is not ALWAYS committed (I mean ha_commit_trans() is
4861
* not always called - for example in SELECT).
4863
* If I call trans_register_ha() but ha_commit_trans() is not called
4864
* then MySQL thinks a transaction is still running (while
4865
* I have committed the auto-transaction in ha_pbxt::external_lock()).
4867
* This causes all kinds of problems, like transactions
4868
* are killed when they should not be.
4870
* To prevent this, I only inform MySQL that a transaction
4871
* has beens started when an update is performed. I have determined that
4872
* ha_commit_trans() is only guarenteed to be called if an update is done.
4875
* So, this is the correct place to start a statement transaction.
4877
* Note: if trans_register_ha() is not called before ha_write_row(), then
4878
* PBXT is not registered correctly as a modification transaction.
4879
* (mark_trx_read_write call in ha_write_row).
4880
* This leads to 2-phase commit not being called as it should when
4881
* binary logging is enabled.
4884
if (!pb_open_tab->ot_thread->st_stat_trans) {
4885
trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
4886
XT_PRINT0(pb_open_tab->ot_thread, "STAT START - ha_pbxt::external_lock --> trans_register_ha\n");
4887
pb_open_tab->ot_thread->st_stat_trans = TRUE;
4890
if (lock_type == F_WRLCK || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
4891
self->st_visible_time = self->st_database->db_xn_end_time;
4893
#ifdef TRACE_STATEMENTS
4894
if (self->st_lock_count == 0)
4895
STAT_TRACE(self, *thd_query(thd));
4897
self->st_lock_count++;
4905
* This function is called for each table in a statement
4906
* after LOCK TABLES has been used.
4908
* Currently I only use this function to set the
4909
* current thread of the table handle.
4911
* GOTCHA: The prototype of start_stmt() has changed
4912
* from version 4.1 to 5.1!
4914
int ha_pbxt::start_stmt(THD *thd, thr_lock_type lock_type)
4919
ASSERT_NS(pb_ex_in_use);
4921
if (!(self = ha_set_current_thread(thd, &err)))
4922
return xt_ha_pbxt_to_mysql_error(err);
4924
XT_PRINT2(self, "ha_pbxt::start_stmt (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, (int) lock_type);
4927
if ((err = reopen()))
4931
ASSERT_NS(pb_open_tab->ot_thread == self);
4932
ASSERT_NS(thd == pb_mysql_thd);
4933
ASSERT_NS(self->st_database == pb_open_tab->ot_table->tab_db);
4935
if (self->st_stat_ended) {
4936
self->st_stat_ended = FALSE;
4937
self->st_stat_trans = FALSE;
4939
#ifdef XT_IMPLEMENT_NO_ACTION
4940
if (self->st_restrict_list.bl_count) {
4941
if (!xt_tab_restrict_rows(&self->st_restrict_list, self)) {
4942
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4947
/* This section handles "auto-commit"... */
4948
if (self->st_xact_data && self->st_auto_commit && self->st_table_trans) {
4949
if (self->st_abort_trans) {
4950
XT_PRINT0(self, "xt_xn_rollback in start_stmt\n");
4951
if (!xt_xn_rollback(self))
4952
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4955
XT_PRINT0(self, "xt_xn_commit in start_stmt\n");
4956
if (!xt_xn_commit(self))
4957
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4961
if (self->st_stat_modify)
4962
self->st_statistics.st_stat_write++;
4964
self->st_statistics.st_stat_read++;
4965
self->st_stat_modify = FALSE;
4966
self->st_import_stat = XT_IMP_NO_IMPORT;
4968
/* If the previous statement was "for update", then set the visibilty
4969
* so that non- for update SELECTs will see what the for update select
4970
* (or update statement) just saw.
4972
if (pb_open_tab->ot_for_update)
4973
self->st_visible_time = self->st_database->db_xn_end_time;
4976
pb_open_tab->ot_for_update =
4977
(lock_type != TL_READ &&
4978
lock_type != TL_READ_WITH_SHARED_LOCKS &&
4980
lock_type != TL_READ_HIGH_PRIORITY &&
4982
lock_type != TL_READ_NO_INSERT);
4983
pb_open_tab->ot_is_modify = FALSE;
4984
if (pb_open_tab->ot_for_update) {
4985
switch ((int) thd_sql_command(thd)) {
4989
case SQLCOM_UPDATE_MULTI:
4990
case SQLCOM_DELETE_MULTI:
4992
case SQLCOM_REPLACE:
4993
case SQLCOM_REPLACE_SELECT:
4995
case SQLCOM_INSERT_SELECT:
4996
pb_open_tab->ot_is_modify = TRUE;
4997
self->st_stat_modify = TRUE;
4999
case SQLCOM_CREATE_TABLE:
5000
case SQLCOM_CREATE_INDEX:
5001
case SQLCOM_ALTER_TABLE:
5002
case SQLCOM_TRUNCATE:
5003
case SQLCOM_DROP_TABLE:
5004
case SQLCOM_DROP_INDEX:
5008
case SQLCOM_OPTIMIZE:
5009
self->st_stat_modify = TRUE;
5015
/* {IS-UPDATE-STAT} This is required at this level!
5016
* No matter how often it is called, it is still the start of a
5017
* statement. We need to make sure statements that are NOT mistaken
5018
* for different type of statement.
5020
* Here is an example:
5021
* select * from t1 where data = getcount("bar")
5023
* If the procedure getcount() addresses another table.
5024
* then open and close of the statements in getcount()
5025
* are nested within an open close of the select t1
5029
* Add to this I add the following:
5030
* A trigger in the middle of an update also causes nested
5031
* statements. If I reset st_is_update, then then
5032
* when the trigger returns the system thinks we
5033
* are in a different update statement, and may
5034
* update the same row again.
5036
if (self->st_is_update == pb_open_tab) {
5037
/* Pop the update stack: */
5038
XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
5040
pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
5041
curr->ot_prev_update = NULL;
5044
/* See comment {START-TRANS} */
5045
if (!self->st_xact_data) {
5047
thd_init_xact(thd, self, false);
5049
if (!xt_xn_begin(self)) {
5050
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5054
if (!self->st_auto_commit) {
5055
trans_register_ha(thd, TRUE, pbxt_hton);
5056
XT_PRINT0(self, "START CONN XACT - ha_pbxt::start_stmt --> trans_register_ha\n");
5061
/* Start a statment (see {START-STAT-HACK}): */
5063
if (!pb_open_tab->ot_thread->st_stat_trans) {
5064
trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
5065
XT_PRINT0(pb_open_tab->ot_thread, "START STAT - ha_pbxt::start_stmt --> trans_register_ha\n");
5066
pb_open_tab->ot_thread->st_stat_trans = TRUE;
5069
if (pb_open_tab->ot_for_update || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
5070
self->st_visible_time = self->st_database->db_xn_end_time;
5074
self->st_stat_count++;
5081
* The idea with handler::store_lock() is the following:
5083
* The statement decided which locks we should need for the table
5084
* for updates/deletes/inserts we get WRITE locks, for SELECT... we get
5087
* Before adding the lock into the table lock handler (see thr_lock.c)
5088
* mysqld calls store lock with the requested locks. Store lock can now
5089
* modify a write lock to a read lock (or some other lock), ignore the
5090
* lock (if we don't want to use MySQL table locks at all) or add locks
5091
* for many tables (like we do when we are using a MERGE handler).
5093
* When releasing locks, store_lock() are also called. In this case one
5094
* usually doesn't have to do anything.
5096
* In some exceptional cases MySQL may send a request for a TL_IGNORE;
5097
* This means that we are requesting the same lock as last time and this
5098
* should also be ignored. (This may happen when someone does a flush
5099
* table when we have opened a part of the tables, in which case mysqld
5100
* closes and reopens the tables and tries to get the same locks at last
5101
* time). In the future we will probably try to remove this.
5103
* Called from lock.cc by get_lock_data().
5105
THR_LOCK_DATA **ha_pbxt::store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type)
5108
* TL_READ means concurrent INSERTs are allowed. This is a problem as in this mode
5109
* PBXT is not compatible with MyISAM which allows INSERTs but isolates them from
5110
* current "transaction" (started by LOCK TABLES, ended by UNLOCK TABLES). PBXT
5111
* used to allow INSERTs and made them visible to the locker (on commit).
5112
* While MySQL manual doesn't state anything regarding row visibility limitations
5113
* we choose to convert local locks into normal read locks for better compatibility
5116
if (lock_type == TL_READ)
5117
lock_type = TL_READ_NO_INSERT;
5119
if (lock_type != TL_IGNORE && pb_lock.type == TL_UNLOCK) {
5120
/* Set to TRUE for operations that require a table lock: */
5121
switch (thd_sql_command(thd)) {
5122
case SQLCOM_TRUNCATE:
5124
* The problem is, if I do not do this, then
5125
* TRUNCATE TABLE deadlocks with a normal update of the table!
5128
* external_lock() is called before MySQL actually locks the
5129
* table. In external_lock(), the table is shared locked,
5130
* by indicating that the handler is in use.
5132
* Then later, in delete_all_rows(), a exclusive lock must be
5133
* obtained. If an UPDATE or INSERT has also gained a shared
5134
* lock in the meantime, then TRUNCATE TABLE hangs.
5136
* By setting pb_lock_table we indicate that an exclusive lock
5137
* should be gained in external_lock().
5139
* This is the locking behaviour:
5142
* XT SHARE LOCK (mysql_lock_tables calls external_lock)
5143
* MySQL WRITE LOCK (mysql_lock_tables)
5145
* XT EXCLUSIVE LOCK (delete_all_rows)
5148
* XT SHARED LOCK (mysql_lock_tables calls external_lock)
5149
* MySQL WRITE_ALLOW_WRITE LOCK (mysql_lock_tables)
5151
* If the locking for INSERT is done in the ... phase
5152
* above, then we have a deadlock because
5153
* WRITE_ALLOW_WRITE conflicts with WRITE.
5155
* Making TRUNCATE TABLE take a WRITE_ALLOW_WRITE LOCK, will
5156
* not solve the problem because then 2 TRUNCATE TABLES
5157
* can deadlock due to lock escalation.
5159
* What may work is if MySQL were to lock BEFORE calling
5162
* However, using this method, TRUNCATE TABLE does deadlock
5163
* with other operations such as ALTER TABLE!
5165
* This is handled with a lock timeout. Assuming
5166
* TRUNCATE TABLE will be mixed with DML this is the
5169
pb_lock_table = TRUE;
5172
pb_lock_table = FALSE;
5176
#ifdef PBXT_HANDLER_TRACE
5177
pb_lock.type = lock_type;
5179
/* GOTCHA: Before it was OK to weaken the lock after just checking
5180
* that !thd->in_lock_tables. However, when starting a procedure, MySQL
5181
* simulates a LOCK TABLES statement.
5183
* So we need to be more specific here, and check what the actual statement
5184
* type. Before doing this I got a deadlock (undetected) on the following test.
5185
* However, now we get a failed assertion in ha_rollback_trans():
5186
* TODO: Check this with InnoDB!
5189
* my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
5191
* drop table if exists t3;
5192
* create table t3 (a smallint primary key) engine=pbxt;
5193
* insert into t3 (a) values (40);
5194
* insert into t3 (a) values (50);
5198
* drop function if exists t3_update|
5200
* create function t3_update() returns int
5202
* insert into t3 values (10);
5211
* update t3 set a = 5 where a = 50;
5216
* update t3 set a = 4 where a = 40;
5220
* update t3 set a = 4 where a = 40; // Hangs waiting CONN 2.
5224
* select t3_update(); // Hangs waiting for table lock.
5227
if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) &&
5229
!(thd_in_lock_tables(thd) && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) &&
5231
!thd_tablespace_op(thd) &&
5232
thd_sql_command(thd) != SQLCOM_TRUNCATE &&
5234
thd_sql_command(thd) != SQLCOM_OPTIMIZE &&
5236
thd_sql_command(thd) != SQLCOM_CREATE_TABLE) {
5237
lock_type = TL_WRITE_ALLOW_WRITE;
5240
/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
5241
* MySQL would use the lock TL_READ_NO_INSERT on t2, and that
5242
* would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
5243
* to t2. Convert the lock to a normal read lock to allow
5244
* concurrent inserts to t2.
5246
* (This one from InnoDB)
5248
* Stewart: removed SQLCOM_CALL, not sure of implications.
5250
if (lock_type == TL_READ_NO_INSERT
5252
&& (!thd_in_lock_tables(thd)
5253
|| thd_sql_command(thd) == SQLCOM_CALL
5258
lock_type = TL_READ;
5261
XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d\n", pb_share->sh_table_path->ps_path, pb_lock.type, lock_type);
5262
pb_lock.type = lock_type;
5264
#ifdef PBXT_HANDLER_TRACE
5266
XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d (ignore/unlock)\n", pb_share->sh_table_path->ps_path, lock_type, lock_type);
5274
* Used to delete a table. By the time delete_table() has been called all
5275
* opened references to this table will have been closed (and your globally
5276
* shared references released. The variable name will just be the name of
5277
* the table. You will need to remove any files you have created at this point.
5279
* Called from handler.cc by delete_table and ha_create_table(). Only used
5280
* during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
5281
* the storage engine.
5284
int PBXTStorageEngine::doDropTable(Session &, TableIdentifier& ident)
5286
const std::string& path = ident.getPath();
5287
const char *table_path = path.c_str();
5289
int ha_pbxt::delete_table(const char *table_path)
5292
THD *thd = current_thd;
5294
XTThreadPtr self = NULL;
5297
STAT_TRACE(self, *thd_query(thd));
5298
XT_PRINT1(self, "delete_table (%s)\n", table_path);
5300
if (XTSystemTableShare::isSystemTable(table_path))
5301
return delete_system_table(table_path);
5303
if (!(self = ha_set_current_thread(thd, &err)))
5304
return xt_ha_pbxt_to_mysql_error(err);
5306
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5309
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5311
ASSERT(xt_get_self() == self);
5313
/* NOTE: MySQL does not drop a table by first locking it!
5314
* We also cannot use pb_share because the handler used
5315
* to delete a table is not openned correctly.
5317
share = ha_get_share(self, table_path, false);
5318
pushr_(ha_unget_share, share);
5319
ha_aquire_exclusive_use(self, share, NULL);
5320
pushr_(ha_release_exclusive_use, share);
5321
ha_close_open_tables(self, share, NULL);
5323
xt_drop_table(self, (XTPathStrPtr) table_path, thd_sql_command(thd) == SQLCOM_DROP_DB);
5325
freer_(); // ha_release_exclusive_use(share)
5326
freer_(); // ha_unget_share(share)
5329
/* In MySQL if the table does not exist, just log the error and continue. This is
5330
* needed to delete table in the case when CREATE TABLE fails and no PBXT disk
5331
* structures were created.
5332
* Drizzle unlike MySQL iterates over all handlers and tries to delete table. It
5333
* stops after when a handler returns TRUE, so in Drizzle we need to report error.
5336
if (self->t_exception.e_xt_err == XT_ERR_TABLE_NOT_FOUND)
5337
xt_log_and_clear_exception(self);
5345
* If there are no more PBXT tables in the database, we
5346
* "drop the database", which deletes all PBXT resources
5349
/* We now only drop the pbxt system data,
5350
* when the PBXT database is dropped.
5352
#ifndef XT_USE_GLOBAL_DB
5353
if (!xt_table_exists(self->st_database)) {
5354
xt_ha_all_threads_close_database(self, self->st_database);
5355
xt_drop_database(self, self->st_database);
5356
xt_unuse_database(self, self);
5357
xt_ha_close_global_database(self);
5362
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5364
if (err == HA_ERR_NO_SUCH_TABLE)
5371
/* Call pbms_delete_table_with_blobs() last because it cannot be undone. */
5373
PBMSResultRec result;
5375
if (pbms_delete_table_with_blobs(table_path, &result)) {
5376
xt_logf(XT_NT_WARNING, "pbms_delete_table_with_blobs() Error: %s", result.mr_message);
5379
pbms_completed(NULL, true);
5384
std::string path2(ident.getPath());
5385
path2.append(DEFAULT_FILE_EXTENSION);
5386
(void)internal::my_delete(path2.c_str(), MYF(0));
5393
int PBXTStorageEngine::delete_system_table(const char *table_path)
5395
int ha_pbxt::delete_system_table(const char *table_path)
5398
THD *thd = current_thd;
5403
if (!(self = xt_ha_set_current_thread(thd, &e)))
5404
return xt_ha_pbxt_to_mysql_error(e.e_xt_err);
5407
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5409
if (xt_table_exists(self->st_database))
5410
xt_throw_xterr(XT_CONTEXT, XT_ERR_PBXT_TABLE_EXISTS);
5412
XTSystemTableShare::setSystemTableDeleted(table_path);
5414
if (!XTSystemTableShare::doesSystemTableExist()) {
5415
xt_ha_all_threads_close_database(self, self->st_database);
5416
xt_drop_database(self, self->st_database);
5417
xt_unuse_database(self, self);
5418
xt_ha_close_global_database(self);
5422
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5430
* Renames a table from one name to another from alter table call.
5431
* This function can be used to move a table from one database to
5435
int PBXTStorageEngine::doRenameTable(Session&,
5436
TableIdentifier& from_ident,
5437
TableIdentifier& to_ident)
5439
const char *from = from_ident.getPath().c_str();
5440
const char *to = to_ident.getPath().c_str();
5442
if (strcmp(from, to) == 0)
5446
int ha_pbxt::rename_table(const char *from, const char *to)
5449
THD *thd = current_thd;
5453
XTDatabaseHPtr to_db;
5455
if (XTSystemTableShare::isSystemTable(from))
5456
return rename_system_table(from, to);
5458
if (!(self = ha_set_current_thread(thd, &err)))
5459
return xt_ha_pbxt_to_mysql_error(err);
5461
XT_PRINT2(self, "rename_table (%s -> %s)\n", from, to);
5464
PBMSResultRec result;
5466
err = pbms_rename_table_with_blobs(from, to, &result);
5468
xt_logf(XT_NT_ERROR, "pbms_rename_table_with_blobs() Error: %s", result.mr_message);
5474
xt_ha_open_database_of_table(self, (XTPathStrPtr) to);
5475
to_db = self->st_database;
5477
xt_ha_open_database_of_table(self, (XTPathStrPtr) from);
5479
if (self->st_database != to_db)
5480
xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
5483
* NOTE: MySQL does not lock before calling rename table!
5485
* We cannot use pb_share because rename_table() is
5486
* called without correctly initializing
5489
share = ha_get_share(self, from, true);
5490
pushr_(ha_unget_share, share);
5491
ha_aquire_exclusive_use(self, share, NULL);
5492
pushr_(ha_release_exclusive_use, share);
5493
ha_close_open_tables(self, share, NULL);
5495
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5496
xt_rename_table(self, (XTPathStrPtr) from, (XTPathStrPtr) to);
5498
freer_(); // ha_release_exclusive_use(share)
5499
freer_(); // ha_unget_share(share)
5502
* If there are no more PBXT tables in the database, we
5503
* "drop the database", which deletes all PBXT resources
5506
#ifdef XT_USE_GLOBAL_DB
5507
/* We now only drop the pbxt system data,
5508
* when the PBXT database is dropped.
5510
if (!xt_table_exists(self->st_database)) {
5511
xt_ha_all_threads_close_database(self, self->st_database);
5512
xt_drop_database(self, self->st_database);
5517
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5522
pbms_completed(NULL, (err == 0));
5527
plugin::StorageEngine::renameDefinitionFromPath(to_ident, from_ident);
5534
int PBXTStorageEngine::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5536
int ha_pbxt::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5539
return ER_NOT_SUPPORTED_YET;
5542
uint ha_pbxt::max_supported_key_length() const
5544
return XT_INDEX_MAX_KEY_SIZE;
5547
uint ha_pbxt::max_supported_key_part_length() const
5549
/* There is a little overhead in order to fit! */
5550
return XT_INDEX_MAX_KEY_SIZE-4;
5554
* Called in test_quick_select to determine if indexes should be used.
5556
* As far as I can tell, time is measured in "disk reads". So the
5557
* calculation below means the system reads about 20 rows per read.
5559
* For example a sequence scan uses a read buffer which reads a
5560
* number of rows at once, or a sequential scan can make use
5561
* of the cache (so it need to read less).
5563
double ha_pbxt::scan_time()
5565
double result = (double) (stats.records + stats.deleted) / 38.0 + 2;
5570
* The next method will never be called if you do not implement indexes.
5572
double ha_pbxt::read_time(uint XT_UNUSED(index), uint ranges, ha_rows rows)
5574
double result = rows2double(ranges+rows);
5579
* Given a starting key, and an ending key estimate the number of rows that
5580
* will exist between the two. end_key may be empty which in case determine
5581
* if start_key matches any rows.
5583
* Called from opt_range.cc by check_quick_keys().
5586
ha_rows ha_pbxt::records_in_range(uint inx, key_range *min_key, key_range *max_key)
5589
key_part_map keypart_map;
5594
keypart_map = min_key->keypart_map;
5596
keypart_map = max_key->keypart_map;
5599
ind = (XTIndexPtr) pb_share->sh_dic_keys[inx];
5601
while (keypart_map & 1) {
5603
keypart_map = keypart_map >> 1;
5606
if (segement < 1 || segement > ind->mi_seg_count)
5609
result = ind->mi_seg[segement-1].is_recs_in_range;
5610
#ifdef XT_PRINT_INDEX_OPT
5611
printf("records_in_range %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X --> %d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) inx, segement, ind->mi_seg_count, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) result);
5617
* create() is called to create a table/database. The variable name will have the name
5618
* of the table. When create() is called you do not need to worry about opening
5619
* the table. Also, the FRM file will have already been created so adjusting
5620
* create_info will not do you any good. You can overwrite the frm file at this
5621
* point if you wish to change the table definition, but there are no methods
5622
* currently provided for doing that.
5624
* Called from handle.cc by ha_create_table().
5627
int PBXTStorageEngine::doCreateTable(Session&,
5629
TableIdentifier& ident,
5630
drizzled::message::Table& proto)
5632
const std::string& path = ident.getPath();
5633
const char *table_path = path.c_str();
5635
int ha_pbxt::create(const char *table_path, TABLE *table_arg, HA_CREATE_INFO *create_info)
5638
THD *thd = current_thd;
5641
XTDDTable *tab_def = NULL;
5642
XTDictionaryRec dic, source_dic;
5644
if ((strcmp(table_path, "./pbxt/location") == 0) ||
5645
(strcmp(table_path, "./pbxt/tables") == 0) ||
5646
(strcmp(table_path, "./pbxt/statistics") == 0))
5649
if ((strcmp(table_path, "./pbxt/location") == 0) || (strcmp(table_path, "./pbxt/statistics") == 0))
5652
memset(&dic, 0, sizeof(dic));
5653
memset(&source_dic, 0, sizeof(source_dic));
5655
if (!(self = ha_set_current_thread(thd, &err)))
5656
return xt_ha_pbxt_to_mysql_error(err);
5658
XT_PRINT2(self, "create (%s) %s\n", table_path, (proto.type() == message::Table::TEMPORARY) ? "temporary" : "");
5659
switch(ident.getType()) {
5660
case message::Table::STANDARD:
5661
dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5664
case message::Table::TEMPORARY:
5665
dic.dic_table_type = XT_TABLE_TYPE_TEMPORARY;
5668
case message::Table::INTERNAL:
5669
dic.dic_table_type = XT_TABLE_TYPE_INTERNAL;
5672
case message::Table::FUNCTION:
5673
dic.dic_table_type = XT_TABLE_TYPE_FUNCTION;
5677
XT_PRINT2(self, "create (%s) %s\n", table_path, (create_info->options & HA_LEX_CREATE_TMP_TABLE) ? "temporary" : "");
5680
STAT_TRACE(self, *thd_query(thd));
5683
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5686
for (uint i=0; i<TS(&table_arg)->keys; i++) {
5687
if (table_arg.key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5688
xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg.key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5691
for (uint i=0; i<TS(table_arg)->keys; i++) {
5692
if (table_arg->key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5693
xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg->key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5697
/* ($) auto_increment_value will be zero if
5698
* AUTO_INCREMENT is not used. Otherwise
5699
* Query was ALTER TABLE ... AUTO_INCREMENT = x; or
5700
* CREATE TABLE ... AUTO_INCREMENT = x;
5702
#ifdef XT_USE_DEFAULT_MEMORY_TABS
5703
if (create_info->storage_media == HA_SM_DEFAULT)
5704
source_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5708
StorageEngine::writeDefinitionFromPath(ident, proto);
5710
tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, const_cast<char *>(thd->getQueryString().c_str()), myxt_create_table_from_table(self, &table_arg), &source_dic);
5711
tab_def->checkForeignKeys(self, proto.type() == message::Table::TEMPORARY);
5713
// tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, *thd_query(thd), myxt_create_table_from_table(self, table_arg));
5714
tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, *thd_query(thd), myxt_create_table_from_table(self, table_arg), &source_dic);
5715
tab_def->checkForeignKeys(self, create_info->options & HA_LEX_CREATE_TMP_TABLE);
5716
dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5719
dic.dic_table = tab_def;
5721
dic.dic_my_table = &table_arg;
5722
dic.dic_tab_flags = source_dic.dic_tab_flags;
5723
//if (create_info.storage_media == HA_SM_MEMORY)
5724
// dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5725
if (proto.type() == message::Table::TEMPORARY)
5726
dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5727
if (myxt_temp_table_name(table_path))
5728
dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5730
dic.dic_min_auto_inc = (xtWord8) proto.options().auto_increment_value(); /* ($) */
5731
dic.dic_def_ave_row_size = proto.options().avg_row_length();
5733
dic.dic_my_table = table_arg;
5734
dic.dic_tab_flags = source_dic.dic_tab_flags;
5736
if (create_info->storage_media == HA_SM_MEMORY)
5737
dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5738
if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
5739
dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5740
if (myxt_temp_table_name(table_path))
5741
dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5743
dic.dic_min_auto_inc = (xtWord8) create_info->auto_increment_value; /* ($) */
5744
dic.dic_def_ave_row_size = (xtWord8) table_arg->s->avg_row_length;
5746
myxt_setup_dictionary(self, &dic);
5749
* We used to ignore the value of foreign_key_checks flag and allowed creation
5750
* of tables with "hanging" references. Now we validate FKs if foreign_key_checks != 0
5752
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5755
* Previously I set delete_if_exists=TRUE because
5756
* CREATE TABLE was being used to TRUNCATE.
5757
* This was due to the flag HTON_CAN_RECREATE.
5758
* Now I could set delete_if_exists=FALSE, but
5759
* leaving it TRUE should not cause any problems.
5761
xt_create_table(self, (XTPathStrPtr) table_path, &dic);
5765
tab_def->finalize(self);
5766
dic.dic_table = NULL;
5767
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5771
/* Free the dictionary, but not 'table_arg'! */
5772
dic.dic_my_table = NULL;
5773
myxt_free_dictionary(self, &dic);
5778
void ha_pbxt::update_create_info(HA_CREATE_INFO *create_info)
5782
if ((ot = pb_open_tab)) {
5783
if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
5784
/* Fill in the minimum auto-increment value! */
5785
create_info->auto_increment_value = ot->ot_table->tab_dic.dic_min_auto_inc;
5791
int PBXTStorageEngine::doStartTransaction(Session *thd, start_transaction_option_t XT_UNUSED(options))
5794
XTThreadPtr self = ha_set_current_thread(thd, &err);
5796
XT_PRINT0(self, "PBXTStorageEngine::doStartTransaction\n");
5798
/* Transaction mode numbers must be identical! */
5799
(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
5800
(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
5802
self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
5803
self->st_ignore_fkeys = (thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5804
self->st_auto_commit = (thd_test_options(thd, (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
5805
self->st_table_trans = FALSE;
5806
self->st_abort_trans = FALSE;
5807
self->st_stat_ended = FALSE;
5808
self->st_stat_trans = FALSE;
5809
xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
5811
if (!self->st_database)
5812
xt_ha_open_database_of_table(self, NULL);
5814
if (!xt_xn_begin(self)) {
5815
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, /*pb_ignore_dup_key*/false);
5822
int PBXTStorageEngine::doSetSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5824
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5827
int PBXTStorageEngine::doRollbackToSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5829
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5832
int PBXTStorageEngine::doReleaseSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5834
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5837
int PBXTStorageEngine::doCommit(drizzled::Session* thd, bool)
5840
XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5842
bool real_commit = !session_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
5844
XT_PRINT1(self, "PBXTStorageEngine::doCommit(real_commit = %s)\n", real_commit ? "true" : "false");
5846
if (real_commit && self) {
5847
if (!xt_xn_commit(self))
5848
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5854
int PBXTStorageEngine::doRollback(drizzled::Session* thd, bool)
5857
XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5859
bool real_commit = !session_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
5861
XT_PRINT1(self, "PBXTStorageEngine::doRollback(real_commit = %s)\n", real_commit ? "true" : "false");
5863
if (real_commit && self) {
5864
if (!xt_xn_rollback(self))
5865
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5872
void PBXTStorageEngine::doGetTableIdentifiers(drizzled::CachedDirectory &directory,
5873
drizzled::SchemaIdentifier &schema_identifier,
5874
drizzled::TableIdentifiers &set_of_identifiers)
5876
CachedDirectory::Entries entries= directory.getEntries();
5878
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5879
entry_iter != entries.end(); ++entry_iter)
5881
CachedDirectory::Entry *entry= *entry_iter;
5882
const std::string *filename= &entry->filename;
5884
assert(filename->size());
5886
const char *ext= strchr(filename->c_str(), '.');
5888
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5889
(filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5893
char uname[NAME_LEN + 1];
5894
uint32_t file_name_len;
5896
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5897
// TODO: Remove need for memory copy here
5898
uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL
5900
set_of_identifiers.push_back(TableIdentifier(schema_identifier, uname));
5905
void PBXTStorageEngine::doGetTableNames(
5906
CachedDirectory &directory,
5908
std::set<std::string>& set_of_names)
5910
CachedDirectory::Entries entries= directory.getEntries();
5912
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5913
entry_iter != entries.end(); ++entry_iter)
5915
CachedDirectory::Entry *entry= *entry_iter;
5916
const std::string *filename= &entry->filename;
5918
assert(filename->size());
5920
const char *ext= strchr(filename->c_str(), '.');
5922
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5923
(filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5927
char uname[NAME_LEN + 1];
5928
uint32_t file_name_len;
5930
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5931
// TODO: Remove need for memory copy here
5932
uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL
5933
set_of_names.insert(uname);
5939
bool PBXTStorageEngine::doDoesTableExist(Session&, TableIdentifier &identifier)
5941
std::string proto_path(identifier.getPath());
5942
proto_path.append(DEFAULT_FILE_EXTENSION);
5944
if (access(proto_path.c_str(), F_OK))
5954
char *ha_pbxt::get_foreign_key_create_info()
5956
THD *thd = current_thd;
5959
XTStringBufferRec tab_def = { 0, 0, 0 };
5961
if (!(self = ha_set_current_thread(thd, &err))) {
5962
xt_ha_pbxt_to_mysql_error(err);
5967
if ((err = reopen()))
5971
if (!pb_open_tab->ot_table->tab_dic.dic_table)
5975
pb_open_tab->ot_table->tab_dic.dic_table->loadForeignKeyString(self, &tab_def);
5978
xt_sb_set_size(self, &tab_def, 0);
5979
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5983
return tab_def.sb_cstring;
5986
void ha_pbxt::free_foreign_key_create_info(char* str)
5991
bool ha_pbxt::get_error_message(int XT_UNUSED(error), String *buf)
5993
THD *thd = current_thd;
5997
if (!(self = ha_set_current_thread(thd, &err)))
6000
if (!self->t_exception.e_xt_err)
6003
buf->copy(self->t_exception.e_err_msg, (uint32_t) strlen(self->t_exception.e_err_msg), system_charset_info);
6008
* get info about FKs of the currently open table
6010
* 1. REPLACE; is > 0 if table is referred by a FOREIGN KEY
6011
* 2. INFORMATION_SCHEMA tables: TABLE_CONSTRAINTS, REFERENTIAL_CONSTRAINTS
6012
* Return value: as of 5.1.24 it's ignored
6015
int ha_pbxt::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
6021
if (!(self = ha_set_current_thread(thd, &err))) {
6022
return xt_ha_pbxt_to_mysql_error(err);
6026
XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6028
if (table_dic == NULL)
6029
xt_throw_errno(XT_CONTEXT, XT_ERR_NO_DICTIONARY);
6031
for (int i = 0, sz = table_dic->dt_fkeys.size(); i < sz; i++) {
6032
FOREIGN_KEY_INFO *fk_info= new // assumed that C++ exceptions are disabled
6033
(thd_alloc(thd, sizeof(FOREIGN_KEY_INFO))) FOREIGN_KEY_INFO;
6035
if (fk_info == NULL)
6036
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
6038
XTDDForeignKey *fk = table_dic->dt_fkeys.itemAt(i);
6040
const char *path = fk->fk_ref_tab_name->ps_path;
6041
const char *ref_tbl_name = path + strlen(path);
6043
while (ref_tbl_name != path && !XT_IS_DIR_CHAR(*ref_tbl_name))
6046
const char * ref_db_name = ref_tbl_name - 1;
6048
while (ref_db_name != path && !XT_IS_DIR_CHAR(*ref_db_name))
6054
fk_info->forein_id = thd_make_lex_string(thd, 0,
6055
fk->co_name, (uint) strlen(fk->co_name), 1);
6057
fk_info->referenced_db = thd_make_lex_string(thd, 0,
6058
ref_db_name, (uint) (ref_tbl_name - ref_db_name - 1), 1);
6060
fk_info->referenced_table = thd_make_lex_string(thd, 0,
6061
ref_tbl_name, (uint) strlen(ref_tbl_name), 1);
6063
fk_info->referenced_key_name = NULL;
6065
XTIndex *ix = fk->getReferenceIndexPtr();
6066
if (ix == NULL) /* can be NULL if another thread changes referenced table at the moment */
6069
XTDDTable *ref_table = fk->fk_ref_table;
6071
// might be a self-reference
6072
if ((ref_table == NULL)
6073
&& (xt_tab_compare_names(path, table_dic->dt_table->tab_name->ps_path) == 0)) {
6074
ref_table = table_dic;
6077
if (ref_table != NULL) {
6078
const XTList<XTDDIndex>& ix_list = ref_table->dt_indexes;
6079
for (int j = 0, sz2 = ix_list.size(); j < sz2; j++) {
6080
XTDDIndex *ddix = ix_list.itemAt(j);
6081
if (ddix->in_index == ix->mi_index_no) {
6082
const char *ix_name =
6083
ddix->co_name ? ddix->co_name : ddix->co_ind_name;
6084
fk_info->referenced_key_name = thd_make_lex_string(thd, 0,
6085
ix_name, (uint) strlen(ix_name), 1);
6091
action = XTDDForeignKey::actionTypeToString(fk->fk_on_delete);
6092
fk_info->delete_method = thd_make_lex_string(thd, 0,
6093
action, (uint) strlen(action), 1);
6094
action = XTDDForeignKey::actionTypeToString(fk->fk_on_update);
6095
fk_info->update_method = thd_make_lex_string(thd, 0,
6096
action, (uint) strlen(action), 1);
6098
const XTList<XTDDColumnRef>& cols = fk->co_cols;
6099
for (int j = 0, sz2 = cols.size(); j < sz2; j++) {
6100
XTDDColumnRef *col_ref= cols.itemAt(j);
6101
fk_info->foreign_fields.push_back(thd_make_lex_string(thd, 0,
6102
col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
6105
const XTList<XTDDColumnRef>& ref_cols = fk->fk_ref_cols;
6106
for (int j = 0, sz2 = ref_cols.size(); j < sz2; j++) {
6107
XTDDColumnRef *col_ref= ref_cols.itemAt(j);
6108
fk_info->referenced_fields.push_back(thd_make_lex_string(thd, 0,
6109
col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
6112
f_key_list->push_back(fk_info);
6116
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
6123
uint ha_pbxt::referenced_by_foreign_key()
6125
XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6129
/* Check the list of referencing tables: */
6130
return table_dic->dt_trefs ? 1 : 0;
6134
struct st_mysql_sys_var
6136
MYSQL_PLUGIN_VAR_HEADER;
6139
#if MYSQL_VERSION_ID < 60000
6140
#if MYSQL_VERSION_ID >= 50124
6141
#define USE_CONST_SAVE
6144
#if MYSQL_VERSION_ID >= 60005
6145
#define USE_CONST_SAVE
6150
#define st_mysql_sys_var drizzled::drizzle_sys_var
6153
#ifdef USE_CONST_SAVE
6154
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, const void *save)
6156
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, void *save)
6159
xtInt8 record_cache_size;
6161
char *old= *(char **) tgt;
6162
*(char **)tgt= *(char **) save;
6163
if (var->flags & PLUGIN_VAR_MEMALLOC)
6165
*(char **)tgt= my_strdup(*(char **) save, MYF(0));
6166
my_free(old, MYF(0));
6168
record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
6169
xt_tc_set_cache_size((size_t) record_cache_size);
6173
sprintf(buffer, "pbxt_record_cache_size=%llu\n", (u_llong) record_cache_size);
6174
xt_logf(XT_NT_INFO, buffer);
6179
struct st_mysql_storage_engine pbxt_storage_engine = {
6180
MYSQL_HANDLERTON_INTERFACE_VERSION
6182
static st_mysql_information_schema pbxt_statitics = {
6183
MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
6187
#if MYSQL_VERSION_ID >= 50118
6188
static MYSQL_SYSVAR_STR(index_cache_size, pbxt_index_cache_size,
6189
PLUGIN_VAR_READONLY,
6190
"The amount of memory allocated to the index cache, used only to cache index data.",
6193
static MYSQL_SYSVAR_STR(record_cache_size, pbxt_record_cache_size,
6194
PLUGIN_VAR_READONLY, // PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
6195
"The amount of memory allocated to the record cache used to cache table data.",
6196
NULL, pbxt_record_cache_size_func, NULL);
6198
static MYSQL_SYSVAR_STR(log_cache_size, pbxt_log_cache_size,
6199
PLUGIN_VAR_READONLY,
6200
"The amount of memory allocated to the transaction log cache used to cache transaction log data.",
6203
static MYSQL_SYSVAR_STR(log_file_threshold, pbxt_log_file_threshold,
6204
PLUGIN_VAR_READONLY,
6205
"The size of a transaction log before rollover, and a new log is created.",
6208
static MYSQL_SYSVAR_STR(transaction_buffer_size, pbxt_transaction_buffer_size,
6209
PLUGIN_VAR_READONLY,
6210
"The size of the global transaction log buffer (the engine allocates 2 buffers of this size).",
6213
static MYSQL_SYSVAR_STR(log_buffer_size, pbxt_log_buffer_size,
6214
PLUGIN_VAR_READONLY,
6215
"The size of the buffer used to cache data from transaction and data logs during sequential scans, or when writing a data log.",
6218
static MYSQL_SYSVAR_STR(checkpoint_frequency, pbxt_checkpoint_frequency,
6219
PLUGIN_VAR_READONLY,
6220
"The size of the transaction data buffer which is allocate by each thread.",
6223
static MYSQL_SYSVAR_STR(data_log_threshold, pbxt_data_log_threshold,
6224
PLUGIN_VAR_READONLY,
6225
"The maximum size of a data log file.",
6228
static MYSQL_SYSVAR_STR(data_file_grow_size, pbxt_data_file_grow_size,
6229
PLUGIN_VAR_READONLY,
6230
"The amount by which the handle data files (.xtd) grow.",
6233
static MYSQL_SYSVAR_STR(row_file_grow_size, pbxt_row_file_grow_size,
6234
PLUGIN_VAR_READONLY,
6235
"The amount by which the row pointer files (.xtr) grow.",
6238
static MYSQL_SYSVAR_STR(record_write_threshold, pbxt_record_write_threshold,
6239
PLUGIN_VAR_READONLY,
6240
"The amount data written to the record files (.xtd and .xtr) before the changes are applied to the database.",
6243
static MYSQL_SYSVAR_INT(garbage_threshold, xt_db_garbage_threshold,
6244
PLUGIN_VAR_OPCMDARG,
6245
"The percentage of garbage in a repository file before it is compacted.",
6246
NULL, NULL, XT_DL_DEFAULT_GARBAGE_LEVEL, 0, 100, 1);
6248
static MYSQL_SYSVAR_INT(log_file_count, xt_db_log_file_count,
6249
PLUGIN_VAR_OPCMDARG,
6250
"The minimum number of transaction logs used.",
6251
NULL, NULL, XT_DL_DEFAULT_XLOG_COUNT, 1, 20000, 1);
6253
static MYSQL_SYSVAR_INT(auto_increment_mode, xt_db_auto_increment_mode,
6254
PLUGIN_VAR_OPCMDARG,
6255
"The auto-increment mode, 0 = MySQL standard (default), 1 = previous ID's never reused.",
6256
NULL, NULL, XT_AUTO_INCREMENT_DEF, 0, 1, 1);
6259
static MYSQL_SYSVAR_INT(offline_log_function, xt_db_offline_log_function,
6260
PLUGIN_VAR_OPCMDARG,
6261
"Determines what happens to transaction logs when the are moved offline, 0 = recycle logs (default), 1 = delete logs (default on Mac OS X), 2 = keep logs.",
6262
NULL, NULL, XT_OFFLINE_LOG_FUNCTION_DEF, 0, 2, 1);
6265
static MYSQL_SYSVAR_INT(sweeper_priority, xt_db_sweeper_priority,
6266
PLUGIN_VAR_OPCMDARG,
6267
"Determines the priority of the background sweeper process, 0 = low (default), 1 = normal (same as user threads), 2 = high.",
6268
NULL, NULL, XT_PRIORITY_LOW, XT_PRIORITY_LOW, XT_PRIORITY_HIGH, 1);
6271
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6272
PLUGIN_VAR_OPCMDARG,
6273
"Enable PBXT support for the XA two-phase commit, default is enabled",
6276
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6277
PLUGIN_VAR_OPCMDARG,
6278
"Enable PBXT support for the XA two-phase commit, default is disabled (due to assertion failure in MySQL)",
6279
/* The problem is, in MySQL an assertion fails in debug mode:
6280
* Assertion failed: (total_ha_2pc == (ulong) opt_bin_log+1), function ha_recover, file handler.cc, line 1557.
6285
static MYSQL_SYSVAR_INT(index_dirty_threshold, xt_db_index_dirty_threshold,
6286
PLUGIN_VAR_OPCMDARG,
6287
"The percentage of the index cache that must be dirty before the index cache is flushed.",
6288
NULL, NULL, XT_DL_DEFAULT_INDEX_DIRTY_LEVEL, 0, 100, 1);
6290
static MYSQL_SYSVAR_INT(flush_log_at_trx_commit, xt_db_flush_log_at_trx_commit,
6291
PLUGIN_VAR_OPCMDARG,
6292
"Determines whether the transaction log is written and/or flushed when a transaction is committed (no matter what the setting the log is written and flushed once per second), 0 = no write & no flush, 1 = write & flush (default), 2 = write & no flush.",
6293
NULL, NULL, 1, 0, 2, 1);
6295
static struct st_mysql_sys_var* pbxt_system_variables[] = {
6296
MYSQL_SYSVAR(index_cache_size),
6297
MYSQL_SYSVAR(record_cache_size),
6298
MYSQL_SYSVAR(log_cache_size),
6299
MYSQL_SYSVAR(log_file_threshold),
6300
MYSQL_SYSVAR(transaction_buffer_size),
6301
MYSQL_SYSVAR(log_buffer_size),
6302
MYSQL_SYSVAR(checkpoint_frequency),
6303
MYSQL_SYSVAR(data_log_threshold),
6304
MYSQL_SYSVAR(data_file_grow_size),
6305
MYSQL_SYSVAR(row_file_grow_size),
6306
MYSQL_SYSVAR(record_write_threshold),
6307
MYSQL_SYSVAR(garbage_threshold),
6308
MYSQL_SYSVAR(log_file_count),
6309
MYSQL_SYSVAR(auto_increment_mode),
6310
MYSQL_SYSVAR(offline_log_function),
6311
MYSQL_SYSVAR(sweeper_priority),
6312
MYSQL_SYSVAR(support_xa),
6313
MYSQL_SYSVAR(index_dirty_threshold),
6314
MYSQL_SYSVAR(flush_log_at_trx_commit),
6320
DRIZZLE_DECLARE_PLUGIN
6325
"Paul McCullagh, PrimeBase Technologies GmbH",
6326
"High performance, multi-versioning transactional engine",
6328
pbxt_init, /* Plugin Init */
6329
pbxt_system_variables, /* system variables */
6330
NULL /* config options */
6332
DRIZZLE_DECLARE_PLUGIN_END;
6334
mysql_declare_plugin(pbxt)
6336
MYSQL_STORAGE_ENGINE_PLUGIN,
6337
&pbxt_storage_engine,
6339
"Paul McCullagh, PrimeBase Technologies GmbH",
6340
"High performance, multi-versioning transactional engine",
6342
pbxt_init, /* Plugin Init */
6343
pbxt_end, /* Plugin Deinit */
6345
NULL, /* status variables */
6346
#if MYSQL_VERSION_ID >= 50118
6347
pbxt_system_variables, /* system variables */
6351
NULL /* config options */
6353
MYSQL_INFORMATION_SCHEMA_PLUGIN,
6356
"Paul McCullagh, PrimeBase Technologies GmbH",
6357
"PBXT internal system statitics",
6359
pbxt_init_statistics, /* plugin init */
6360
pbxt_exit_statistics, /* plugin deinit */
6362
NULL, /* status variables */
6363
NULL, /* system variables */
6364
NULL /* config options */
6366
mysql_declare_plugin_end;
6369
#if defined(XT_WIN) && defined(XT_COREDUMP)
6372
* WINDOWS CORE DUMP SUPPORT
6374
* MySQL supports core dumping on Windows with --core-file command line option.
6375
* However it creates dumps with the MiniDumpNormal option which saves only stack traces.
6377
* We instead (or in addition) create dumps with MiniDumpWithoutOptionalData option
6378
* which saves all available information. To enable core dumping enable XT_COREDUMP
6380
* In addition, pbxt_crash_debug must be set to TRUE which is the case if XT_CRASH_DEBUG
6382
* This switch is also controlled by creating a file called "no-debug" or "crash-debug"
6383
* in the pbxt database directory.
6386
typedef enum _MINIDUMP_TYPE {
6387
MiniDumpNormal = 0x0000,
6388
MiniDumpWithDataSegs = 0x0001,
6389
MiniDumpWithFullMemory = 0x0002,
6390
MiniDumpWithHandleData = 0x0004,
6391
MiniDumpFilterMemory = 0x0008,
6392
MiniDumpScanMemory = 0x0010,
6393
MiniDumpWithUnloadedModules = 0x0020,
6394
MiniDumpWithIndirectlyReferencedMemory = 0x0040,
6395
MiniDumpFilterModulePaths = 0x0080,
6396
MiniDumpWithProcessThreadData = 0x0100,
6397
MiniDumpWithPrivateReadWriteMemory = 0x0200,
6400
typedef struct _MINIDUMP_EXCEPTION_INFORMATION {
6402
PEXCEPTION_POINTERS ExceptionPointers;
6403
BOOL ClientPointers;
6404
} MINIDUMP_EXCEPTION_INFORMATION, *PMINIDUMP_EXCEPTION_INFORMATION;
6406
typedef BOOL (WINAPI *MINIDUMPWRITEDUMP)(
6410
MINIDUMP_TYPE DumpType,
6411
void *ExceptionParam,
6412
void *UserStreamParam,
6416
char base_path[_MAX_PATH] = {0};
6417
char dump_path[_MAX_PATH] = {0};
6419
void core_dump(struct _EXCEPTION_POINTERS *pExceptionInfo)
6421
SECURITY_ATTRIBUTES sa = { sizeof(SECURITY_ATTRIBUTES), 0, 0 };
6423
HMODULE hDll = NULL;
6425
MINIDUMPWRITEDUMP pDump;
6426
char *end_ptr = base_path;
6428
MINIDUMP_EXCEPTION_INFORMATION ExInfo, *ExInfoPtr = NULL;
6430
if (pExceptionInfo) {
6431
ExInfo.ThreadId = GetCurrentThreadId();
6432
ExInfo.ExceptionPointers = pExceptionInfo;
6433
ExInfo.ClientPointers = NULL;
6434
ExInfoPtr = &ExInfo;
6437
end_ptr = base_path + strlen(base_path);
6439
strcat(base_path, "DBGHELP.DLL" );
6440
hDll = LoadLibrary(base_path);
6444
err = HRESULT_CODE(GetLastError());
6445
hDll = LoadLibrary( "DBGHELP.DLL" );
6447
err = HRESULT_CODE(GetLastError());
6452
pDump = (MINIDUMPWRITEDUMP)GetProcAddress( hDll, "MiniDumpWriteDump" );
6455
err = HRESULT_CODE(GetLastError());
6459
for (i = 1; i < INT_MAX; i++) {
6460
sprintf(dump_path, "%sPBXTCore%08d.dmp", base_path, i);
6461
hFile = CreateFile( dump_path, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_NEW,
6462
FILE_ATTRIBUTE_NORMAL, NULL );
6464
if ( hFile != INVALID_HANDLE_VALUE )
6467
if (HRESULT_CODE(GetLastError()) == ERROR_FILE_EXISTS )
6474
BOOL bOK = pDump( GetCurrentProcess(), GetCurrentProcessId(), hFile,
6475
MiniDumpWithPrivateReadWriteMemory, ExInfoPtr, NULL, NULL );
6480
LONG crash_filter( struct _EXCEPTION_POINTERS *pExceptionInfo )
6482
core_dump(pExceptionInfo);
6483
return EXCEPTION_EXECUTE_HANDLER;
6486
void register_crash_filter()
6488
SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER) crash_filter );
6491
#endif // XT_WIN && XT_COREDUMP