1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* 2005-02-08 Paul McCullagh
24
#include "xt_config.h"
35
#include <drizzled/common.h>
36
#include <drizzled/dtcollation.h>
38
#include "mysql_priv.h"
42
#include "database_xt.h"
44
#include "strutil_xt.h"
49
#include "systab_xt.h"
52
//#define TRACE_VARIATIONS
53
//#define TRACE_VARIATIONS_IN_DUP_CHECK
54
//#define DUMP_CHECK_TABLE
55
//#define CHECK_INDEX_ON_CHECK_TABLE
56
//#define TRACE_TABLE_IDS
57
//#define TRACE_FLUSH_TABLE
58
//#define TRACE_CREATE_TABLES
61
#define CHECK_TABLE_STATS
63
#ifdef TRACE_TABLE_IDS
64
//#define PRINTF xt_ftracef
65
#define PRINTF xt_trace
68
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr dic);
69
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic);
70
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def);
71
static void tab_free_ext_records(XTTableHPtr tab);
74
* -----------------------------------------------------------------------
78
#define XT_MAX_TABLE_FILE_NAME_SIZE (XT_TABLE_NAME_SIZE+6+40)
81
* -----------------------------------------------------------------------
85
/* GOTCHA! The problem:
87
* The server uses names like: "./test/my_tab",
88
* the BLOB streaming engine uses: "test/my_tab"
89
* which leads to the same table being loaded twice.
91
xtPublic int xt_tab_compare_paths(char *n1, char *n2)
93
n1 = xt_last_2_names_of_path(n1);
94
n2 = xt_last_2_names_of_path(n2);
96
return strcasecmp(n1, n2);
97
return strcmp(n1, n2);
101
* This function only compares only the last 2 components of
102
* the path because table names must differ in this area.
104
xtPublic int xt_tab_compare_names(const char *n1, const char *n2)
106
n1 = xt_last_2_names_of_path(n1);
107
n2 = xt_last_2_names_of_path(n2);
108
if (pbxt_ignore_case)
109
return strcasecmp(n1, n2);
110
return strcmp(n1, n2);
114
* -----------------------------------------------------------------------
118
static xtBool tab_list_comp(void *key, void *data)
120
XTTableHPtr tab = (XTTableHPtr) data;
122
return strcmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
125
static xtHashValue tab_list_hash(xtBool is_key, void *key_data)
127
XTTableHPtr tab = (XTTableHPtr) key_data;
130
return xt_ht_hash(xt_last_2_names_of_path((char *) key_data));
131
return xt_ht_hash(xt_last_2_names_of_path(tab->tab_name->ps_path));
134
static xtBool tab_list_comp_ci(void *key, void *data)
136
XTTableHPtr tab = (XTTableHPtr) data;
138
return strcasecmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
141
static xtHashValue tab_list_hash_ci(xtBool is_key, void *key_data)
143
XTTableHPtr tab = (XTTableHPtr) key_data;
146
return xt_ht_casehash(xt_last_2_names_of_path((char *) key_data));
147
return xt_ht_casehash(xt_last_2_names_of_path(tab->tab_name->ps_path));
150
static void tab_list_free(XTThreadPtr self, void *data)
152
XTTableHPtr tab = (XTTableHPtr) data;
153
XTDatabaseHPtr db = tab->tab_db;
154
XTTableEntryPtr te_ptr;
156
/* Remove the reference from the ID list, whem the table is
157
* removed from the name list:
159
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab->tab_id)))
160
te_ptr->te_table = NULL;
162
if (tab->tab_dic.dic_table)
163
tab->tab_dic.dic_table->removeReferences(self);
164
xt_heap_release(self, tab);
167
static void tab_close_files(XTThreadPtr self, XTTableHPtr tab)
169
if (tab->tab_rec_file) {
170
xt_fs_release_file(self, tab->tab_rec_file);
171
tab->tab_rec_file = NULL;
173
if (tab->tab_row_file) {
174
xt_fs_release_file(self, tab->tab_row_file);
175
tab->tab_row_file = NULL;
177
if (tab->tab_ind_file) {
178
xt_fs_release_file(self, tab->tab_ind_file);
179
tab->tab_ind_file = NULL;
183
static void tab_finalize(XTThreadPtr self, void *x)
185
XTTableHPtr tab = (XTTableHPtr) x;
187
xt_exit_row_locks(&tab->tab_locks);
189
xt_xres_exit_tab(self, tab);
191
if (tab->tab_ind_free_list) {
192
XTIndFreeListPtr list, flist;
194
list = tab->tab_ind_free_list;
197
list = list->fl_next_list;
198
xt_free(self, flist);
200
tab->tab_ind_free_list = NULL;
203
tab_close_files(self, tab);
205
if (tab->tab_index_head) {
206
xt_free(self, tab->tab_index_head);
207
tab->tab_index_head = NULL;
210
tab_free_ext_records(tab);
212
#ifdef TRACE_TABLE_IDS
213
PRINTF("%s: free TABLE: db=%d tab=%d %s\n", self->t_name, (int) tab->tab_db ? tab->tab_db->db_id : 0, (int) tab->tab_id,
214
tab->tab_name ? xt_last_2_names_of_path(tab->tab_name->ps_path) : "?");
217
xt_free(self, tab->tab_name);
218
tab->tab_name = NULL;
220
myxt_free_dictionary(self, &tab->tab_dic);
221
if (tab->tab_free_locks) {
222
tab->tab_seq.xt_op_seq_exit(self);
223
xt_spinlock_free(self, &tab->tab_mem_lock);
224
xt_spinlock_free(self, &tab->tab_ainc_lock);
225
xt_free_mutex(&tab->tab_rec_flush_lock);
226
xt_free_mutex(&tab->tab_ind_flush_lock);
227
xt_free_mutex(&tab->tab_ind_stat_lock);
228
xt_free_mutex(&tab->tab_dic_field_lock);
229
xt_free_mutex(&tab->tab_row_lock);
230
xt_free_mutex(&tab->tab_ind_lock);
231
xt_free_mutex(&tab->tab_rec_lock);
232
for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
233
XT_TAB_ROW_FREE_LOCK(self, &tab->tab_row_rwlock[i]);
235
#ifdef XT_SORT_REC_WRITES
236
if (tab->tab_rec_dw_writes) {
237
xt_free_sortedlist(self, tab->tab_rec_dw_writes);
238
tab->tab_rec_dw_writes = NULL;
240
if (tab->tab_rec_dw_data)
241
xt_free_ns(tab->tab_rec_dw_data);
243
if (tab->tab_rec_flush_task)
244
tab->tab_rec_flush_task->tk_exit();
245
if (tab->tab_ind_flush_task)
246
tab->tab_ind_flush_task->tk_exit();
249
static void tab_onrelease(void *x)
251
XTTableHPtr tab = (XTTableHPtr) x;
253
/* Signal threads waiting for exclusive use of the table: */
254
if (tab->tab_db->db_tables)
255
xt_ht_signal(NULL, tab->tab_db->db_tables);
259
* -----------------------------------------------------------------------
264
* This function sets the table name to "", if the file
265
* does not belong to XT.
267
xtPublic char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name)
272
file_name = xt_last_name_of_path(file_name);
273
cptr = file_name + strlen(file_name) - 1;
274
while (cptr > file_name && *cptr != '.')
276
if (cptr > file_name && *cptr == '.') {
277
if (strcmp(cptr, ".xtl") == 0 || strcmp(cptr, ".xtr") == 0) {
279
while (cptr > file_name && isdigit(*cptr))
283
const char **ext = pbxt_extensions;
286
if (strcmp(cptr, *ext) == 0)
295
len = cptr - file_name;
299
memcpy(tab_name, file_name, len);
302
/* Return a pointer to what was removed! */
303
return file_name + len;
306
static void tab_get_row_file_name(char *table_name, char *name, xtTableID tab_id)
308
sprintf(table_name, "%s-%lu.xtr", name, (u_long) tab_id);
311
static void tab_get_data_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
313
sprintf(table_name, "%s.xtd", name);
316
static void tab_get_index_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
318
sprintf(table_name, "%s.xti", name);
321
static void tab_free_by_id(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
323
XTTableEntryPtr te_ptr = (XTTableEntryPtr) item;
325
if (te_ptr->te_tab_name) {
326
xt_free(self, te_ptr->te_tab_name);
327
te_ptr->te_tab_name = NULL;
329
te_ptr->te_tab_id = 0;
330
te_ptr->te_heap_tab = FALSE;
331
te_ptr->te_table = NULL;
334
static int tab_comp_by_id(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
336
xtTableID te_id = *((xtTableID *) a);
337
XTTableEntryPtr te_ptr = (XTTableEntryPtr) b;
339
if (te_id < te_ptr->te_tab_id)
341
if (te_id == te_ptr->te_tab_id)
346
static void tab_free_path(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
348
XTTablePathPtr tp_ptr = *((XTTablePathPtr *) item);
350
xt_free(self, tp_ptr);
353
static int tab_comp_path(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
355
char *path = (char *) a;
356
XTTablePathPtr tp_ptr = *((XTTablePathPtr *) b);
358
return xt_tab_compare_paths(path, tp_ptr->tp_path);
361
static xtBool tab_get_name_value(XTTableDescPtr td, char **ret_name, char **ret_value)
363
char *ptr = td->x.z.td_curr_ptr;
365
while (*ptr && isspace(*ptr)) ptr++;
367
td->x.z.td_curr_ptr = ptr;
372
while (*ptr && *ptr != '=' && *ptr != '\r' && *ptr != '\n') ptr++;
377
while (*ptr && *ptr != '\r' && *ptr != '\n') ptr++;
390
td->x.z.td_curr_ptr = ptr;
394
xtPublic void xt_describe_tables_init(XTThreadPtr self, XTDatabaseHPtr db, XTTableDescPtr td)
399
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
400
xt_add_tables_file(PATH_MAX, pbuf);
401
if (xt_fs_exists(pbuf))
402
td->td_type = XT_TD_FROM_TAB_FILE;
404
td->td_type = XT_TD_FROM_DIRECTORY;
406
switch (td->td_type) {
407
case XT_TD_FROM_DIRECTORY:
408
td->x.y.td_path_idx = 0;
409
if (td->x.y.td_path_idx < xt_sl_get_size(db->db_table_paths)) {
410
XTTablePathPtr *tp_ptr;
412
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, td->x.y.td_path_idx);
413
td->td_tab_path = *tp_ptr;
414
td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
417
td->x.y.td_open_dir = NULL;
419
case XT_TD_FROM_TAB_FILE:
426
of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
427
pushr_(xt_close_file, of);
428
len = (int) xt_seek_eof_file(self, of);
429
buffer = (char *) xt_malloc(self, len + 1);
430
pushr_(xt_free, buffer);
431
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
434
popr_(); // Discard xt_free(buffer)
435
freer_(); // xt_close_file(of)
437
td->x.z.td_table_info = buffer;
438
td->x.z.td_curr_ptr = buffer;
439
while (tab_get_name_value(td, &name, &value)) {
440
if (strcmp(name, "[table]") == 0)
447
xtPublic xtBool xt_describe_tables_next(XTThreadPtr self, XTTableDescPtr td)
454
switch (td->td_type) {
455
case XT_TD_FROM_DIRECTORY:
457
if (!td->x.y.td_open_dir)
460
r = xt_dir_next(self, td->x.y.td_open_dir);
463
xt_describe_tables_exit(self, td);
468
XTTablePathPtr *tp_ptr;
470
if (td->x.y.td_path_idx+1 >= xt_sl_get_size(td->td_db->db_table_paths))
473
if (td->x.y.td_open_dir)
474
xt_dir_close(NULL, td->x.y.td_open_dir);
475
td->x.y.td_open_dir = NULL;
477
td->x.y.td_path_idx++;
478
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(td->td_db->db_table_paths, td->x.y.td_path_idx);
479
td->td_tab_path = *tp_ptr;
480
td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
484
tab_name = xt_dir_name(self, td->x.y.td_open_dir);
485
td->td_tab_id = (xtTableID) xt_file_name_to_id(tab_name);
486
xt_tab_file_to_name(XT_TABLE_NAME_SIZE, td->td_tab_name, tab_name);
487
td->td_heap_tab = FALSE;
489
case XT_TD_FROM_TAB_FILE:
494
while (tab_get_name_value(td, &name, &value)) {
495
if (strcmp(name, "name") == 0)
496
xt_strcpy(XT_TABLE_NAME_SIZE, td->td_tab_name, value);
497
else if (strcmp(name, "id") == 0) {
500
sscanf(value, "%lu", &lvalue);
501
td->td_tab_id = (xtTableID) lvalue;
503
else if (strcmp(name, "storage") == 0) {
504
if (strcmp(value, "heap") == 0)
505
td->td_heap_tab = TRUE;
507
td->td_heap_tab = FALSE;
509
else if (strcmp(name, "location") == 0) {
511
XTTablePathPtr db_path;
516
/* Convert path to WIN path: */
523
if ((tp = (XTTablePathPtr *) xt_sl_find(self, td->td_db->db_table_paths, value)))
529
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
530
db_path->tp_tab_count = 0;
531
memcpy(db_path->tp_path, value, len);
532
db_path->tp_path[len] = 0;
533
xt_sl_insert(self, td->td_db->db_table_paths, db_path->tp_path, &db_path);
535
td->td_tab_path = db_path;
537
else if (strcmp(name, "type") == 0) {
540
sscanf(value, "%lu", &lvalue);
541
td->td_tab_type = (xtWord1) lvalue;
543
else if (strcmp(name, "[table]") == 0)
553
xtPublic void xt_describe_tables_exit(XTThreadPtr self, XTTableDescPtr td)
555
switch (td->td_type) {
556
case XT_TD_FROM_DIRECTORY:
557
if (td->x.y.td_open_dir)
558
xt_dir_close(NULL, td->x.y.td_open_dir);
559
td->x.y.td_open_dir = NULL;
561
case XT_TD_FROM_TAB_FILE:
562
if (td->x.z.td_table_info) {
563
xt_free(self, td->x.z.td_table_info);
564
td->x.z.td_table_info = NULL;
566
td->x.z.td_curr_ptr = NULL;
569
td->td_tab_path = NULL;
572
xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db)
575
XTTableEntryRec te_tab;
576
XTTableEntryPtr te_ptr;
577
XTTablePathPtr db_path;
583
pushr_(xt_tab_exit_db, db);
584
if (pbxt_ignore_case)
585
db->db_tables = xt_new_hashtable(self, tab_list_comp_ci, tab_list_hash_ci, tab_list_free, TRUE, TRUE);
587
db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE);
588
db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE);
589
db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE);
591
if (db->db_multi_path) {
593
char *buffer, *ptr, *path;
595
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
596
xt_add_tables_file(PATH_MAX, pbuf);
597
if (!xt_fs_exists(pbuf)) {
598
/* Load the location file, if a tables file does not
601
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
602
xt_add_location_file(PATH_MAX, pbuf);
603
if (xt_fs_exists(pbuf)) {
604
of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
605
pushr_(xt_close_file, of);
606
len = (int) xt_seek_eof_file(self, of);
607
buffer = (char *) xt_malloc(self, len + 1);
608
pushr_(xt_free, buffer);
609
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
614
/* Ignore preceeding space: */
615
while (*ptr && isspace(*ptr))
618
while (*ptr && *ptr != '\n' && *ptr != '\r') {
620
/* Undo the conversion below: */
626
if (*path != '#' && ptr > path) {
627
len = (int) (ptr - path);
628
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
629
db_path->tp_tab_count = 0;
630
memcpy(db_path->tp_path, path, len);
631
db_path->tp_path[len] = 0;
632
xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
636
freer_(); // xt_free(buffer)
637
freer_(); // xt_close_file(of)
642
len = (int) strlen(db->db_main_path);
643
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
644
db_path->tp_tab_count = 0;
645
strcpy(db_path->tp_path, db->db_main_path);
646
xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
649
xt_describe_tables_init(self, db, &desc);
650
pushr_(xt_describe_tables_exit, &desc);
651
while (xt_describe_tables_next(self, &desc)) {
652
te_tab.te_tab_id = desc.td_tab_id;
653
te_tab.te_heap_tab = desc.td_heap_tab;
655
if (te_tab.te_tab_id > db->db_curr_tab_id)
656
db->db_curr_tab_id = te_tab.te_tab_id;
658
te_tab.te_tab_name = xt_dup_string(self, desc.td_tab_name);
659
te_tab.te_tab_path = desc.td_tab_path;
660
desc.td_tab_path->tp_tab_count++;
661
te_tab.te_table = NULL;
662
te_tab.te_type = desc.td_tab_type;
663
xt_sl_insert(self, db->db_table_by_id, &desc.td_tab_id, &te_tab);
665
freer_(); // xt_describe_tables_exit(&desc)
668
* The purpose of this code is to ensure that all tables are opened and cached,
669
* which is actually only required if tables have foreign key references.
671
* In other words, a side affect of this code is that FK references between tables
672
* are registered, and checked.
674
* Unfortunately we don't know if a table is referenced by a FK, so we have to open
677
* Cannot open tables in the loop above because db->db_table_by_id which is built
678
* above is used by xt_use_table_no_lock()
681
* NOTE: The code also lead to the statistics failing to work because
682
* the tables were already open when the handler was opened.
683
* Previously we only caclulated statistics when a handler was opened
684
* and the underlying table was also opened.
686
xt_enum_tables_init(&edx);
687
while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
688
xt_strcpy(PATH_MAX, pbuf, te_ptr->te_tab_path->tp_path);
689
xt_add_dir_char(PATH_MAX, pbuf);
690
xt_strcat(PATH_MAX, pbuf, te_ptr->te_tab_name);
692
xt_heap_release(self, xt_use_table_no_lock(self, db, (XTPathStrPtr)pbuf, FALSE, FALSE, NULL));
695
xt_log_and_clear_warning(self);
699
popr_(); // Discard xt_tab_exit_db(db)
703
static void tab_save_tables(XTThreadPtr self, XTDatabaseHPtr db)
705
XTTableEntryPtr te_ptr;
706
XTStringBufferRec buffer;
710
memset(&buffer, 0, sizeof(buffer));
712
xt_strcpy(PATH_MAX, path, db->db_main_path);
713
xt_add_tables_file(PATH_MAX, path);
715
if (xt_sl_get_size(db->db_table_by_id)) {
716
pushr_(xt_sb_free, &buffer);
717
for (u_int i=0; i<xt_sl_get_size(db->db_table_by_id); i++) {
718
te_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, i);
719
xt_sb_concat(self, &buffer, "[table]\n");
720
xt_sb_concat(self, &buffer, "id=");
721
xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_tab_id);
722
xt_sb_concat(self, &buffer, "\n");
723
xt_sb_concat(self, &buffer, "name=");
724
xt_sb_concat(self, &buffer, te_ptr->te_tab_name);
725
xt_sb_concat(self, &buffer, "\n");
726
xt_sb_concat(self, &buffer, "location=");
727
xt_sb_concat(self, &buffer, te_ptr->te_tab_path->tp_path);
728
xt_sb_concat(self, &buffer, "\n");
729
xt_sb_concat(self, &buffer, "storage=");
730
if (te_ptr->te_heap_tab)
731
xt_sb_concat(self, &buffer, "heap\n");
733
xt_sb_concat(self, &buffer, "disk\n");
734
xt_sb_concat(self, &buffer, "type=");
735
xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_type);
736
xt_sb_concat(self, &buffer, "\n");
740
/* To make the location file cross-platform (at least
741
* as long as relative paths are used) we replace all '\'
745
ptr = buffer.sb_cstring;
752
of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
753
pushr_(xt_close_file, of);
754
if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
756
xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
757
freer_(); // xt_close_file(of)
759
freer_(); // xt_sb_free(&buffer);
762
xt_fs_delete(NULL, path);
765
static void tab_save_table_paths(XTThreadPtr self, XTDatabaseHPtr db)
767
XTTablePathPtr *tp_ptr;
768
XTStringBufferRec buffer;
772
memset(&buffer, 0, sizeof(buffer));
774
xt_strcpy(PATH_MAX, path, db->db_main_path);
775
xt_add_location_file(PATH_MAX, path);
777
if (xt_sl_get_size(db->db_table_paths)) {
778
pushr_(xt_sb_free, &buffer);
779
for (u_int i=0; i<xt_sl_get_size(db->db_table_paths); i++) {
780
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, i);
781
xt_sb_concat(self, &buffer, (*tp_ptr)->tp_path);
782
xt_sb_concat(self, &buffer, "\n");
786
/* To make the location file cross-platform (at least
787
* as long as relative paths are used) we replace all '\'
791
ptr = buffer.sb_cstring;
799
of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
800
pushr_(xt_close_file, of);
801
if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
803
xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
804
freer_(); // xt_close_file(of)
806
freer_(); // xt_sb_free(&buffer);
809
xt_fs_delete(NULL, path);
812
static XTTablePathPtr tab_get_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr tab_name, xtBool save_it)
814
XTTablePathPtr *tp, tab_path;
817
xt_strcpy(PATH_MAX, path, tab_name->ps_path);
818
xt_remove_last_name_of_path(path);
819
xt_remove_dir_char(path);
820
tp = (XTTablePathPtr *) xt_sl_find(self, db->db_table_paths, path);
824
int len = (int) strlen(path);
826
tab_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
827
tab_path->tp_tab_count = 0;
828
memcpy(tab_path->tp_path, path, len);
829
tab_path->tp_path[len] = 0;
830
xt_sl_insert(self, db->db_table_paths, tab_path->tp_path, &tab_path);
832
tab_save_table_paths(self, db);
833
if (xt_sl_get_size(db->db_table_paths) == 1) {
834
XTSystemTableShare::createSystemTables(self, db);
838
tab_path->tp_tab_count++;
842
static void tab_remove_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTTablePathPtr tab_path)
844
if (tab_path->tp_tab_count > 0) {
845
tab_path->tp_tab_count--;
846
if (tab_path->tp_tab_count == 0) {
847
xt_sl_delete(self, db->db_table_paths, tab_path->tp_path);
848
tab_save_table_paths(self, db);
853
static void tab_free_table_path(XTThreadPtr self, XTTablePathPtr tab_path)
855
XTDatabaseHPtr db = self->st_database;
857
tab_remove_table_path(self, db, tab_path);
860
xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db)
863
xt_free_hashtable(self, db->db_tables);
864
db->db_tables = NULL;
866
if (db->db_table_by_id) {
867
xt_free_sortedlist(self, db->db_table_by_id);
868
db->db_table_by_id = NULL;
870
if (db->db_table_paths) {
871
xt_free_sortedlist(self, db->db_table_paths);
872
db->db_table_paths = NULL;
877
xtPublic xtBool xt_table_exists(XTDatabaseHPtr db)
879
return xt_sl_get_size(db->db_table_by_id) > 0;
883
* Enumerate all tables in the current database.
886
xtPublic void xt_enum_tables_init(u_int *edx)
891
xtPublic XTTableEntryPtr xt_enum_tables_next(XTThreadPtr XT_UNUSED(self), XTDatabaseHPtr db, u_int *edx)
893
XTTableEntryPtr en_ptr;
895
if (*edx >= xt_sl_get_size(db->db_table_by_id))
897
en_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, *edx);
902
xtPublic void xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft)
905
ft->ft_tab_name = tab_name;
906
ft->ft_tab_id = tab_id;
909
xtPublic xtBool xt_enum_files_of_tables_next(XTFilesOfTablePtr ft)
911
char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
914
switch (ft->ft_state) {
916
tab_get_row_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
919
tab_get_data_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
922
tab_get_index_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
929
xt_strcpy(PATH_MAX, ft->ft_file_path, ft->ft_tab_name->ps_path);
930
xt_remove_last_name_of_path(ft->ft_file_path);
931
xt_strcat(PATH_MAX, ft->ft_file_path, file_name);
932
if (!xt_fs_exists(ft->ft_file_path))
938
static xtBool tab_find_table(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtTableID *tab_id)
941
XTTableEntryPtr te_ptr;
944
xt_enum_tables_init(&edx);
945
while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
946
xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
947
xt_add_dir_char(PATH_MAX, path);
948
xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
949
if (xt_tab_compare_names(path, name->ps_path) == 0) {
950
*tab_id = te_ptr->te_tab_id;
957
xtPublic void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error)
959
tab->tab_dic.dic_disable_index = ind_error;
960
xt_tab_set_table_repair_pending(tab);
963
xtPublic void xt_tab_set_index_error(XTTableHPtr tab)
965
switch (tab->tab_dic.dic_disable_index) {
968
case XT_INDEX_TOO_OLD:
969
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_OLD_VERSION, tab->tab_name);
971
case XT_INDEX_TOO_NEW:
972
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NEW_VERSION, tab->tab_name);
974
case XT_INDEX_BAD_BLOCK:
977
sprintf(number, "%d", (int) tab->tab_index_page_size);
978
xt_register_i2xterr(XT_REG_CONTEXT, XT_ERR_BAD_IND_BLOCK_SIZE, xt_last_name_of_path(tab->tab_name->ps_path), number);
980
case XT_INDEX_CORRUPTED:
981
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name);
983
case XT_INDEX_MISSING:
984
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_MISSING, tab->tab_name);
986
case XT_INDEX_NOT_RECOVERED:
987
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NOT_RECOVERED, tab->tab_name);
992
static void tab_load_index_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file, XTPathStrPtr table_name)
997
XTIndexFormatDPtr index_fmt;
999
/* Load the pointers: */
1000
if (tab->tab_index_head)
1001
xt_free_ns(tab->tab_index_head);
1002
tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc(self, XT_INDEX_HEAD_SIZE);
1005
if (!xt_pread_file(file, 0, XT_INDEX_HEAD_SIZE, 0, tab->tab_index_head, NULL, &self->st_statistics.st_ind, self))
1008
tab->tab_index_format_offset = XT_GET_DISK_4(tab->tab_index_head->tp_format_offset_4);
1009
index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1011
/* If the table version is less than or equal to an incompatible (unsupported
1012
* version), or greater than the current version, then we cannot open this table
1014
if (XT_GET_DISK_2(index_fmt->if_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1015
XT_GET_DISK_2(index_fmt->if_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1016
switch (XT_GET_DISK_2(index_fmt->if_tab_version_2)) {
1018
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1021
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1024
xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1030
tab->tab_dic.dic_index_ver = XT_GET_DISK_2(index_fmt->if_ind_version_2);
1031
tab->tab_dic.dic_disable_index = XT_INDEX_OK;
1033
if (tab->tab_dic.dic_index_ver == 1) {
1034
tab->tab_index_header_size = 1024 * 16;
1035
tab->tab_index_page_size = 1024 * 16;
1038
tab->tab_index_header_size = XT_GET_DISK_4(tab->tab_index_head->tp_header_size_4);
1039
tab->tab_index_page_size = XT_GET_DISK_4(index_fmt->if_page_size_4);
1042
#ifdef XT_USE_LAZY_DELETE
1043
if (tab->tab_dic.dic_index_ver <= XT_IND_NO_LAZY_DELETE)
1044
tab->tab_dic.dic_no_lazy_delete = TRUE;
1046
tab->tab_dic.dic_no_lazy_delete = FALSE;
1048
tab->tab_dic.dic_no_lazy_delete = TRUE;
1051
/* Incorrect version of index is handled by allowing a sequential scan, but no index access.
1052
* Recovery with the wrong index type will not recover the indexes, a REPAIR TABLE
1055
if (tab->tab_dic.dic_index_ver != XT_IND_CURRENT_VERSION) {
1056
switch (tab->tab_dic.dic_index_ver) {
1057
case XT_IND_NO_LAZY_DELETE:
1058
case XT_IND_LAZY_DELETE_OK:
1059
/* I can handle this type of index. */
1062
if (tab->tab_dic.dic_index_ver < XT_IND_CURRENT_VERSION)
1063
xt_tab_disable_index(tab, XT_INDEX_TOO_OLD);
1065
xt_tab_disable_index(tab, XT_INDEX_TOO_NEW);
1069
else if (tab->tab_index_page_size != XT_INDEX_PAGE_SIZE)
1070
xt_tab_disable_index(tab, XT_INDEX_BAD_BLOCK);
1073
memset(tab->tab_index_head, 0, XT_INDEX_HEAD_SIZE);
1074
xt_tab_disable_index(tab, XT_INDEX_MISSING);
1075
tab->tab_index_header_size = XT_INDEX_HEAD_SIZE;
1076
tab->tab_index_page_size = XT_INDEX_PAGE_SIZE;
1077
tab->tab_dic.dic_index_ver = 0;
1078
tab->tab_index_format_offset = 0;
1082
if (tab->tab_dic.dic_disable_index) {
1083
xt_tab_set_index_error(tab);
1084
xt_log_and_clear_exception_ns();
1087
if (tab->tab_dic.dic_disable_index) {
1088
/* Reset, as if we have empty indexes.
1089
* Flush will wipe things out, of course.
1090
* REPAIR TABLE will be required...
1092
XT_NODE_ID(tab->tab_ind_eof) = 1;
1093
XT_NODE_ID(tab->tab_ind_free) = 0;
1095
ind = tab->tab_dic.dic_keys;
1096
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++)
1097
XT_NODE_ID((*ind)->mi_root) = 0;
1100
XT_NODE_ID(tab->tab_ind_eof) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_eof_6);
1101
XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_free_6);
1103
data = tab->tab_index_head->tp_data;
1104
ind = tab->tab_dic.dic_keys;
1105
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
1106
(*ind)->mi_root = XT_GET_NODE_REF(tab, data);
1107
data += XT_NODE_REF_SIZE;
1112
static void tab_load_table_format(XTThreadPtr self, XTOpenFilePtr file, XTPathStrPtr table_name, size_t *ret_format_offset, size_t *ret_head_size, XTDictionaryPtr dic)
1114
XTDiskValue4 size_buf;
1116
XTTableFormatDRec tab_fmt;
1119
if (!xt_pread_file(file, 0, 4, 4, &size_buf, NULL, &self->st_statistics.st_rec, self))
1122
head_size = XT_GET_DISK_4(size_buf);
1123
*ret_format_offset = head_size;
1125
/* Load the table format information: */
1126
if (!xt_pread_file(file, head_size, offsetof(XTTableFormatDRec, tf_definition), offsetof(XTTableFormatDRec, tf_tab_version_2) + 2, &tab_fmt, NULL, &self->st_statistics.st_rec, self))
1129
/* If the table version is less than or equal to an incompatible (unsupported
1130
* version), or greater than the current version, then we cannot open this table
1132
if (XT_GET_DISK_2(tab_fmt.tf_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1133
XT_GET_DISK_2(tab_fmt.tf_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1134
switch (XT_GET_DISK_2(tab_fmt.tf_tab_version_2)) {
1136
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1139
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1142
xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1148
fmt_size = XT_GET_DISK_4(tab_fmt.tf_format_size_4);
1149
*ret_head_size = XT_GET_DISK_4(tab_fmt.tf_tab_head_size_4);
1150
dic->dic_rec_size = XT_GET_DISK_4(tab_fmt.tf_rec_size_4);
1151
dic->dic_rec_fixed = XT_GET_DISK_1(tab_fmt.tf_rec_fixed_1);
1152
dic->dic_min_auto_inc = XT_GET_DISK_8(tab_fmt.tf_min_auto_inc_8);
1153
if (fmt_size > offsetof(XTTableFormatDRec, tf_definition)) {
1154
size_t def_size = fmt_size - offsetof(XTTableFormatDRec, tf_definition);
1157
pushsr_(def_sql, xt_free, (char *) xt_malloc(self, def_size));
1158
if (!xt_pread_file(file, head_size+offsetof(XTTableFormatDRec, tf_definition), def_size, def_size, def_sql, NULL, &self->st_statistics.st_rec, self))
1160
dic->dic_table = xt_ri_create_table(self, false, table_name, def_sql, myxt_create_table_from_table(self, dic->dic_my_table), NULL);
1161
freer_(); // xt_free(def_sql)
1164
dic->dic_table = myxt_create_table_from_table(self, dic->dic_my_table);
1167
static void tab_load_table_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file)
1169
XTTableHeadDRec rec_head;
1171
if (!xt_pread_file(file, 0, sizeof(XTTableHeadDRec), sizeof(XTTableHeadDRec), (xtWord1 *) &rec_head, NULL, &self->st_statistics.st_rec, self))
1174
tab->tab_head_op_seq = XT_GET_DISK_4(rec_head.th_op_seq_4);
1175
tab->tab_head_row_free_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_free_6);
1176
tab->tab_head_row_eof_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_eof_6);
1177
tab->tab_head_row_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_row_fnum_6);
1178
tab->tab_head_rec_free_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_free_6);
1179
tab->tab_head_rec_eof_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_eof_6);
1180
tab->tab_head_rec_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_rec_fnum_6);
1181
tab->tab_wr_op_seq = tab->tab_head_op_seq;
1184
xtPublic void xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1186
XTTableHPtr tab = ot->ot_table;
1188
XT_SET_DISK_4(rec_head->th_op_seq_4, tab->tab_head_op_seq);
1189
XT_SET_DISK_6(rec_head->th_row_free_6, tab->tab_head_row_free_id);
1190
XT_SET_DISK_6(rec_head->th_row_eof_6, tab->tab_head_row_eof_id);
1191
XT_SET_DISK_6(rec_head->th_row_fnum_6, tab->tab_head_row_fnum);
1192
XT_SET_DISK_6(rec_head->th_rec_free_6, tab->tab_head_rec_free_id);
1193
XT_SET_DISK_6(rec_head->th_rec_eof_6, tab->tab_head_rec_eof_id);
1194
XT_SET_DISK_6(rec_head->th_rec_fnum_6, tab->tab_head_rec_fnum);
1197
static xtBool tab_write_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1199
if (!xt_tab_write_rec(ot, offsetof(XTTableHeadDRec, th_op_seq_4), 40, (xtWord1 *) rec_head->th_op_seq_4))
1201
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1206
xtPublic xtBool xt_tab_write_min_auto_inc(XTOpenTablePtr ot)
1211
XT_SET_DISK_8(value, ot->ot_table->tab_dic.dic_min_auto_inc);
1212
offset = ot->ot_table->tab_table_format_offset + offsetof(XTTableFormatDRec, tf_min_auto_inc_8);
1213
if (!xt_tab_write_rec(ot, offset, 8, value))
1215
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1220
/* a helper function to remove table from the open tables hash on exception
1221
* used in tab_new_handle() below
1223
#ifdef NO_LONGER_REQ
1224
static void xt_del_from_db_tables_ht(XTThreadPtr self, XTTableHPtr tab)
1226
XTTableEntryPtr te_ptr;
1227
XTDatabaseHPtr db = tab->tab_db;
1228
xtTableID tab_id = tab->tab_id;
1230
/* Oops! should use tab->tab_name, instead of tab! */
1231
xt_ht_del(self, db->db_tables, tab->tab_name);
1233
/* Remove the reference from the ID list, when a table is
1234
* removed from the table name list:
1236
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id)))
1237
te_ptr->te_table = NULL;
1241
xtPublic XTFileType xt_rec_file_type(xtBool heap_tab)
1245
if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1246
return XT_FT_REWRITE_FLUSH;
1247
return XT_REC_FILE_TYPE;
1250
xtPublic XTFileType xt_row_file_type(xtBool heap_tab)
1254
if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1255
return XT_FT_REWRITE_FLUSH;
1256
return XT_ROW_FILE_TYPE;
1259
xtPublic XTFileType xt_ind_file_type(xtBool heap_tab)
1263
if (XT_IND_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1264
return XT_FT_REWRITE_FLUSH;
1265
return XT_IND_FILE_TYPE;
1268
#ifdef XT_SORT_REC_WRITES
1269
static int tab_cmp_dw_rec_id(struct XTThread *XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
1271
xtRecordID rec_id = *((xtRecordID *) a);
1272
XTDelayWritePtr dw_ptr = (XTDelayWritePtr) b;
1274
if (rec_id == dw_ptr->dw_rec_id)
1276
if (rec_id < dw_ptr->dw_rec_id)
1283
* Create a new table handle (i.e. open a table).
1284
* Return NULL if the table is missing, and it is OK for the table
1287
static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id, XTPathStrPtr tab_path, xtBool missing_ok, XTDictionaryPtr dic)
1289
char path[PATH_MAX];
1291
char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1292
XTOpenFilePtr of_rec, of_ind;
1293
XTTableEntryPtr te_ptr;
1294
size_t tab_format_offset;
1295
size_t tab_head_size= 0;
1299
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
1302
tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1303
pushr_(xt_heap_release, tab);
1305
tab->tab_name = (XTPathStrPtr) xt_dup_string(self, tab_path->ps_path);
1307
tab->tab_id = tab_id;
1308
tab->tab_dic.dic_table_type = te_ptr->te_type;
1309
#ifdef TRACE_TABLE_IDS
1310
PRINTF("%s: allocated TABLE: db=%d tab=%d %s\n", self->t_name, (int) db->db_id, (int) tab->tab_id, xt_last_2_names_of_path(tab->tab_name->ps_path));
1314
myxt_move_dictionary(&tab->tab_dic, dic);
1315
myxt_setup_dictionary(self, &tab->tab_dic);
1318
if (!myxt_load_dictionary(self, &tab->tab_dic, db, tab_path)) {
1319
freer_(); // xt_heap_release(tab)
1320
return_(XT_TAB_NO_DICTIONARY);
1324
/* Do not use the XT_TF_DDL_TEMP_TABLE bit from the given dic
1325
* This bit depends only on the
1326
* name of the table, and must be set explicitly.
1328
if (myxt_temp_table_name(tab_path->ps_path))
1329
tab->tab_dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
1331
tab->tab_dic.dic_tab_flags &= ~XT_TF_DDL_TEMP_TABLE;
1333
tab->tab_seq.xt_op_seq_init(self);
1334
xt_spinlock_init_with_autoname(self, &tab->tab_ainc_lock);
1335
xt_init_mutex_with_autoname(self, &tab->tab_rec_flush_lock);
1336
xt_init_mutex_with_autoname(self, &tab->tab_ind_flush_lock);
1337
xt_init_mutex_with_autoname(self, &tab->tab_ind_stat_lock);
1338
xt_init_mutex_with_autoname(self, &tab->tab_dic_field_lock);
1339
xt_init_mutex_with_autoname(self, &tab->tab_row_lock);
1340
xt_init_mutex_with_autoname(self, &tab->tab_ind_lock);
1341
xt_init_mutex_with_autoname(self, &tab->tab_rec_lock);
1342
xt_spinlock_init_with_autoname(self, &tab->tab_mem_lock);
1343
if (!(tab->tab_rec_flush_task = new XTFlushRecRowTask()))
1344
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1345
tab->tab_rec_flush_task->tk_init(self);
1346
tab->tab_rec_flush_task->frt_table = tab;
1347
if (!(tab->tab_ind_flush_task = new XTFlushIndexTask()))
1348
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1349
tab->tab_ind_flush_task->tk_init(self);
1350
tab->tab_ind_flush_task->fit_table = tab;
1351
for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
1352
XT_TAB_ROW_INIT_LOCK(self, &tab->tab_row_rwlock[i]);
1353
tab->tab_free_locks = TRUE;
1355
xt_strcpy(PATH_MAX, path, tab_path->ps_path);
1356
xt_remove_last_name_of_path(path);
1357
tab_get_row_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1358
xt_strcat(PATH_MAX, path, file_name);
1359
tab->tab_row_file = xt_fs_get_file(self, path, xt_row_file_type(te_ptr->te_heap_tab));
1361
xt_remove_last_name_of_path(path);
1362
tab_get_data_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1363
xt_strcat(PATH_MAX, path, file_name);
1364
tab->tab_rec_file = xt_fs_get_file(self, path, xt_rec_file_type(te_ptr->te_heap_tab));
1366
xt_remove_last_name_of_path(path);
1367
tab_get_index_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1368
xt_strcat(PATH_MAX, path, file_name);
1369
tab->tab_ind_file = xt_fs_get_file(self, path, xt_ind_file_type(te_ptr->te_heap_tab));
1371
if (te_ptr->te_heap_tab) {
1372
XTOpenFilePtr of_row;
1374
tab->tab_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
1375
of_row = xt_open_file(self, tab->tab_row_file->fil_path, xt_row_file_type(TRUE), XT_FS_CREATE, xt_db_row_file_grow_size);
1376
pushr_(xt_close_file, of_row);
1377
if (xt_seek_eof_file(self, of_row) == 0)
1378
tab_init_row_file(self, of_row, tab, &tab->tab_dic);
1379
freer_(); // xt_close_file(of_row)
1381
of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(TRUE), XT_FS_CREATE, XT_INDEX_PAGE_SIZE*256);
1382
if (xt_seek_eof_file(self, of_ind) == 0)
1383
tab_init_ind_file(self, of_ind, tab, &tab->tab_dic);
1384
pushr_(xt_close_file, of_ind);
1385
tab_load_index_header(self, tab, of_ind, tab_path);
1386
freer_(); // xt_close_file(of_ind)
1388
of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(te_ptr->te_heap_tab), XT_FS_CREATE, xt_db_data_file_grow_size);
1389
pushr_(xt_close_file, of_rec);
1390
if (xt_seek_eof_file(self, of_rec) == 0)
1391
tab_init_data_file(self, of_rec, tab, &tab->tab_dic, 0, NULL);
1394
#ifdef XT_SORT_REC_WRITES
1395
tab->tab_rec_dw_writes = xt_new_sortedlist(self, sizeof(XTDelayWriteRec), 20, 10, tab_cmp_dw_rec_id, NULL, NULL, TRUE, FALSE);
1397
of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(FALSE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
1399
pushr_(xt_close_file, of_ind);
1400
tab_load_index_header(self, tab, of_ind, tab_path);
1401
freer_(); // xt_close_file(of_ind)
1404
tab_load_index_header(self, tab, of_ind, tab_path);
1406
of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(FALSE), missing_ok ? XT_FS_MISSING_OK : XT_FS_DEFAULT, xt_db_data_file_grow_size);
1408
freer_(); // xt_heap_release(tab)
1409
return_(XT_TAB_NOT_FOUND);
1411
pushr_(xt_close_file, of_rec);
1414
tab_load_table_format(self, of_rec, tab_path, &tab_format_offset, &tab_head_size, &tab->tab_dic);
1415
tab->tab_table_format_offset = tab_format_offset;
1416
tab->tab_table_head_size = tab_head_size;
1417
tab->tab_dic.dic_table->dt_table = tab;
1418
tab_load_table_header(self, tab, of_rec);
1419
freer_(); // xt_close_file(of_rec)
1421
tab->tab_seq.xt_op_seq_set(self, tab->tab_wr_op_seq+1);
1422
tab->tab_row_eof_id = tab->tab_head_row_eof_id;
1423
tab->tab_row_free_id = tab->tab_head_row_free_id;
1424
tab->tab_row_fnum = tab->tab_head_row_fnum;
1425
tab->tab_rec_eof_id = tab->tab_head_rec_eof_id;
1426
tab->tab_rec_free_id = tab->tab_head_rec_free_id;
1427
tab->tab_rec_fnum = tab->tab_head_rec_fnum;
1429
tab->tab_rows.xt_tc_setup(tab, FALSE, sizeof(XTTabRowHeadDRec), sizeof(XTTabRowRefDRec));
1430
tab->tab_recs.xt_tc_setup(tab, TRUE, tab_head_size, tab->tab_dic.dic_rec_size);
1432
xt_xres_init_tab(self, tab);
1434
if (!xt_init_row_locks(&tab->tab_locks))
1437
xt_heap_set_release_callback(tab, tab_onrelease);
1439
tab->tab_repair_pending = xt_tab_is_table_repair_pending(tab);
1441
popr_(); // Discard xt_heap_release(tab)
1443
xt_ht_put(self, db->db_tables, tab);
1445
/* Add a reference to the ID list, when a table is
1446
* added to the table name list:
1448
te_ptr->te_table = tab;
1450
/* Moved from after xt_init_row_locks() above, so that calling
1451
* xt_use_table_no_lock() with no_load == FALSE from attachReferences()
1452
* will work if we have cyclic foreign key references.
1454
if (tab->tab_dic.dic_table) {
1456
tab->tab_dic.dic_table->attachReferences(self, db);
1458
/* ignore problems of referenced tables */
1459
xt_log_and_clear_warning(self);
1469
* Get a reference to a table in the current database. The table reference is valid,
1470
* as long as the thread is using the database!!!
1472
xtPublic XTTableHPtr xt_use_table_no_lock(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1477
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1479
tab = (XTTableHPtr) xt_ht_get(self, db->db_tables, name);
1480
if (!tab && !no_load) {
1481
xtTableID tab_id = 0;
1483
if (!tab_find_table(self, db, name, &tab_id)) {
1486
xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1489
switch (tab_new_handle(self, &tab, db, tab_id, name, missing_ok, dic)) {
1490
case XT_TAB_NO_DICTIONARY:
1491
xt_throw_taberr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, name);
1492
case XT_TAB_POOL_CLOSED:
1493
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
1494
case XT_TAB_NOT_FOUND:
1497
xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1504
xt_heap_reference(self, tab);
1509
static void tab_close_table(XTOpenTablePtr ot)
1511
xt_ind_free_reserved(ot);
1513
if (ot->ot_rec_file) {
1514
XT_CLOSE_RR_FILE_NS(ot->ot_rec_file);
1515
ot->ot_rec_file = NULL;
1518
if (ot->ot_ind_file) {
1519
xt_close_file_ns(ot->ot_ind_file);
1520
ot->ot_ind_file = NULL;
1523
if (ot->ot_row_file) {
1524
XT_CLOSE_RR_FILE_NS(ot->ot_row_file);
1525
ot->ot_row_file = NULL;
1529
xt_heap_release(xt_get_self(), ot->ot_table);
1530
ot->ot_table = NULL;
1532
if (ot->ot_ind_rhandle) {
1533
xt_ind_release_handle(ot->ot_ind_rhandle, FALSE, ot->ot_thread);
1534
ot->ot_ind_rhandle = NULL;
1536
if (ot->ot_row_rbuffer) {
1537
xt_free_ns(ot->ot_row_rbuffer);
1538
ot->ot_row_rbuf_size = 0;
1539
ot->ot_row_rbuffer = NULL;
1541
if (ot->ot_row_wbuffer) {
1542
xt_free_ns(ot->ot_row_wbuffer);
1543
ot->ot_row_wbuf_size = 0;
1544
ot->ot_row_wbuffer = NULL;
1546
#ifdef XT_TRACK_RETURNED_ROWS
1547
if (ot->ot_rows_returned) {
1548
xt_free_ns(ot->ot_rows_returned);
1549
ot->ot_rows_returned = NULL;
1551
ot->ot_rows_ret_curr = 0;
1552
ot->ot_rows_ret_max = 0;
1557
static void tab_delete_table_files(XTThreadPtr self, XTPathStrPtr tab_name, xtTableID tab_id)
1559
XTFilesOfTableRec ft;
1561
xt_enum_files_of_tables_init(tab_name, tab_id, &ft);
1562
while (xt_enum_files_of_tables_next(&ft)) {
1563
if (!xt_fs_delete(NULL, ft.ft_file_path))
1564
xt_log_and_clear_exception(self);
1568
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr)
1570
XTTabRowHeadDRec row_head;
1572
tab->tab_row_eof_id = 1;
1573
tab->tab_row_free_id = 0;
1574
tab->tab_row_fnum = 0;
1576
tab->tab_head_row_eof_id = 1;
1577
tab->tab_head_row_free_id = 0;
1578
tab->tab_head_row_fnum = 0;
1580
XT_SET_DISK_4(row_head.rh_magic_4, XT_TAB_ROW_MAGIC);
1581
if (!xt_pwrite_file(of_row, 0, sizeof(row_head), &row_head, &self->st_statistics.st_rec, self))
1585
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def)
1588
XTTableHeadDRec rec_head;
1589
XTTableFormatDRec table_fmt;
1591
/* Calculate the offset of the first record in the data handle file. */
1592
eof = sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition) + def_len + XT_FORMAT_DEF_SPACE;
1593
eof = (eof + 1024 - 1) / 1024 * 1024; // Round to a value divisible by 1024
1595
tab->tab_table_format_offset = sizeof(XTTableHeadDRec);
1596
tab->tab_table_head_size = (size_t) eof;
1598
tab->tab_rec_eof_id = 1; // This is the first record ID!
1599
tab->tab_rec_free_id = 0;
1600
tab->tab_rec_fnum = 0;
1602
tab->tab_head_rec_eof_id = 1; // The first record ID
1603
tab->tab_head_rec_free_id = 0;
1604
tab->tab_head_rec_fnum = 0;
1606
tab->tab_dic.dic_rec_size = dic->dic_rec_size;
1607
tab->tab_dic.dic_rec_fixed = dic->dic_rec_fixed;
1608
tab->tab_dic.dic_tab_flags = dic->dic_tab_flags;
1609
tab->tab_dic.dic_min_auto_inc = dic->dic_min_auto_inc;
1610
tab->tab_dic.dic_def_ave_row_size = dic->dic_def_ave_row_size;
1611
tab->tab_dic.dic_table_type = dic->dic_table_type;
1613
XT_SET_DISK_4(rec_head.th_head_size_4, sizeof(XTTableHeadDRec));
1614
XT_SET_DISK_4(rec_head.th_op_seq_4, tab->tab_head_op_seq);
1615
XT_SET_DISK_6(rec_head.th_row_free_6, tab->tab_head_row_free_id);
1616
XT_SET_DISK_6(rec_head.th_row_eof_6, tab->tab_head_row_eof_id);
1617
XT_SET_DISK_6(rec_head.th_row_fnum_6, tab->tab_head_row_fnum);
1618
XT_SET_DISK_6(rec_head.th_rec_free_6, tab->tab_head_rec_free_id);
1619
XT_SET_DISK_6(rec_head.th_rec_eof_6, tab->tab_head_rec_eof_id);
1620
XT_SET_DISK_6(rec_head.th_rec_fnum_6, tab->tab_head_rec_fnum);
1622
if (!xt_pwrite_file(of_rec, 0, sizeof(XTTableHeadDRec), &rec_head, &self->st_statistics.st_rec, self))
1625
/* Store the table format: */
1626
memset(&table_fmt, 0, offsetof(XTTableFormatDRec, tf_definition));
1627
XT_SET_DISK_4(table_fmt.tf_format_size_4, offsetof(XTTableFormatDRec, tf_definition) + def_len);
1628
XT_SET_DISK_4(table_fmt.tf_tab_head_size_4, eof);
1629
XT_SET_DISK_2(table_fmt.tf_tab_version_2, XT_TAB_CURRENT_VERSION);
1630
XT_SET_DISK_4(table_fmt.tf_rec_size_4, tab->tab_dic.dic_rec_size);
1631
XT_SET_DISK_1(table_fmt.tf_rec_fixed_1, tab->tab_dic.dic_rec_fixed);
1632
XT_SET_DISK_2(table_fmt.tf_tab_unused_2, 0);
1633
XT_SET_DISK_8(table_fmt.tf_min_auto_inc_8, tab->tab_dic.dic_min_auto_inc);
1635
if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec), offsetof(XTTableFormatDRec, tf_definition), &table_fmt, &self->st_statistics.st_rec, self))
1638
if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition), def_len, tab_def->sb_cstring, &self->st_statistics.st_rec, self))
1643
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic)
1645
XTIndexFormatDPtr index_fmt;
1647
/* This is the size of the index header: */
1648
tab->tab_index_format_offset = offsetof(XTIndexHeadDRec, tp_data) + dic->dic_key_count * XT_NODE_REF_SIZE;
1649
if (!(tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc_ns(XT_INDEX_HEAD_SIZE)))
1652
XT_NODE_ID(tab->tab_ind_eof) = 1;
1653
XT_NODE_ID(tab->tab_ind_free) = 0;
1655
XT_SET_DISK_4(tab->tab_index_head->tp_header_size_4, XT_INDEX_HEAD_SIZE);
1656
XT_SET_DISK_4(tab->tab_index_head->tp_format_offset_4, tab->tab_index_format_offset);
1657
XT_SET_DISK_6(tab->tab_index_head->tp_ind_eof_6, XT_NODE_ID(tab->tab_ind_eof));
1658
XT_SET_DISK_6(tab->tab_index_head->tp_ind_free_6, XT_NODE_ID(tab->tab_ind_free));
1660
/* Store the index format: */
1661
index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1662
XT_SET_DISK_4(index_fmt->if_format_size_4, sizeof(XTIndexFormatDRec));
1663
XT_SET_DISK_2(index_fmt->if_tab_version_2, XT_TAB_CURRENT_VERSION);
1664
XT_SET_DISK_2(index_fmt->if_ind_version_2, XT_IND_CURRENT_VERSION);
1665
XT_SET_DISK_1(index_fmt->if_node_ref_size_1, XT_NODE_REF_SIZE);
1666
XT_SET_DISK_1(index_fmt->if_rec_ref_size_1, XT_RECORD_REF_SIZE);
1667
XT_SET_DISK_4(index_fmt->if_page_size_4, XT_INDEX_PAGE_SIZE);
1669
/* Save the header: */
1670
if (!xt_pwrite_file(of_ind, 0, XT_INDEX_HEAD_SIZE, tab->tab_index_head, &self->st_statistics.st_ind, self))
1674
xtPublic void xt_create_table(XTThreadPtr self, XTPathStrPtr name, XTDictionaryPtr dic)
1676
char table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1677
char path[PATH_MAX];
1678
XTDatabaseHPtr db = self->st_database;
1679
XTOpenTablePoolPtr table_pool;
1681
XTTableHPtr old_tab = NULL;
1682
xtTableID old_tab_id = 0;
1683
xtTableID tab_id = 0;
1684
XTStringBufferRec tab_def = { 0, 0, 0 };
1685
XTTableEntryRec te_tab;
1686
XTSortedListInfoRec li_undo;
1688
#ifdef TRACE_CREATE_TABLES
1689
printf("CREATE %s\n", name->ps_path);
1692
if (strlen(xt_last_name_of_path(name->ps_path)) > XT_TABLE_NAME_SIZE-1)
1693
xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, name);
1695
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1697
/* Lock to prevent table list change during creation. */
1698
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, name, FALSE, TRUE, TRUE, &old_tab);
1699
pushr_(xt_db_unlock_table_pool, table_pool);
1700
xt_ht_lock(self, db->db_tables);
1701
pushr_(xt_ht_unlock, db->db_tables);
1702
pushr_(xt_heap_release, old_tab);
1704
/* This must be done before we remove the old table
1705
* from the directory, or we will not be able
1706
* to find the table, which could is require
1709
if (xt_sl_get_size(db->db_table_by_id) >= XT_MAX_TABLES)
1710
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TOO_MANY_TABLES, (u_long) XT_MAX_TABLES);
1712
tab_id = db->db_curr_tab_id + 1;
1715
old_tab_id = old_tab->tab_id;
1716
xt_dl_delete_ext_data(self, old_tab, FALSE, TRUE);
1717
freer_(); // xt_heap_release(self, old_tab)
1719
/* For the Windows version this must be done before we
1720
* start to delete the underlying files!
1722
tab_close_files(self, old_tab);
1724
tab_delete_table_files(self, name, old_tab_id);
1726
/* Remove the PBMS table: */
1727
ASSERT(xt_get_self() == self);
1729
/* Remove the table from the directory. It will get a new
1730
* ID so the handle in the directory will no longer be valid.
1732
xt_ht_del(self, db->db_tables, name);
1735
freer_(); // xt_heap_release(self, old_tab)
1738
/* Add the table to the directory, well remove on error! */
1739
li_undo.li_sl = db->db_table_by_id;
1740
li_undo.li_key = &tab_id;
1741
te_tab.te_tab_id = tab_id;
1742
te_tab.te_heap_tab = dic->dic_tab_flags & XT_TF_MEMORY_TABLE;
1743
te_tab.te_tab_name = xt_dup_string(self, xt_last_name_of_path(name->ps_path));
1744
te_tab.te_tab_path = tab_get_table_path(self, db, name, TRUE);
1745
te_tab.te_table = NULL;
1746
te_tab.te_type = dic->dic_table_type;
1747
xt_sl_insert(self, db->db_table_by_id, &tab_id, &te_tab);
1751
XTOpenFilePtr of_row, of_rec, of_ind;
1754
tab_save_tables(self, db);
1756
tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1757
pushr_(xt_heap_release, tab);
1759
/* The length of the foreign key definition: */
1760
if (dic->dic_table) {
1761
dic->dic_table->loadString(self, &tab_def);
1762
def_len = tab_def.sb_len + 1;
1765
tab->tab_head_op_seq = 0;
1766
tab->tab_wr_op_seq = 0;
1768
/* This tests operation number overflow. */
1769
//tab->tab_head_op_seq = 0xFFFFFFFF - 12;
1770
//tab->tab_wr_op_seq = 0xFFFFFFFF - 12;
1773
/* ------- ROW FILE: */
1774
xt_strcpy(PATH_MAX, path, name->ps_path);
1775
xt_remove_last_name_of_path(path);
1776
tab_get_row_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1777
xt_strcat(PATH_MAX, path, table_name);
1778
of_row = xt_open_file(self, path, xt_row_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_row_file_grow_size);
1779
pushr_(xt_close_file, of_row);
1780
tab_init_row_file(self, of_row, tab, dic);
1781
freer_(); // xt_close_file(of_row)
1783
(void) ASSERT(sizeof(XTTabRowHeadDRec) == sizeof(XTTabRowRefDRec));
1784
(void) ASSERT(sizeof(XTTabRowRefDRec) == 1 << XT_TAB_ROW_SHIFTS);
1786
/* ------------ DATA FILE: */
1787
xt_remove_last_name_of_path(path);
1788
tab_get_data_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1789
xt_strcat(PATH_MAX, path, table_name);
1790
of_rec = xt_open_file(self, path, xt_rec_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_data_file_grow_size);
1791
pushr_(xt_close_file, of_rec);
1792
tab_init_data_file(self, of_rec, tab, dic, def_len, &tab_def);
1793
freer_(); // xt_close_file(of_rec)
1795
/* ----------- INDEX FILE: */
1796
xt_remove_last_name_of_path(path);
1797
tab_get_index_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1798
xt_strcat(PATH_MAX, path, table_name);
1799
of_ind = xt_open_file(self, path, xt_ind_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, XT_INDEX_PAGE_SIZE*256);
1800
pushr_(xt_close_file, of_ind);
1801
tab_init_ind_file(self, of_ind, tab, dic);
1802
freer_(); // xt_close_file(of_ind)
1805
/* Log the new table ID! */
1806
db->db_curr_tab_id = tab_id;
1807
if (!xt_xn_log_tab_id(self, tab_id)) {
1808
db->db_curr_tab_id = tab_id - 1;
1812
freer_(); // xt_heap_release(tab)
1815
* 2008-12-10: Note, there is another problem, example:
1816
* set storage_engine = pbxt;
1818
* CREATE TABLE t1 (s1 INT PRIMARY KEY, s2 INT);
1819
* CREATE TABLE t2 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t1 (s1) ON UPDATE CASCADE);
1820
* CREATE TABLE t3 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t2 (s1) ON UPDATE CASCADE);
1822
* DROP TABLE IF EXISTS t2,t1;
1823
* CREATE TABLE t1 (s1 ENUM('a','b') PRIMARY KEY);
1824
* CREATE TABLE t2 (s1 ENUM('A','B'), FOREIGN KEY (s1) REFERENCES t1 (s1));
1826
* DROP TABLE IF EXISTS t2,t1;
1828
* In the example above. The second create t2 does not fail, although t3 references it,
1829
* and the data types do not match.
1831
* The main problem is that this error comes on DROP TABLE IF EXISTS t2! Which prevents
1832
* the table from being dropped - not good.
1834
* So my idea here is to open the table, and if it fails, then the create table fails
1839
* We pass table type separately and provide NULL for the dic parameter, this is because
1840
* we want to force loading table (which is triggered by dic == NULL) but we still need table type
1844
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1845
xt_heap_release(self, tab);
1850
/* Creation failed, delete the table files: */
1853
xt_enter_exception_handler(self, &e);
1855
tab_delete_table_files(self, name, tab_id);
1856
tab_remove_table_path(self, db, te_tab.te_tab_path);
1857
xt_sl_delete(NULL, db->db_table_by_id, &tab_id);
1858
tab_save_tables(self, db);
1859
xt_sb_set_size(self, &tab_def, 0);
1860
xt_exit_exception_handler(self, &e);
1865
xt_sb_set_size(self, &tab_def, 0);
1869
XTTableEntryPtr te_ptr;
1871
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &old_tab_id))) {
1872
tab_remove_table_path(self, db, te_ptr->te_tab_path);
1873
xt_sl_delete(self, db->db_table_by_id, &old_tab_id);
1874
tab_save_tables(self, db);
1877
/* Same purpose as above {LOAD-FOR-FKS} (although this should work,
1878
* beacuse this is a TRUNCATE TABLE.
1880
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1881
xt_heap_release(self, tab);
1884
/* Log this error, but do not return it, because
1885
* it just involves the cleanup of the old table,
1886
* the new table has been successfully created.
1888
xt_log_and_clear_exception(self);
1893
freer_(); // xt_ht_unlock(db->db_tables)
1894
freer_(); // xt_db_unlock_table_pool(table_pool)
1896
/* I open the table here, because I cannot rely on MySQL to do
1897
* it after a create. This is normally OK, but with foreign keys
1898
* tables can be referenced and then they are not opened
1899
* before use. In this example, the INSERT opens t2, but t1 is
1900
* not opened of the create. As a result the foreign key
1901
* reference is not resolved.
1903
* drop table t1, t2;
1906
* id INT PRIMARY KEY
1912
* CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
1916
* INSERT INTO t2 VALUES(2);
1918
/* this code is not needed anymore as we open tables referred by FKs as necessary during checks
1919
xt_ht_lock(self, db->db_tables);
1920
pushr_(xt_ht_unlock, db->db_tables);
1921
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1922
freer_(); // xt_ht_unlock(db->db_tables)
1923
xt_heap_release(self, tab);
1924
* CHANGED see {LOAD-FOR-FKS} above.
1930
xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop_db)
1932
XTDatabaseHPtr db = self->st_database;
1933
XTOpenTablePoolPtr table_pool;
1934
XTTableHPtr tab = NULL;
1935
xtTableID tab_id = 0;
1936
xtBool can_drop = TRUE;
1940
#ifdef TRACE_CREATE_TABLES
1941
printf("DROP %s\n", tab_name->ps_path);
1944
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, tab_name, FALSE, FALSE, TRUE, &tab);
1945
pushr_(xt_db_unlock_table_pool, table_pool);
1946
xt_ht_lock(self, db->db_tables);
1947
pushr_(xt_ht_unlock, db->db_tables);
1948
pushr_(xt_heap_release, tab);
1951
tab_id = tab->tab_id; /* tab is not null if returned table_pool is not null */
1952
/* check if other tables refer this */
1953
if (!self->st_ignore_fkeys)
1954
can_drop = tab->tab_dic.dic_table->checkCanDrop(drop_db);
1957
/* See the comment in ha_pbxt::delete_table regarding different implmentation of DROP TABLE
1958
* in MySQL and Drizzle
1961
xt_throw_xterr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND);
1967
XTTableEntryPtr te_ptr;
1969
xt_dl_delete_ext_data(self, tab, FALSE, TRUE);
1970
freer_(); // xt_heap_release(self, tab)
1972
/* For the Windows version this must be done before we
1973
* start to delete the underlying files!
1975
tab_close_files(self, tab);
1977
tab_delete_table_files(self, tab_name, tab_id);
1979
ASSERT(xt_get_self() == self);
1980
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
1981
tab_remove_table_path(self, db, te_ptr->te_tab_path);
1982
xt_sl_delete(self, db->db_table_by_id, &tab_id);
1983
tab_save_tables(self, db);
1987
freer_(); // xt_heap_release(self, tab)
1990
xt_ht_del(self, db->db_tables, tab_name);
1992
else { /* cannot drop table because of FK dependencies */
1993
xt_throw_xterr(XT_CONTEXT, XT_ERR_ROW_IS_REFERENCED);
1996
freer_(); // xt_ht_unlock(db->db_tables)
1997
freer_(); // xt_db_unlock_table_pool(table_pool)
2002
* Record buffer size:
2003
* -------------------
2004
* The size of the record buffer used to hold the row
2005
* in memory. This buffer size does not include the BLOB data.
2006
* About 8 bytes (a pointer and a size) is reserved for each BLOB
2009
* The buffer size includes a number of "NULL" bytes followed by
2010
* the data area. The NULL bytes contain 1 bit for every column,
2011
* to indicate of the columns is NULL or not.
2013
* The size of the buffer is 4/8-byte aligned, so it may be padded
2016
* Fixed length rec. len.:
2017
* -----------------------
2018
* If the record does not include any BLOBs then this is the size of the
2019
* fixed length record. The size if the data in the data handle record
2020
* need never be bigger then this length, if the record does not
2021
* contain BLOBs. So this should be the maximum size set for
2022
* AVG_ROW_LENGTH in this case.
2024
* Handle data record size:
2025
* ------------------------
2026
* This is the size of the handle data record. It is the data size
2027
* plus the "max header size".
2029
* Min/max header size:
2030
* The min and max header size of the header in the data handle file.
2031
* The larger header is used if a record has an extended data (data log
2034
* Min/avg/max record size:
2035
* ------------------------
2036
* These are variable length records sizes. That is, the size of records
2037
* when stored in the variable length format. Variable length records
2038
* do not have fixed fields sizes, instead the fields are packed one
2039
* after the other, prefixed by a number of size indicator bytes.
2041
* The average is an estimate of the average record size. This estimate
2042
* is used if no AVG_ROW_LENGTH is specifically given.
2044
* If the average estimate is withing 20% of the maximum size of the record,
2045
* then the record will be handled as a fixed length record.
2047
* Avg row len set for tab:
2048
* ------------------------
2049
* This is the value set using AVG_ROW_LENGTH when the table is declared.
2051
* Rows fixed length:
2052
* ------------------
2053
* YES if the records of this table are handled as a fixed length records.
2054
* In this case the table records will never have an extended record
2057
* The size of the data area in the handle data record is set to the
2058
* size of the MySQL data record ("Fixed length rec. len.").
2060
* It also means that the record format used is identical to the MySQL
2063
* If the records are not fixed, then the variable length record format
2064
* is used. Records size are then in the range specified by
2065
* "Min/avg/max record size".
2067
* Maximum fixed size:
2068
* -------------------
2069
* This is the maximum size of a data log record.
2071
* Minimum variable size:
2072
* ------------------------
2073
* Records below this size are handled as a fixed length record size, unless
2074
* the AVG_ROW_LENGTH is specifically set.
2076
xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot)
2078
XTTableHPtr tab = ot->ot_table;
2080
XTTabRecExtDPtr rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
2081
XTactExtRecEntryDRec ext_rec;
2084
xtLogOffset log_offset;
2086
xtRecordID prev_rec_id;
2089
u_llong free_rec_count = 0, free_count2 = 0;
2090
u_llong delete_rec_count = 0;
2091
u_llong alloc_rec_count = 0;
2092
u_llong alloc_rec_bytes = 0;
2093
u_llong min_comp_rec_len = 0;
2094
u_llong max_comp_rec_len = 0;
2097
u_llong ext_data_len = 0;
2099
#if defined(DUMP_CHECK_TABLE) || defined(CHECK_TABLE_STATS)
2100
printf("\nCHECK TABLE: %s\n", tab->tab_name->ps_path);
2103
xt_lock_mutex(self, &tab->tab_db->db_co_ext_lock);
2104
pushr_(xt_unlock_mutex, &tab->tab_db->db_co_ext_lock);
2106
xt_lock_mutex(self, &tab->tab_rec_lock);
2107
pushr_(xt_unlock_mutex, &tab->tab_rec_lock);
2109
#ifdef CHECK_TABLE_STATS
2110
printf("Record buffer size = %lu\n", (u_long) tab->tab_dic.dic_mysql_buf_size);
2111
printf("Fixed length rec. len. = %lu\n", (u_long) tab->tab_dic.dic_mysql_rec_size);
2112
printf("Handle data record size = %lu\n", (u_long) tab->tab_dic.dic_rec_size);
2113
printf("Min/max header size = %d/%d\n", (int) offsetof(XTTabRecFix, rf_data), tab->tab_dic.dic_rec_fixed ? (int) offsetof(XTTabRecFix, rf_data) : (int) offsetof(XTTabRecExtDRec, re_data));
2114
printf("Min/avg/max record size = %llu/%llu/%llu\n", (u_llong) tab->tab_dic.dic_min_row_size, (u_llong) tab->tab_dic.dic_ave_row_size, (u_llong) tab->tab_dic.dic_max_row_size);
2115
if (tab->tab_dic.dic_def_ave_row_size)
2116
printf("Avg row len set for tab = %lu\n", (u_long) tab->tab_dic.dic_def_ave_row_size);
2118
printf("Avg row len set for tab = not specified\n");
2119
printf("Rows fixed length = %s\n", tab->tab_dic.dic_rec_fixed ? "YES" : "NO");
2120
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
2121
printf("Table type = MEMORY\n");
2122
else if (tab->tab_dic.dic_tab_flags & XT_TF_REAL_TEMP_TABLE)
2123
printf("Table type = TEMPORARY\n");
2124
else if (tab->tab_dic.dic_tab_flags & XT_TF_DDL_TEMP_TABLE)
2125
printf("Table type = DDL-TEMPORARY\n");
2126
if (tab->tab_dic.dic_def_ave_row_size)
2127
printf("Maximum fixed size = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH_SPEC);
2129
printf("Maximum fixed size = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH);
2130
printf("Minimum variable size = %lu\n", (u_long) XT_TAB_MIN_VAR_REC_LENGTH);
2131
printf("Minimum auto-increment = %llu\n", (u_llong) tab->tab_dic.dic_min_auto_inc);
2132
printf("Number of columns = %lu\n", (u_long) tab->tab_dic.dic_no_of_cols);
2133
printf("Number of fixed columns = %lu\n", (u_long) tab->tab_dic.dic_fix_col_count);
2134
printf("Columns req. for index = %lu\n", (u_long) tab->tab_dic.dic_ind_cols_req);
2135
if (tab->tab_dic.dic_ind_rec_len)
2136
printf("Rec len req. for index = %llu\n", (u_llong) tab->tab_dic.dic_ind_rec_len);
2137
printf("Columns req. for blobs = %lu\n", (u_long) tab->tab_dic.dic_blob_cols_req);
2138
printf("Number of blob columns = %lu\n", (u_long) tab->tab_dic.dic_blob_count);
2139
printf("Number of indices = %lu\n", (u_long) tab->tab_dic.dic_key_count);
2142
#ifdef DUMP_CHECK_TABLE
2143
printf("Records:-\n");
2144
printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_rec_free_id, (u_llong) tab->tab_rec_fnum);
2145
printf("EOF: %llu\n", (u_llong) tab->tab_rec_eof_id);
2148
rec_size = XT_REC_EXT_HEADER_SIZE;
2149
if (rec_size > tab->tab_recs.tci_rec_size)
2150
rec_size = tab->tab_recs.tci_rec_size;
2152
while (rec_id < tab->tab_rec_eof_id) {
2153
if (!xt_tab_get_rec_data(ot, rec_id, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer))
2156
#ifdef DUMP_CHECK_TABLE
2157
printf("%-4llu ", (u_llong) rec_id);
2159
switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2160
case XT_TAB_STATUS_FREED:
2161
#ifdef DUMP_CHECK_TABLE
2162
printf("======== ");
2166
case XT_TAB_STATUS_DELETE:
2167
#ifdef DUMP_CHECK_TABLE
2172
case XT_TAB_STATUS_FIXED:
2173
#ifdef DUMP_CHECK_TABLE
2174
printf("record-F ");
2177
row_size = myxt_store_row_length(ot, (char *) ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE);
2178
alloc_rec_bytes += row_size;
2179
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2180
min_comp_rec_len = row_size;
2181
if (row_size > max_comp_rec_len)
2182
max_comp_rec_len = row_size;
2184
case XT_TAB_STATUS_VARIABLE:
2185
#ifdef DUMP_CHECK_TABLE
2186
printf("record-V ");
2189
row_size = myxt_load_row_length(ot, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, NULL);
2190
alloc_rec_bytes += row_size;
2191
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2192
min_comp_rec_len = row_size;
2193
if (row_size > max_comp_rec_len)
2194
max_comp_rec_len = row_size;
2196
case XT_TAB_STATUS_EXT_DLOG:
2197
#ifdef DUMP_CHECK_TABLE
2198
printf("record-X ");
2201
ext_data_len += XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2202
row_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4) + ot->ot_rec_size - XT_REC_EXT_HEADER_SIZE;
2203
alloc_rec_bytes += row_size;
2204
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2205
min_comp_rec_len = row_size;
2206
if (row_size > max_comp_rec_len)
2207
max_comp_rec_len = row_size;
2210
#ifdef DUMP_CHECK_TABLE
2211
if (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_CLEANED_BIT)
2216
prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2217
xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4);
2218
row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4);
2219
switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2220
case XT_TAB_STATUS_FREED:
2221
#ifdef DUMP_CHECK_TABLE
2222
printf(" prev=%-3llu (xact=%-3llu row=%lu)\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2225
case XT_TAB_STATUS_EXT_DLOG:
2228
#ifdef DUMP_CHECK_TABLE
2229
printf(" prev=%-3llu xact=%-3llu row=%lu Xlog=%lu Xoff=%llu Xsiz=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id, (u_long) XT_GET_DISK_2(rec_buf->re_log_id_2), (u_llong) XT_GET_DISK_6(rec_buf->re_log_offs_6), (u_long) XT_GET_DISK_4(rec_buf->re_log_dat_siz_4));
2232
log_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2233
XT_GET_LOG_REF(log_id, log_offset, rec_buf);
2234
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2235
xt_tab_read_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec);
2239
if (!(ok = self->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec, self)))
2240
xt_log_and_clear_exception(self);
2244
xtTableID curr_tab_id;
2245
xtRecordID curr_rec_id;
2247
log_size2 = XT_GET_DISK_4(ext_rec.er_data_size_4);
2248
curr_tab_id = XT_GET_DISK_4(ext_rec.er_tab_id_4);
2249
curr_rec_id = XT_GET_DISK_4(ext_rec.er_rec_id_4);
2250
if (log_size2 != log_size || curr_tab_id != tab->tab_id || curr_rec_id != rec_id) {
2251
xt_logf(XT_INFO, "Table %s: record %llu, extended record %lu:%llu not valid\n", tab->tab_name, (u_llong) rec_id, (u_long) log_id, (u_llong) log_offset);
2256
#ifdef DUMP_CHECK_TABLE
2257
printf(" prev=%-3llu xact=%-3llu row=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2264
#ifdef CHECK_TABLE_STATS
2265
u_long rec, row, ind;
2268
rec = xt_seek_eof_file(self, ot->ot_rec_file);
2269
row = xt_seek_eof_file(self, ot->ot_row_file);
2270
ind = xt_seek_eof_file(self, ot->ot_ind_file);
2271
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2272
if (!tab->tab_dic.dic_rec_fixed) {
2273
xt_int8_to_byte_size((xtInt8) tab->tab_mem_total, value);
2274
printf("Ext. record memory used = %s\n", value);
2276
xt_int8_to_byte_size((xtInt8) ind, value);
2277
printf("Index data memory used = %s\n", value);
2278
xt_int8_to_byte_size((xtInt8) rec + row, value);
2279
printf("Table data memory used = %s\n", value);
2280
xt_int8_to_byte_size((xtInt8) tab->tab_mem_total + rec + row + ind, value);
2281
printf("Total memory used = %s\n", value);
2284
if (!tab->tab_dic.dic_rec_fixed) {
2285
xt_int8_to_byte_size((xtInt8) ext_data_len, value);
2286
printf("Ext. record disk used = %s\n", value);
2288
xt_int8_to_byte_size((xtInt8) ind, value);
2289
printf("Index disk space used = %s\n", value);
2290
xt_int8_to_byte_size((xtInt8) rec + row, value);
2291
printf("Table disk space used = %s\n", value);
2292
xt_int8_to_byte_size((xtInt8) ext_data_len + rec + row + ind, value);
2293
printf("Total disk space used = %s\n", value);
2296
if (alloc_rec_count) {
2297
printf("Minumum comp. rec. len. = %llu\n", (u_llong) min_comp_rec_len);
2298
printf("Average comp. rec. len. = %llu\n", (u_llong) ((double) alloc_rec_bytes / (double) alloc_rec_count + (double) 0.5));
2299
printf("Maximum comp. rec. len. = %llu\n", (u_llong) max_comp_rec_len);
2301
printf("Free record count = %llu\n", (u_llong) free_rec_count);
2302
printf("Deleted record count = %llu\n", (u_llong) delete_rec_count);
2303
printf("Allocated record count = %llu\n", (u_llong) alloc_rec_count);
2306
if (tab->tab_rec_fnum != free_rec_count)
2307
xt_logf(XT_INFO, "Table %s: incorrect number of free blocks, %llu, should be: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) tab->tab_rec_fnum);
2309
/* Checking the free list: */
2311
rec_id = tab->tab_rec_free_id;
2313
if (rec_id >= tab->tab_rec_eof_id) {
2314
xt_logf(XT_INFO, "Table %s: invalid reference on free list: %llu, ", tab->tab_name, (u_llong) rec_id);
2316
xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
2318
xt_logf(XT_INFO, "reference by list head pointer\n");
2321
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) rec_buf)) {
2322
xt_log_and_clear_exception(self);
2325
if ((rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
2326
xt_logf(XT_INFO, "Table %s: record, %llu, on free list is not free\n", tab->tab_name, (u_llong) rec_id);
2329
rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2331
if (free_count2 < free_rec_count)
2332
xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2);
2334
freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock);
2338
xt_lock_mutex(self, &tab->tab_row_lock);
2339
pushr_(xt_unlock_mutex, &tab->tab_row_lock);
2341
#ifdef DUMP_CHECK_TABLE
2343
printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_row_free_id, (u_llong) tab->tab_row_fnum);
2344
printf("EOF: %llu\n", (u_llong) tab->tab_row_eof_id);
2348
while (rec_id < tab->tab_row_eof_id) {
2349
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, rec_id, &ref_id, self))
2351
#ifdef DUMP_CHECK_TABLE
2352
printf("%-3llu ", (u_llong) rec_id);
2354
#ifdef DUMP_CHECK_TABLE
2356
printf("====== 0\n");
2358
printf("in use %llu\n", (u_llong) ref_id);
2363
freer_(); // xt_unlock_mutex(&tab->tab_row_lock);
2365
#ifdef CHECK_INDEX_ON_CHECK_TABLE
2366
xt_check_indices(ot);
2368
freer_(); // xt_unlock_mutex(&tab->tab_db->db_co_ext_lock);
2371
xtPublic void xt_rename_table(XTThreadPtr self, XTPathStrPtr old_name, XTPathStrPtr new_name)
2373
XTDatabaseHPtr db = self->st_database;
2374
XTOpenTablePoolPtr table_pool;
2375
XTTableHPtr tab = NULL;
2376
char table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
2378
XTFilesOfTableRec ft;
2379
XTDictionaryRec dic;
2381
XTTableEntryPtr te_ptr;
2383
XTTablePathPtr te_new_path;
2384
XTTablePathPtr te_old_path;
2385
char to_path[PATH_MAX];
2387
memset(&dic, 0, sizeof(dic));
2389
#ifdef TRACE_CREATE_TABLES
2390
printf("RENAME %s --> %s\n", old_name->ps_path, new_name->ps_path);
2392
if (strlen(xt_last_name_of_path(new_name->ps_path)) > XT_TABLE_NAME_SIZE-1)
2393
xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, new_name);
2395
/* MySQL renames the table while it is in use. Here is
2401
* COPY tab1 -> tmp_tab
2403
* RENAME tab1 -> tmp2_tab
2404
* RENAME tmp_tab -> tab1
2405
* CLOSE tab1 (tmp2_tab)
2409
* Since the table is open when it is renamed, I cannot
2410
* get exclusive use of the table for this operation.
2412
* So instead we just make sure that the sweeper is not
2415
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, old_name, FALSE, TRUE, FALSE, &tab);
2416
pushr_(xt_db_unlock_table_pool, table_pool);
2417
xt_ht_lock(self, db->db_tables);
2418
pushr_(xt_ht_unlock, db->db_tables);
2419
tab_id = tab->tab_id;
2420
myxt_move_dictionary(&dic, &tab->tab_dic);
2421
pushr_(myxt_free_dictionary, &dic);
2422
pushr_(xt_heap_release, tab);
2424
/* Unmap the memory mapped table files:
2425
* For windows this must be done before we
2426
* can rename the files.
2428
tab_close_files(self, tab);
2430
freer_(); // xt_heap_release(self, old_tab)
2432
/* Create the new name and path: */
2433
te_new_name = xt_dup_string(self, xt_last_name_of_path(new_name->ps_path));
2434
pushr_(xt_free, te_new_name);
2435
te_new_path = tab_get_table_path(self, db, new_name, FALSE);
2436
pushr_(tab_free_table_path, te_new_path);
2438
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
2440
/* Remove the table from the Database directory: */
2441
xt_ht_del(self, db->db_tables, old_name);
2443
xt_enum_files_of_tables_init(old_name, tab_id, &ft);
2444
while (xt_enum_files_of_tables_next(&ft)) {
2445
postfix = xt_tab_file_to_name(XT_MAX_TABLE_FILE_NAME_SIZE, table_name, ft.ft_file_path);
2447
xt_strcpy(PATH_MAX, to_path, new_name->ps_path);
2448
xt_strcat(PATH_MAX, to_path, postfix);
2450
if (!xt_fs_rename(NULL, ft.ft_file_path, to_path))
2451
xt_log_and_clear_exception(self);
2454
/* Switch the table name and path: */
2455
xt_free(self, te_ptr->te_tab_name);
2456
te_ptr->te_tab_name = te_new_name;
2457
te_old_path = te_ptr->te_tab_path;
2458
te_ptr->te_tab_path = te_new_path;
2459
tab_remove_table_path(self, db, te_old_path);
2460
tab_save_tables(self, db);
2462
popr_(); // Discard tab_free_table_path(te_new_path);
2463
popr_(); // Discard xt_free(te_new_name);
2465
tab = xt_use_table_no_lock(self, db, new_name, FALSE, FALSE, &dic);
2466
/* All renamed tables are considered repaired! */
2467
xt_tab_table_repaired(tab);
2468
xt_heap_release(self, tab);
2470
freer_(); // myxt_free_dictionary(&dic)
2471
freer_(); // xt_ht_unlock(db->db_tables)
2472
freer_(); // xt_db_unlock_table_pool(table_pool)
2475
xtPublic XTTableHPtr xt_use_table(XTThreadPtr self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok)
2478
XTDatabaseHPtr db = self->st_database;
2480
xt_ht_lock(self, db->db_tables);
2481
pushr_(xt_ht_unlock, db->db_tables);
2482
tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, NULL);
2487
xtPublic void xt_sync_flush_table(XTThreadPtr self, XTOpenTablePtr ot, int timeout)
2489
XTTableHPtr tab = ot->ot_table;
2490
XTDatabaseHPtr db = tab->tab_db;
2492
/* Wakeup the sweeper:
2493
* We want the sweeper to check if there is anything to do,
2494
* so we must wake it up.
2495
* Once it has done all it can, it will go back to sleep.
2496
* This should be good enough.
2498
* NOTE: I all cases, we do not wait if the sweeper is in
2501
if (db->db_sw_idle) {
2502
u_int check_count = db->db_sw_check_count;
2505
xt_wakeup_sweeper(db);
2506
if (!db->db_sw_thread || db->db_sw_idle != XT_THREAD_IDLE || check_count != db->db_sw_check_count)
2508
xt_sleep_milli_second(10);
2512
/* Wait for the sweeper to become idle: */
2513
xt_lock_mutex(self, &db->db_sw_lock);
2514
pushr_(xt_unlock_mutex, &db->db_sw_lock);
2515
while (db->db_sw_thread && !db->db_sw_idle) {
2516
xt_timed_wait_cond(self, &db->db_sw_cond, &db->db_sw_lock, 10);
2518
freer_(); // xt_unlock_mutex(&db->db_sw_lock)
2520
/* Wait for the writer to write out all operations on the table:
2521
* We also do not wait for the writer if it is in
2524
time_t start_time = time(NULL);
2525
while (db->db_wr_thread &&
2526
db->db_wr_idle != XT_THREAD_INERR &&
2527
XTTableSeq::xt_op_is_before(tab->tab_head_op_seq+1, tab->tab_seq.ts_next_seq)) {
2528
if (timeout && time(NULL) > start_time + timeout) {
2529
char name_buf[XT_TABLE_NAME_BUF_SIZE];
2531
xt_tab_make_table_name(tab->tab_name, name_buf, XT_TABLE_NAME_BUF_SIZE);
2532
xt_logf(XT_WARNING, "Timeout waiting for writer while flushing %s\n", name_buf);
2536
/* Flush the log, in case this is holding up the
2539
if (!db->db_xlog.xlog_flush(self))
2542
xt_lock_mutex(self, &db->db_wr_lock);
2543
pushr_(xt_unlock_mutex, &db->db_wr_lock);
2544
db->db_wr_thread_waiting++;
2546
* Wake the writer if it is sleeping. In order to
2547
* flush a table we must wait for the writer to complete
2548
* committing all the changes in the table to the database.
2550
if (db->db_wr_idle) {
2551
if (!xt_broadcast_cond_ns(&db->db_wr_cond))
2552
xt_log_and_clear_exception_ns();
2555
freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2556
xt_sleep_milli_second(10);
2558
xt_lock_mutex(self, &db->db_wr_lock);
2559
pushr_(xt_unlock_mutex, &db->db_wr_lock);
2560
db->db_wr_thread_waiting--;
2561
freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2564
xt_flush_table(self, ot);
2567
xtBool XTFlushRecRowTask::tk_task(XTThreadPtr thread)
2571
/* {TASK-TABLE-GONE}
2572
* If this task was scheduled before the table was deleted
2573
* or renamed, then we may be caught holding an invalid
2574
* table (frt_table) object.
2576
* As a result we just use the ID, to get the open table
2579
* If the tables are not identical, then there is no point
2582
if (!(xt_db_open_pool_table_ns(&ot, frt_table->tab_db, frt_table->tab_id)))
2586
/* Can happen if the table has been dropped: */
2587
if (thread->t_exception.e_xt_err)
2588
xt_log_and_clear_exception(thread);
2589
xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table was not found\n", (u_long) frt_table->tab_id);
2590
xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2594
if (ot->ot_table != frt_table) {
2595
/* Can happen if the table has been renamed: */
2596
if (thread->t_exception.e_xt_err)
2597
xt_log_and_clear_exception(thread);
2598
xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table has been renamed\n", (u_long) frt_table->tab_id);
2599
xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2603
if (!xt_flush_record_row(ot, NULL, FALSE)) {
2604
xt_db_return_table_to_pool_ns(ot);
2609
xt_db_return_table_to_pool_ns(ot);
2613
void XTFlushRecRowTask::tk_reference()
2615
xt_heap_reference_ns(frt_table);
2618
void XTFlushRecRowTask::tk_release()
2620
xt_heap_release_ns(frt_table);
2624
* Start a flush of this file in background.
2626
xtPublic xtBool xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread)
2628
if (tab->tab_rec_flush_task->tk_is_running())
2632
return xt_run_async_task(tab->tab_rec_flush_task, notify_complete, FALSE, thread, tab->tab_db);
2635
xtPublic xtBool xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_lock)
2637
XTTableHeadDRec rec_head;
2638
XTTableHPtr tab = ot->ot_table;
2640
#ifdef TRACE_FLUSH_TABLE
2644
if (!xt_begin_checkpoint(tab->tab_db, have_table_lock, ot->ot_thread))
2647
xt_lock_mutex_ns(&tab->tab_rec_flush_lock);
2648
#ifdef XT_SORT_REC_WRITES
2649
if (!xt_xres_delay_flush(ot, TRUE))
2652
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_START_REC_ROW);
2654
ASSERT_NS(ot->ot_thread == xt_get_self());
2655
/* Make sure that the table recovery point, in
2656
* particular the operation ID is recorded
2657
* before all other flush activity!
2659
* This is because only operations after the
2660
* recovery point in the header are applied
2661
* to the table on recovery.
2663
* So the operation ID is recorded before the
2664
* flush activity, and written after all is done.
2666
xt_tab_store_header(ot, &rec_head);
2668
/* Write the table header: */
2669
if (tab->tab_flush_pending) {
2670
tab->tab_flush_pending = FALSE;
2672
#ifdef TRACE_FLUSH_TABLE
2674
printf("FLUSH TABLE bytes=%lu %s\n", (u_long) tab->tab_bytes_to_flush, tab->tab_name->ps_path);
2677
// Want to see how much was to be flushed in the debugger:
2678
to_flush = tab->tab_bytes_to_flush;
2679
tab->tab_bytes_to_flush = 0;
2681
*bytes_flushed += to_flush;
2683
#ifdef XT_REC_FLUSH_THRESHOLD
2686
/* Reset the writer's byte level: */
2687
if ((writer = ot->ot_table->tab_db->db_wr_thread))
2688
tab->tab_rec_wr_last_flush = writer->st_statistics.st_rec.ts_write;
2691
/* Flush the table data: */
2692
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags)) {
2693
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread) ||
2694
!XT_FLUSH_RR_FILE(ot->ot_row_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread)) {
2695
tab->tab_flush_pending = TRUE;
2700
/* The header includes the operation number which
2701
* must be written AFTER all other data,
2702
* because operations will not be applied again.
2704
if (!tab_write_header(ot, &rec_head)) {
2705
tab->tab_flush_pending = TRUE;
2710
/* Flush the auto-increment: */
2711
if (xt_db_auto_increment_mode == 1) {
2712
if (tab->tab_auto_inc != tab->tab_dic.dic_min_auto_inc) {
2713
tab->tab_dic.dic_min_auto_inc = tab->tab_auto_inc;
2714
if (!xt_tab_write_min_auto_inc(ot))
2719
/* Mark this table as record/row flushed: */
2720
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_DONE_REC_ROW);
2722
#ifdef TRACE_FLUSH_TABLE
2724
printf("flush table (%d) %s DONE\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2729
xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2731
if (!xt_end_checkpoint(tab->tab_db, ot->ot_thread, NULL))
2736
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_STOP_REC_ROW);
2738
#ifdef TRACE_FLUSH_TABLE
2740
printf("flush table (%d) %s FAILED\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2745
xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2749
xtPublic void xt_flush_table(XTThreadPtr self, XTOpenTablePtr ot)
2751
/* GOTCHA {FLUSH-BUG}: This bug was difficult to find.
2752
* It occured on Windows in the multi_update
2755
* What happens is the checkpointer starts to
2756
* flush the table, and gets to the
2757
* XT_FLUSH_RR_FILE part.
2759
* Then a rename occurs, and the user thread
2760
* flushes the table, and goes through and
2761
* writes the table header, with the most
2762
* recent table operation (the last operation
2765
* The checkpointer the completes and
2766
* also writes the header, but with old
2767
* values (as read in xt_tab_store_header()).
2769
* The then user thread continues, and
2770
* reopens the table after rename.
2771
* On reopen, it reads the old value from the header,
2772
* and sets the current operation number.
2774
* Now there is a problem in the table cache,
2775
* because some cache pages have operation numbers
2776
* that are greater than current operation
2779
* This later lead to the free-er hanging while
2780
* it waited for an operation to be
2781
* written to the disk that never would be.
2782
* This is because a page can only be freed when
2783
* the head operation number has passed the
2784
* page operation number.
2786
* Which indicates that the page has been written
2790
* As a result I now use mutex so that only one
2791
* thread can flush at a time.
2794
if (!xt_flush_record_row(ot, NULL, FALSE))
2797
/* This was before the table data flush,
2798
* (after xt_tab_store_header() above,
2799
* but I don't think it makes any difference.
2800
* Because in the checkpointer it was at this
2803
if (!xt_flush_indices(ot, NULL, FALSE, NULL))
2808
static XTOpenTablePtr tab_open_table(XTTableHPtr tab)
2810
volatile XTOpenTablePtr ot;
2813
if (!(ot = (XTOpenTablePtr) xt_malloc_ns(sizeof(XTOpenTableRec))))
2815
memset(ot, 0, offsetof(XTOpenTableRec, ot_ind_wbuf));
2817
ot->ot_seq_page = NULL;
2818
ot->ot_seq_data = NULL;
2820
self = xt_get_self();
2822
xt_heap_reference(self, tab);
2824
ot->ot_row_file = xt_open_file(self, ot->ot_table->tab_row_file->fil_path, xt_row_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_row_file_grow_size);
2825
ot->ot_rec_file = xt_open_file(self, ot->ot_table->tab_rec_file->fil_path, xt_rec_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_data_file_grow_size);
2826
#ifdef XT_USE_DIRECT_IO_ON_INDEX
2827
ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK | XT_FS_DIRECT_IO, XT_INDEX_PAGE_SIZE*256);
2829
ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
2837
if (!ot->ot_table || !ot->ot_row_file || !ot->ot_rec_file)
2840
if (!(ot->ot_row_rbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
2842
ot->ot_row_rbuf_size = ot->ot_table->tab_dic.dic_rec_size;
2843
if (!(ot->ot_row_wbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
2845
ot->ot_row_wbuf_size = ot->ot_table->tab_dic.dic_rec_size;
2847
/* Cache this stuff to speed access a bit: */
2848
ot->ot_rec_fixed = ot->ot_table->tab_dic.dic_rec_fixed;
2849
ot->ot_rec_size = ot->ot_table->tab_dic.dic_rec_size;
2854
tab_close_table(ot);
2858
xtPublic XTOpenTablePtr xt_open_table(XTTableHPtr tab)
2860
return tab_open_table(tab);
2863
xtPublic void xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock)
2866
if (!xt_flush_record_row(ot, NULL, have_table_lock))
2867
xt_log_and_clear_exception_ns();
2869
if (!xt_flush_indices(ot, NULL, have_table_lock, NULL))
2870
xt_log_and_clear_exception_ns();
2872
tab_close_table(ot);
2875
static int tab_use_table_by_id(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id)
2877
XTTableEntryPtr te_ptr;
2878
XTTableHPtr tab = NULL;
2880
char path[PATH_MAX];
2883
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
2884
xt_ht_lock(self, db->db_tables);
2885
pushr_(xt_ht_unlock, db->db_tables);
2887
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
2889
if (!(tab = te_ptr->te_table)) {
2890
/* Open the table: */
2891
xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
2892
xt_add_dir_char(PATH_MAX, path);
2893
xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
2894
r = tab_new_handle(self, &tab, db, tab_id, (XTPathStrPtr) path, TRUE, NULL);
2898
r = XT_TAB_NOT_FOUND;
2901
xt_heap_reference(self, tab);
2904
freer_(); // xt_ht_unlock(db->db_tables)
2908
xtPublic XTTableHPtr xt_use_table_by_id(XTThreadPtr self, XTDatabaseHPtr db, xtTableID tab_id, int *result)
2913
r = tab_use_table_by_id(self, &tab, db, tab_id);
2915
if (r != XT_TAB_OK) {
2922
case XT_TAB_NOT_FOUND:
2924
case XT_TAB_NO_DICTIONARY:
2925
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, (u_long) tab_id);
2926
case XT_TAB_POOL_CLOSED:
2927
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
2936
xtPublic XTTableHPtr xt_use_table_by_id_ns(XTDatabaseHPtr db, xtTableID tab_id)
2939
XTThreadPtr self = xt_get_self();
2942
tab = xt_use_table_by_id(self, db, tab_id, NULL);
2952
/* The fixed part of the record is already in the row buffer.
2953
* This function loads the extended part, expanding the row
2954
* buffer if necessary.
2956
xtPublic xtBool xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req)
2960
xtLogOffset log_offset;
2961
xtWord1 save_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
2962
xtBool retried = FALSE;
2963
XTactExtRecEntryDPtr ext_data_ptr;
2965
xtTableID curr_tab_id;
2966
xtRecordID curr_rec_id;
2968
log_size = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->re_log_dat_siz_4);
2969
XT_GET_LOG_REF(log_id, log_offset, (XTTabRecExtDPtr) ot->ot_row_rbuffer);
2971
if (ot->ot_rec_size + log_size > ot->ot_row_rbuf_size) {
2972
if (!xt_realloc_ns((void **) &ot->ot_row_rbuffer, ot->ot_rec_size + log_size))
2974
ot->ot_row_rbuf_size = ot->ot_rec_size + log_size;
2977
/* Read the extended part first: */
2978
ext_data_ptr = (XTactExtRecEntryDPtr) (ot->ot_row_rbuffer + ot->ot_rec_size - offsetof(XTactExtRecEntryDRec, er_data));
2980
/* Save the data which the header will overwrite: */
2981
memcpy(save_buffer, ext_data_ptr, offsetof(XTactExtRecEntryDRec, er_data));
2984
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
2985
xt_tab_read_ext_record(ot->ot_table, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr);
2987
if (!ot->ot_thread->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr, ot->ot_thread))
2991
log_size2 = XT_GET_DISK_4(ext_data_ptr->er_data_size_4);
2992
curr_tab_id = XT_GET_DISK_4(ext_data_ptr->er_tab_id_4);
2993
curr_rec_id = XT_GET_DISK_4(ext_data_ptr->er_rec_id_4);
2995
if (log_size2 != log_size || curr_tab_id != ot->ot_table->tab_id || curr_rec_id != load_rec_id) {
2996
/* [(3)] This can happen in the following circumstances:
2997
* - A new record is created, but the data log is not
2999
* - The server quits.
3000
* - On restart the transaction is rolled back, but the data record
3001
* was not written, so later a new record could be written at this
3003
* - Later the sweeper tries to cleanup this record, and finds
3004
* that a different record has been written at this position.
3006
* NOTE: Index entries can only be written to disk for records
3007
* that have been committed to the disk, because uncommitted
3008
* records may not exist in order to remove the index entry
3011
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_EXT_RECORD);
3015
/* Restore the saved area: */
3016
memcpy(ext_data_ptr, save_buffer, offsetof(XTactExtRecEntryDRec, er_data));
3019
xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3020
return myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req);
3024
/* (1) It may be that reading the log fails because the garbage collector
3025
* has moved the record since we determined the location.
3026
* We handle this here, by re-reading the data the garbage collector
3027
* would have updated.
3029
* (2) It may also happen that a new record is just being updated or
3030
* inserted. It is possible that the handle part of the record
3031
* has been written, but not yet the overflow.
3032
* This means that repeating the read attempt could work.
3034
* (3) The extended data has been written by another handler and not yet
3035
* flushed. This should not happen because on committed extended
3036
* records are read, and all data should be flushed before
3039
* NOTE: (2) above is not a problem when versioning is working
3040
* correctly. In this case, we should never try to read the extended
3041
* part of an uncommitted record (belonging to some other thread/
3044
XTTabRecExtDRec rec_buf;
3046
xt_lock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3049
if (!xt_tab_get_rec_data(ot, load_rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &rec_buf))
3052
XT_GET_LOG_REF(log_id, log_offset, &rec_buf);
3058
xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3062
xtPublic xtBool xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3064
register XTTableHPtr tab = ot->ot_table;
3068
return tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread);
3071
xtPublic xtBool xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3073
register XTTableHPtr tab = ot->ot_table;
3078
if (status == XT_LOG_ENT_REC_MOVED) {
3079
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, &op_seq, TRUE, ot->ot_thread))
3083
else if (status == XT_LOG_ENT_REC_CLEANED_1) {
3084
ASSERT_NS(0); // shouldn't be used anymore
3088
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, &op_seq, TRUE, ot->ot_thread))
3092
return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3095
xtPublic xtBool xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3097
register XTTableHPtr tab = ot->ot_table;
3101
if (status == XT_LOG_ENT_REC_MOVED) {
3102
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, op_seq, TRUE, ot->ot_thread))
3106
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread))
3110
return xt_xlog_modify_table(tab->tab_id, status, *op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3113
xtPublic xtBool xt_tab_get_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3115
register XTTableHPtr tab = ot->ot_table;
3119
return tab->tab_recs.xt_tc_read(ot->ot_rec_file, rec_id, (size_t) size, buffer, ot->ot_thread);
3123
* Note: this function grants locks even to transactions that
3124
* are not specifically waiting for this transaction.
3125
* This is required, because all threads waiting for
3126
* a lock should be considered "equal". In other words,
3127
* they should not have to wait for the "right" transaction
3128
* before they get the lock, or it will turn into a
3129
* race to wait for the correct transaction.
3131
* A transaction T1 can end up waiting for the wrong transaction
3132
* T2, because T2 has released the lock, and given it to T3.
3133
* Of course, T1 will wake up soon and realize this, but
3134
* it is a matter of timing.
3136
* The main point is that T2 has release the lock because
3137
* it has ended (see {RELEASING-LOCKS} for more details)
3138
* and therefore, there is no danger of it claiming the
3139
* lock again, which can lead to a deadlock if T1 is
3140
* given the lock instead of T3 in the example above.
3141
* Then, if T2 tries to regain the lock before T1
3142
* realizes that it has the lock.
3144
//static xtBool tab_get_lock_after_wait(XTThreadPtr thread, XTLockWaitPtr lw)
3146
// register XTTableHPtr tab = lw->lw_ot->ot_table;
3149
* I don't believe this lock is required. If it is, please explain why!!
3150
* XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[gl->lw_row_id % XT_ROW_RWLOCKS], thread);
3152
* With the old row lock implementation a XT_TAB_ROW_WRITE_LOCK was required because
3153
* the row locking did not have its own locks.
3154
* The new list locking has its own locks. I was using XT_TAB_ROW_READ_LOCK,
3155
* but i don't think this is required.
3157
// return tab->tab_locks.xt_set_temp_lock(lw->lw_ot, lw, &lw->lw_thread->st_lock_list);
3161
* NOTE: Previously this function did not gain the row lock.
3162
* If this change is a problem, please document why!
3163
* The previously implementation did wait until no lock was on the
3166
* I am thinking that it is simply a good idea to grab the lock,
3167
* instead of waiting for no lock, before the retry. But it could
3168
* result in locking more than required!
3170
static xtBool tab_wait_for_update(register XTOpenTablePtr ot, xtRowID row_id, xtXactID xn_id, XTThreadPtr thread)
3176
xw.xw_xn_id = xn_id;
3178
lw.lw_thread = thread;
3180
lw.lw_row_id = row_id;
3181
lw.lw_row_updated = FALSE;
3183
/* First try to get the lock: */
3184
if (!ot->ot_table->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list))
3186
if (lw.lw_curr_lock != XT_NO_LOCK)
3187
/* Wait for the lock, then the transaction: */
3188
ok = xt_xn_wait_for_xact(thread, &xw, &lw);
3190
/* Just wait for the transaction: */
3191
ok = xt_xn_wait_for_xact(thread, &xw, NULL);
3193
#ifdef DEBUG_LOCK_QUEUE
3194
ot->ot_table->tab_locks.rl_check(&lw);
3200
* XT_OLD - The record is old. No longer visible because there is
3201
* newer committed record before it in the record list.
3202
* This is a special case of FALSE (the record is not visible).
3203
* (see {WAIT-FOR} for details).
3204
* It is significant because if we find too many of these when
3205
* searching for records, then we have reason to believe the
3206
* sweeper is far behind. This can happen in a test like this:
3207
* runTest(INCREMENT_TEST, 2, INCREMENT_TEST_UPDATE_COUNT);
3208
* What happens is T1 detects an updated row by T2,
3209
* but T2 has not committed yet.
3210
* It waits for T2. T2 commits and updates again before T1
3213
* Of course if we got a lock on the row when T2 quits, then
3214
* this would not happen!
3218
* Is a record visible?
3219
* Returns TRUE, FALSE, XT_ERR.
3221
* TRUE - The record is visible.
3222
* FALSE - The record is not visible.
3223
* XT_ERR - An exception (error) occurred.
3224
* XT_NEW - The most recent variation of this row has been returned
3225
* and is to be used instead of the input!
3226
* XT_REREAD - Re-read the record, and try again.
3228
* Basically, a record is visible if it was committed on or before
3229
* the transactions "visible time" (st_visible_time), and there
3230
* are no other visible records before this record in the
3231
* variation chain for the record.
3233
* This holds in general, but you don't always get to see the
3234
* visible record (as defined in this sence).
3236
* On any kind of update (SELECT FOR UPDATE, UPDATE or DELETE), you
3237
* get to see the most recent variation of the row!
3239
* So on update, this function will wait if necessary for a recent
3240
* update to be committed.
3242
* So an update is a kind of "committed read" with a wait for
3243
* uncommitted records.
3246
* - INSERTS may not seen by the update read, depending on when
3248
* - Records may be returned in non-index order.
3249
* - New records returned must be checked again by an index scan
3250
* to make sure they conform to the condition!
3252
* CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20),
3253
* index(Value, Name)) ENGINE=pbxt;
3254
* INSERT test_tab values(4, 2, 'D');
3255
* INSERT test_tab values(5, 2, 'E');
3256
* INSERT test_tab values(6, 2, 'F');
3257
* INSERT test_tab values(7, 2, 'G');
3261
* select * from test_tab where id = 6 for update;
3264
* select * from test_tab where value = 2 order by value, name for update;
3266
* update test_tab set Name = 'A' where id = 7;
3269
* Result order D, E, F, A.
3271
* But Jim does it like this, so it should be OK.
3273
static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xtRecordID *new_rec_id)
3275
XTThreadPtr thread = ot->ot_thread;
3277
XTTabRecHeadDRec var_head;
3279
xtRecordID var_rec_id;
3280
register XTTableHPtr tab;
3281
xtBool wait = FALSE;
3282
xtXactID wait_xn_id = 0;
3283
#ifdef TRACE_VARIATIONS
3289
xtRecordID invalid_rec;
3292
/* It can be that between the time that I read the index,
3293
* and the time that I try to access the
3294
* record, that the record is removed by
3297
if (XT_REC_NOT_VALID(rec_head->tr_rec_type_1))
3300
row_id = XT_GET_DISK_4(rec_head->tr_row_id_4);
3302
/* This can happen if the row has been removed, and
3305
if (ot->ot_curr_row_id && row_id != ot->ot_curr_row_id)
3308
#ifdef TRACE_VARIATIONS
3309
len = sprintf(t_buf, "row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3311
if (!(rec_clean = XT_REC_IS_CLEAN(rec_head->tr_rec_type_1))) {
3312
/* The record is not clean, which means it has not been swept.
3313
* So we have to check if it is visible.
3315
xn_id = XT_GET_DISK_4(rec_head->tr_xact_id_4);
3316
switch (xt_xn_status(ot, xn_id, ot->ot_curr_rec_id)) {
3319
case XT_XN_NOT_VISIBLE:
3320
if (ot->ot_for_update) {
3321
/* It is visible, only if it is an insert,
3322
* which means if has no previous variation.
3323
* Note, if an insert is updated, the record
3324
* should be overwritten (TODO - check this).
3326
var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3329
#ifdef TRACE_VARIATIONS
3331
len += sprintf(t_buf+len, "OTHER COMMIT (OVERWRITTEN) T%d\n", (int) xn_id);
3332
xt_ttracef(thread, "%s", t_buf);
3335
#ifdef TRACE_VARIATIONS
3338
len += sprintf(t_buf+len, "OTHER COMMIT T%d\n", (int) xn_id);
3339
xt_ttracef(thread, "%s", t_buf);
3343
* The record is not visible, although it has been committed.
3344
* Clean the transaction ASAP.
3346
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3350
* Reading an aborted record, this transaction
3351
* must be cleaned up ASAP!
3353
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3354
#ifdef TRACE_VARIATIONS
3356
len += sprintf(t_buf+len, "ABORTED T%d\n", (int) xn_id);
3357
xt_ttracef(thread, "%s", t_buf);
3360
case XT_XN_MY_UPDATE:
3361
/* This is a record written by this transaction. */
3362
if (thread->st_is_update) {
3363
/* Check that it was not written by the current update statement: */
3364
if (XT_STAT_ID_MASK(ot->ot_update_id) == rec_head->tr_stat_id_1) {
3365
#ifdef TRACE_VARIATIONS
3367
len += sprintf(t_buf+len, "MY UPDATE IN THIS STATEMENT T%d\n", (int) xn_id);
3368
xt_ttracef(thread, "%s", t_buf);
3373
ot->ot_curr_row_id = row_id;
3374
ot->ot_curr_updated = TRUE;
3375
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3377
/* It is visible if it is at the front of the list.
3378
* An update can end up not being at the front of the list
3379
* if it is deleted afterwards!
3381
#ifdef TRACE_VARIATIONS
3383
if (var_rec_id == ot->ot_curr_rec_id)
3384
len += sprintf(t_buf+len, "MY UPDATE T%d\n", (int) xn_id);
3386
len += sprintf(t_buf+len, "MY UPDATE (OVERWRITTEN) T%d\n", (int) xn_id);
3388
xt_ttracef(thread, "%s", t_buf);
3390
return var_rec_id == ot->ot_curr_rec_id;
3391
case XT_XN_OTHER_UPDATE:
3392
if (ot->ot_for_update) {
3393
/* If this is an insert, we are interested!
3394
* Updated values are handled below. This is because
3395
* the changed (new) records returned below are always
3396
* followed (in the version chain) by the record
3397
* we would have returned (if nothing had changed).
3399
* As a result, we only return records here which have
3402
var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3404
#ifdef TRACE_VARIATIONS
3406
len += sprintf(t_buf+len, "OTHER INSERT (WAIT FOR) T%d\n", (int) xn_id);
3407
xt_ttracef(thread, "%s", t_buf);
3409
if (!tab_wait_for_update(ot, row_id, xn_id, thread))
3411
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3413
rec_head = &var_head;
3417
#ifdef TRACE_VARIATIONS
3419
len += sprintf(t_buf+len, "OTHER UPDATE T%d\n", (int) xn_id);
3420
xt_ttracef(thread, "%s", t_buf);
3424
#ifdef TRACE_VARIATIONS
3426
len += sprintf(t_buf+len, "REREAD?! T%d\n", (int) xn_id);
3427
xt_ttracef(thread, "%s", t_buf);
3433
/* Follow the variation chain until we come to this record.
3434
* If it is not the first visible variation then
3435
* it is not visible at all. If it in not found on the
3436
* variation chain, it is also not visible.
3442
#ifdef XT_USE_LIST_BASED_ROW_LOCKS
3443
/* The list based row locks used there own locks, so
3444
* it is not necessary to get a write lock here.
3446
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3448
if (ot->ot_for_update)
3449
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3451
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3456
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3458
#ifdef TRACE_VARIATIONS
3459
len += sprintf(t_buf+len, "ROW=%d", (int) row_id);
3461
while (var_rec_id != ot->ot_curr_rec_id) {
3463
#ifdef TRACE_VARIATIONS
3464
xt_ttracef(thread, "row=%d rec=%d NOT VISI not found in list\n", (int) row_id, (int) ot->ot_curr_rec_id);
3468
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3470
#ifdef TRACE_VARIATIONS
3472
len += sprintf(t_buf+len, " -> %d(%d)", (int) var_rec_id, (int) var_head.tr_rec_type_1);
3474
/* All clean records are visible, by all transactions: */
3475
if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) {
3476
#ifdef TRACE_VARIATIONS
3477
xt_ttracef(thread, "row=%d rec=%d NOT VISI clean rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3481
if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
3482
#ifdef TRACE_VARIATIONS
3483
xt_ttracef(thread, "row=%d rec=%d NOT VISI free rec found?!\n", (int) row_id, (int) ot->ot_curr_rec_id);
3486
* After an analysis we came to conclusion that this situation is
3487
* possible and valid. It can happen if index scan and row deletion
3490
* Client Thread Sweeper
3491
* ------------- -------
3492
* 1. start index scan, lock the index file.
3493
* 2. start row deletion, wait for index lock
3494
* 3. unlock the index file, start search for
3495
* the valid version of the record
3496
* 4. delete the row, mark record as freed,
3497
* but not yet cleaned by sweeper
3498
* 5. observe the record being freed
3500
* after these steps we can get here, if the record was marked as free after
3501
* the tab_visible was entered by the scanning thread.
3504
if (invalid_rec != var_rec_id) {
3505
/* This was "var_rec_id = invalid_rec", caused an infinite loop (bug #310184!) */
3506
invalid_rec = var_rec_id;
3509
/* Assume end of list. */
3513
/* This can happen if the row has been removed, and
3516
if (row_id != XT_GET_DISK_4(var_head.tr_row_id_4))
3519
xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
3520
/* This variation is visibleif committed before this
3521
* transaction started, or updated by this transaction.
3523
* We now know that this is the valid variation for
3524
* this record (for this table) for this transaction!
3525
* This will not change, unless the transaction
3526
* updates the record (again).
3528
* So we can store this information as a hint, if
3529
* we see other variations belonging to this record,
3530
* then we can ignore them immediately!
3532
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
3535
* We have encountered a record that has been overwritten, if the
3536
* record has not been cleaned, then the sweeper is too far
3540
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3541
#ifdef TRACE_VARIATIONS
3542
xt_ttracef(thread, "row=%d rec=%d NOT VISI committed rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3545
case XT_XN_NOT_VISIBLE:
3546
if (ot->ot_for_update) {
3547
/* Substitute this record for the one we
3550
if (result == TRUE) {
3551
if (XT_REC_IS_DELETE(var_head.tr_rec_type_1))
3554
*new_rec_id = var_rec_id;
3561
/* Ignore the record, it will be removed. */
3563
case XT_XN_MY_UPDATE:
3564
#ifdef TRACE_VARIATIONS
3565
xt_ttracef(thread, "row=%d rec=%d NOT VISI my update found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3568
case XT_XN_OTHER_UPDATE:
3569
/* Wait for this update to commit or abort: */
3574
#ifdef TRACE_VARIATIONS
3576
len += sprintf(t_buf+len, "-T%d", (int) wait_xn_id);
3580
if (invalid_rec != var_rec_id) {
3581
invalid_rec = var_rec_id;
3584
/* Assume end of list. */
3585
#ifdef XT_CRASH_DEBUG
3586
/* Should not happen! */
3591
var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
3593
#ifdef TRACE_VARIATIONS
3595
sprintf(t_buf+len, " -> %d(%d)\n", (int) var_rec_id, (int) rec_head->tr_rec_type_1);
3597
sprintf(t_buf+len, " ...\n");
3598
//xt_ttracef(thread, "%s", t_buf);
3601
if (ot->ot_for_update) {
3606
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3607
#ifdef TRACE_VARIATIONS
3608
xt_ttracef(thread, "T%d WAIT FOR T%d (will retry)\n", (int) thread->st_xact_data->xd_start_xn_id, (int) wait_xn_id);
3610
if (!tab_wait_for_update(ot, row_id, wait_xn_id, thread))
3615
* Retry in order to try to avoid missing
3616
* any records that we should see in FOR UPDATE
3619
* We also want to take another look at the record
3620
* we just tried to read.
3622
* If it has been updated, then a new record has
3623
* been created. This will be detected when we
3624
* try to read it again, and XT_NEW will be returned.
3626
thread->st_statistics.st_retry_index_scan++;
3630
/* {ROW-LIST-LOCK} */
3631
lw.lw_thread = thread;
3633
lw.lw_row_id = row_id;
3634
lw.lw_row_updated = FALSE;
3635
ok = tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list);
3636
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3638
#ifdef DEBUG_LOCK_QUEUE
3639
ot->ot_table->tab_locks.rl_check(&lw);
3643
if (lw.lw_curr_lock != XT_NO_LOCK) {
3644
#ifdef TRACE_VARIATIONS
3645
xt_ttracef(thread, "T%d WAIT FOR LOCK(%s) T%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) lw.lw_curr_lock == XT_TEMP_LOCK ? "temp" : "perm", (int) xn_id);
3647
if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3648
#ifdef DEBUG_LOCK_QUEUE
3649
ot->ot_table->tab_locks.rl_check(&lw);
3653
#ifdef DEBUG_LOCK_QUEUE
3654
ot->ot_table->tab_locks.rl_check(&lw);
3656
#ifdef TRACE_VARIATIONS
3657
len = sprintf(t_buf, "(retry): row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3660
* Reset the result before we go down the list again, to make sure we
3661
* get the latest record!!
3664
thread->st_statistics.st_reread_record_list++;
3667
#ifdef DEBUG_LOCK_QUEUE
3668
ot->ot_table->tab_locks.rl_check(&lw);
3672
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3675
#ifdef TRACE_VARIATIONS
3676
if (result == XT_NEW)
3677
xt_ttracef(thread, "row=%d rec=%d RETURN NEW %d\n", (int) row_id, (int) ot->ot_curr_rec_id, (int) *new_rec_id);
3679
xt_ttracef(thread, "row=%d rec=%d VISIBLE\n", (int) row_id, (int) ot->ot_curr_rec_id);
3681
xt_ttracef(thread, "row=%d rec=%d RETURN NOT VISIBLE (NEW)\n", (int) row_id, (int) ot->ot_curr_rec_id);
3684
ot->ot_curr_row_id = row_id;
3685
ot->ot_curr_updated = FALSE;
3689
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3693
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3698
* Return TRUE if the record has been read, and is visible.
3699
* Return FALSE if the record is not visible.
3700
* Return XT_ERR if an error occurs.
3702
xtPublic int xt_tab_visible(XTOpenTablePtr ot)
3705
XTTabRecHeadDRec rec_head;
3706
xtRecordID new_rec_id;
3707
xtBool read_again = FALSE;
3710
if ((row_id = ot->ot_curr_row_id)) {
3711
/* Fast track, do a quick check.
3712
* Row ID is only set if this record has been committed,
3714
* Check if it is the first on the list!
3716
xtRecordID var_rec_id;
3719
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3721
if (ot->ot_curr_rec_id == var_rec_id) {
3723
if (ot->ot_for_update) {
3724
XTThreadPtr thread = ot->ot_thread;
3725
XTTableHPtr tab = ot->ot_table;
3728
/* {ROW-LIST-LOCK} */
3729
lw.lw_thread = thread;
3731
lw.lw_row_id = row_id;
3732
lw.lw_row_updated = FALSE;
3733
if (!tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list)) {
3734
#ifdef DEBUG_LOCK_QUEUE
3735
ot->ot_table->tab_locks.rl_check(&lw);
3739
if (lw.lw_curr_lock != XT_NO_LOCK) {
3740
if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3741
#ifdef DEBUG_LOCK_QUEUE
3742
ot->ot_table->tab_locks.rl_check(&lw);
3746
#ifdef DEBUG_LOCK_QUEUE
3747
ot->ot_table->tab_locks.rl_check(&lw);
3751
#ifdef DEBUG_LOCK_QUEUE
3752
ot->ot_table->tab_locks.rl_check(&lw);
3760
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
3763
switch ((r = tab_visible(ot, &rec_head, &new_rec_id))) {
3765
ot->ot_curr_rec_id = new_rec_id;
3768
/* Avoid infinite loop: */
3770
/* Should not happen! */
3771
#ifdef XT_CRASH_DEBUG
3772
/* Generate a core dump! */
3786
* Read a record, and return one of the following:
3787
* TRUE - the record has been read, and is visible.
3788
* FALSE - the record is not visible.
3789
* XT_ERR - an error occurs.
3790
* XT_NEW - Means the expected record has been changed.
3791
* When doing an index scan, the conditions must be checked again!
3793
xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
3795
register XTTableHPtr tab = ot->ot_table;
3796
size_t rec_size = tab->tab_dic.dic_rec_size;
3797
xtRecordID new_rec_id;
3799
xtBool read_again = FALSE;
3801
if (!(ot->ot_thread->st_xact_data)) {
3802
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
3807
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
3810
switch (tab_visible(ot, (XTTabRecHeadDPtr) ot->ot_row_rbuffer, &new_rec_id)) {
3816
if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
3818
ot->ot_curr_rec_id = new_rec_id;
3824
/* Avoid infinite loop: */
3826
/* Should not happen! */
3827
#ifdef XT_CRASH_DEBUG
3828
/* Generate a core dump! */
3840
if (ot->ot_rec_fixed)
3841
memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
3842
else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
3843
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
3847
u_int cols_req = ot->ot_cols_req;
3849
ASSERT_NS(cols_req);
3850
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
3851
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
3855
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
3866
* TRUE/OK - record was read.
3867
* FALSE/FAILED - An error occurred.
3869
xtPublic int xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
3871
register XTTableHPtr tab = ot->ot_table;
3872
size_t rec_size = tab->tab_dic.dic_rec_size;
3874
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
3877
if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
3878
/* Should not happen! */
3879
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_DELETED);
3883
ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
3884
ot->ot_curr_updated =
3885
(XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
3887
if (ot->ot_rec_fixed)
3888
memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
3889
else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
3890
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
3894
u_int cols_req = ot->ot_cols_req;
3896
ASSERT_NS(cols_req);
3897
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
3898
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
3902
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
3910
#ifdef XT_USE_ROW_REC_MMAP_FILES
3911
/* Loading into cache is not required,
3912
* Instead we copy the memory map to load the
3915
#define TAB_ROW_LOAD_CACHE FALSE
3917
#define TAB_ROW_LOAD_CACHE TRUE
3921
* Pull the entire row pointer file into memory.
3923
xtPublic void xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot)
3925
XTTableHPtr tab = ot->ot_table;
3926
xtRecordID eof_rec_id = tab->tab_row_eof_id;
3928
xtWord1 *buffer = NULL;
3930
/* Check if there is enough cache: */
3931
usage = xt_tc_get_usage();
3932
if (xt_tc_get_high() > usage)
3933
usage = xt_tc_get_high();
3934
if (usage + ((xtInt8) eof_rec_id * (xtInt8) tab->tab_rows.tci_rec_size) < xt_tc_get_size()) {
3936
size_t poffset, tfer;
3937
off_t offset, end_offset;
3938
XTTabCachePagePtr page;
3940
end_offset = xt_row_id_to_row_offset(tab, eof_rec_id);
3942
while (rec_id < eof_rec_id) {
3943
if (!tab->tab_rows.xt_tc_get_page(ot->ot_row_file, rec_id, TAB_ROW_LOAD_CACHE, &page, &poffset, self))
3946
tab->tab_rows.xt_tc_release_page(ot->ot_row_file, page, self);
3951
buffer = (xtWord1 *) xt_malloc(self, tab->tab_rows.tci_page_size);
3952
offset = xt_row_id_to_row_offset(tab, rec_id);
3953
tfer = tab->tab_rows.tci_page_size;
3954
if (offset + (off_t) tfer > end_offset)
3955
tfer = (size_t) (end_offset - offset);
3956
XT_LOCK_MEMORY_PTR(buff_ptr, ot->ot_row_file, offset, tfer, &self->st_statistics.st_rec, self);
3958
memcpy(buffer, buff_ptr, tfer);
3959
XT_UNLOCK_MEMORY_PTR(ot->ot_row_file, buff_ptr, self);
3962
rec_id += tab->tab_rows.tci_rows_per_page;
3965
xt_free(self, buffer);
3969
xtPublic void xt_tab_load_table(XTThreadPtr self, XTOpenTablePtr ot)
3971
xt_load_pages(self, ot);
3972
xt_load_indices(self, ot);
3975
xtPublic xtBool xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf)
3977
register XTTableHPtr tab = ot->ot_table;
3978
size_t rec_size = tab->tab_dic.dic_rec_size;
3980
if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
3983
if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
3984
/* Should not happen! */
3985
XTThreadPtr self = ot->ot_thread;
3987
xt_log(XT_WARNING, "Recently updated record invalid\n");
3991
ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
3992
ot->ot_curr_updated =
3993
(XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
3995
if (ot->ot_rec_fixed) {
3996
size_t size = rec_size - XT_REC_FIX_HEADER_SIZE;
3997
if (!xt_ib_alloc(NULL, rec_buf, size))
3999
memcpy(rec_buf->ib_db.db_data, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, size);
4002
if (!xt_ib_alloc(NULL, rec_buf, tab->tab_dic.dic_mysql_buf_size))
4004
if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4005
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_buf->ib_db.db_data, ot->ot_cols_req))
4009
u_int cols_req = ot->ot_cols_req;
4011
ASSERT_NS(cols_req);
4012
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4013
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_buf->ib_db.db_data, cols_req))
4017
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, rec_buf->ib_db.db_data, cols_req))
4026
xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4028
XTTabRowRefDRec free_row;
4032
ASSERT_NS(row_id); // Cannot free the header!
4034
xt_lock_mutex_ns(&tab->tab_row_lock);
4035
prev_row = tab->tab_row_free_id;
4036
XT_SET_DISK_4(free_row.rr_ref_id_4, prev_row);
4037
if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, &op_seq, TRUE, ot->ot_thread)) {
4038
xt_unlock_mutex_ns(&tab->tab_row_lock);
4041
tab->tab_row_free_id = row_id;
4042
tab->tab_row_fnum++;
4043
xt_unlock_mutex_ns(&tab->tab_row_lock);
4045
if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread))
4051
static void tab_free_ext_record_on_fail(XTOpenTablePtr ot, xtRecordID rec_id, XTTabRecExtDPtr ext_rec, xtBool log_err)
4053
xtWord4 log_over_size = XT_GET_DISK_4(ext_rec->re_log_dat_siz_4);
4055
xtLogOffset log_offset;
4057
XT_GET_LOG_REF(log_id, log_offset, ext_rec);
4059
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4060
xt_tab_free_ext_slot(ot->ot_table, log_id, log_offset, log_over_size);
4062
if (!ot->ot_thread->st_dlog_buf.dlb_delete_log(log_id, log_offset, log_over_size, ot->ot_table->tab_id, rec_id, ot->ot_thread)) {
4064
xt_log_and_clear_exception_ns();
4069
static void tab_save_exception(XTExceptionPtr e)
4071
XTThreadPtr self = xt_get_self();
4073
*e = self->t_exception;
4076
static void tab_restore_exception(XTExceptionPtr e)
4078
XTThreadPtr self = xt_get_self();
4080
self->t_exception = *e;
4084
* This function assumes that a record may be partially written.
4085
* It removes all associated data and references to the record.
4087
* This function return XT_ERR if an error occurs.
4088
* TRUE if the record has been removed, and may be freed.
4089
* FALSE if the record has already been freed.
4092
xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_id, xtBool clean_delete, xtRowID row_id, xtXactID XT_UNUSED(xn_id))
4094
register XTTableHPtr tab = ot->ot_table;
4096
xtWord1 old_rec_type;
4098
u_int cols_in_buffer;
4106
* NOTE: This function uses the read buffer. This should be OK because
4107
* the function is only called by the sweeper. The read buffer
4108
* is REQUIRED because of the call to xt_tab_load_ext_data()!!!
4110
rec_size = tab->tab_dic.dic_rec_size;
4111
if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
4113
old_rec_type = ot->ot_row_rbuffer[0];
4115
/* Check of the record has not already been freed: */
4116
if (XT_REC_IS_FREE(old_rec_type))
4119
/* This record must belong to the given row: */
4120
if (XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_row_id_4) != row_id)
4123
/* The transaction ID of the record must be BEFORE or equal to the given
4126
* No, this does not always hold. Because we wait for updates now,
4127
* a "younger" transaction can update before an older
4129
* Commit order determined the actual order in which the transactions
4130
* should be replicated. This is determined by the log number of
4131
* the commit record!
4132
if (db->db_xn_curr_id(xn_id, XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_xact_id_4)))
4136
*prev_var_id = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_prev_rec_id_4);
4138
if (tab->tab_dic.dic_key_count) {
4141
switch (old_rec_type) {
4142
case XT_TAB_STATUS_DELETE:
4143
case XT_TAB_STATUS_DEL_CLEAN:
4144
rec_size = sizeof(XTTabRecHeadDRec);
4146
case XT_TAB_STATUS_FIXED:
4147
case XT_TAB_STATUS_FIX_CLEAN:
4148
/* We know that for a fixed length record,
4149
* dic_ind_rec_len <= dic_rec_size! */
4150
rec_size = (size_t) tab->tab_dic.dic_ind_rec_len + XT_REC_FIX_HEADER_SIZE;
4151
rec_data = ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE;
4153
case XT_TAB_STATUS_VARIABLE:
4154
case XT_TAB_STATUS_VAR_CLEAN:
4155
cols_req = tab->tab_dic.dic_ind_cols_req;
4157
cols_in_buffer = cols_req;
4158
rec_size = myxt_load_row_length(ot, rec_size - XT_REC_FIX_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, &cols_in_buffer);
4159
if (cols_in_buffer < cols_req)
4160
rec_size = tab->tab_dic.dic_rec_size;
4162
rec_size += XT_REC_FIX_HEADER_SIZE;
4163
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_data, cols_req)) {
4164
xt_log_and_clear_exception_ns();
4168
case XT_TAB_STATUS_EXT_DLOG:
4169
case XT_TAB_STATUS_EXT_CLEAN:
4170
cols_req = tab->tab_dic.dic_ind_cols_req;
4172
ASSERT_NS(cols_req);
4173
cols_in_buffer = cols_req;
4174
rec_size = myxt_load_row_length(ot, rec_size - XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, &cols_in_buffer);
4175
if (cols_in_buffer < cols_req) {
4176
rec_size = tab->tab_dic.dic_rec_size;
4177
if (!xt_tab_load_ext_data(ot, rec_id, rec_data, cols_req)) {
4178
/* This is actually quite possible after recovery, see [(3)] */
4179
if (ot->ot_thread->t_exception.e_xt_err != XT_ERR_BAD_EXT_RECORD &&
4180
ot->ot_thread->t_exception.e_xt_err != XT_ERR_DATA_LOG_NOT_FOUND)
4181
xt_log_and_clear_exception_ns();
4186
/* All the records we require are in the buffer... */
4187
rec_size += XT_REC_EXT_HEADER_SIZE;
4188
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_data, cols_req)) {
4189
xt_log_and_clear_exception_ns();
4198
/* Could this be the case?: This change may only be flushed after the
4199
* operation below has been flushed to the log.
4201
* No, remove records are never "undone". The sweeper will delete
4202
* the record again if it does not land in the log.
4204
* The fact that the index entries have already been removed is not
4207
if (!tab->tab_dic.dic_disable_index) {
4208
ind = tab->tab_dic.dic_keys;
4209
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
4210
if (!xt_idx_delete(ot, *ind, rec_id, rec_data))
4211
xt_log_and_clear_exception_ns();
4217
switch (old_rec_type) {
4218
case XT_TAB_STATUS_DELETE:
4219
case XT_TAB_STATUS_DEL_CLEAN:
4220
rec_size = XT_REC_FIX_HEADER_SIZE;
4222
case XT_TAB_STATUS_FIXED:
4223
case XT_TAB_STATUS_FIX_CLEAN:
4224
case XT_TAB_STATUS_VARIABLE:
4225
case XT_TAB_STATUS_VAR_CLEAN:
4226
rec_size = XT_REC_FIX_HEADER_SIZE;
4228
case XT_TAB_STATUS_EXT_DLOG:
4229
case XT_TAB_STATUS_EXT_CLEAN:
4230
rec_size = XT_REC_EXT_HEADER_SIZE;
4236
if (XT_REC_IS_EXT_DLOG(old_rec_type)) {
4237
/* {LOCK-EXT-REC} Lock, and read again to make sure that the
4238
* compactor does not change this record, while
4239
* we are removing it! */
4240
xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4241
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) {
4242
xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4245
xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4250
/* A record is "clean" deleted if the record was
4251
* XT_TAB_STATUS_DELETE which was comitted.
4252
* This makes sure that the record will still invalidate
4253
* following records in a row.
4257
* 1. INSERT A ROW, then DELETE it, assume the sweeper is delayed.
4259
* We now have the sequence row X --> del rec A --> valid rec B.
4261
* 2. A SELECT can still find B. Assume it now goes to check
4262
* if the record is valid, it reads row X, and gets A.
4264
* 3. Now the sweeper gets control and removes X, A and B.
4265
* It frees A with the clean bit.
4267
* 4. Now the SELECT gets control and reads A. Normally a freed record
4268
* would be ignored, and it would go onto B, which would then
4269
* be considered valid (note, even after the free, the next
4270
* pointer is not affected).
4272
* However, because the clean bit has been set, it will stop at A
4273
* and consider B invalid (which is the desired result).
4275
* NOTE: We assume it is not possible for A to be allocated and refer
4276
* to B, because B is freed before A. This means that B may refer to
4277
* A after the next allocation.
4281
XTTabRecFreeDPtr free_rec = (XTTabRecFreeDPtr) ot->ot_row_rbuffer;
4282
xtRecordID prev_rec_id;
4283
xtWord1 new_rec_type = XT_TAB_STATUS_FREED | (clean_delete ? XT_TAB_STATUS_CLEANED_BIT : 0);
4284
u_int status = XT_LOG_ENT_REC_REMOVED_BI;
4286
xt_lock_mutex_ns(&tab->tab_rec_lock);
4287
free_rec->rf_rec_type_1 = new_rec_type;
4288
#ifdef XT_CLUSTER_FREE_RECORDS
4289
XTTabCachePagePtr page;
4292
if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4293
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4297
if (page->tcp_free_rec == 0xFFFF) {
4298
/* There is no free record on this page. */
4299
prev_rec_id = tab->tab_rec_free_id;
4300
XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4301
memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4302
tab->tab_rec_free_id = rec_id;
4305
XTTabRecFreeDPtr prev_free_rec = (XTTabRecFreeDPtr) (page->tcp_data + page->tcp_free_rec);
4307
status = XT_LOG_ENT_REC_REMOVED_BI_L;
4308
XT_COPY_DISK_4(free_rec->rf_next_rec_id_4, prev_free_rec->rf_next_rec_id_4);
4309
memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4311
/* The previous now references the next: */
4312
XT_SET_DISK_4(prev_free_rec->rf_next_rec_id_4, rec_id);
4314
/* This is the record ID of the previous record: */
4315
ASSERT_NS((page->tcp_free_rec % tab->tab_recs.tci_rec_size) == 0);
4316
prev_rec_id = (page->tcp_page_idx * tab->tab_recs.tci_rows_per_page) + (page->tcp_free_rec / tab->tab_recs.tci_rec_size) + 1;
4317
ASSERT_NS(prev_rec_id != rec_id);
4320
/* Link after this page in future! */
4321
ASSERT_NS((offset % tab->tab_recs.tci_rec_size) == 0);
4322
page->tcp_free_rec = offset;
4323
tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4325
prev_rec_id = tab->tab_rec_free_id;
4326
XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4327
if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) {
4328
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4331
tab->tab_rec_free_id = rec_id;
4332
ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id);
4334
tab->tab_rec_fnum++;
4335
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4337
free_rec->rf_rec_type_1 = old_rec_type;
4338
return xt_xlog_modify_table(tab->tab_id, status, op_seq, new_rec_type, prev_rec_id, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread);
4341
static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab)
4345
xtRowID next_row_id = 0;
4348
xt_lock_mutex_ns(&tab->tab_row_lock);
4349
if ((row_id = tab->tab_row_free_id)) {
4350
status = XT_LOG_ENT_ROW_NEW_FL;
4352
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
4353
xt_unlock_mutex_ns(&tab->tab_row_lock);
4356
tab->tab_row_free_id = next_row_id;
4357
tab->tab_row_fnum--;
4360
status = XT_LOG_ENT_ROW_NEW;
4361
row_id = tab->tab_row_eof_id;
4362
if (row_id == 0xFFFFFFFF) {
4363
xt_unlock_mutex_ns(&tab->tab_row_lock);
4364
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_MAX_ROW_COUNT);
4367
if (((row_id - 1) % tab->tab_rows.tci_rows_per_page) == 0) {
4368
/* By fetching the page now, we avoid reading it later... */
4369
XTTabCachePagePtr page;
4370
XTTabCacheSegPtr seg;
4373
if (!tab->tab_rows.tc_fetch(ot->ot_row_file, row_id, &seg, &page, &poffset, FALSE, ot->ot_thread)) {
4374
xt_unlock_mutex_ns(&tab->tab_row_lock);
4377
TAB_CAC_UNLOCK(&seg->tcs_lock, ot->ot_thread->t_id);
4379
tab->tab_row_eof_id++;
4381
op_seq = tab->tab_seq.ts_get_op_seq();
4382
xt_unlock_mutex_ns(&tab->tab_row_lock);
4384
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_row_id, row_id, 0, NULL, ot->ot_thread))
4387
XT_DISABLED_TRACE(("new row tx=%d row=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id));
4392
xtPublic xtBool xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id)
4394
register XTTableHPtr tab = ot->ot_table;
4396
(void) ASSERT_NS(sizeof(XTTabRowRefDRec) == 4);
4398
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, var_rec_id, ot->ot_thread))
4403
xtPublic xtBool xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id)
4405
register XTTableHPtr tab = ot->ot_table;
4406
XTTabRowRefDRec row_buf;
4409
ASSERT_NS(var_rec_id < tab->tab_rec_eof_id);
4410
XT_SET_DISK_4(row_buf.rr_ref_id_4, var_rec_id);
4412
if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, &op_seq, TRUE, ot->ot_thread))
4415
return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, ot->ot_thread);
4418
static void tab_free_row_on_fail(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4422
tab_save_exception(&e);
4423
xt_tab_free_row(ot, tab, row_id);
4424
tab_restore_exception(&e);
4427
#ifdef CHECK_CLUSTER_EFFICIENCY
4428
u_int next_on_page = 0;
4429
u_int next_off_page = 0;
4432
static xtBool tab_add_record(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, u_int status)
4434
register XTTableHPtr tab = ot->ot_table;
4435
XTThreadPtr thread = ot->ot_thread;
4438
xtLogOffset log_offset;
4440
xtRecordID next_rec_id = 0;
4442
if (rec_info->ri_ext_rec) {
4443
/* Determine where the overflow will go... */
4444
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
4445
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
4449
if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
4452
XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
4455
/* Write the record to disk: */
4456
xt_lock_mutex_ns(&tab->tab_rec_lock);
4457
if ((rec_id = tab->tab_rec_free_id)) {
4458
ASSERT_NS(rec_id < tab->tab_rec_eof_id);
4459
#ifdef XT_CLUSTER_FREE_RECORDS
4460
XTTabCachePagePtr page;
4462
XTTabRecFreeDPtr free_block;
4464
if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4465
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4469
/* Read the data from the old record: */
4470
free_block = (XTTabRecFreeDPtr) (page->tcp_data + offset);
4471
next_rec_id = XT_GET_DISK_4(free_block->rf_next_rec_id_4);
4473
#ifdef CHECK_CLUSTER_EFFICIENCY
4474
xtRecordID dbg_rec_id;
4476
dbg_rec_id = next_rec_id-1;
4477
if (page->tcp_page_idx == dbg_rec_id / tab->tab_recs.tci_rows_per_page)
4481
if ((next_on_page % 1000) == 0)
4482
printf("Next on: %d off: %d\n", next_on_page, next_off_page);
4485
if (page->tcp_free_rec == offset) {
4486
/* Adjust the free record: */
4487
xtRecordID tmp_rec_id;
4489
/* Check if the next record is on the same page: */
4490
tmp_rec_id = next_rec_id-1;
4491
if (page->tcp_page_idx == tmp_rec_id / tab->tab_recs.tci_rows_per_page)
4492
/* This is the next free record on this page: */
4493
page->tcp_free_rec = (xtWord2) ((tmp_rec_id % tab->tab_recs.tci_rows_per_page) * tab->tab_recs.tci_rec_size);
4495
/* Not on the same page, so there are no more free records on this page: */
4496
page->tcp_free_rec = 0xFFFF;
4499
/* Write the new record: */
4500
memcpy(free_block, rec_info->ri_fix_rec_buf, rec_info->ri_rec_buf_size);
4501
tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4503
XTTabRecFreeDRec free_block;
4505
if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), (xtWord1 *) &free_block)) {
4506
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4509
next_rec_id = XT_GET_DISK_4(free_block.rf_next_rec_id_4);
4510
if (!xt_tab_put_rec_data(ot, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq)) {
4511
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4515
tab->tab_rec_free_id = next_rec_id;
4516
tab->tab_rec_fnum--;
4518
/* XT_LOG_ENT_UPDATE --> XT_LOG_ENT_UPDATE_FL */
4519
/* XT_LOG_ENT_INSERT --> XT_LOG_ENT_INSERT_FL */
4520
/* XT_LOG_ENT_DELETE --> XT_LOG_ENT_DELETE_FL */
4526
rec_id = tab->tab_rec_eof_id;
4527
tab->tab_rec_eof_id++;
4529
/* If we are writing to a new page (at the EOF)
4530
* then we do not need to read the page from the
4531
* file because it is new.
4533
* Note that this only works because we are holding
4534
* a lock on the record file.
4536
read = ((rec_id - 1) % tab->tab_recs.tci_rows_per_page) != 0;
4538
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq, read, ot->ot_thread)) {
4539
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4543
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4545
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_rec_id, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, ot->ot_thread))
4548
if (rec_info->ri_ext_rec) {
4549
/* Write the log buffer overflow: */
4550
rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
4551
XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
4552
XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
4553
XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
4554
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
4555
if (!xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf))
4559
if (!thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, ot->ot_thread)) {
4560
/* Failed to write the overflow, free the record allocated above: */
4566
XT_DISABLED_TRACE(("new rec tx=%d val=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) rec_id));
4567
rec_info->ri_rec_id = rec_id;
4571
static void tab_delete_record_on_fail(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, XTTabRecHeadDPtr row_ptr, xtWord1 *rec_data, u_int key_count)
4574
xtBool log_err = TRUE;
4575
XTTabRecInfoRec rec_info;
4577
tab_save_exception(&e);
4579
if (e.e_xt_err == XT_ERR_DUPLICATE_KEY ||
4580
e.e_xt_err == XT_ERR_DUPLICATE_FKEY) {
4581
/* If the error does not cause rollback, then we will ignore the
4582
* error if an error occurs in the UNDO!
4585
tab_restore_exception(&e);
4590
ind = ot->ot_table->tab_dic.dic_keys;
4591
for (u_int i=0; i<key_count; i++, ind++) {
4592
if (!xt_idx_delete(ot, *ind, rec_id, rec_data)) {
4594
xt_log_and_clear_exception_ns();
4599
if (row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_DLOG || row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_CLEAN)
4600
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) row_ptr, log_err);
4602
rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
4603
rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
4604
rec_info.ri_ext_rec = NULL;
4605
rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
4606
rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
4607
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
4608
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, rec_id);
4609
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
4611
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
4614
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
4618
tab_restore_exception(&e);
4623
xt_log_and_clear_exception_ns();
4625
tab_restore_exception(&e);
4629
* Wait until all the variations between the start of the chain, and
4630
* the given record have been rolled-back.
4631
* If any is committed, register a locked error, and return FAILED.
4633
static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordID commit_rec_id)
4635
register XTTableHPtr tab = ot->ot_table;
4636
xtRecordID var_rec_id;
4637
XTTabRecHeadDRec var_head;
4639
xtRecordID invalid_rec = 0;
4643
if (!xt_tab_get_row(ot, row_id, &var_rec_id))
4646
while (var_rec_id != commit_rec_id) {
4649
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
4651
if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1))
4653
if (XT_REC_IS_FREE(var_head.tr_rec_type_1))
4654
/* Should not happen: */
4655
goto record_invalid;
4656
xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
4657
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
4659
case XT_XN_NOT_VISIBLE:
4662
/* Ingore the record, it will be removed. */
4664
case XT_XN_MY_UPDATE:
4665
/* Should not happen: */
4667
case XT_XN_OTHER_UPDATE:
4668
/* Wait for the transaction to commit or rollback: */
4669
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4670
xw.xw_xn_id = xn_id;
4671
if (!xt_xn_wait_for_xact(ot->ot_thread, &xw, NULL)) {
4672
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4675
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4678
goto record_invalid;
4680
var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
4685
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_CHANGED);
4689
/* Prevent an infinite loop due to a bad record: */
4690
if (invalid_rec != var_rec_id) {
4691
var_rec_id = invalid_rec;
4694
/* The record is invalid, it will be "overwritten"... */
4695
#ifdef XT_CRASH_DEBUG
4696
/* Should not happen! */
4702
/* Check if a record may be visible:
4703
* Return TRUE of the record may be visible now.
4704
* Return XT_MAYBE if the record may be visible in the future (set out_xn_id).
4705
* Return FALSE of the record is not valid (freed or is a delete record).
4706
* Return XT_ERR if an error occurred.
4708
xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *out_xn_id, xtRowID *out_rowid, xtBool *out_updated)
4710
XTTabRecHeadDRec rec_head;
4711
xtXactID rec_xn_id = 0;
4712
xtBool wait = FALSE;
4713
xtXactID wait_xn_id = 0;
4715
xtRecordID var_rec_id;
4717
register XTTableHPtr tab;
4718
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4723
xtRecordID invalid_rec = 0;
4726
if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4729
if (XT_REC_NOT_VALID(rec_head.tr_rec_type_1))
4732
if (!XT_REC_IS_CLEAN(rec_head.tr_rec_type_1)) {
4733
rec_xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
4734
switch (xt_xn_status(ot, rec_xn_id, rec_id)) {
4736
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4740
case XT_XN_NOT_VISIBLE:
4741
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4747
case XT_XN_MY_UPDATE:
4748
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4752
case XT_XN_OTHER_UPDATE:
4753
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4757
wait_xn_id = rec_xn_id;
4760
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4763
/* Avoid infinite loop: */
4764
if (invalid_rec == rec_id) {
4765
/* Should not happen! */
4766
#ifdef XT_CRASH_DEBUG
4767
/* Generate a core dump! */
4772
invalid_rec = rec_id;
4777
/* Follow the variation chain until we come to this record.
4778
* If it is not the first visible variation then
4779
* it is not visible at all. If it in not found on the
4780
* variation chain, it is also not visible.
4782
row_id = XT_GET_DISK_4(rec_head.tr_row_id_4);
4785
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4789
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
4791
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4792
len = sprintf(t_buf, "dup row=%d", (int) row_id);
4794
while (var_rec_id != rec_id) {
4797
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4799
len += sprintf(t_buf+len, " -> %d", (int) var_rec_id);
4801
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4803
/* All clean records are visible, by all transactions: */
4804
if (XT_REC_IS_CLEAN(rec_head.tr_rec_type_1))
4807
if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) {
4808
/* Should not happen: */
4809
if (invalid_rec != var_rec_id) {
4810
var_rec_id = invalid_rec;
4813
/* Assume end of list. */
4814
#ifdef XT_CRASH_DEBUG
4815
/* Should not happen! */
4821
xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
4822
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
4824
case XT_XN_NOT_VISIBLE:
4827
/* Ingore the record, it will be removed. */
4828
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4830
len += sprintf(t_buf+len, "(T%d-A)", (int) xn_id);
4833
case XT_XN_MY_UPDATE:
4835
case XT_XN_OTHER_UPDATE:
4836
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4838
len += sprintf(t_buf+len, "(T%d-wait)", (int) xn_id);
4840
/* Wait for this update to commit or abort: */
4847
if (invalid_rec != var_rec_id) {
4848
var_rec_id = invalid_rec;
4851
/* Assume end of list. */
4852
#ifdef XT_CRASH_DEBUG
4853
/* Should not happen! */
4858
var_rec_id = XT_GET_DISK_4(rec_head.tr_prev_rec_id_4);
4860
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4862
sprintf(t_buf+len, " -> %d(T%d-%s)\n", (int) var_rec_id, (int) rec_xn_id, t_type);
4864
sprintf(t_buf+len, " ...(T%d-%s)\n", (int) rec_xn_id, t_type);
4867
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4869
*out_xn_id = wait_xn_id;
4872
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4873
xt_ttracef(thread, "%s", t_buf);
4876
*out_rowid = row_id;
4877
*out_updated = (rec_xn_id == ot->ot_thread->st_xact_data->xd_start_xn_id);
4882
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4886
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4890
xtPublic xtBool xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
4892
register XTTableHPtr tab = ot->ot_table;
4893
register XTThreadPtr self = ot->ot_thread;
4894
XTTabRecInfoRec rec_info;
4899
if (!myxt_store_row(ot, &rec_info, (char *) rec_buf))
4902
/* Get a new row ID: */
4903
if (!(row_id = tab_new_row(ot, tab)))
4906
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
4907
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
4908
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, 0);
4909
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
4911
/* Note, it is important that this record is written BEFORE the row
4912
* due to the problem distributed here [(5)]
4914
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_INSERT))
4917
#ifdef TRACE_VARIATIONS
4918
xt_ttracef(self, "insert: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
4920
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
4922
XT_DISABLED_TRACE(("set new tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
4924
/* Add the index references: */
4925
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
4926
if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, rec_buf, NULL, FALSE)) {
4927
ot->ot_err_index_no = (*ind)->mi_index_no;
4932
/* Do the foreign key stuff: */
4933
if (ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
4934
if (!ot->ot_table->tab_dic.dic_table->insertRow(ot, rec_buf))
4938
self->st_statistics.st_row_insert++;
4942
/* Once the row has been inserted, it is to late to remove it!
4943
* Now all we can do is delete it!
4945
tab_delete_record_on_fail(ot, row_id, rec_info.ri_rec_id, (XTTabRecHeadDPtr) rec_info.ri_fix_rec_buf, rec_buf, idx_cnt);
4949
tab_free_row_on_fail(ot, tab, row_id);
4955
/* We cannot remove a change we have made to a row while a transaction
4956
* is running, so we have to undo what we have done by
4957
* overwriting the record we just created with
4960
static xtBool tab_overwrite_record_on_fail(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, xtWord1 *before_buf, xtWord1 *after_buf, u_int idx_cnt)
4962
register XTTableHPtr tab = ot->ot_table;
4963
XTTabRecHeadDRec prev_rec_head;
4966
XTThreadPtr thread = ot->ot_thread;
4968
xtLogOffset log_offset;
4969
xtRecordID rec_id = rec_info->ri_rec_id;
4971
/* Remove the new extended record: */
4972
if (rec_info->ri_ext_rec)
4973
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info->ri_fix_rec_buf, TRUE);
4975
/* Undo index entries of the new record: */
4977
for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
4978
if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
4983
memcpy(&prev_rec_head, rec_info->ri_fix_rec_buf, sizeof(XTTabRecHeadDRec));
4986
/* Can happen if the delete was called from some cascaded action.
4987
* And this is better than a crash...
4989
* TODO: to make sure the change will not be applied in case the
4990
* transaction will be commited, we'd need to add a log entry to
4991
* restore the record like it's done for top-level operation. In
4992
* order to do this we'd need to read the before-image of the
4993
* record before modifying it.
4995
if (!ot->ot_thread->t_exception.e_xt_err)
4996
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_BEFORE_IMAGE);
5000
/* Restore the previous record! */
5001
if (!myxt_store_row(ot, rec_info, (char *) before_buf))
5004
memcpy(rec_info->ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5006
if (rec_info->ri_ext_rec) {
5007
/* Determine where the overflow will go... */
5008
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5009
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
5013
if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
5016
XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
5019
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf))
5022
if (rec_info->ri_ext_rec) {
5023
/* Write the log buffer overflow: */
5024
rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
5025
XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
5026
XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
5027
XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
5029
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5030
if (!xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf))
5034
if (!thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, ot->ot_thread))
5039
/* Put the index entries back: */
5040
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5041
if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5042
/* Incomplete restore, there will be a rollback... */
5051
* If a transaction updates the same record over again, we should update
5052
* in place. This prevents producing unnecessary variations!
5054
static xtBool tab_overwrite_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5056
register XTTableHPtr tab = ot->ot_table;
5057
xtRowID row_id = ot->ot_curr_row_id;
5058
register XTThreadPtr self = ot->ot_thread;
5059
xtRecordID rec_id = ot->ot_curr_rec_id;
5060
XTTabRecExtDRec prev_rec_head;
5061
XTTabRecInfoRec rec_info;
5062
u_int idx_cnt = 0, i;
5065
xtLogOffset log_offset;
5066
xtBool prev_ext_rec;
5068
if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5071
/* Read before we overwrite! */
5072
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &prev_rec_head))
5075
prev_ext_rec = prev_rec_head.tr_rec_type_1 & XT_TAB_STATUS_EXT_DLOG;
5077
if (rec_info.ri_ext_rec) {
5078
/* Determine where the overflow will go... */
5079
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5080
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size))
5084
if (!self->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, ot->ot_thread))
5087
XT_SET_LOG_REF(rec_info.ri_ext_rec, log_id, log_offset);
5090
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5091
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5092
XT_COPY_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, prev_rec_head.tr_prev_rec_id_4);
5093
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5095
/* Remove the index references, that have changed: */
5096
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5097
if (!xt_idx_delete(ot, *ind, rec_id, before_buf)) {
5102
#ifdef TRACE_VARIATIONS
5103
xt_ttracef(self, "overwrite: row=%d rec=%d T%d\n", (int) row_id, (int) rec_id, (int) self->st_xact_data->xd_start_xn_id);
5105
/* Overwrite the record: */
5106
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5109
if (rec_info.ri_ext_rec) {
5110
/* Write the log buffer overflow: */
5111
rec_info.ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
5112
XT_SET_DISK_4(rec_info.ri_log_buf->er_data_size_4, rec_info.ri_log_data_size);
5113
XT_SET_DISK_4(rec_info.ri_log_buf->er_tab_id_4, tab->tab_id);
5114
XT_SET_DISK_4(rec_info.ri_log_buf->er_rec_id_4, rec_id);
5115
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5116
if (!xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, (xtWord1 *) rec_info.ri_log_buf))
5120
if (!self->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, (xtWord1 *) rec_info.ri_log_buf, ot->ot_thread))
5125
/* Add the index references that have changed: */
5126
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5127
if (!xt_idx_insert(ot, *ind, 0, rec_id, after_buf, before_buf, FALSE)) {
5128
ot->ot_err_index_no = (*ind)->mi_index_no;
5133
/* Do the foreign key stuff: */
5134
if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5135
if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5139
/* Delete the previous overflow area: */
5141
tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5146
/* Remove the new extended record: */
5147
if (rec_info.ri_ext_rec)
5148
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info.ri_fix_rec_buf, TRUE);
5150
/* Restore the previous record! */
5151
/* Undo index entries: */
5152
for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5153
if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5157
/* Restore the record: */
5158
if (!myxt_store_row(ot, &rec_info, (char *) before_buf))
5161
if (rec_info.ri_ext_rec)
5162
memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, XT_REC_EXT_HEADER_SIZE);
5164
memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5166
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5169
/* Put the index entries back: */
5170
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5171
if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5172
/* Incomplete restore, there will be a rollback... */
5176
/* The previous record has now been restored. */
5180
/* The old record is overwritten, I must free the previous extended record: */
5182
tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5188
xtPublic xtBool xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5190
register XTTableHPtr tab;
5192
register XTThreadPtr self;
5193
xtRecordID curr_var_rec_id;
5194
XTTabRecInfoRec rec_info;
5199
* Originally only the flag ot->ot_curr_updated was checked, and if it was on, then
5200
* tab_overwrite_record() was called, but this caused crashes in some cases like:
5202
* set @@autocommit = 0;
5203
* create table t1 (s1 int primary key);
5204
* create table t2 (s1 int primary key, foreign key (s1) references t1 (s1) on update cascade);
5205
* insert into t1 values (1);
5206
* insert into t2 values (1);
5207
* update t1 set s1 = 1;
5209
* the last update lead to a crash on t2 cascade update because before_buf argument is NULL
5210
* in the call below. It is NULL only during cascade update of child table. In that case we
5211
* cannot pass before_buf value from XTDDTableRef::modifyRow as the before_buf is the original
5212
* row for the parent (t1) table and it would be used to update any existing indexes
5213
* in the child table which would be wrong of course.
5215
* Alternative solution would be to copy the after_info in the XTDDTableRef::modifyRow():
5218
* if (!xt_tab_load_record(ot, ot->ot_curr_rec_id, &after_info))
5222
* here the xt_tab_load_record() loads the original row, so we can copy it from there, but in
5223
* that case we'd need to allocate a new (possibly up to 65536 bytes long) buffer, which makes
5224
* the optimization questionable
5227
if (ot->ot_curr_updated && before_buf)
5228
/* This record has already been updated by this transaction.
5229
* Do the update in place!
5231
return tab_overwrite_record(ot, before_buf, after_buf);
5234
row_id = ot->ot_curr_row_id;
5235
self = ot->ot_thread;
5237
if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5240
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5241
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5242
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5243
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5245
/* Create the new record: */
5246
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_UPDATE))
5249
/* Link the new variation into the list: */
5250
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5252
if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5255
if (curr_var_rec_id != ot->ot_curr_rec_id) {
5256
/* If the transaction does not rollback, I will get an
5259
if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5261
/* [(4)] This is the situation when we overwrite the
5262
* reference to curr_var_rec_id!
5263
* When curr_var_rec_id is cleaned up by the sweeper, the
5264
* sweeper will notice that the record is no longer in
5269
#ifdef TRACE_VARIATIONS
5270
xt_ttracef(self, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
5272
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5274
XT_DISABLED_TRACE(("set upd tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5276
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5278
/* Add the index references: */
5279
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5280
if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, after_buf, before_buf, FALSE)) {
5281
ot->ot_err_index_no = (*ind)->mi_index_no;
5286
if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5287
if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5291
ot->ot_thread->st_statistics.st_row_update++;
5295
tab_overwrite_record_on_fail(ot, &rec_info, before_buf, after_buf, idx_cnt);
5299
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5305
xtPublic xtBool xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
5307
register XTTableHPtr tab = ot->ot_table;
5308
xtRowID row_id = ot->ot_curr_row_id;
5309
xtRecordID curr_var_rec_id;
5310
XTTabRecInfoRec rec_info;
5312
/* Setup a delete record: */
5313
rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
5314
rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
5315
rec_info.ri_ext_rec = NULL;
5316
rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
5317
rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
5318
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5319
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5320
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
5322
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
5325
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5327
if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5330
if (curr_var_rec_id != ot->ot_curr_rec_id) {
5331
if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5335
#ifdef TRACE_VARIATIONS
5336
xt_ttracef(ot->ot_thread, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) ot->ot_thread->st_xact_data->xd_start_xn_id);
5338
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5340
XT_DISABLED_TRACE(("del row tx=%d row=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5342
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5344
if (ot->ot_table->tab_dic.dic_table->dt_trefs) {
5345
if (!ot->ot_table->tab_dic.dic_table->deleteRow(ot, rec_buf))
5349
ot->ot_thread->st_statistics.st_row_delete++;
5353
tab_overwrite_record_on_fail(ot, &rec_info, rec_buf, NULL, 0);
5357
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5361
xtPublic xtBool xt_tab_restrict_rows(XTBasicListPtr list, XTThreadPtr thread)
5364
XTRestrictItemPtr item;
5365
XTOpenTablePtr pot = NULL;
5366
XTDatabaseHPtr db = thread->st_database;
5369
for (i=0; i<list->bl_count; i++) {
5370
item = (XTRestrictItemPtr) xt_bl_item_at(list, i);
5373
if (pot->ot_table->tab_id == item->ri_tab_id)
5375
xt_db_return_table_to_pool_ns(pot);
5379
if (!xt_db_open_pool_table_ns(&pot, db, item->ri_tab_id)) {
5380
/* Should not happen, but just in case, we just don't
5381
* remove the lock. We will probably end up with a deadlock
5384
xt_log_and_clear_exception_ns();
5385
goto skip_check_action;
5388
/* Can happen of the table has been dropped: */
5389
goto skip_check_action;
5392
if (!pot->ot_table->tab_dic.dic_table->checkNoAction(pot, item->ri_rec_id)) {
5400
xt_db_return_table_to_pool_ns(pot);
5401
xt_bl_free(NULL, list);
5406
xtPublic xtBool xt_tab_seq_init(XTOpenTablePtr ot)
5408
register XTTableHPtr tab = ot->ot_table;
5410
ASSERT_NS(!ot->ot_seq_page);
5411
ot->ot_seq_page = NULL;
5412
ot->ot_seq_data = NULL;
5413
ot->ot_on_page = FALSE;
5414
ot->ot_seq_offset = 0;
5416
ot->ot_curr_rec_id = 0; // 0 is an invalid position!
5417
ot->ot_curr_row_id = 0; // 0 is an invalid row ID!
5418
ot->ot_curr_updated = FALSE;
5420
/* We note the current EOF before we start a sequential scan.
5421
* It is basically possible to update the same record more than
5422
* once because an updated record creates a new record which
5423
* has a new position which may be in the area that is
5424
* still to be scanned.
5426
* By noting the EOF before we start a sequential scan we
5427
* reduce the possibility of this.
5429
* However, the possibility still remains, but it should
5430
* not be a problem because a record is not modified
5431
* if there is nothing to change, which is the case
5432
* if the record has already been changed!
5434
* NOTE (2008-01-29) There is no longer a problem with updating a
5435
* record twice because records are marked by an update.
5437
* [(10)] I have changed this (see below). I now check the
5438
* current EOF of the table.
5440
* The reason is that committed read must be able to see the
5441
* changes that occur during table table scan. *
5443
ot->ot_seq_eof_id = tab->tab_rec_eof_id;
5445
if (!ot->ot_thread->st_xact_data) {
5446
/* MySQL ignores this error, so we
5447
* setup the sequential scan so that it will
5450
ot->ot_seq_rec_id = ot->ot_seq_eof_id;
5451
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
5455
ot->ot_seq_rec_id = 1;
5456
ot->ot_thread->st_statistics.st_scan_table++;
5460
xtPublic void xt_tab_seq_reset(XTOpenTablePtr ot)
5462
ot->ot_seq_rec_id = 0;
5463
ot->ot_seq_eof_id = 0;
5464
ASSERT_NS(!ot->ot_seq_page);
5465
ot->ot_seq_page = NULL;
5466
ot->ot_seq_data = NULL;
5467
ot->ot_on_page = FALSE;
5468
ot->ot_seq_offset = 0;
5471
xtPublic void xt_tab_seq_exit(XTOpenTablePtr ot)
5473
register XTTableHPtr tab = ot->ot_table;
5475
if (ot->ot_seq_page) {
5476
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5477
ot->ot_seq_page = NULL;
5479
if (ot->ot_seq_data) {
5480
XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5481
ot->ot_seq_data = NULL;
5483
ot->ot_on_page = FALSE;
5486
#ifdef XT_USE_ROW_REC_MMAP_FILES
5487
#define TAB_SEQ_LOAD_CACHE FALSE
5489
#ifdef XT_SEQ_SCAN_LOADS_CACHE
5490
#define TAB_SEQ_LOAD_CACHE TRUE
5492
#define TAB_SEQ_LOAD_CACHE FALSE
5496
xtPublic void xt_tab_seq_repeat(XTOpenTablePtr ot)
5498
ot->ot_seq_rec_id--;
5499
ot->ot_seq_offset -= ot->ot_table->tab_dic.dic_rec_size;
5502
xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof)
5504
register XTTableHPtr tab = ot->ot_table;
5505
register size_t rec_size = tab->tab_dic.dic_rec_size;
5507
xtRecordID new_rec_id;
5508
xtRecordID invalid_rec = 0;
5511
if (!ot->ot_on_page) {
5512
ASSERT_NS(!ot->ot_seq_page);
5513
if (!(ot->ot_on_page = tab->tab_recs.xt_tc_get_page(ot->ot_rec_file, ot->ot_seq_rec_id, TAB_SEQ_LOAD_CACHE, &ot->ot_seq_page, &ot->ot_seq_offset, ot->ot_thread)))
5515
if (!ot->ot_seq_page) {
5516
XT_LOCK_MEMORY_PTR(ot->ot_seq_data, ot->ot_rec_file, xt_rec_id_to_rec_offset(tab, ot->ot_seq_rec_id), tab->tab_rows.tci_page_size, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
5517
if (!ot->ot_seq_data)
5519
ot->ot_on_page = TRUE;
5520
ot->ot_seq_offset = 0;
5525
/* [(10)] The current EOF is used: */
5526
if (ot->ot_seq_rec_id >= ot->ot_seq_eof_id) {
5531
if (ot->ot_seq_offset >= tab->tab_recs.tci_page_size) {
5532
if (ot->ot_seq_page) {
5533
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5534
ot->ot_seq_page = NULL;
5536
if (ot->ot_seq_data) {
5537
/* NULL here means that in the case of non-memory mapped
5538
* files we "keep" the lock.
5540
XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5541
ot->ot_seq_data = NULL;
5543
ot->ot_on_page = FALSE;
5547
if (ot->ot_seq_page)
5548
buff_ptr = ot->ot_seq_page->tcp_data + ot->ot_seq_offset;
5550
buff_ptr = ot->ot_seq_data + ot->ot_seq_offset;
5552
/* This is the current record: */
5553
ot->ot_curr_rec_id = ot->ot_seq_rec_id;
5554
ot->ot_curr_row_id = 0;
5556
/* Move to the next record: */
5557
ot->ot_seq_rec_id++;
5558
ot->ot_seq_offset += rec_size;
5561
switch (tab_visible(ot, (XTTabRecHeadDPtr) buff_ptr, &new_rec_id)) {
5567
buff_ptr = ot->ot_row_rbuffer;
5568
if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
5570
ot->ot_curr_rec_id = new_rec_id;
5575
if (invalid_rec != ot->ot_curr_rec_id) {
5576
/* Don't re-read for the same record twice: */
5577
invalid_rec = ot->ot_curr_rec_id;
5579
/* Undo move to next: */
5580
ot->ot_seq_rec_id--;
5581
ot->ot_seq_offset -= rec_size;
5583
/* Prepare to reread the page: */
5584
if (ot->ot_seq_page) {
5585
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5586
ot->ot_seq_page = NULL;
5588
ot->ot_on_page = FALSE;
5591
#ifdef XT_CRASH_DEBUG
5592
/* Should not happen! */
5595
/* Continue, and skip the record... */
5602
switch (*buff_ptr) {
5603
case XT_TAB_STATUS_FIXED:
5604
case XT_TAB_STATUS_FIX_CLEAN:
5605
memcpy(buffer, buff_ptr + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
5607
case XT_TAB_STATUS_VARIABLE:
5608
case XT_TAB_STATUS_VAR_CLEAN:
5609
if (!myxt_load_row(ot, buff_ptr + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
5612
case XT_TAB_STATUS_EXT_DLOG:
5613
case XT_TAB_STATUS_EXT_CLEAN: {
5614
u_int cols_req = ot->ot_cols_req;
5616
ASSERT_NS(cols_req);
5617
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
5618
if (!myxt_load_row(ot, buff_ptr + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
5622
if (buff_ptr != ot->ot_row_rbuffer)
5623
memcpy(ot->ot_row_rbuffer, buff_ptr, rec_size);
5624
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
5641
* -----------------------------------------------------------------------
5649
static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_name)
5651
XTThreadPtr thread = xt_get_self();
5652
char file_path[PATH_MAX];
5653
XTOpenFilePtr of = NULL;
5655
char *buffer = NULL, *ptr, *name;
5657
xtBool found = FALSE;
5659
xt_strcpy(PATH_MAX, file_path, db->db_main_path);
5660
xt_add_pbxt_file(PATH_MAX, file_path, "repair-pending");
5662
if (what == REP_ADD) {
5663
if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 0))
5667
if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_DEFAULT | XT_FS_MISSING_OK, 0))
5673
len = (int) xt_seek_eof_file(NULL, of);
5675
if (!(buffer = (char *) xt_malloc_ns(len + 1)))
5678
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &thread->st_statistics.st_x, thread))
5685
while (*ptr && *ptr != '\n' && *ptr != '\r')
5690
if (xt_tab_compare_names(name, table_name) == 0) {
5705
/* Remove any trailing empty lines: */
5707
if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
5712
if (!xt_pwrite_file(of, len, 1, (void *) "\n", &thread->st_statistics.st_x, thread))
5716
if (!xt_pwrite_file(of, len, strlen(table_name), table_name, &thread->st_statistics.st_x, thread))
5718
len += strlen(table_name);
5719
if (!xt_set_eof_file(NULL, of, len))
5727
memmove(name, ptr, len - (ptr - buffer));
5728
len = len - (ptr - name);
5730
/* Remove trailing empty lines: */
5732
if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
5738
if (!xt_pwrite_file(of, 0, len, buffer, &thread->st_statistics.st_x, thread))
5740
if (!xt_set_eof_file(NULL, of, len))
5747
xt_close_file_ns(of);
5751
xt_fs_delete(NULL, file_path);
5756
xt_close_file_ns(of);
5759
xt_log_and_clear_exception(thread);
5763
xtPublic void xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size)
5767
nptr = xt_last_name_of_path(tab_path->ps_path);
5768
if (xt_starts_with(nptr, "#sql")) {
5769
/* {INVALID-OLD-TABLE-FIX}
5770
* Temporary files can have strange paths, for example
5771
* ..../var/tmp/mysqld.1/#sqldaec_1_6
5772
* This occurs, for example, occurs when the temp_table.test is
5773
* run using the PBXT suite in MariaDB:
5774
* ./mtr --suite=pbxt --do-test=temp_table
5776
* Calling myxt_static_convert_file_name, with a '.', in the name
5778
* [ERROR] Invalid (old?) table or database name 'mysqld.1'
5779
* To prevent this, we do not convert the temporary
5780
* table names using the mysql functions.
5782
* Note, this bug was found by Monty, and fixed by modifying
5783
* xt_2nd_last_name_of_path(), see {INVALID-OLD-TABLE-FIX}.
5786
xt_2nd_last_name_of_path(size, table_name, tab_path->ps_path);
5787
xt_strcat(size, table_name, ".");
5788
xt_strcat(size, table_name, nptr);
5791
char name_buf[XT_TABLE_NAME_SIZE*3+3];
5795
xt_2nd_last_name_of_path(sizeof(name_buf), name_buf, tab_path->ps_path);
5796
myxt_static_convert_file_name(name_buf, table_name, size);
5797
xt_strcat(size, table_name, ".");
5799
/* Handle partition extensions to table names: */
5800
if ((part_ptr = strstr(nptr, "#P#")))
5801
xt_strncpy(sizeof(name_buf), name_buf, nptr, part_ptr - nptr);
5803
xt_strcpy(sizeof(name_buf), name_buf, nptr);
5805
len = strlen(table_name);
5806
myxt_static_convert_file_name(name_buf, table_name + len, size - len);
5809
/* Add the partition extension (which is relevant to the engine). */
5813
if ((sub_part_ptr = strstr(part_ptr, "#SP#")))
5814
xt_strncpy(sizeof(name_buf), name_buf, part_ptr, sub_part_ptr - part_ptr);
5816
xt_strcpy(sizeof(name_buf), name_buf, part_ptr);
5818
xt_strcat(size, table_name, " (");
5819
len = strlen(table_name);
5820
myxt_static_convert_file_name(name_buf, table_name + len, size - len);
5825
xt_strcat(size, table_name, " - ");
5826
len = strlen(table_name);
5827
myxt_static_convert_file_name(sub_part_ptr, table_name + len, size - len);
5830
xt_strcat(size, table_name, ")");
5835
xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab)
5837
char table_name[XT_TABLE_NAME_BUF_SIZE];
5839
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
5840
return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name);
5843
xtPublic void xt_tab_table_repaired(XTTableHPtr tab)
5845
if (tab->tab_repair_pending) {
5846
char table_name[XT_TABLE_NAME_BUF_SIZE];
5848
tab->tab_repair_pending = FALSE;
5849
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
5850
tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name);
5854
xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab)
5856
if (!tab->tab_repair_pending) {
5857
char table_name[XT_TABLE_NAME_BUF_SIZE];
5859
tab->tab_repair_pending = TRUE;
5860
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
5861
tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name);
5866
* -----------------------------------------------------------------------
5867
* EXTENDED DATA FOR RAM TABLES
5870
xtPublic xtBool xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t /*req_size*/)
5874
xt_spinlock_lock(&tab->tab_mem_lock);
5875
if (tab->tab_mem_ind_free) {
5876
new_slot = tab->tab_mem_ind_free - 1;
5877
tab->tab_mem_ind_free = (size_t) tab->tab_mem_index[new_slot];
5880
if (tab->tab_mem_ind_usage == tab->tab_mem_ind_size) {
5881
/* Grow the index: */
5882
if (!xt_realloc_ns((void **) &tab->tab_mem_index, (tab->tab_mem_ind_size + 100) * sizeof(xtWord1 *)))
5884
tab->tab_mem_ind_size += 100;
5886
new_slot = tab->tab_mem_ind_usage;
5887
tab->tab_mem_ind_usage++;
5889
xt_spinlock_unlock(&tab->tab_mem_lock);
5890
tab->tab_mem_index[new_slot] = NULL;
5892
*log_offset = new_slot + 1;
5896
xtPublic xtBool xt_tab_save_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
5898
size_t slot = ((size_t) log_offset) - 1;
5901
if (!(rec_data = (xtWord1 *) xt_malloc_ns(size)))
5903
memcpy(rec_data, data, size);
5904
xt_spinlock_lock(&tab->tab_mem_lock);
5905
tab->tab_mem_total += size;
5906
tab->tab_mem_index[slot] = rec_data;
5907
xt_spinlock_unlock(&tab->tab_mem_lock);
5911
xtPublic void xt_tab_read_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
5913
size_t slot = ((size_t) log_offset) - 1;
5915
if (slot < tab->tab_mem_ind_usage && tab->tab_mem_index[slot])
5916
memcpy(data, tab->tab_mem_index[slot], size);
5918
memset(data, 0, size);
5921
xtPublic void xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size)
5923
size_t slot = ((size_t) log_offset) - 1;
5925
xt_spinlock_lock(&tab->tab_mem_lock);
5926
if (tab->tab_mem_index[slot]) {
5927
xt_free_ns(tab->tab_mem_index[slot]);
5928
tab->tab_mem_total -= size;
5930
tab->tab_mem_index[slot] = (xtWord1 *) tab->tab_mem_ind_free;
5931
tab->tab_mem_ind_free = slot + 1;
5932
xt_spinlock_unlock(&tab->tab_mem_lock);
5935
static void tab_free_ext_records(XTTableHPtr tab)
5939
if (!tab->tab_mem_index)
5942
i = tab->tab_mem_ind_free;
5944
next = (size_t) tab->tab_mem_index[i-1];
5945
tab->tab_mem_index[i-1] = NULL;
5949
for (i=0; i<tab->tab_mem_ind_usage; i++) {
5950
if (tab->tab_mem_index[i])
5951
xt_free_ns(tab->tab_mem_index[i]);
5954
xt_free_ns(tab->tab_mem_index);