~posulliv/drizzle/optimizer-style-cleanup

« back to all changes in this revision

Viewing changes to plugin/pbxt/src/table_xt.cc

  • Committer: Padraig O'Sullivan
  • Date: 2010-04-17 01:38:47 UTC
  • mfrom: (1237.9.238 bad-staging)
  • Revision ID: osullivan.padraig@gmail.com-20100417013847-ibjioqsfbmf5yg4g
Merge trunk.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
 
2
 *
 
3
 * PrimeBase XT
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 
18
 *
 
19
 * 2005-02-08   Paul McCullagh
 
20
 *
 
21
 * H&G2JCtL
 
22
 */
 
23
 
 
24
#include "xt_config.h"
 
25
 
 
26
#include <string.h>
 
27
#include <stdio.h>
 
28
#ifndef XT_WIN
 
29
#include <strings.h>
 
30
#endif
 
31
#include <ctype.h>
 
32
#include <time.h>
 
33
 
 
34
#ifdef DRIZZLED
 
35
#include <drizzled/common.h>
 
36
#include <drizzled/dtcollation.h>
 
37
#else
 
38
#include "mysql_priv.h"
 
39
#endif
 
40
 
 
41
#include "table_xt.h"
 
42
#include "database_xt.h"
 
43
#include "heap_xt.h"
 
44
#include "strutil_xt.h"
 
45
#include "myxt_xt.h"
 
46
#include "cache_xt.h"
 
47
#include "trace_xt.h"
 
48
#include "index_xt.h"
 
49
#include "systab_xt.h"
 
50
 
 
51
#ifdef DEBUG
 
52
//#define TRACE_VARIATIONS
 
53
//#define TRACE_VARIATIONS_IN_DUP_CHECK
 
54
//#define DUMP_CHECK_TABLE
 
55
//#define CHECK_INDEX_ON_CHECK_TABLE
 
56
//#define TRACE_TABLE_IDS
 
57
//#define TRACE_FLUSH_TABLE
 
58
//#define TRACE_CREATE_TABLES
 
59
#endif
 
60
 
 
61
#define CHECK_TABLE_STATS
 
62
 
 
63
#ifdef TRACE_TABLE_IDS
 
64
//#define PRINTF                xt_ftracef
 
65
#define PRINTF          xt_trace
 
66
#endif
 
67
 
 
68
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr dic);
 
69
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic);
 
70
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def);
 
71
static void tab_free_ext_records(XTTableHPtr tab);
 
72
 
 
73
/*
 
74
 * -----------------------------------------------------------------------
 
75
 * Internal structures
 
76
 */
 
77
 
 
78
#define XT_MAX_TABLE_FILE_NAME_SIZE             (XT_TABLE_NAME_SIZE+6+40)
 
79
 
 
80
/*
 
81
 * -----------------------------------------------------------------------
 
82
 * Compare paths:
 
83
 */
 
84
 
 
85
/* GOTCHA! The problem:
 
86
 *
 
87
 * The server uses names like: "./test/my_tab",
 
88
 * the BLOB streaming engine uses: "test/my_tab"
 
89
 * which leads to the same table being loaded twice.
 
90
 */
 
91
xtPublic int xt_tab_compare_paths(char *n1, char *n2)
 
92
{
 
93
        n1 = xt_last_2_names_of_path(n1);
 
94
        n2 = xt_last_2_names_of_path(n2);
 
95
        if (pbxt_ignore_case)
 
96
                return strcasecmp(n1, n2);
 
97
        return strcmp(n1, n2);
 
98
}
 
99
 
 
100
/*
 
101
 * This function only compares only the last 2 components of
 
102
 * the path because table names must differ in this area.
 
103
 */
 
104
xtPublic int xt_tab_compare_names(const char *n1, const char *n2)
 
105
{
 
106
        n1 = xt_last_2_names_of_path(n1);
 
107
        n2 = xt_last_2_names_of_path(n2);
 
108
        if (pbxt_ignore_case)
 
109
                return strcasecmp(n1, n2);
 
110
        return strcmp(n1, n2);
 
111
}
 
112
 
 
113
/*
 
114
 * -----------------------------------------------------------------------
 
115
 * Private utilities
 
116
 */
 
117
 
 
118
static xtBool tab_list_comp(void *key, void *data)
 
119
{
 
120
        XTTableHPtr     tab = (XTTableHPtr) data;
 
121
 
 
122
        return strcmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
 
123
}
 
124
 
 
125
static xtHashValue tab_list_hash(xtBool is_key, void *key_data)
 
126
{
 
127
        XTTableHPtr     tab = (XTTableHPtr) key_data;
 
128
 
 
129
        if (is_key)
 
130
                return xt_ht_hash(xt_last_2_names_of_path((char *) key_data));
 
131
        return xt_ht_hash(xt_last_2_names_of_path(tab->tab_name->ps_path));
 
132
}
 
133
 
 
134
static xtBool tab_list_comp_ci(void *key, void *data)
 
135
{
 
136
        XTTableHPtr     tab = (XTTableHPtr) data;
 
137
 
 
138
        return strcasecmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
 
139
}
 
140
 
 
141
static xtHashValue tab_list_hash_ci(xtBool is_key, void *key_data)
 
142
{
 
143
        XTTableHPtr     tab = (XTTableHPtr) key_data;
 
144
 
 
145
        if (is_key)
 
146
                return xt_ht_casehash(xt_last_2_names_of_path((char *) key_data));
 
147
        return xt_ht_casehash(xt_last_2_names_of_path(tab->tab_name->ps_path));
 
148
}
 
149
 
 
150
static void tab_list_free(XTThreadPtr self, void *data)
 
151
{
 
152
        XTTableHPtr             tab = (XTTableHPtr) data;
 
153
        XTDatabaseHPtr  db = tab->tab_db;
 
154
        XTTableEntryPtr te_ptr;
 
155
 
 
156
        /* Remove the reference from the ID list, whem the table is
 
157
         * removed from the name list:
 
158
         */
 
159
        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab->tab_id)))
 
160
                te_ptr->te_table = NULL;
 
161
 
 
162
        if (tab->tab_dic.dic_table)
 
163
                tab->tab_dic.dic_table->removeReferences(self);
 
164
        xt_heap_release(self, tab);
 
165
}
 
166
 
 
167
static void tab_close_files(XTThreadPtr self, XTTableHPtr tab)
 
168
{
 
169
        if (tab->tab_rec_file) {
 
170
                xt_fs_release_file(self, tab->tab_rec_file);
 
171
                tab->tab_rec_file = NULL;
 
172
        }
 
173
        if (tab->tab_row_file) {
 
174
                xt_fs_release_file(self, tab->tab_row_file);
 
175
                tab->tab_row_file = NULL;
 
176
        }
 
177
        if (tab->tab_ind_file) {
 
178
                xt_fs_release_file(self, tab->tab_ind_file);
 
179
                tab->tab_ind_file = NULL;
 
180
        }
 
181
}
 
182
 
 
183
static void tab_finalize(XTThreadPtr self, void *x)
 
184
{
 
185
        XTTableHPtr     tab = (XTTableHPtr) x;
 
186
 
 
187
        xt_exit_row_locks(&tab->tab_locks);
 
188
 
 
189
        xt_xres_exit_tab(self, tab);
 
190
 
 
191
        if (tab->tab_ind_free_list) {
 
192
                XTIndFreeListPtr list, flist;
 
193
                
 
194
                list = tab->tab_ind_free_list;
 
195
                while (list) {
 
196
                        flist = list;
 
197
                        list = list->fl_next_list;
 
198
                        xt_free(self, flist);
 
199
                }
 
200
                tab->tab_ind_free_list = NULL;
 
201
        }
 
202
 
 
203
        tab_close_files(self, tab);
 
204
 
 
205
        if (tab->tab_index_head) {
 
206
                xt_free(self, tab->tab_index_head);
 
207
                tab->tab_index_head = NULL;
 
208
        }
 
209
 
 
210
        tab_free_ext_records(tab);
 
211
 
 
212
#ifdef TRACE_TABLE_IDS
 
213
        PRINTF("%s: free TABLE: db=%d tab=%d %s\n", self->t_name, (int) tab->tab_db ? tab->tab_db->db_id : 0, (int) tab->tab_id, 
 
214
                tab->tab_name ? xt_last_2_names_of_path(tab->tab_name->ps_path) : "?");
 
215
#endif
 
216
        if (tab->tab_name) {
 
217
                xt_free(self, tab->tab_name);
 
218
                tab->tab_name = NULL;
 
219
        }
 
220
        myxt_free_dictionary(self, &tab->tab_dic);
 
221
        if (tab->tab_free_locks) {
 
222
                tab->tab_seq.xt_op_seq_exit(self);
 
223
                xt_spinlock_free(self, &tab->tab_mem_lock);
 
224
                xt_spinlock_free(self, &tab->tab_ainc_lock);
 
225
                xt_free_mutex(&tab->tab_rec_flush_lock);
 
226
                xt_free_mutex(&tab->tab_ind_flush_lock);
 
227
                xt_free_mutex(&tab->tab_ind_stat_lock);
 
228
                xt_free_mutex(&tab->tab_dic_field_lock);
 
229
                xt_free_mutex(&tab->tab_row_lock);
 
230
                xt_free_mutex(&tab->tab_ind_lock);
 
231
                xt_free_mutex(&tab->tab_rec_lock);
 
232
                for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
 
233
                        XT_TAB_ROW_FREE_LOCK(self, &tab->tab_row_rwlock[i]);
 
234
        }
 
235
#ifdef XT_SORT_REC_WRITES
 
236
        if (tab->tab_rec_dw_writes) {
 
237
                xt_free_sortedlist(self, tab->tab_rec_dw_writes);
 
238
                tab->tab_rec_dw_writes = NULL;
 
239
        }
 
240
        if (tab->tab_rec_dw_data)
 
241
                xt_free_ns(tab->tab_rec_dw_data);
 
242
#endif
 
243
        if (tab->tab_rec_flush_task)
 
244
                tab->tab_rec_flush_task->tk_exit();
 
245
        if (tab->tab_ind_flush_task)
 
246
                tab->tab_ind_flush_task->tk_exit();
 
247
}
 
248
 
 
249
static void tab_onrelease(void *x)
 
250
{
 
251
        XTTableHPtr     tab = (XTTableHPtr) x;
 
252
 
 
253
        /* Signal threads waiting for exclusive use of the table: */
 
254
        if (tab->tab_db->db_tables)
 
255
                xt_ht_signal(NULL, tab->tab_db->db_tables);
 
256
}
 
257
 
 
258
/*
 
259
 * -----------------------------------------------------------------------
 
260
 * PUBLIC METHODS
 
261
 */
 
262
 
 
263
/*
 
264
 * This function sets the table name to "", if the file
 
265
 * does not belong to XT.
 
266
 */
 
267
xtPublic char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name)
 
268
{
 
269
        char    *cptr;
 
270
        size_t  len;
 
271
 
 
272
        file_name = xt_last_name_of_path(file_name);
 
273
        cptr = file_name + strlen(file_name) - 1;
 
274
        while (cptr > file_name && *cptr != '.')
 
275
                cptr--;
 
276
        if (cptr > file_name && *cptr == '.') {
 
277
                if (strcmp(cptr, ".xtl") == 0 || strcmp(cptr, ".xtr") == 0) {
 
278
                        cptr--;
 
279
                        while (cptr > file_name && isdigit(*cptr))
 
280
                                cptr--;
 
281
                }
 
282
                else {
 
283
                        const char **ext = pbxt_extensions;
 
284
                        
 
285
                        while (*ext) {
 
286
                                if (strcmp(cptr, *ext) == 0)
 
287
                                        goto ret_name;
 
288
                                ext++;
 
289
                        }
 
290
                        cptr = file_name;
 
291
                }
 
292
        }
 
293
 
 
294
        ret_name:
 
295
        len = cptr - file_name;
 
296
        if (len > size-1)
 
297
                len = size-1;
 
298
 
 
299
        memcpy(tab_name, file_name, len);
 
300
        tab_name[len] = 0;
 
301
 
 
302
        /* Return a pointer to what was removed! */
 
303
        return file_name + len;
 
304
}
 
305
 
 
306
static void tab_get_row_file_name(char *table_name, char *name, xtTableID tab_id)
 
307
{
 
308
        sprintf(table_name, "%s-%lu.xtr", name, (u_long) tab_id);
 
309
}
 
310
 
 
311
static void tab_get_data_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
 
312
{
 
313
        sprintf(table_name, "%s.xtd", name);
 
314
}
 
315
 
 
316
static void tab_get_index_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
 
317
{
 
318
        sprintf(table_name, "%s.xti", name);
 
319
}
 
320
 
 
321
static void tab_free_by_id(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
 
322
{
 
323
        XTTableEntryPtr te_ptr = (XTTableEntryPtr) item;
 
324
 
 
325
        if (te_ptr->te_tab_name) {
 
326
                xt_free(self, te_ptr->te_tab_name);
 
327
                te_ptr->te_tab_name = NULL;
 
328
        }
 
329
        te_ptr->te_tab_id = 0;
 
330
        te_ptr->te_heap_tab = FALSE;
 
331
        te_ptr->te_table = NULL;
 
332
}
 
333
 
 
334
static int tab_comp_by_id(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
335
{
 
336
        xtTableID               te_id = *((xtTableID *) a);
 
337
        XTTableEntryPtr te_ptr = (XTTableEntryPtr) b;
 
338
 
 
339
        if (te_id < te_ptr->te_tab_id)
 
340
                return -1;
 
341
        if (te_id == te_ptr->te_tab_id)
 
342
                return 0;
 
343
        return 1;
 
344
}
 
345
 
 
346
static void tab_free_path(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
 
347
{
 
348
        XTTablePathPtr  tp_ptr = *((XTTablePathPtr *) item);
 
349
 
 
350
        xt_free(self, tp_ptr);
 
351
}
 
352
 
 
353
static int tab_comp_path(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
354
{
 
355
        char                    *path = (char *) a;
 
356
        XTTablePathPtr  tp_ptr = *((XTTablePathPtr *) b);
 
357
 
 
358
        return xt_tab_compare_paths(path, tp_ptr->tp_path);
 
359
}
 
360
 
 
361
static xtBool tab_get_name_value(XTTableDescPtr td, char **ret_name, char **ret_value)
 
362
{
 
363
        char *ptr = td->x.z.td_curr_ptr;
 
364
 
 
365
        while (*ptr && isspace(*ptr)) ptr++;
 
366
        if (!*ptr) {
 
367
                td->x.z.td_curr_ptr = ptr;
 
368
                return FALSE;
 
369
        }
 
370
 
 
371
        *ret_name = ptr;
 
372
        while (*ptr && *ptr != '=' && *ptr != '\r' && *ptr != '\n') ptr++;
 
373
        if (*ptr == '=') {
 
374
                *ptr = 0;
 
375
                ptr++;
 
376
                *ret_value = ptr;
 
377
                while (*ptr && *ptr != '\r' && *ptr != '\n') ptr++;
 
378
                if (*ptr) {
 
379
                        *ptr = 0;
 
380
                        ptr++;
 
381
                }
 
382
        }
 
383
        else {
 
384
                if (*ptr) {
 
385
                        *ptr = 0;
 
386
                        ptr++;
 
387
                }
 
388
                *ret_value = NULL;
 
389
        }
 
390
        td->x.z.td_curr_ptr = ptr;
 
391
        return TRUE;
 
392
}
 
393
 
 
394
xtPublic void xt_describe_tables_init(XTThreadPtr self, XTDatabaseHPtr db, XTTableDescPtr td)
 
395
{
 
396
        char pbuf[PATH_MAX];
 
397
 
 
398
        td->td_db = db;
 
399
        xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
 
400
        xt_add_tables_file(PATH_MAX, pbuf);
 
401
        if (xt_fs_exists(pbuf))
 
402
                td->td_type = XT_TD_FROM_TAB_FILE;
 
403
        else
 
404
                td->td_type = XT_TD_FROM_DIRECTORY;
 
405
 
 
406
        switch (td->td_type) {
 
407
                case XT_TD_FROM_DIRECTORY:
 
408
                        td->x.y.td_path_idx = 0;
 
409
                        if (td->x.y.td_path_idx < xt_sl_get_size(db->db_table_paths)) {
 
410
                                XTTablePathPtr *tp_ptr;
 
411
 
 
412
                                tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, td->x.y.td_path_idx);
 
413
                                td->td_tab_path = *tp_ptr;
 
414
                                td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
 
415
                        }
 
416
                        else
 
417
                                td->x.y.td_open_dir = NULL;
 
418
                        break;
 
419
                case XT_TD_FROM_TAB_FILE:
 
420
                        XTOpenFilePtr   of;
 
421
                        int                             len;
 
422
                        char                    *buffer;
 
423
                        char                    *name;
 
424
                        char                    *value;
 
425
 
 
426
                        of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
 
427
                        pushr_(xt_close_file, of);
 
428
                        len = (int) xt_seek_eof_file(self, of);
 
429
                        buffer = (char *) xt_malloc(self, len + 1);
 
430
                        pushr_(xt_free, buffer);
 
431
                        if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
 
432
                                xt_throw(self);
 
433
                        buffer[len] = 0;
 
434
                        popr_(); // Discard xt_free(buffer)
 
435
                        freer_(); // xt_close_file(of)
 
436
 
 
437
                        td->x.z.td_table_info = buffer;
 
438
                        td->x.z.td_curr_ptr = buffer;
 
439
                        while (tab_get_name_value(td, &name, &value)) {
 
440
                                if (strcmp(name, "[table]") == 0)
 
441
                                        break;
 
442
                        }
 
443
                        break;
 
444
        }
 
445
}
 
446
 
 
447
xtPublic xtBool xt_describe_tables_next(XTThreadPtr self, XTTableDescPtr td)
 
448
{
 
449
        char    *tab_name;
 
450
        xtBool  r = FALSE;
 
451
 
 
452
        enter_();
 
453
 
 
454
        switch (td->td_type) {
 
455
                case XT_TD_FROM_DIRECTORY:
 
456
                        retry:
 
457
                        if (!td->x.y.td_open_dir)
 
458
                                return_(FALSE);
 
459
                        try_(a) {
 
460
                                r = xt_dir_next(self, td->x.y.td_open_dir);
 
461
                        }
 
462
                        catch_(a) {
 
463
                                xt_describe_tables_exit(self, td);
 
464
                                throw_();
 
465
                        }
 
466
                        cont_(a);
 
467
                        if (!r) {
 
468
                                XTTablePathPtr *tp_ptr;
 
469
 
 
470
                                if (td->x.y.td_path_idx+1 >= xt_sl_get_size(td->td_db->db_table_paths))
 
471
                                        return_(FALSE);
 
472
 
 
473
                                if (td->x.y.td_open_dir)
 
474
                                        xt_dir_close(NULL, td->x.y.td_open_dir);
 
475
                                td->x.y.td_open_dir = NULL;
 
476
 
 
477
                                td->x.y.td_path_idx++;
 
478
                                tp_ptr = (XTTablePathPtr *) xt_sl_item_at(td->td_db->db_table_paths, td->x.y.td_path_idx);
 
479
                                td->td_tab_path = *tp_ptr;
 
480
                                td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
 
481
                                goto retry;
 
482
                        }
 
483
 
 
484
                        tab_name = xt_dir_name(self, td->x.y.td_open_dir);
 
485
                        td->td_tab_id = (xtTableID) xt_file_name_to_id(tab_name);
 
486
                        xt_tab_file_to_name(XT_TABLE_NAME_SIZE, td->td_tab_name, tab_name);
 
487
                        td->td_heap_tab = FALSE;
 
488
                        break;
 
489
                case XT_TD_FROM_TAB_FILE:
 
490
                        char *name;
 
491
                        char *value;
 
492
 
 
493
                        td->td_tab_id = 0;
 
494
                        while (tab_get_name_value(td, &name, &value)) {
 
495
                                if (strcmp(name, "name") == 0)
 
496
                                        xt_strcpy(XT_TABLE_NAME_SIZE, td->td_tab_name, value);
 
497
                                else if (strcmp(name, "id") == 0) {
 
498
                                        u_long lvalue = 0;
 
499
 
 
500
                                        sscanf(value, "%lu", &lvalue);
 
501
                                        td->td_tab_id = (xtTableID) lvalue;
 
502
                                }
 
503
                                else if (strcmp(name, "storage") == 0) {
 
504
                                        if (strcmp(value, "heap") == 0)
 
505
                                                td->td_heap_tab = TRUE;
 
506
                                        else /* disk */
 
507
                                                td->td_heap_tab = FALSE;
 
508
                                }
 
509
                                else if (strcmp(name, "location") == 0) {
 
510
                                        XTTablePathPtr  *tp;
 
511
                                        XTTablePathPtr  db_path;
 
512
 
 
513
#ifdef XT_WIN
 
514
                                        char *ptr = value;
 
515
 
 
516
                                        /* Convert path to WIN path: */
 
517
                                        while (*ptr) {
 
518
                                                if (*ptr == '/')
 
519
                                                        *ptr = '\\';
 
520
                                                ptr++;
 
521
                                        }
 
522
#endif
 
523
                                        if ((tp = (XTTablePathPtr *) xt_sl_find(self, td->td_db->db_table_paths, value)))
 
524
                                                db_path = *tp;
 
525
                                        else {
 
526
                                                size_t                  len;
 
527
 
 
528
                                                len = strlen(value);
 
529
                                                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
530
                                                db_path->tp_tab_count = 0;
 
531
                                                memcpy(db_path->tp_path, value, len);
 
532
                                                db_path->tp_path[len] = 0;
 
533
                                                xt_sl_insert(self, td->td_db->db_table_paths, db_path->tp_path, &db_path);
 
534
                                        }
 
535
                                        td->td_tab_path = db_path;
 
536
                                }
 
537
                                else if (strcmp(name, "type") == 0) {
 
538
                                        u_long lvalue = 0;
 
539
 
 
540
                                        sscanf(value, "%lu", &lvalue);
 
541
                                        td->td_tab_type = (xtWord1) lvalue;
 
542
                                } 
 
543
                                else if (strcmp(name, "[table]") == 0)
 
544
                                        break;
 
545
                        }
 
546
                        if (!td->td_tab_id)
 
547
                                return_(FALSE);
 
548
                        break;
 
549
        }
 
550
        return_(TRUE);
 
551
}
 
552
 
 
553
xtPublic void xt_describe_tables_exit(XTThreadPtr self, XTTableDescPtr td)
 
554
{
 
555
        switch (td->td_type) {
 
556
                case XT_TD_FROM_DIRECTORY:
 
557
                        if (td->x.y.td_open_dir)
 
558
                                xt_dir_close(NULL, td->x.y.td_open_dir);
 
559
                        td->x.y.td_open_dir = NULL;
 
560
                        break;
 
561
                case XT_TD_FROM_TAB_FILE:
 
562
                        if (td->x.z.td_table_info) {
 
563
                                xt_free(self, td->x.z.td_table_info);
 
564
                                td->x.z.td_table_info = NULL;
 
565
                        }
 
566
                        td->x.z.td_curr_ptr = NULL;
 
567
                        break;
 
568
        }
 
569
        td->td_tab_path = NULL;
 
570
}
 
571
 
 
572
xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db)
 
573
{
 
574
        XTTableDescRec          desc;
 
575
        XTTableEntryRec         te_tab;
 
576
        XTTableEntryPtr         te_ptr;
 
577
        XTTablePathPtr          db_path;
 
578
        char                            pbuf[PATH_MAX];
 
579
        int                                     len;
 
580
        u_int                           edx;
 
581
 
 
582
        enter_();
 
583
        pushr_(xt_tab_exit_db, db);
 
584
        if (pbxt_ignore_case)
 
585
                db->db_tables = xt_new_hashtable(self, tab_list_comp_ci, tab_list_hash_ci, tab_list_free, TRUE, TRUE);
 
586
        else
 
587
                db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE);
 
588
        db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE);
 
589
        db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE);
 
590
 
 
591
        if (db->db_multi_path) {
 
592
                XTOpenFilePtr   of;
 
593
                char                    *buffer, *ptr, *path;
 
594
 
 
595
                xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
 
596
                xt_add_tables_file(PATH_MAX, pbuf);
 
597
                if (!xt_fs_exists(pbuf)) {
 
598
                        /* Load the location file, if a tables file does not
 
599
                         * exists:
 
600
                         */
 
601
                        xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
 
602
                        xt_add_location_file(PATH_MAX, pbuf);
 
603
                        if (xt_fs_exists(pbuf)) {
 
604
                                of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
 
605
                                pushr_(xt_close_file, of);
 
606
                                len = (int) xt_seek_eof_file(self, of);
 
607
                                buffer = (char *) xt_malloc(self, len + 1);
 
608
                                pushr_(xt_free, buffer);
 
609
                                if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
 
610
                                        xt_throw(self);
 
611
                                buffer[len] = 0;
 
612
                                ptr = buffer;
 
613
                                while (*ptr) {
 
614
                                        /* Ignore preceeding space: */
 
615
                                        while (*ptr && isspace(*ptr))
 
616
                                                ptr++;
 
617
                                        path = ptr;
 
618
                                        while (*ptr && *ptr != '\n' && *ptr != '\r') {
 
619
#ifdef XT_WIN
 
620
                                                /* Undo the conversion below: */
 
621
                                                if (*ptr == '/')
 
622
                                                        *ptr = '\\';
 
623
#endif
 
624
                                                ptr++;
 
625
                                        }
 
626
                                        if (*path != '#' && ptr > path) {
 
627
                                                len = (int) (ptr - path);
 
628
                                                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
629
                                                db_path->tp_tab_count = 0;
 
630
                                                memcpy(db_path->tp_path, path, len);
 
631
                                                db_path->tp_path[len] = 0;
 
632
                                                xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
 
633
                                        }
 
634
                                        ptr++;
 
635
                                }
 
636
                                freer_(); // xt_free(buffer)
 
637
                                freer_(); // xt_close_file(of)
 
638
                        }
 
639
                }
 
640
        }
 
641
        else {
 
642
                len = (int) strlen(db->db_main_path);
 
643
                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
644
                db_path->tp_tab_count = 0;
 
645
                strcpy(db_path->tp_path, db->db_main_path);
 
646
                xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
 
647
        }
 
648
 
 
649
        xt_describe_tables_init(self, db, &desc);
 
650
        pushr_(xt_describe_tables_exit, &desc);
 
651
        while (xt_describe_tables_next(self, &desc)) {
 
652
                te_tab.te_tab_id = desc.td_tab_id;
 
653
                te_tab.te_heap_tab = desc.td_heap_tab;
 
654
 
 
655
                if (te_tab.te_tab_id > db->db_curr_tab_id)
 
656
                        db->db_curr_tab_id = te_tab.te_tab_id;
 
657
 
 
658
                te_tab.te_tab_name = xt_dup_string(self, desc.td_tab_name);
 
659
                te_tab.te_tab_path = desc.td_tab_path;
 
660
                desc.td_tab_path->tp_tab_count++;
 
661
                te_tab.te_table = NULL;
 
662
                te_tab.te_type = desc.td_tab_type;
 
663
                xt_sl_insert(self, db->db_table_by_id, &desc.td_tab_id, &te_tab);
 
664
        }
 
665
        freer_(); // xt_describe_tables_exit(&desc)
 
666
 
 
667
        /* 
 
668
         * The purpose of this code is to ensure that all tables are opened and cached,
 
669
         * which is actually only required if tables have foreign key references.
 
670
         *
 
671
         * In other words, a side affect of this code is that FK references between tables
 
672
         * are registered, and checked.
 
673
         *
 
674
         * Unfortunately we don't know if a table is referenced by a FK, so we have to open
 
675
         * all tables.
 
676
         * 
 
677
         * Cannot open tables in the loop above because db->db_table_by_id which is built 
 
678
         * above is used by xt_use_table_no_lock() 
 
679
         *
 
680
         * {TABLE-STATS}
 
681
         * NOTE: The code also lead to the statistics failing to work because 
 
682
         * the tables were already open when the handler was opened.
 
683
         * Previously we only caclulated statistics when a handler was opened
 
684
         * and the underlying table was also opened.
 
685
         */
 
686
        xt_enum_tables_init(&edx);
 
687
        while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
 
688
                xt_strcpy(PATH_MAX, pbuf, te_ptr->te_tab_path->tp_path);
 
689
                xt_add_dir_char(PATH_MAX, pbuf);
 
690
                xt_strcat(PATH_MAX, pbuf, te_ptr->te_tab_name);
 
691
                try_(a) {
 
692
                        xt_heap_release(self, xt_use_table_no_lock(self, db, (XTPathStrPtr)pbuf, FALSE, FALSE, NULL));
 
693
                } catch_(a) {
 
694
                        /* ignore errors */
 
695
                        xt_log_and_clear_warning(self);
 
696
                } cont_(a);
 
697
        }
 
698
 
 
699
        popr_(); // Discard xt_tab_exit_db(db)
 
700
        exit_();
 
701
}
 
702
 
 
703
static void tab_save_tables(XTThreadPtr self, XTDatabaseHPtr db)
 
704
{
 
705
        XTTableEntryPtr         te_ptr;
 
706
        XTStringBufferRec       buffer;
 
707
        XTOpenFilePtr           of;
 
708
        char                            path[PATH_MAX];
 
709
 
 
710
        memset(&buffer, 0, sizeof(buffer));
 
711
 
 
712
        xt_strcpy(PATH_MAX, path, db->db_main_path);
 
713
        xt_add_tables_file(PATH_MAX, path);
 
714
 
 
715
        if (xt_sl_get_size(db->db_table_by_id)) {
 
716
                pushr_(xt_sb_free, &buffer);
 
717
                for (u_int i=0; i<xt_sl_get_size(db->db_table_by_id); i++) {
 
718
                        te_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, i);
 
719
                        xt_sb_concat(self, &buffer, "[table]\n");
 
720
                        xt_sb_concat(self, &buffer, "id=");
 
721
                        xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_tab_id);
 
722
                        xt_sb_concat(self, &buffer, "\n");
 
723
                        xt_sb_concat(self, &buffer, "name=");
 
724
                        xt_sb_concat(self, &buffer, te_ptr->te_tab_name);
 
725
                        xt_sb_concat(self, &buffer, "\n");
 
726
                        xt_sb_concat(self, &buffer, "location=");
 
727
                        xt_sb_concat(self, &buffer, te_ptr->te_tab_path->tp_path);
 
728
                        xt_sb_concat(self, &buffer, "\n");
 
729
                        xt_sb_concat(self, &buffer, "storage=");
 
730
                        if (te_ptr->te_heap_tab)
 
731
                                xt_sb_concat(self, &buffer, "heap\n");
 
732
                        else
 
733
                                xt_sb_concat(self, &buffer, "disk\n");
 
734
                        xt_sb_concat(self, &buffer, "type=");
 
735
                        xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_type);
 
736
                        xt_sb_concat(self, &buffer, "\n");
 
737
                }
 
738
 
 
739
#ifdef XT_WIN
 
740
                /* To make the location file cross-platform (at least
 
741
                 * as long as relative paths are used) we replace all '\' 
 
742
                 * with '/': */
 
743
                char *ptr;
 
744
                
 
745
                ptr = buffer.sb_cstring;
 
746
                while (*ptr) {
 
747
                        if (*ptr == '\\')
 
748
                                *ptr = '/';
 
749
                        ptr++;
 
750
                }
 
751
#endif
 
752
                of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
 
753
                pushr_(xt_close_file, of);
 
754
                if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
 
755
                        xt_throw(self);
 
756
                xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
 
757
                freer_(); // xt_close_file(of)
 
758
                
 
759
                freer_(); // xt_sb_free(&buffer);
 
760
        }
 
761
        else
 
762
                xt_fs_delete(NULL, path);
 
763
}
 
764
 
 
765
static void tab_save_table_paths(XTThreadPtr self, XTDatabaseHPtr db)
 
766
{
 
767
        XTTablePathPtr          *tp_ptr;
 
768
        XTStringBufferRec       buffer;
 
769
        XTOpenFilePtr           of;
 
770
        char                            path[PATH_MAX];
 
771
 
 
772
        memset(&buffer, 0, sizeof(buffer));
 
773
 
 
774
        xt_strcpy(PATH_MAX, path, db->db_main_path);
 
775
        xt_add_location_file(PATH_MAX, path);
 
776
 
 
777
        if (xt_sl_get_size(db->db_table_paths)) {
 
778
                pushr_(xt_sb_free, &buffer);
 
779
                for (u_int i=0; i<xt_sl_get_size(db->db_table_paths); i++) {
 
780
                        tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, i);
 
781
                        xt_sb_concat(self, &buffer, (*tp_ptr)->tp_path);
 
782
                        xt_sb_concat(self, &buffer, "\n");
 
783
                }
 
784
 
 
785
#ifdef XT_WIN
 
786
                /* To make the location file cross-platform (at least
 
787
                 * as long as relative paths are used) we replace all '\' 
 
788
                 * with '/': */
 
789
                char *ptr;
 
790
                
 
791
                ptr = buffer.sb_cstring;
 
792
                while (*ptr) {
 
793
                        if (*ptr == '\\')
 
794
                                *ptr = '/';
 
795
                        ptr++;
 
796
                }
 
797
#endif
 
798
 
 
799
                of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
 
800
                pushr_(xt_close_file, of);
 
801
                if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
 
802
                        xt_throw(self);
 
803
                xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
 
804
                freer_(); // xt_close_file(of)
 
805
                
 
806
                freer_(); // xt_sb_free(&buffer);
 
807
        }
 
808
        else
 
809
                xt_fs_delete(NULL, path);
 
810
}
 
811
 
 
812
static XTTablePathPtr tab_get_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr tab_name, xtBool save_it)
 
813
{
 
814
        XTTablePathPtr  *tp, tab_path;
 
815
        char                    path[PATH_MAX];
 
816
 
 
817
        xt_strcpy(PATH_MAX, path, tab_name->ps_path);
 
818
        xt_remove_last_name_of_path(path);
 
819
        xt_remove_dir_char(path);
 
820
        tp = (XTTablePathPtr *) xt_sl_find(self, db->db_table_paths, path);
 
821
        if (tp)
 
822
                tab_path = *tp;
 
823
        else {
 
824
                int len = (int) strlen(path);
 
825
 
 
826
                tab_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
827
                tab_path->tp_tab_count = 0;
 
828
                memcpy(tab_path->tp_path, path, len);
 
829
                tab_path->tp_path[len] = 0;
 
830
                xt_sl_insert(self, db->db_table_paths, tab_path->tp_path, &tab_path);
 
831
                if (save_it) {
 
832
                        tab_save_table_paths(self, db);
 
833
                        if (xt_sl_get_size(db->db_table_paths) == 1) {
 
834
                                XTSystemTableShare::createSystemTables(self, db);
 
835
                        }
 
836
                }
 
837
        }
 
838
        tab_path->tp_tab_count++;
 
839
        return tab_path;
 
840
}
 
841
 
 
842
static void tab_remove_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTTablePathPtr tab_path)
 
843
{
 
844
        if (tab_path->tp_tab_count > 0) {
 
845
                tab_path->tp_tab_count--;
 
846
                if (tab_path->tp_tab_count == 0) {
 
847
                        xt_sl_delete(self, db->db_table_paths, tab_path->tp_path);
 
848
                        tab_save_table_paths(self, db);
 
849
                }
 
850
        }
 
851
}
 
852
 
 
853
static void tab_free_table_path(XTThreadPtr self, XTTablePathPtr tab_path)
 
854
{
 
855
        XTDatabaseHPtr db = self->st_database;
 
856
 
 
857
        tab_remove_table_path(self, db, tab_path);
 
858
}
 
859
 
 
860
xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db)
 
861
{
 
862
        if (db->db_tables) {
 
863
                xt_free_hashtable(self, db->db_tables);
 
864
                db->db_tables = NULL;
 
865
        }
 
866
        if (db->db_table_by_id) {
 
867
                xt_free_sortedlist(self, db->db_table_by_id);
 
868
                db->db_table_by_id = NULL;
 
869
        }
 
870
        if (db->db_table_paths) {
 
871
                xt_free_sortedlist(self, db->db_table_paths);
 
872
                db->db_table_paths = NULL;
 
873
        }
 
874
}
 
875
 
 
876
 
 
877
xtPublic xtBool xt_table_exists(XTDatabaseHPtr db)
 
878
{
 
879
        return xt_sl_get_size(db->db_table_by_id) > 0;
 
880
}
 
881
 
 
882
/*
 
883
 * Enumerate all tables in the current database.
 
884
 */
 
885
 
 
886
xtPublic void xt_enum_tables_init(u_int *edx)
 
887
{
 
888
        *edx = 0;
 
889
}
 
890
 
 
891
xtPublic XTTableEntryPtr xt_enum_tables_next(XTThreadPtr XT_UNUSED(self), XTDatabaseHPtr db, u_int *edx)
 
892
{
 
893
        XTTableEntryPtr en_ptr;
 
894
 
 
895
        if (*edx >= xt_sl_get_size(db->db_table_by_id))
 
896
                return NULL;
 
897
        en_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, *edx);
 
898
        (*edx)++;
 
899
        return en_ptr;
 
900
}
 
901
 
 
902
xtPublic void xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft)
 
903
{
 
904
        ft->ft_state = 0;
 
905
        ft->ft_tab_name = tab_name;
 
906
        ft->ft_tab_id = tab_id;
 
907
}
 
908
 
 
909
xtPublic xtBool xt_enum_files_of_tables_next(XTFilesOfTablePtr ft)
 
910
{
 
911
        char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
912
 
 
913
        retry:
 
914
        switch (ft->ft_state) {
 
915
                case 0:
 
916
                        tab_get_row_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
 
917
                        break;
 
918
                case 1:
 
919
                        tab_get_data_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
 
920
                        break;
 
921
                case 2:
 
922
                        tab_get_index_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
 
923
                        break;
 
924
                default:
 
925
                        return FAILED;
 
926
        }
 
927
 
 
928
        ft->ft_state++;
 
929
        xt_strcpy(PATH_MAX, ft->ft_file_path, ft->ft_tab_name->ps_path);
 
930
        xt_remove_last_name_of_path(ft->ft_file_path);
 
931
        xt_strcat(PATH_MAX, ft->ft_file_path, file_name);
 
932
        if (!xt_fs_exists(ft->ft_file_path))
 
933
                goto retry;
 
934
 
 
935
        return TRUE;
 
936
}
 
937
 
 
938
static xtBool tab_find_table(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtTableID *tab_id)
 
939
{
 
940
        u_int                   edx;
 
941
        XTTableEntryPtr te_ptr;
 
942
        char                    path[PATH_MAX];
 
943
 
 
944
        xt_enum_tables_init(&edx);
 
945
        while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
 
946
                xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
 
947
                xt_add_dir_char(PATH_MAX, path);
 
948
                xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
 
949
                if (xt_tab_compare_names(path, name->ps_path) == 0) {
 
950
                        *tab_id = te_ptr->te_tab_id;
 
951
                        return TRUE;
 
952
                }
 
953
        }
 
954
        return FALSE;
 
955
}
 
956
 
 
957
xtPublic void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error)
 
958
{
 
959
        tab->tab_dic.dic_disable_index = ind_error;
 
960
        xt_tab_set_table_repair_pending(tab);
 
961
}
 
962
 
 
963
xtPublic void xt_tab_set_index_error(XTTableHPtr tab)
 
964
{
 
965
        switch (tab->tab_dic.dic_disable_index) {
 
966
                case XT_INDEX_OK:
 
967
                        break;
 
968
                case XT_INDEX_TOO_OLD:
 
969
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_OLD_VERSION, tab->tab_name);
 
970
                        break;
 
971
                case XT_INDEX_TOO_NEW:
 
972
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NEW_VERSION, tab->tab_name);
 
973
                        break;
 
974
                case XT_INDEX_BAD_BLOCK:
 
975
                        char number[40];
 
976
 
 
977
                        sprintf(number, "%d", (int) tab->tab_index_page_size);
 
978
                        xt_register_i2xterr(XT_REG_CONTEXT, XT_ERR_BAD_IND_BLOCK_SIZE, xt_last_name_of_path(tab->tab_name->ps_path), number);
 
979
                        break;
 
980
                case XT_INDEX_CORRUPTED:
 
981
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name);
 
982
                        break;
 
983
                case XT_INDEX_MISSING:
 
984
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_MISSING, tab->tab_name);
 
985
                        break;
 
986
                case XT_INDEX_NOT_RECOVERED:
 
987
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NOT_RECOVERED, tab->tab_name);
 
988
                        break;
 
989
        }
 
990
}
 
991
 
 
992
static void tab_load_index_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file, XTPathStrPtr table_name)
 
993
{
 
994
        XT_NODE_TEMP;
 
995
        XTIndexPtr                      *ind;
 
996
        xtWord1                         *data;
 
997
        XTIndexFormatDPtr       index_fmt;
 
998
 
 
999
        /* Load the pointers: */
 
1000
        if (tab->tab_index_head)
 
1001
                xt_free_ns(tab->tab_index_head);
 
1002
        tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc(self, XT_INDEX_HEAD_SIZE);
 
1003
 
 
1004
        if (file) {
 
1005
                if (!xt_pread_file(file, 0, XT_INDEX_HEAD_SIZE, 0, tab->tab_index_head, NULL, &self->st_statistics.st_ind, self))
 
1006
                        xt_throw(self);
 
1007
 
 
1008
                tab->tab_index_format_offset = XT_GET_DISK_4(tab->tab_index_head->tp_format_offset_4);
 
1009
                index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
 
1010
 
 
1011
                /* If the table version is less than or equal to an incompatible (unsupported
 
1012
                 * version), or greater than the current version, then we cannot open this table
 
1013
                 */
 
1014
                if (XT_GET_DISK_2(index_fmt->if_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
 
1015
                        XT_GET_DISK_2(index_fmt->if_tab_version_2) > XT_TAB_CURRENT_VERSION) {
 
1016
                        switch (XT_GET_DISK_2(index_fmt->if_tab_version_2)) {
 
1017
                                case 4: 
 
1018
                                        xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
 
1019
                                        break;
 
1020
                                case 3: 
 
1021
                                        xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
 
1022
                                        break;
 
1023
                                default:
 
1024
                                        xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
 
1025
                                        break;
 
1026
                        }
 
1027
                        return;
 
1028
                }
 
1029
 
 
1030
                tab->tab_dic.dic_index_ver = XT_GET_DISK_2(index_fmt->if_ind_version_2);
 
1031
                tab->tab_dic.dic_disable_index = XT_INDEX_OK;
 
1032
 
 
1033
                if (tab->tab_dic.dic_index_ver == 1) {
 
1034
                        tab->tab_index_header_size = 1024 * 16;
 
1035
                        tab->tab_index_page_size = 1024 * 16;
 
1036
                }
 
1037
                else {
 
1038
                        tab->tab_index_header_size = XT_GET_DISK_4(tab->tab_index_head->tp_header_size_4);
 
1039
                        tab->tab_index_page_size = XT_GET_DISK_4(index_fmt->if_page_size_4);
 
1040
                }       
 
1041
 
 
1042
#ifdef XT_USE_LAZY_DELETE
 
1043
                if (tab->tab_dic.dic_index_ver <= XT_IND_NO_LAZY_DELETE)
 
1044
                        tab->tab_dic.dic_no_lazy_delete = TRUE;
 
1045
                else
 
1046
                        tab->tab_dic.dic_no_lazy_delete = FALSE;
 
1047
#else
 
1048
                tab->tab_dic.dic_no_lazy_delete = TRUE;
 
1049
#endif
 
1050
 
 
1051
                /* Incorrect version of index is handled by allowing a sequential scan, but no index access.
 
1052
                 * Recovery with the wrong index type will not recover the indexes, a REPAIR TABLE
 
1053
                 * will be required!
 
1054
                 */
 
1055
                if (tab->tab_dic.dic_index_ver != XT_IND_CURRENT_VERSION) {
 
1056
                        switch (tab->tab_dic.dic_index_ver) {
 
1057
                                case XT_IND_NO_LAZY_DELETE:
 
1058
                                case XT_IND_LAZY_DELETE_OK:
 
1059
                                        /* I can handle this type of index. */
 
1060
                                        break;
 
1061
                                default:
 
1062
                                        if (tab->tab_dic.dic_index_ver < XT_IND_CURRENT_VERSION)
 
1063
                                                xt_tab_disable_index(tab, XT_INDEX_TOO_OLD);
 
1064
                                        else
 
1065
                                                xt_tab_disable_index(tab, XT_INDEX_TOO_NEW);
 
1066
                                        break;
 
1067
                        }
 
1068
                }
 
1069
                else if (tab->tab_index_page_size != XT_INDEX_PAGE_SIZE)
 
1070
                        xt_tab_disable_index(tab, XT_INDEX_BAD_BLOCK);
 
1071
        }
 
1072
        else {
 
1073
                memset(tab->tab_index_head, 0, XT_INDEX_HEAD_SIZE);
 
1074
                xt_tab_disable_index(tab, XT_INDEX_MISSING);
 
1075
                tab->tab_index_header_size = XT_INDEX_HEAD_SIZE;
 
1076
                tab->tab_index_page_size = XT_INDEX_PAGE_SIZE;
 
1077
                tab->tab_dic.dic_index_ver = 0;
 
1078
                tab->tab_index_format_offset = 0;
 
1079
        }
 
1080
 
 
1081
        
 
1082
        if (tab->tab_dic.dic_disable_index) {
 
1083
                xt_tab_set_index_error(tab);
 
1084
                xt_log_and_clear_exception_ns();
 
1085
        }
 
1086
 
 
1087
        if (tab->tab_dic.dic_disable_index) {
 
1088
                /* Reset, as if we have empty indexes.
 
1089
                 * Flush will wipe things out, of course.
 
1090
                 * REPAIR TABLE will be required...
 
1091
                 */
 
1092
                XT_NODE_ID(tab->tab_ind_eof) = 1;
 
1093
                XT_NODE_ID(tab->tab_ind_free) = 0;
 
1094
 
 
1095
                ind = tab->tab_dic.dic_keys;
 
1096
                for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++)
 
1097
                        XT_NODE_ID((*ind)->mi_root) = 0;
 
1098
        }
 
1099
        else {
 
1100
                XT_NODE_ID(tab->tab_ind_eof) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_eof_6);
 
1101
                XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_free_6);
 
1102
 
 
1103
                data = tab->tab_index_head->tp_data;
 
1104
                ind = tab->tab_dic.dic_keys;
 
1105
                for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
 
1106
                        (*ind)->mi_root = XT_GET_NODE_REF(tab, data);
 
1107
                        data += XT_NODE_REF_SIZE;
 
1108
                }
 
1109
        }
 
1110
}
 
1111
 
 
1112
static void tab_load_table_format(XTThreadPtr self, XTOpenFilePtr file, XTPathStrPtr table_name, size_t *ret_format_offset, size_t *ret_head_size, XTDictionaryPtr dic)
 
1113
{
 
1114
        XTDiskValue4            size_buf;
 
1115
        size_t                          head_size;
 
1116
        XTTableFormatDRec       tab_fmt;
 
1117
        size_t                          fmt_size;
 
1118
 
 
1119
        if (!xt_pread_file(file, 0, 4, 4, &size_buf, NULL, &self->st_statistics.st_rec, self))
 
1120
                xt_throw(self);
 
1121
 
 
1122
        head_size = XT_GET_DISK_4(size_buf);
 
1123
        *ret_format_offset = head_size;
 
1124
 
 
1125
        /* Load the table format information: */
 
1126
        if (!xt_pread_file(file, head_size, offsetof(XTTableFormatDRec, tf_definition), offsetof(XTTableFormatDRec, tf_tab_version_2) + 2, &tab_fmt, NULL, &self->st_statistics.st_rec, self))
 
1127
                xt_throw(self);
 
1128
 
 
1129
        /* If the table version is less than or equal to an incompatible (unsupported
 
1130
         * version), or greater than the current version, then we cannot open this table
 
1131
         */
 
1132
        if (XT_GET_DISK_2(tab_fmt.tf_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
 
1133
                XT_GET_DISK_2(tab_fmt.tf_tab_version_2) > XT_TAB_CURRENT_VERSION) {
 
1134
                switch (XT_GET_DISK_2(tab_fmt.tf_tab_version_2)) {
 
1135
                        case 4: 
 
1136
                                xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
 
1137
                                break;
 
1138
                        case 3: 
 
1139
                                xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
 
1140
                                break;
 
1141
                        default:
 
1142
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
 
1143
                                break;
 
1144
                }
 
1145
                return;
 
1146
        }
 
1147
 
 
1148
        fmt_size = XT_GET_DISK_4(tab_fmt.tf_format_size_4);
 
1149
        *ret_head_size = XT_GET_DISK_4(tab_fmt.tf_tab_head_size_4);
 
1150
        dic->dic_rec_size = XT_GET_DISK_4(tab_fmt.tf_rec_size_4);
 
1151
        dic->dic_rec_fixed = XT_GET_DISK_1(tab_fmt.tf_rec_fixed_1);
 
1152
        dic->dic_min_auto_inc = XT_GET_DISK_8(tab_fmt.tf_min_auto_inc_8);
 
1153
        if (fmt_size > offsetof(XTTableFormatDRec, tf_definition)) {
 
1154
                size_t  def_size = fmt_size - offsetof(XTTableFormatDRec, tf_definition);
 
1155
                char    *def_sql;
 
1156
 
 
1157
                pushsr_(def_sql, xt_free, (char *) xt_malloc(self, def_size));
 
1158
                if (!xt_pread_file(file, head_size+offsetof(XTTableFormatDRec, tf_definition), def_size, def_size, def_sql, NULL, &self->st_statistics.st_rec, self))
 
1159
                        xt_throw(self);
 
1160
                dic->dic_table = xt_ri_create_table(self, false, table_name, def_sql, myxt_create_table_from_table(self, dic->dic_my_table), NULL);
 
1161
                freer_(); // xt_free(def_sql)
 
1162
        }
 
1163
        else
 
1164
                dic->dic_table = myxt_create_table_from_table(self, dic->dic_my_table);
 
1165
}
 
1166
 
 
1167
static void tab_load_table_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file)
 
1168
{
 
1169
        XTTableHeadDRec rec_head;
 
1170
 
 
1171
        if (!xt_pread_file(file, 0, sizeof(XTTableHeadDRec), sizeof(XTTableHeadDRec), (xtWord1 *) &rec_head, NULL, &self->st_statistics.st_rec, self))
 
1172
                xt_throw(self);
 
1173
 
 
1174
        tab->tab_head_op_seq = XT_GET_DISK_4(rec_head.th_op_seq_4);
 
1175
        tab->tab_head_row_free_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_free_6);
 
1176
        tab->tab_head_row_eof_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_eof_6);
 
1177
        tab->tab_head_row_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_row_fnum_6);
 
1178
        tab->tab_head_rec_free_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_free_6);
 
1179
        tab->tab_head_rec_eof_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_eof_6);
 
1180
        tab->tab_head_rec_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_rec_fnum_6);
 
1181
        tab->tab_wr_op_seq = tab->tab_head_op_seq;
 
1182
}
 
1183
 
 
1184
xtPublic void xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
 
1185
{
 
1186
        XTTableHPtr tab = ot->ot_table;
 
1187
 
 
1188
        XT_SET_DISK_4(rec_head->th_op_seq_4, tab->tab_head_op_seq);
 
1189
        XT_SET_DISK_6(rec_head->th_row_free_6, tab->tab_head_row_free_id);
 
1190
        XT_SET_DISK_6(rec_head->th_row_eof_6, tab->tab_head_row_eof_id);
 
1191
        XT_SET_DISK_6(rec_head->th_row_fnum_6, tab->tab_head_row_fnum);
 
1192
        XT_SET_DISK_6(rec_head->th_rec_free_6, tab->tab_head_rec_free_id);
 
1193
        XT_SET_DISK_6(rec_head->th_rec_eof_6, tab->tab_head_rec_eof_id);
 
1194
        XT_SET_DISK_6(rec_head->th_rec_fnum_6, tab->tab_head_rec_fnum);
 
1195
}
 
1196
 
 
1197
static xtBool tab_write_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
 
1198
{
 
1199
        if (!xt_tab_write_rec(ot, offsetof(XTTableHeadDRec, th_op_seq_4), 40, (xtWord1 *) rec_head->th_op_seq_4))
 
1200
                return FAILED;
 
1201
        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
 
1202
                return FAILED;
 
1203
        return OK;
 
1204
}
 
1205
 
 
1206
xtPublic xtBool xt_tab_write_min_auto_inc(XTOpenTablePtr ot)
 
1207
{
 
1208
        xtWord1         value[8];
 
1209
        off_t           offset;
 
1210
 
 
1211
        XT_SET_DISK_8(value, ot->ot_table->tab_dic.dic_min_auto_inc);
 
1212
        offset = ot->ot_table->tab_table_format_offset + offsetof(XTTableFormatDRec, tf_min_auto_inc_8);
 
1213
        if (!xt_tab_write_rec(ot, offset, 8, value))
 
1214
                return FAILED;
 
1215
        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
 
1216
                return FAILED;
 
1217
        return OK;
 
1218
}
 
1219
 
 
1220
/* a helper function to remove table from the open tables hash on exception
 
1221
 * used in tab_new_handle() below
 
1222
 */
 
1223
 #ifdef NO_LONGER_REQ
 
1224
static void xt_del_from_db_tables_ht(XTThreadPtr self, XTTableHPtr tab)
 
1225
{
 
1226
        XTTableEntryPtr te_ptr;
 
1227
        XTDatabaseHPtr  db = tab->tab_db;
 
1228
        xtTableID               tab_id = tab->tab_id;
 
1229
 
 
1230
        /* Oops! should use tab->tab_name, instead of tab! */
 
1231
        xt_ht_del(self, db->db_tables, tab->tab_name);
 
1232
 
 
1233
        /* Remove the reference from the ID list, when a table is
 
1234
         * removed from the table name list:
 
1235
         */
 
1236
        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id)))
 
1237
                te_ptr->te_table = NULL;
 
1238
}
 
1239
#endif
 
1240
 
 
1241
xtPublic XTFileType xt_rec_file_type(xtBool heap_tab)
 
1242
{
 
1243
        if (heap_tab)
 
1244
                return XT_FT_HEAP;
 
1245
        if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
 
1246
                return XT_FT_REWRITE_FLUSH;
 
1247
        return XT_REC_FILE_TYPE;
 
1248
}
 
1249
 
 
1250
xtPublic XTFileType xt_row_file_type(xtBool heap_tab)
 
1251
{
 
1252
        if (heap_tab)
 
1253
                return XT_FT_HEAP;
 
1254
        if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
 
1255
                return XT_FT_REWRITE_FLUSH;
 
1256
        return XT_ROW_FILE_TYPE;
 
1257
}
 
1258
 
 
1259
xtPublic XTFileType xt_ind_file_type(xtBool heap_tab)
 
1260
{
 
1261
        if (heap_tab)
 
1262
                return XT_FT_HEAP;
 
1263
        if (XT_IND_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
 
1264
                return XT_FT_REWRITE_FLUSH;
 
1265
        return XT_IND_FILE_TYPE;
 
1266
}
 
1267
 
 
1268
#ifdef XT_SORT_REC_WRITES
 
1269
static int tab_cmp_dw_rec_id(struct XTThread *XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
1270
{
 
1271
        xtRecordID              rec_id = *((xtRecordID *) a);
 
1272
        XTDelayWritePtr dw_ptr = (XTDelayWritePtr) b;
 
1273
 
 
1274
        if (rec_id == dw_ptr->dw_rec_id)
 
1275
                return 0;
 
1276
        if (rec_id < dw_ptr->dw_rec_id)
 
1277
                return -1;
 
1278
        return 1;
 
1279
}
 
1280
#endif
 
1281
 
 
1282
/*
 
1283
 * Create a new table handle (i.e. open a table).
 
1284
 * Return NULL if the table is missing, and it is OK for the table
 
1285
 * to be missing.
 
1286
 */
 
1287
static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id, XTPathStrPtr tab_path, xtBool missing_ok, XTDictionaryPtr dic)
 
1288
{
 
1289
        char                    path[PATH_MAX];
 
1290
        XTTableHPtr             tab;
 
1291
        char                    file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
1292
        XTOpenFilePtr   of_rec, of_ind;
 
1293
        XTTableEntryPtr te_ptr;
 
1294
        size_t                  tab_format_offset;
 
1295
        size_t                  tab_head_size= 0;
 
1296
 
 
1297
        enter_();
 
1298
 
 
1299
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
 
1300
        ASSERT(te_ptr);
 
1301
 
 
1302
        tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
 
1303
        pushr_(xt_heap_release, tab);
 
1304
 
 
1305
        tab->tab_name = (XTPathStrPtr) xt_dup_string(self, tab_path->ps_path);
 
1306
        tab->tab_db = db;
 
1307
        tab->tab_id = tab_id;
 
1308
        tab->tab_dic.dic_table_type = te_ptr->te_type;
 
1309
#ifdef TRACE_TABLE_IDS
 
1310
        PRINTF("%s: allocated TABLE: db=%d tab=%d %s\n", self->t_name, (int) db->db_id, (int) tab->tab_id, xt_last_2_names_of_path(tab->tab_name->ps_path));
 
1311
#endif
 
1312
 
 
1313
        if (dic) {
 
1314
                myxt_move_dictionary(&tab->tab_dic, dic);
 
1315
                myxt_setup_dictionary(self, &tab->tab_dic);
 
1316
        }
 
1317
        else {
 
1318
                if (!myxt_load_dictionary(self, &tab->tab_dic, db, tab_path)) {
 
1319
                        freer_(); // xt_heap_release(tab)
 
1320
                        return_(XT_TAB_NO_DICTIONARY);
 
1321
                }
 
1322
        }
 
1323
 
 
1324
        /* Do not use the XT_TF_DDL_TEMP_TABLE bit from the given dic
 
1325
         * This bit depends only on the
 
1326
         * name of the table, and must be set explicitly.
 
1327
         */
 
1328
        if (myxt_temp_table_name(tab_path->ps_path))
 
1329
                tab->tab_dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
 
1330
        else
 
1331
                tab->tab_dic.dic_tab_flags &= ~XT_TF_DDL_TEMP_TABLE;
 
1332
 
 
1333
        tab->tab_seq.xt_op_seq_init(self);
 
1334
        xt_spinlock_init_with_autoname(self, &tab->tab_ainc_lock);
 
1335
        xt_init_mutex_with_autoname(self, &tab->tab_rec_flush_lock);
 
1336
        xt_init_mutex_with_autoname(self, &tab->tab_ind_flush_lock);
 
1337
        xt_init_mutex_with_autoname(self, &tab->tab_ind_stat_lock);
 
1338
        xt_init_mutex_with_autoname(self, &tab->tab_dic_field_lock);
 
1339
        xt_init_mutex_with_autoname(self, &tab->tab_row_lock);
 
1340
        xt_init_mutex_with_autoname(self, &tab->tab_ind_lock);
 
1341
        xt_init_mutex_with_autoname(self, &tab->tab_rec_lock);
 
1342
        xt_spinlock_init_with_autoname(self, &tab->tab_mem_lock);
 
1343
        if (!(tab->tab_rec_flush_task = new XTFlushRecRowTask()))
 
1344
                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
 
1345
        tab->tab_rec_flush_task->tk_init(self);
 
1346
        tab->tab_rec_flush_task->frt_table = tab;
 
1347
        if (!(tab->tab_ind_flush_task = new XTFlushIndexTask()))
 
1348
                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
 
1349
        tab->tab_ind_flush_task->tk_init(self);
 
1350
        tab->tab_ind_flush_task->fit_table = tab;
 
1351
        for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
 
1352
                XT_TAB_ROW_INIT_LOCK(self, &tab->tab_row_rwlock[i]);
 
1353
        tab->tab_free_locks = TRUE;
 
1354
 
 
1355
        xt_strcpy(PATH_MAX, path, tab_path->ps_path);
 
1356
        xt_remove_last_name_of_path(path);
 
1357
        tab_get_row_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
 
1358
        xt_strcat(PATH_MAX, path, file_name);
 
1359
        tab->tab_row_file = xt_fs_get_file(self, path, xt_row_file_type(te_ptr->te_heap_tab));
 
1360
 
 
1361
        xt_remove_last_name_of_path(path);
 
1362
        tab_get_data_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
 
1363
        xt_strcat(PATH_MAX, path, file_name);
 
1364
        tab->tab_rec_file = xt_fs_get_file(self, path, xt_rec_file_type(te_ptr->te_heap_tab));
 
1365
 
 
1366
        xt_remove_last_name_of_path(path);
 
1367
        tab_get_index_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
 
1368
        xt_strcat(PATH_MAX, path, file_name);
 
1369
        tab->tab_ind_file = xt_fs_get_file(self, path, xt_ind_file_type(te_ptr->te_heap_tab));
 
1370
 
 
1371
        if (te_ptr->te_heap_tab) {
 
1372
                XTOpenFilePtr   of_row;
 
1373
 
 
1374
                tab->tab_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
 
1375
                of_row = xt_open_file(self, tab->tab_row_file->fil_path, xt_row_file_type(TRUE), XT_FS_CREATE, xt_db_row_file_grow_size);
 
1376
                pushr_(xt_close_file, of_row);
 
1377
                if (xt_seek_eof_file(self, of_row) == 0)
 
1378
                        tab_init_row_file(self, of_row, tab, &tab->tab_dic);
 
1379
                freer_(); // xt_close_file(of_row)
 
1380
 
 
1381
                of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(TRUE), XT_FS_CREATE, XT_INDEX_PAGE_SIZE*256);
 
1382
                if (xt_seek_eof_file(self, of_ind) == 0)
 
1383
                        tab_init_ind_file(self, of_ind, tab, &tab->tab_dic);
 
1384
                pushr_(xt_close_file, of_ind);
 
1385
                tab_load_index_header(self, tab, of_ind, tab_path);
 
1386
                freer_(); // xt_close_file(of_ind)
 
1387
 
 
1388
                of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(te_ptr->te_heap_tab), XT_FS_CREATE, xt_db_data_file_grow_size);
 
1389
                pushr_(xt_close_file, of_rec);
 
1390
                if (xt_seek_eof_file(self, of_rec) == 0)
 
1391
                        tab_init_data_file(self, of_rec, tab, &tab->tab_dic, 0, NULL);
 
1392
        }
 
1393
        else {
 
1394
#ifdef XT_SORT_REC_WRITES
 
1395
                tab->tab_rec_dw_writes = xt_new_sortedlist(self, sizeof(XTDelayWriteRec), 20, 10, tab_cmp_dw_rec_id, NULL, NULL, TRUE, FALSE);
 
1396
#endif
 
1397
                of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(FALSE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
 
1398
                if (of_ind) {
 
1399
                        pushr_(xt_close_file, of_ind);
 
1400
                        tab_load_index_header(self, tab, of_ind, tab_path);
 
1401
                        freer_(); // xt_close_file(of_ind)
 
1402
                }
 
1403
                else
 
1404
                        tab_load_index_header(self, tab, of_ind, tab_path);
 
1405
 
 
1406
                of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(FALSE), missing_ok ? XT_FS_MISSING_OK : XT_FS_DEFAULT, xt_db_data_file_grow_size);
 
1407
                if (!of_rec) {
 
1408
                        freer_(); // xt_heap_release(tab)
 
1409
                        return_(XT_TAB_NOT_FOUND);
 
1410
                }
 
1411
                pushr_(xt_close_file, of_rec);
 
1412
        }
 
1413
 
 
1414
        tab_load_table_format(self, of_rec, tab_path, &tab_format_offset, &tab_head_size, &tab->tab_dic);
 
1415
        tab->tab_table_format_offset = tab_format_offset;
 
1416
        tab->tab_table_head_size = tab_head_size;
 
1417
        tab->tab_dic.dic_table->dt_table = tab;
 
1418
        tab_load_table_header(self, tab, of_rec);
 
1419
        freer_(); // xt_close_file(of_rec)
 
1420
 
 
1421
        tab->tab_seq.xt_op_seq_set(self, tab->tab_wr_op_seq+1);
 
1422
        tab->tab_row_eof_id = tab->tab_head_row_eof_id;
 
1423
        tab->tab_row_free_id = tab->tab_head_row_free_id;
 
1424
        tab->tab_row_fnum = tab->tab_head_row_fnum;
 
1425
        tab->tab_rec_eof_id = tab->tab_head_rec_eof_id;
 
1426
        tab->tab_rec_free_id = tab->tab_head_rec_free_id;
 
1427
        tab->tab_rec_fnum = tab->tab_head_rec_fnum;
 
1428
 
 
1429
        tab->tab_rows.xt_tc_setup(tab, FALSE, sizeof(XTTabRowHeadDRec), sizeof(XTTabRowRefDRec));
 
1430
        tab->tab_recs.xt_tc_setup(tab, TRUE, tab_head_size, tab->tab_dic.dic_rec_size);
 
1431
 
 
1432
        xt_xres_init_tab(self, tab);
 
1433
 
 
1434
        if (!xt_init_row_locks(&tab->tab_locks))
 
1435
                xt_throw(self);
 
1436
 
 
1437
        xt_heap_set_release_callback(tab, tab_onrelease);
 
1438
 
 
1439
        tab->tab_repair_pending = xt_tab_is_table_repair_pending(tab);
 
1440
 
 
1441
        popr_(); // Discard xt_heap_release(tab)
 
1442
 
 
1443
        xt_ht_put(self, db->db_tables, tab);
 
1444
 
 
1445
        /* Add a reference to the ID list, when a table is
 
1446
         * added to the table name list:
 
1447
         */
 
1448
        te_ptr->te_table = tab;
 
1449
 
 
1450
    /* Moved from after xt_init_row_locks() above, so that calling
 
1451
     * xt_use_table_no_lock() with no_load == FALSE from attachReferences()
 
1452
     * will work if we have cyclic foreign key references.
 
1453
     */ 
 
1454
        if (tab->tab_dic.dic_table) {
 
1455
                try_(a) {
 
1456
                        tab->tab_dic.dic_table->attachReferences(self, db);
 
1457
                } catch_(a) {
 
1458
                        /* ignore problems of referenced tables */
 
1459
                        xt_log_and_clear_warning(self);
 
1460
                } cont_(a);
 
1461
        }
 
1462
 
 
1463
        *r_tab = tab;
 
1464
        return_(XT_TAB_OK);
 
1465
}
 
1466
 
 
1467
 
 
1468
/*
 
1469
 * Get a reference to a table in the current database. The table reference is valid,
 
1470
 * as long as the thread is using the database!!!
 
1471
 */
 
1472
xtPublic XTTableHPtr xt_use_table_no_lock(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
 
1473
{
 
1474
        XTTableHPtr tab;
 
1475
 
 
1476
        if (!db)
 
1477
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
 
1478
 
 
1479
        tab = (XTTableHPtr) xt_ht_get(self, db->db_tables, name);
 
1480
        if (!tab && !no_load) {
 
1481
                xtTableID       tab_id = 0;
 
1482
 
 
1483
                if (!tab_find_table(self, db, name, &tab_id)) {
 
1484
                        if (missing_ok)
 
1485
                                return NULL;
 
1486
                        xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
 
1487
                }
 
1488
 
 
1489
                switch (tab_new_handle(self, &tab, db, tab_id, name, missing_ok, dic)) {
 
1490
                        case XT_TAB_NO_DICTIONARY:
 
1491
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, name);
 
1492
                        case XT_TAB_POOL_CLOSED:
 
1493
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
 
1494
                        case XT_TAB_NOT_FOUND:
 
1495
                                if (missing_ok)
 
1496
                                        return NULL;
 
1497
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
 
1498
                        default:
 
1499
                                break;
 
1500
                }
 
1501
        }
 
1502
        
 
1503
        if (tab)
 
1504
                xt_heap_reference(self, tab);
 
1505
 
 
1506
        return tab;
 
1507
}
 
1508
 
 
1509
static void tab_close_table(XTOpenTablePtr ot)
 
1510
{
 
1511
        xt_ind_free_reserved(ot);
 
1512
 
 
1513
        if (ot->ot_rec_file) {
 
1514
                XT_CLOSE_RR_FILE_NS(ot->ot_rec_file);
 
1515
                ot->ot_rec_file = NULL;
 
1516
                
 
1517
        }
 
1518
        if (ot->ot_ind_file) {
 
1519
                xt_close_file_ns(ot->ot_ind_file);
 
1520
                ot->ot_ind_file = NULL;
 
1521
                
 
1522
        }
 
1523
        if (ot->ot_row_file) {
 
1524
                XT_CLOSE_RR_FILE_NS(ot->ot_row_file);
 
1525
                ot->ot_row_file = NULL;
 
1526
                
 
1527
        }
 
1528
        if (ot->ot_table) {
 
1529
                xt_heap_release(xt_get_self(), ot->ot_table);
 
1530
                ot->ot_table = NULL;
 
1531
        }
 
1532
        if (ot->ot_ind_rhandle) {
 
1533
                xt_ind_release_handle(ot->ot_ind_rhandle, FALSE, ot->ot_thread);
 
1534
                ot->ot_ind_rhandle = NULL;
 
1535
        }
 
1536
        if (ot->ot_row_rbuffer) {
 
1537
                xt_free_ns(ot->ot_row_rbuffer);
 
1538
                ot->ot_row_rbuf_size = 0;
 
1539
                ot->ot_row_rbuffer = NULL;
 
1540
        }
 
1541
        if (ot->ot_row_wbuffer) {
 
1542
                xt_free_ns(ot->ot_row_wbuffer);
 
1543
                ot->ot_row_wbuf_size = 0;
 
1544
                ot->ot_row_wbuffer = NULL;
 
1545
        }
 
1546
#ifdef XT_TRACK_RETURNED_ROWS
 
1547
        if (ot->ot_rows_returned) {
 
1548
                xt_free_ns(ot->ot_rows_returned);
 
1549
                ot->ot_rows_returned = NULL;
 
1550
        }
 
1551
        ot->ot_rows_ret_curr = 0;
 
1552
        ot->ot_rows_ret_max = 0;
 
1553
#endif
 
1554
        xt_free(NULL, ot);
 
1555
}
 
1556
 
 
1557
static void tab_delete_table_files(XTThreadPtr self, XTPathStrPtr tab_name, xtTableID tab_id)
 
1558
{
 
1559
        XTFilesOfTableRec       ft;
 
1560
 
 
1561
        xt_enum_files_of_tables_init(tab_name, tab_id, &ft);
 
1562
        while (xt_enum_files_of_tables_next(&ft)) {
 
1563
                if (!xt_fs_delete(NULL, ft.ft_file_path))
 
1564
                        xt_log_and_clear_exception(self);
 
1565
        }
 
1566
}
 
1567
 
 
1568
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr)
 
1569
{
 
1570
        XTTabRowHeadDRec        row_head;
 
1571
 
 
1572
        tab->tab_row_eof_id = 1;
 
1573
        tab->tab_row_free_id = 0;
 
1574
        tab->tab_row_fnum = 0;
 
1575
 
 
1576
        tab->tab_head_row_eof_id = 1;
 
1577
        tab->tab_head_row_free_id = 0;
 
1578
        tab->tab_head_row_fnum  = 0;
 
1579
 
 
1580
        XT_SET_DISK_4(row_head.rh_magic_4, XT_TAB_ROW_MAGIC);
 
1581
        if (!xt_pwrite_file(of_row, 0, sizeof(row_head), &row_head, &self->st_statistics.st_rec, self))
 
1582
                xt_throw(self);
 
1583
}
 
1584
 
 
1585
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def)
 
1586
{
 
1587
        off_t                           eof;
 
1588
        XTTableHeadDRec         rec_head;
 
1589
        XTTableFormatDRec       table_fmt;
 
1590
 
 
1591
        /* Calculate the offset of the first record in the data handle file. */
 
1592
        eof = sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition) + def_len + XT_FORMAT_DEF_SPACE;
 
1593
        eof = (eof + 1024 - 1) / 1024 * 1024;           // Round to a value divisible by 1024
 
1594
 
 
1595
        tab->tab_table_format_offset = sizeof(XTTableHeadDRec);
 
1596
        tab->tab_table_head_size = (size_t) eof;
 
1597
 
 
1598
        tab->tab_rec_eof_id = 1;                                                // This is the first record ID!
 
1599
        tab->tab_rec_free_id = 0;
 
1600
        tab->tab_rec_fnum = 0;
 
1601
        
 
1602
        tab->tab_head_rec_eof_id = 1;                                   // The first record ID
 
1603
        tab->tab_head_rec_free_id = 0;
 
1604
        tab->tab_head_rec_fnum = 0;
 
1605
 
 
1606
        tab->tab_dic.dic_rec_size = dic->dic_rec_size;
 
1607
        tab->tab_dic.dic_rec_fixed = dic->dic_rec_fixed;
 
1608
        tab->tab_dic.dic_tab_flags = dic->dic_tab_flags;
 
1609
        tab->tab_dic.dic_min_auto_inc = dic->dic_min_auto_inc;
 
1610
        tab->tab_dic.dic_def_ave_row_size = dic->dic_def_ave_row_size;
 
1611
        tab->tab_dic.dic_table_type = dic->dic_table_type;
 
1612
 
 
1613
        XT_SET_DISK_4(rec_head.th_head_size_4, sizeof(XTTableHeadDRec));
 
1614
        XT_SET_DISK_4(rec_head.th_op_seq_4, tab->tab_head_op_seq);
 
1615
        XT_SET_DISK_6(rec_head.th_row_free_6, tab->tab_head_row_free_id);
 
1616
        XT_SET_DISK_6(rec_head.th_row_eof_6, tab->tab_head_row_eof_id);
 
1617
        XT_SET_DISK_6(rec_head.th_row_fnum_6, tab->tab_head_row_fnum);
 
1618
        XT_SET_DISK_6(rec_head.th_rec_free_6, tab->tab_head_rec_free_id);
 
1619
        XT_SET_DISK_6(rec_head.th_rec_eof_6, tab->tab_head_rec_eof_id);
 
1620
        XT_SET_DISK_6(rec_head.th_rec_fnum_6, tab->tab_head_rec_fnum);
 
1621
 
 
1622
        if (!xt_pwrite_file(of_rec, 0, sizeof(XTTableHeadDRec), &rec_head, &self->st_statistics.st_rec, self))
 
1623
                xt_throw(self);
 
1624
 
 
1625
        /* Store the table format: */
 
1626
        memset(&table_fmt, 0, offsetof(XTTableFormatDRec, tf_definition));
 
1627
        XT_SET_DISK_4(table_fmt.tf_format_size_4, offsetof(XTTableFormatDRec, tf_definition) + def_len);
 
1628
        XT_SET_DISK_4(table_fmt.tf_tab_head_size_4, eof);
 
1629
        XT_SET_DISK_2(table_fmt.tf_tab_version_2, XT_TAB_CURRENT_VERSION);
 
1630
        XT_SET_DISK_4(table_fmt.tf_rec_size_4, tab->tab_dic.dic_rec_size);
 
1631
        XT_SET_DISK_1(table_fmt.tf_rec_fixed_1, tab->tab_dic.dic_rec_fixed);
 
1632
        XT_SET_DISK_2(table_fmt.tf_tab_unused_2, 0);
 
1633
        XT_SET_DISK_8(table_fmt.tf_min_auto_inc_8, tab->tab_dic.dic_min_auto_inc);
 
1634
 
 
1635
        if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec), offsetof(XTTableFormatDRec, tf_definition), &table_fmt, &self->st_statistics.st_rec, self))
 
1636
                xt_throw(self);
 
1637
        if (def_len) {
 
1638
                if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition), def_len, tab_def->sb_cstring, &self->st_statistics.st_rec, self))
 
1639
                        xt_throw(self);
 
1640
        }
 
1641
}
 
1642
 
 
1643
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic)
 
1644
{
 
1645
        XTIndexFormatDPtr       index_fmt;
 
1646
 
 
1647
        /* This is the size of the index header: */
 
1648
        tab->tab_index_format_offset = offsetof(XTIndexHeadDRec, tp_data) + dic->dic_key_count * XT_NODE_REF_SIZE;
 
1649
        if (!(tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc_ns(XT_INDEX_HEAD_SIZE)))
 
1650
                xt_throw(self);
 
1651
 
 
1652
        XT_NODE_ID(tab->tab_ind_eof) = 1;
 
1653
        XT_NODE_ID(tab->tab_ind_free) = 0;
 
1654
 
 
1655
        XT_SET_DISK_4(tab->tab_index_head->tp_header_size_4, XT_INDEX_HEAD_SIZE);
 
1656
        XT_SET_DISK_4(tab->tab_index_head->tp_format_offset_4, tab->tab_index_format_offset);
 
1657
        XT_SET_DISK_6(tab->tab_index_head->tp_ind_eof_6, XT_NODE_ID(tab->tab_ind_eof));
 
1658
        XT_SET_DISK_6(tab->tab_index_head->tp_ind_free_6, XT_NODE_ID(tab->tab_ind_free));
 
1659
 
 
1660
        /* Store the index format: */
 
1661
        index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
 
1662
        XT_SET_DISK_4(index_fmt->if_format_size_4, sizeof(XTIndexFormatDRec));
 
1663
        XT_SET_DISK_2(index_fmt->if_tab_version_2, XT_TAB_CURRENT_VERSION);
 
1664
        XT_SET_DISK_2(index_fmt->if_ind_version_2, XT_IND_CURRENT_VERSION);
 
1665
        XT_SET_DISK_1(index_fmt->if_node_ref_size_1, XT_NODE_REF_SIZE);
 
1666
        XT_SET_DISK_1(index_fmt->if_rec_ref_size_1, XT_RECORD_REF_SIZE);
 
1667
        XT_SET_DISK_4(index_fmt->if_page_size_4, XT_INDEX_PAGE_SIZE);
 
1668
 
 
1669
        /* Save the header: */
 
1670
        if (!xt_pwrite_file(of_ind, 0, XT_INDEX_HEAD_SIZE, tab->tab_index_head, &self->st_statistics.st_ind, self))
 
1671
                xt_throw(self);
 
1672
}
 
1673
 
 
1674
xtPublic void xt_create_table(XTThreadPtr self, XTPathStrPtr name, XTDictionaryPtr dic)
 
1675
{
 
1676
        char                            table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
1677
        char                            path[PATH_MAX];
 
1678
        XTDatabaseHPtr          db = self->st_database;
 
1679
        XTOpenTablePoolPtr      table_pool;
 
1680
        XTTableHPtr                     tab;
 
1681
        XTTableHPtr                     old_tab = NULL;
 
1682
        xtTableID                       old_tab_id = 0;
 
1683
        xtTableID                       tab_id = 0;
 
1684
        XTStringBufferRec       tab_def = { 0, 0, 0 };
 
1685
        XTTableEntryRec         te_tab;
 
1686
        XTSortedListInfoRec     li_undo;
 
1687
 
 
1688
#ifdef TRACE_CREATE_TABLES
 
1689
        printf("CREATE %s\n", name->ps_path);
 
1690
#endif
 
1691
        enter_();
 
1692
        if (strlen(xt_last_name_of_path(name->ps_path)) > XT_TABLE_NAME_SIZE-1)
 
1693
                xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, name);
 
1694
        if (!db)
 
1695
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
 
1696
 
 
1697
        /* Lock to prevent table list change during creation. */
 
1698
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, name, FALSE, TRUE, TRUE, &old_tab);
 
1699
        pushr_(xt_db_unlock_table_pool, table_pool);
 
1700
        xt_ht_lock(self, db->db_tables);
 
1701
        pushr_(xt_ht_unlock, db->db_tables);
 
1702
        pushr_(xt_heap_release, old_tab);
 
1703
 
 
1704
        /* This must be done before we remove the old table
 
1705
         * from the directory, or we will not be able
 
1706
         * to find the table, which could is require
 
1707
         * for TRUNCATE!
 
1708
         */
 
1709
        if (xt_sl_get_size(db->db_table_by_id) >= XT_MAX_TABLES)
 
1710
                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TOO_MANY_TABLES, (u_long) XT_MAX_TABLES);
 
1711
 
 
1712
        tab_id = db->db_curr_tab_id + 1;                
 
1713
 
 
1714
        if (old_tab) {
 
1715
                old_tab_id = old_tab->tab_id;           
 
1716
                xt_dl_delete_ext_data(self, old_tab, FALSE, TRUE);
 
1717
                freer_(); // xt_heap_release(self, old_tab)
 
1718
 
 
1719
                /* For the Windows version this must be done before we
 
1720
                 * start to delete the underlying files!
 
1721
                 */
 
1722
                tab_close_files(self, old_tab);
 
1723
 
 
1724
                tab_delete_table_files(self, name, old_tab_id);
 
1725
 
 
1726
                /* Remove the PBMS table: */
 
1727
                ASSERT(xt_get_self() == self);
 
1728
 
 
1729
                /* Remove the table from the directory. It will get a new
 
1730
                 * ID so the handle in the directory will no longer be valid.
 
1731
                 */
 
1732
                xt_ht_del(self, db->db_tables, name);
 
1733
        }
 
1734
        else {
 
1735
                freer_(); // xt_heap_release(self, old_tab)
 
1736
        }
 
1737
 
 
1738
        /* Add the table to the directory, well remove on error! */
 
1739
        li_undo.li_sl = db->db_table_by_id;
 
1740
        li_undo.li_key = &tab_id;
 
1741
        te_tab.te_tab_id = tab_id;
 
1742
        te_tab.te_heap_tab = dic->dic_tab_flags & XT_TF_MEMORY_TABLE;
 
1743
        te_tab.te_tab_name = xt_dup_string(self, xt_last_name_of_path(name->ps_path));
 
1744
        te_tab.te_tab_path = tab_get_table_path(self, db, name, TRUE);
 
1745
        te_tab.te_table = NULL;
 
1746
        te_tab.te_type = dic->dic_table_type;  
 
1747
        xt_sl_insert(self, db->db_table_by_id, &tab_id, &te_tab);
 
1748
 
 
1749
        *path = 0;
 
1750
        try_(a) {
 
1751
                XTOpenFilePtr   of_row, of_rec, of_ind;
 
1752
                size_t                  def_len = 0;
 
1753
 
 
1754
                tab_save_tables(self, db);
 
1755
 
 
1756
                tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
 
1757
                pushr_(xt_heap_release, tab);
 
1758
 
 
1759
                /* The length of the foreign key definition: */
 
1760
                if (dic->dic_table) {
 
1761
                        dic->dic_table->loadString(self, &tab_def);
 
1762
                        def_len = tab_def.sb_len + 1;
 
1763
                }
 
1764
 
 
1765
                tab->tab_head_op_seq = 0;
 
1766
                tab->tab_wr_op_seq = 0;
 
1767
#ifdef DEBUG
 
1768
                /* This tests operation number overflow. */
 
1769
                //tab->tab_head_op_seq = 0xFFFFFFFF - 12;
 
1770
                //tab->tab_wr_op_seq = 0xFFFFFFFF - 12;
 
1771
#endif
 
1772
 
 
1773
                /* ------- ROW FILE: */
 
1774
                xt_strcpy(PATH_MAX, path, name->ps_path);
 
1775
                xt_remove_last_name_of_path(path);
 
1776
                tab_get_row_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
 
1777
                xt_strcat(PATH_MAX, path, table_name);
 
1778
                of_row = xt_open_file(self, path, xt_row_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_row_file_grow_size);
 
1779
                pushr_(xt_close_file, of_row);
 
1780
                tab_init_row_file(self, of_row, tab, dic);
 
1781
                freer_(); // xt_close_file(of_row)
 
1782
 
 
1783
                (void) ASSERT(sizeof(XTTabRowHeadDRec) == sizeof(XTTabRowRefDRec));
 
1784
                (void) ASSERT(sizeof(XTTabRowRefDRec) == 1 << XT_TAB_ROW_SHIFTS);
 
1785
 
 
1786
                /* ------------ DATA FILE: */
 
1787
                xt_remove_last_name_of_path(path);
 
1788
                tab_get_data_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
 
1789
                xt_strcat(PATH_MAX, path, table_name);
 
1790
                of_rec = xt_open_file(self, path, xt_rec_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_data_file_grow_size);
 
1791
                pushr_(xt_close_file, of_rec);
 
1792
                tab_init_data_file(self, of_rec, tab, dic, def_len, &tab_def);
 
1793
                freer_(); // xt_close_file(of_rec)
 
1794
 
 
1795
                /* ----------- INDEX FILE: */
 
1796
                xt_remove_last_name_of_path(path);
 
1797
                tab_get_index_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
 
1798
                xt_strcat(PATH_MAX, path, table_name);
 
1799
                of_ind = xt_open_file(self, path, xt_ind_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, XT_INDEX_PAGE_SIZE*256);
 
1800
                pushr_(xt_close_file, of_ind);
 
1801
                tab_init_ind_file(self, of_ind, tab, dic);
 
1802
                freer_(); // xt_close_file(of_ind)
 
1803
 
 
1804
                /* ------------ */
 
1805
                /* Log the new table ID! */
 
1806
                db->db_curr_tab_id = tab_id;
 
1807
                if (!xt_xn_log_tab_id(self, tab_id)) {
 
1808
                        db->db_curr_tab_id = tab_id - 1;
 
1809
                        xt_throw(self);
 
1810
                }
 
1811
 
 
1812
                freer_(); // xt_heap_release(tab)
 
1813
 
 
1814
                /* {LOAD-FOR-FKS}
 
1815
                 * 2008-12-10: Note, there is another problem, example:
 
1816
                 * set storage_engine = pbxt;
 
1817
                 * 
 
1818
                 * CREATE TABLE t1 (s1 INT PRIMARY KEY, s2 INT);
 
1819
                 * CREATE TABLE t2 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t1 (s1) ON UPDATE CASCADE);
 
1820
                 * CREATE TABLE t3 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t2 (s1) ON UPDATE CASCADE);
 
1821
                 * 
 
1822
                 * DROP TABLE IF EXISTS t2,t1;
 
1823
                 * CREATE TABLE t1 (s1 ENUM('a','b') PRIMARY KEY);
 
1824
                 * CREATE TABLE t2 (s1 ENUM('A','B'), FOREIGN KEY (s1) REFERENCES t1 (s1));
 
1825
                 * 
 
1826
                 * DROP TABLE IF EXISTS t2,t1;
 
1827
                 * 
 
1828
                 * In the example above. The second create t2 does not fail, although t3 references it,
 
1829
                 * and the data types do not match.
 
1830
                 * 
 
1831
                 * The main problem is that this error comes on DROP TABLE IF EXISTS t2! Which prevents
 
1832
                 * the table from being dropped - not good.
 
1833
                 *
 
1834
                 * So my idea here is to open the table, and if it fails, then the create table fails
 
1835
                 * as well.
 
1836
                 */
 
1837
                /*
 
1838
                 * Drizzle-specific:
 
1839
                 * We pass table type separately and provide NULL for the dic parameter, this is because
 
1840
                 * we want to force loading table (which is triggered by dic == NULL) but we still need table type
 
1841
                 */
 
1842
                if (!old_tab_id) {
 
1843
#ifndef DRIZZLED
 
1844
                        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
 
1845
                        xt_heap_release(self, tab);
 
1846
#endif
 
1847
                }
 
1848
        }
 
1849
        catch_(a) {
 
1850
                /* Creation failed, delete the table files: */
 
1851
                XTException e;
 
1852
 
 
1853
                xt_enter_exception_handler(self, &e);
 
1854
                if (*path)
 
1855
                        tab_delete_table_files(self, name, tab_id);
 
1856
                tab_remove_table_path(self, db, te_tab.te_tab_path);
 
1857
                xt_sl_delete(NULL, db->db_table_by_id, &tab_id);
 
1858
                tab_save_tables(self, db);
 
1859
                xt_sb_set_size(self, &tab_def, 0);
 
1860
                xt_exit_exception_handler(self, &e);
 
1861
                xt_throw(self);
 
1862
        }
 
1863
        cont_(a);
 
1864
 
 
1865
        xt_sb_set_size(self, &tab_def, 0);
 
1866
 
 
1867
        if (old_tab_id) {
 
1868
                try_(b) {
 
1869
                        XTTableEntryPtr te_ptr;
 
1870
 
 
1871
                        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &old_tab_id))) {
 
1872
                                tab_remove_table_path(self, db, te_ptr->te_tab_path);
 
1873
                                xt_sl_delete(self, db->db_table_by_id, &old_tab_id);
 
1874
                                tab_save_tables(self, db);
 
1875
                        }
 
1876
 
 
1877
                        /* Same purpose as above {LOAD-FOR-FKS} (although this should work, 
 
1878
                         * beacuse this is a TRUNCATE TABLE.
 
1879
                         */
 
1880
                        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
 
1881
                        xt_heap_release(self, tab);
 
1882
                }
 
1883
                catch_(b) {
 
1884
                        /* Log this error, but do not return it, because
 
1885
                         * it just involves the cleanup of the old table,
 
1886
                         * the new table has been successfully created.
 
1887
                         */
 
1888
                        xt_log_and_clear_exception(self);
 
1889
                }
 
1890
                cont_(b);
 
1891
        }
 
1892
 
 
1893
        freer_(); // xt_ht_unlock(db->db_tables)
 
1894
        freer_(); // xt_db_unlock_table_pool(table_pool)
 
1895
 
 
1896
        /* I open the table here, because I cannot rely on MySQL to do
 
1897
         * it after a create. This is normally OK, but with foreign keys
 
1898
         * tables can be referenced and then they are not opened
 
1899
         * before use. In this example, the INSERT opens t2, but t1 is
 
1900
         * not opened of the create. As a result the foreign key
 
1901
         * reference is not resolved.
 
1902
         *
 
1903
         * drop table t1, t2;
 
1904
         * CREATE TABLE t1
 
1905
         * (
 
1906
         *  id INT PRIMARY KEY
 
1907
         * ) ENGINE=pbxt;
 
1908
         * 
 
1909
         * CREATE TABLE t2
 
1910
         * (
 
1911
         *  v INT,
 
1912
         *  CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
 
1913
         * ) ENGINE=pbxt;
 
1914
         * 
 
1915
         * --error 1452
 
1916
         * INSERT INTO t2 VALUES(2);
 
1917
         */
 
1918
        /* this code is not needed anymore as we open tables referred by FKs as necessary during checks
 
1919
        xt_ht_lock(self, db->db_tables);
 
1920
        pushr_(xt_ht_unlock, db->db_tables);
 
1921
        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
 
1922
        freer_(); // xt_ht_unlock(db->db_tables)
 
1923
        xt_heap_release(self, tab);
 
1924
        * CHANGED see {LOAD-FOR-FKS} above.
 
1925
        */
 
1926
 
 
1927
        exit_();
 
1928
}
 
1929
 
 
1930
xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop_db)
 
1931
{
 
1932
        XTDatabaseHPtr          db = self->st_database;
 
1933
        XTOpenTablePoolPtr      table_pool;
 
1934
        XTTableHPtr                     tab = NULL;
 
1935
        xtTableID                       tab_id = 0;
 
1936
        xtBool                          can_drop = TRUE;
 
1937
 
 
1938
        enter_();
 
1939
 
 
1940
#ifdef TRACE_CREATE_TABLES
 
1941
        printf("DROP %s\n", tab_name->ps_path);
 
1942
#endif
 
1943
 
 
1944
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, tab_name, FALSE, FALSE, TRUE, &tab);
 
1945
        pushr_(xt_db_unlock_table_pool, table_pool);
 
1946
        xt_ht_lock(self, db->db_tables);
 
1947
        pushr_(xt_ht_unlock, db->db_tables);
 
1948
        pushr_(xt_heap_release, tab);
 
1949
 
 
1950
        if (table_pool) {
 
1951
                tab_id = tab->tab_id;   /* tab is not null if returned table_pool is not null */
 
1952
                /* check if other tables refer this */
 
1953
                if (!self->st_ignore_fkeys) 
 
1954
                        can_drop = tab->tab_dic.dic_table->checkCanDrop(drop_db);
 
1955
        }
 
1956
#ifdef DRIZZLED 
 
1957
        /* See the comment in ha_pbxt::delete_table regarding different implmentation of DROP TABLE
 
1958
         * in MySQL and Drizzle
 
1959
         */
 
1960
        else {
 
1961
                xt_throw_xterr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND);
 
1962
        }
 
1963
#endif
 
1964
 
 
1965
        if (can_drop) {
 
1966
                if (tab_id) {
 
1967
                        XTTableEntryPtr te_ptr;
 
1968
 
 
1969
                        xt_dl_delete_ext_data(self, tab, FALSE, TRUE);
 
1970
                        freer_(); // xt_heap_release(self, tab)
 
1971
 
 
1972
                        /* For the Windows version this must be done before we
 
1973
                         * start to delete the underlying files!
 
1974
                         */
 
1975
                        tab_close_files(self, tab);
 
1976
 
 
1977
                        tab_delete_table_files(self, tab_name, tab_id);
 
1978
 
 
1979
                        ASSERT(xt_get_self() == self);
 
1980
                        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
 
1981
                                tab_remove_table_path(self, db, te_ptr->te_tab_path);
 
1982
                                xt_sl_delete(self, db->db_table_by_id, &tab_id);
 
1983
                                tab_save_tables(self, db);
 
1984
                        }
 
1985
                }
 
1986
                else {
 
1987
                        freer_(); // xt_heap_release(self, tab)
 
1988
                }
 
1989
 
 
1990
                xt_ht_del(self, db->db_tables, tab_name);
 
1991
        }
 
1992
        else {  /* cannot drop table because of FK dependencies */
 
1993
                xt_throw_xterr(XT_CONTEXT, XT_ERR_ROW_IS_REFERENCED);
 
1994
        }
 
1995
 
 
1996
        freer_(); // xt_ht_unlock(db->db_tables)
 
1997
        freer_(); // xt_db_unlock_table_pool(table_pool)
 
1998
        exit_();
 
1999
}
 
2000
 
 
2001
/*
 
2002
 * Record buffer size:
 
2003
 * -------------------
 
2004
 * The size of the record buffer used to hold the row
 
2005
 * in memory. This buffer size does not include the BLOB data.
 
2006
 * About 8 bytes (a pointer and a size) is reserved for each BLOB
 
2007
 * in this buffer.
 
2008
 *
 
2009
 * The buffer size includes a number of "NULL" bytes followed by
 
2010
 * the data area. The NULL bytes contain 1 bit for every column,
 
2011
 * to indicate of the columns is NULL or not.
 
2012
 *
 
2013
 * The size of the buffer is 4/8-byte aligned, so it may be padded
 
2014
 * at the end.
 
2015
 *
 
2016
 * Fixed length rec. len.:
 
2017
 * -----------------------
 
2018
 * If the record does not include any BLOBs then this is the size of the
 
2019
 * fixed length record. The size if the data in the data handle record
 
2020
 * need never be bigger then this length, if the record does not
 
2021
 * contain BLOBs. So this should be the maximum size set for
 
2022
 * AVG_ROW_LENGTH in this case.
 
2023
 *
 
2024
 * Handle data record size:
 
2025
 * ------------------------
 
2026
 * This is the size of the handle data record. It is the data size
 
2027
 * plus the "max header size".
 
2028
 *
 
2029
 * Min/max header size:
 
2030
 * The min and max header size of the header in the data handle file.
 
2031
 * The larger header is used if a record has an extended data (data log
 
2032
 * file) component.
 
2033
 *
 
2034
 * Min/avg/max record size:
 
2035
 * ------------------------
 
2036
 * These are variable length records sizes. That is, the size of records
 
2037
 * when stored in the variable length format. Variable length records
 
2038
 * do not have fixed fields sizes, instead the fields are packed one
 
2039
 * after the other, prefixed by a number of size indicator bytes.
 
2040
 *
 
2041
 * The average is an estimate of the average record size. This estimate
 
2042
 * is used if no AVG_ROW_LENGTH is specifically given.
 
2043
 *
 
2044
 * If the average estimate is withing 20% of the maximum size of the record,
 
2045
 * then the record will be handled as a fixed length record.
 
2046
 *
 
2047
 * Avg row len set for tab:
 
2048
 * ------------------------
 
2049
 * This is the value set using AVG_ROW_LENGTH when the table is declared.
 
2050
 *
 
2051
 * Rows fixed length:
 
2052
 * ------------------
 
2053
 * YES if the records of this table are handled as a fixed length records.
 
2054
 * In this case the table records will never have an extended record
 
2055
 * component.
 
2056
 *
 
2057
 * The size of the data area in the handle data record is set to the
 
2058
 * size of the MySQL data record ("Fixed length rec. len.").
 
2059
 *
 
2060
 * It also means that the record format used is identical to the MySQL
 
2061
 * record format.
 
2062
 *
 
2063
 * If the records are not fixed, then the variable length record format
 
2064
 * is used. Records size are then in the range specified by
 
2065
 * "Min/avg/max record size".
 
2066
 *
 
2067
 * Maximum fixed size:
 
2068
 * -------------------
 
2069
 * This is the maximum size of a data log record.
 
2070
 *
 
2071
 * Minimum variable size:
 
2072
 * ------------------------
 
2073
 * Records below this size are handled as a fixed length record size, unless
 
2074
 * the AVG_ROW_LENGTH is specifically set.
 
2075
 */
 
2076
xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot)
 
2077
{
 
2078
        XTTableHPtr                             tab = ot->ot_table;
 
2079
        xtRecordID                              prec_id;
 
2080
        XTTabRecExtDPtr                 rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
 
2081
        XTactExtRecEntryDRec    ext_rec;
 
2082
        size_t                                  log_size;
 
2083
        xtLogID                                 log_id;
 
2084
        xtLogOffset                             log_offset;
 
2085
        xtRecordID                              rec_id;
 
2086
        xtRecordID                              prev_rec_id;
 
2087
        xtXactID                                xn_id;
 
2088
        xtRowID                                 row_id;
 
2089
        u_llong                                 free_rec_count = 0, free_count2 = 0;
 
2090
        u_llong                                 delete_rec_count = 0;
 
2091
        u_llong                                 alloc_rec_count = 0;
 
2092
        u_llong                                 alloc_rec_bytes = 0;
 
2093
        u_llong                                 min_comp_rec_len = 0;
 
2094
        u_llong                                 max_comp_rec_len = 0;
 
2095
        size_t                                  rec_size;
 
2096
        size_t                                  row_size;
 
2097
        u_llong                                 ext_data_len = 0;
 
2098
 
 
2099
#if defined(DUMP_CHECK_TABLE) || defined(CHECK_TABLE_STATS)
 
2100
        printf("\nCHECK TABLE: %s\n", tab->tab_name->ps_path);
 
2101
#endif
 
2102
 
 
2103
        xt_lock_mutex(self, &tab->tab_db->db_co_ext_lock);
 
2104
        pushr_(xt_unlock_mutex, &tab->tab_db->db_co_ext_lock);
 
2105
 
 
2106
        xt_lock_mutex(self, &tab->tab_rec_lock);
 
2107
        pushr_(xt_unlock_mutex, &tab->tab_rec_lock);
 
2108
 
 
2109
#ifdef CHECK_TABLE_STATS
 
2110
        printf("Record buffer size      = %lu\n", (u_long) tab->tab_dic.dic_mysql_buf_size);
 
2111
        printf("Fixed length rec. len.  = %lu\n", (u_long) tab->tab_dic.dic_mysql_rec_size);
 
2112
        printf("Handle data record size = %lu\n", (u_long) tab->tab_dic.dic_rec_size);
 
2113
        printf("Min/max header size     = %d/%d\n", (int) offsetof(XTTabRecFix, rf_data), tab->tab_dic.dic_rec_fixed ? (int) offsetof(XTTabRecFix, rf_data) : (int) offsetof(XTTabRecExtDRec, re_data));
 
2114
        printf("Min/avg/max record size = %llu/%llu/%llu\n", (u_llong) tab->tab_dic.dic_min_row_size, (u_llong) tab->tab_dic.dic_ave_row_size, (u_llong) tab->tab_dic.dic_max_row_size);
 
2115
        if (tab->tab_dic.dic_def_ave_row_size)
 
2116
                printf("Avg row len set for tab = %lu\n", (u_long) tab->tab_dic.dic_def_ave_row_size);
 
2117
        else
 
2118
                printf("Avg row len set for tab = not specified\n");
 
2119
        printf("Rows fixed length       = %s\n", tab->tab_dic.dic_rec_fixed ? "YES" : "NO");
 
2120
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
2121
                printf("Table type              = MEMORY\n");
 
2122
        else if (tab->tab_dic.dic_tab_flags & XT_TF_REAL_TEMP_TABLE)
 
2123
                printf("Table type              = TEMPORARY\n");
 
2124
        else if (tab->tab_dic.dic_tab_flags & XT_TF_DDL_TEMP_TABLE)
 
2125
                printf("Table type              = DDL-TEMPORARY\n");
 
2126
        if (tab->tab_dic.dic_def_ave_row_size)
 
2127
                printf("Maximum fixed size      = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH_SPEC);
 
2128
        else
 
2129
                printf("Maximum fixed size      = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH);
 
2130
        printf("Minimum variable size   = %lu\n", (u_long) XT_TAB_MIN_VAR_REC_LENGTH);
 
2131
        printf("Minimum auto-increment  = %llu\n", (u_llong) tab->tab_dic.dic_min_auto_inc);
 
2132
        printf("Number of columns       = %lu\n", (u_long) tab->tab_dic.dic_no_of_cols);
 
2133
        printf("Number of fixed columns = %lu\n", (u_long) tab->tab_dic.dic_fix_col_count);
 
2134
        printf("Columns req. for index  = %lu\n", (u_long) tab->tab_dic.dic_ind_cols_req);
 
2135
        if (tab->tab_dic.dic_ind_rec_len)
 
2136
                printf("Rec len req. for index  = %llu\n", (u_llong) tab->tab_dic.dic_ind_rec_len);
 
2137
        printf("Columns req. for blobs  = %lu\n", (u_long) tab->tab_dic.dic_blob_cols_req);
 
2138
        printf("Number of blob columns  = %lu\n", (u_long) tab->tab_dic.dic_blob_count);
 
2139
        printf("Number of indices       = %lu\n", (u_long) tab->tab_dic.dic_key_count);
 
2140
#endif
 
2141
 
 
2142
#ifdef DUMP_CHECK_TABLE
 
2143
        printf("Records:-\n");
 
2144
        printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_rec_free_id, (u_llong) tab->tab_rec_fnum);
 
2145
        printf("EOF:       %llu\n", (u_llong) tab->tab_rec_eof_id);
 
2146
#endif
 
2147
 
 
2148
        rec_size = XT_REC_EXT_HEADER_SIZE;
 
2149
        if (rec_size > tab->tab_recs.tci_rec_size)
 
2150
                rec_size = tab->tab_recs.tci_rec_size;
 
2151
        rec_id = 1;
 
2152
        while (rec_id < tab->tab_rec_eof_id) {
 
2153
                if (!xt_tab_get_rec_data(ot, rec_id, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer))
 
2154
                        xt_throw(self);
 
2155
 
 
2156
#ifdef DUMP_CHECK_TABLE
 
2157
                printf("%-4llu ", (u_llong) rec_id);
 
2158
#endif
 
2159
                switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
 
2160
                        case XT_TAB_STATUS_FREED:
 
2161
#ifdef DUMP_CHECK_TABLE
 
2162
                                printf("======== ");
 
2163
#endif
 
2164
                                free_rec_count++;
 
2165
                                break;
 
2166
                        case XT_TAB_STATUS_DELETE:
 
2167
#ifdef DUMP_CHECK_TABLE
 
2168
                                printf("delete   ");
 
2169
#endif
 
2170
                                delete_rec_count++;
 
2171
                                break;
 
2172
                        case XT_TAB_STATUS_FIXED:
 
2173
#ifdef DUMP_CHECK_TABLE
 
2174
                                printf("record-F ");
 
2175
#endif
 
2176
                                alloc_rec_count++;
 
2177
                                row_size = myxt_store_row_length(ot, (char *) ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE);
 
2178
                                alloc_rec_bytes += row_size;
 
2179
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
 
2180
                                        min_comp_rec_len = row_size;
 
2181
                                if (row_size > max_comp_rec_len)
 
2182
                                        max_comp_rec_len = row_size;
 
2183
                                break;
 
2184
                        case XT_TAB_STATUS_VARIABLE:
 
2185
#ifdef DUMP_CHECK_TABLE
 
2186
                                printf("record-V ");
 
2187
#endif
 
2188
                                alloc_rec_count++;
 
2189
                                row_size = myxt_load_row_length(ot, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, NULL);
 
2190
                                alloc_rec_bytes += row_size;
 
2191
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
 
2192
                                        min_comp_rec_len = row_size;
 
2193
                                if (row_size > max_comp_rec_len)
 
2194
                                        max_comp_rec_len = row_size;
 
2195
                                break;
 
2196
                        case XT_TAB_STATUS_EXT_DLOG:
 
2197
#ifdef DUMP_CHECK_TABLE
 
2198
                                printf("record-X ");
 
2199
#endif
 
2200
                                alloc_rec_count++;
 
2201
                                ext_data_len += XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
 
2202
                                row_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4) + ot->ot_rec_size - XT_REC_EXT_HEADER_SIZE;
 
2203
                                alloc_rec_bytes += row_size;
 
2204
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
 
2205
                                        min_comp_rec_len = row_size;
 
2206
                                if (row_size > max_comp_rec_len)
 
2207
                                        max_comp_rec_len = row_size;
 
2208
                                break;
 
2209
                }
 
2210
#ifdef DUMP_CHECK_TABLE
 
2211
                if (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_CLEANED_BIT)
 
2212
                        printf("C");
 
2213
                else
 
2214
                        printf(" ");
 
2215
#endif
 
2216
                prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
 
2217
                xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4);
 
2218
                row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4);
 
2219
                switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
 
2220
                        case XT_TAB_STATUS_FREED:
 
2221
#ifdef DUMP_CHECK_TABLE
 
2222
                                printf(" prev=%-3llu (xact=%-3llu row=%lu)\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
 
2223
#endif
 
2224
                                break;
 
2225
                        case XT_TAB_STATUS_EXT_DLOG:
 
2226
                                xtBool ok;
 
2227
 
 
2228
#ifdef DUMP_CHECK_TABLE
 
2229
                                printf(" prev=%-3llu  xact=%-3llu row=%lu  Xlog=%lu Xoff=%llu Xsiz=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id, (u_long) XT_GET_DISK_2(rec_buf->re_log_id_2), (u_llong) XT_GET_DISK_6(rec_buf->re_log_offs_6), (u_long) XT_GET_DISK_4(rec_buf->re_log_dat_siz_4));
 
2230
#endif
 
2231
 
 
2232
                                log_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
 
2233
                                XT_GET_LOG_REF(log_id, log_offset, rec_buf);
 
2234
                                if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
2235
                                        xt_tab_read_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec);
 
2236
                                        ok = TRUE;
 
2237
                                }
 
2238
                                else {
 
2239
                                        if (!(ok = self->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec, self)))
 
2240
                                                xt_log_and_clear_exception(self);
 
2241
                                }
 
2242
                                if (ok) {
 
2243
                                        size_t          log_size2;
 
2244
                                        xtTableID       curr_tab_id;
 
2245
                                        xtRecordID      curr_rec_id;
 
2246
 
 
2247
                                        log_size2 = XT_GET_DISK_4(ext_rec.er_data_size_4);
 
2248
                                        curr_tab_id = XT_GET_DISK_4(ext_rec.er_tab_id_4);
 
2249
                                        curr_rec_id = XT_GET_DISK_4(ext_rec.er_rec_id_4);
 
2250
                                        if (log_size2 != log_size || curr_tab_id != tab->tab_id || curr_rec_id != rec_id) {
 
2251
                                                xt_logf(XT_INFO, "Table %s: record %llu, extended record %lu:%llu not valid\n", tab->tab_name, (u_llong) rec_id, (u_long) log_id, (u_llong) log_offset);
 
2252
                                        }
 
2253
                                }
 
2254
                                break;
 
2255
                        default:
 
2256
#ifdef DUMP_CHECK_TABLE
 
2257
                                printf(" prev=%-3llu  xact=%-3llu row=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
 
2258
#endif
 
2259
                                break;
 
2260
                }
 
2261
                rec_id++;
 
2262
        }
 
2263
        
 
2264
#ifdef CHECK_TABLE_STATS
 
2265
        u_long  rec, row, ind;
 
2266
        char    value[50];
 
2267
 
 
2268
        rec = xt_seek_eof_file(self, ot->ot_rec_file);
 
2269
        row = xt_seek_eof_file(self, ot->ot_row_file);
 
2270
        ind = xt_seek_eof_file(self, ot->ot_ind_file);
 
2271
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
2272
                if (!tab->tab_dic.dic_rec_fixed) {
 
2273
                        xt_int8_to_byte_size((xtInt8) tab->tab_mem_total, value);
 
2274
                        printf("Ext. record memory used = %s\n", value);
 
2275
                }
 
2276
                xt_int8_to_byte_size((xtInt8) ind, value);
 
2277
                printf("Index data memory used  = %s\n", value);
 
2278
                xt_int8_to_byte_size((xtInt8) rec + row, value);
 
2279
                printf("Table data memory used  = %s\n", value);
 
2280
                xt_int8_to_byte_size((xtInt8) tab->tab_mem_total + rec + row + ind, value);
 
2281
                printf("Total memory used       = %s\n", value);
 
2282
        }
 
2283
        else {
 
2284
                if (!tab->tab_dic.dic_rec_fixed) {
 
2285
                        xt_int8_to_byte_size((xtInt8) ext_data_len, value);
 
2286
                        printf("Ext. record disk used   = %s\n", value);                
 
2287
                }
 
2288
                xt_int8_to_byte_size((xtInt8) ind, value);
 
2289
                printf("Index disk space used   = %s\n", value);
 
2290
                xt_int8_to_byte_size((xtInt8) rec + row, value);
 
2291
                printf("Table disk space used   = %s\n", value);
 
2292
                xt_int8_to_byte_size((xtInt8) ext_data_len + rec + row + ind, value);
 
2293
                printf("Total disk space used   = %s\n", value);
 
2294
        }
 
2295
        
 
2296
        if (alloc_rec_count) {
 
2297
                printf("Minumum comp. rec. len. = %llu\n", (u_llong) min_comp_rec_len);
 
2298
                printf("Average comp. rec. len. = %llu\n", (u_llong) ((double) alloc_rec_bytes / (double) alloc_rec_count + (double) 0.5));
 
2299
                printf("Maximum comp. rec. len. = %llu\n", (u_llong) max_comp_rec_len);
 
2300
        }
 
2301
        printf("Free record count       = %llu\n", (u_llong) free_rec_count);
 
2302
        printf("Deleted record count    = %llu\n", (u_llong) delete_rec_count);
 
2303
        printf("Allocated record count  = %llu\n", (u_llong) alloc_rec_count);
 
2304
 
 
2305
#endif
 
2306
        if (tab->tab_rec_fnum != free_rec_count)
 
2307
                xt_logf(XT_INFO, "Table %s: incorrect number of free blocks, %llu, should be: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) tab->tab_rec_fnum);
 
2308
 
 
2309
        /* Checking the free list: */
 
2310
        prec_id = 0;
 
2311
        rec_id = tab->tab_rec_free_id;
 
2312
        while (rec_id) {
 
2313
                if (rec_id >= tab->tab_rec_eof_id) {
 
2314
                        xt_logf(XT_INFO, "Table %s: invalid reference on free list: %llu, ", tab->tab_name, (u_llong) rec_id);
 
2315
                        if (prec_id)
 
2316
                                xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
 
2317
                        else
 
2318
                                xt_logf(XT_INFO, "reference by list head pointer\n");
 
2319
                        break;
 
2320
                }
 
2321
                if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) rec_buf)) {
 
2322
                        xt_log_and_clear_exception(self);
 
2323
                        break;
 
2324
                }
 
2325
                if ((rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
 
2326
                        xt_logf(XT_INFO, "Table %s: record, %llu, on free list is not free\n", tab->tab_name, (u_llong) rec_id);
 
2327
                free_count2++;
 
2328
                prec_id = rec_id;
 
2329
                rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
 
2330
        }
 
2331
        if (free_count2 < free_rec_count)
 
2332
                xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2);
 
2333
 
 
2334
        freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
2335
 
 
2336
        xtRefID ref_id;
 
2337
 
 
2338
        xt_lock_mutex(self, &tab->tab_row_lock);
 
2339
        pushr_(xt_unlock_mutex, &tab->tab_row_lock);
 
2340
 
 
2341
#ifdef DUMP_CHECK_TABLE
 
2342
        printf("Rows:-\n");
 
2343
        printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_row_free_id, (u_llong) tab->tab_row_fnum);
 
2344
        printf("EOF:       %llu\n", (u_llong) tab->tab_row_eof_id);
 
2345
#endif
 
2346
 
 
2347
        rec_id = 1;
 
2348
        while (rec_id < tab->tab_row_eof_id) {
 
2349
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, rec_id, &ref_id, self))
 
2350
                        xt_throw(self);
 
2351
#ifdef DUMP_CHECK_TABLE
 
2352
                printf("%-3llu ", (u_llong) rec_id);
 
2353
#endif
 
2354
#ifdef DUMP_CHECK_TABLE
 
2355
                if (ref_id == 0)
 
2356
                        printf("====== 0\n");
 
2357
                else
 
2358
                        printf("in use %llu\n", (u_llong) ref_id);
 
2359
#endif
 
2360
                rec_id++;
 
2361
        }
 
2362
 
 
2363
        freer_(); // xt_unlock_mutex(&tab->tab_row_lock);
 
2364
 
 
2365
#ifdef CHECK_INDEX_ON_CHECK_TABLE
 
2366
        xt_check_indices(ot);
 
2367
#endif
 
2368
        freer_(); // xt_unlock_mutex(&tab->tab_db->db_co_ext_lock);
 
2369
}
 
2370
 
 
2371
xtPublic void xt_rename_table(XTThreadPtr self, XTPathStrPtr old_name, XTPathStrPtr new_name)
 
2372
{
 
2373
        XTDatabaseHPtr          db = self->st_database;
 
2374
        XTOpenTablePoolPtr      table_pool;
 
2375
        XTTableHPtr                     tab = NULL;
 
2376
        char                            table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
2377
        char                            *postfix;
 
2378
        XTFilesOfTableRec       ft;
 
2379
        XTDictionaryRec         dic;
 
2380
        xtTableID                       tab_id;
 
2381
        XTTableEntryPtr         te_ptr;
 
2382
        char                            *te_new_name;
 
2383
        XTTablePathPtr          te_new_path;
 
2384
        XTTablePathPtr          te_old_path;
 
2385
        char                            to_path[PATH_MAX];
 
2386
 
 
2387
        memset(&dic, 0, sizeof(dic));
 
2388
 
 
2389
#ifdef TRACE_CREATE_TABLES
 
2390
        printf("RENAME %s --> %s\n", old_name->ps_path, new_name->ps_path);
 
2391
#endif
 
2392
        if (strlen(xt_last_name_of_path(new_name->ps_path)) > XT_TABLE_NAME_SIZE-1)
 
2393
                xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, new_name);
 
2394
 
 
2395
        /* MySQL renames the table while it is in use. Here is
 
2396
         * the sequence:
 
2397
         *
 
2398
         * OPEN tab1
 
2399
         * CREATE tmp_tab
 
2400
         * OPEN tmp_tab
 
2401
         * COPY tab1 -> tmp_tab
 
2402
         * CLOSE tmp_tab
 
2403
         * RENAME tab1 -> tmp2_tab
 
2404
         * RENAME tmp_tab -> tab1
 
2405
         * CLOSE tab1 (tmp2_tab)
 
2406
         * DELETE tmp2_tab
 
2407
         * OPEN tab1
 
2408
         *
 
2409
         * Since the table is open when it is renamed, I cannot
 
2410
         * get exclusive use of the table for this operation.
 
2411
         *
 
2412
         * So instead we just make sure that the sweeper is not
 
2413
         * using the table.
 
2414
         */
 
2415
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, old_name, FALSE, TRUE, FALSE, &tab);
 
2416
        pushr_(xt_db_unlock_table_pool, table_pool);
 
2417
        xt_ht_lock(self, db->db_tables);
 
2418
        pushr_(xt_ht_unlock, db->db_tables);
 
2419
        tab_id = tab->tab_id;
 
2420
        myxt_move_dictionary(&dic, &tab->tab_dic);
 
2421
        pushr_(myxt_free_dictionary, &dic);
 
2422
        pushr_(xt_heap_release, tab);
 
2423
 
 
2424
        /* Unmap the memory mapped table files: 
 
2425
         * For windows this must be done before we
 
2426
         * can rename the files.
 
2427
         */
 
2428
        tab_close_files(self, tab);
 
2429
 
 
2430
        freer_(); // xt_heap_release(self, old_tab)
 
2431
 
 
2432
        /* Create the new name and path: */
 
2433
        te_new_name = xt_dup_string(self, xt_last_name_of_path(new_name->ps_path));
 
2434
        pushr_(xt_free, te_new_name);
 
2435
        te_new_path = tab_get_table_path(self, db, new_name, FALSE);
 
2436
        pushr_(tab_free_table_path, te_new_path);
 
2437
 
 
2438
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
 
2439
 
 
2440
        /* Remove the table from the Database directory: */
 
2441
        xt_ht_del(self, db->db_tables, old_name);
 
2442
 
 
2443
        xt_enum_files_of_tables_init(old_name, tab_id, &ft);
 
2444
        while (xt_enum_files_of_tables_next(&ft)) {
 
2445
                postfix = xt_tab_file_to_name(XT_MAX_TABLE_FILE_NAME_SIZE, table_name, ft.ft_file_path);
 
2446
 
 
2447
                xt_strcpy(PATH_MAX, to_path, new_name->ps_path);
 
2448
                xt_strcat(PATH_MAX, to_path, postfix);
 
2449
 
 
2450
                if (!xt_fs_rename(NULL, ft.ft_file_path, to_path))
 
2451
                        xt_log_and_clear_exception(self);
 
2452
        }
 
2453
 
 
2454
        /* Switch the table name and path: */
 
2455
        xt_free(self, te_ptr->te_tab_name);
 
2456
        te_ptr->te_tab_name = te_new_name;
 
2457
        te_old_path = te_ptr->te_tab_path;
 
2458
        te_ptr->te_tab_path = te_new_path;
 
2459
        tab_remove_table_path(self, db, te_old_path);
 
2460
        tab_save_tables(self, db);
 
2461
 
 
2462
        popr_(); // Discard tab_free_table_path(te_new_path);
 
2463
        popr_(); // Discard xt_free(te_new_name);
 
2464
 
 
2465
        tab = xt_use_table_no_lock(self, db, new_name, FALSE, FALSE, &dic);
 
2466
        /* All renamed tables are considered repaired! */
 
2467
        xt_tab_table_repaired(tab);
 
2468
        xt_heap_release(self, tab);
 
2469
 
 
2470
        freer_(); // myxt_free_dictionary(&dic)
 
2471
        freer_(); // xt_ht_unlock(db->db_tables)
 
2472
        freer_(); // xt_db_unlock_table_pool(table_pool)
 
2473
}
 
2474
 
 
2475
xtPublic XTTableHPtr xt_use_table(XTThreadPtr self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok)
 
2476
{
 
2477
        XTTableHPtr             tab;
 
2478
        XTDatabaseHPtr  db = self->st_database;
 
2479
 
 
2480
        xt_ht_lock(self, db->db_tables);
 
2481
        pushr_(xt_ht_unlock, db->db_tables);
 
2482
        tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, NULL);
 
2483
        freer_();
 
2484
        return tab;
 
2485
}
 
2486
 
 
2487
xtPublic void xt_sync_flush_table(XTThreadPtr self, XTOpenTablePtr ot, int timeout)
 
2488
{
 
2489
        XTTableHPtr             tab = ot->ot_table;
 
2490
        XTDatabaseHPtr  db = tab->tab_db;
 
2491
 
 
2492
        /* Wakeup the sweeper:
 
2493
         * We want the sweeper to check if there is anything to do,
 
2494
         * so we must wake it up.
 
2495
         * Once it has done all it can, it will go back to sleep.
 
2496
         * This should be good enough.
 
2497
         *
 
2498
         * NOTE: I all cases, we do not wait if the sweeper is in
 
2499
         * error state.
 
2500
         */
 
2501
        if (db->db_sw_idle) {
 
2502
                u_int check_count = db->db_sw_check_count;
 
2503
 
 
2504
                for (;;) {
 
2505
                        xt_wakeup_sweeper(db);
 
2506
                        if (!db->db_sw_thread || db->db_sw_idle != XT_THREAD_IDLE || check_count != db->db_sw_check_count)
 
2507
                                break;
 
2508
                        xt_sleep_milli_second(10);
 
2509
                }
 
2510
        }
 
2511
 
 
2512
        /* Wait for the sweeper to become idle: */
 
2513
        xt_lock_mutex(self, &db->db_sw_lock);
 
2514
        pushr_(xt_unlock_mutex, &db->db_sw_lock);
 
2515
        while (db->db_sw_thread && !db->db_sw_idle) {
 
2516
                xt_timed_wait_cond(self, &db->db_sw_cond, &db->db_sw_lock, 10);
 
2517
        }
 
2518
        freer_(); // xt_unlock_mutex(&db->db_sw_lock)
 
2519
 
 
2520
        /* Wait for the writer to write out all operations on the table:
 
2521
         * We also do not wait for the writer if it is in
 
2522
         * error state.
 
2523
         */
 
2524
        time_t start_time = time(NULL);
 
2525
        while (db->db_wr_thread && 
 
2526
                db->db_wr_idle != XT_THREAD_INERR &&
 
2527
                XTTableSeq::xt_op_is_before(tab->tab_head_op_seq+1, tab->tab_seq.ts_next_seq)) {
 
2528
                if (timeout && time(NULL) > start_time + timeout) {
 
2529
                        char    name_buf[XT_TABLE_NAME_BUF_SIZE];
 
2530
 
 
2531
                        xt_tab_make_table_name(tab->tab_name, name_buf, XT_TABLE_NAME_BUF_SIZE);
 
2532
                        xt_logf(XT_WARNING, "Timeout waiting for writer while flushing %s\n", name_buf);
 
2533
                        break;
 
2534
                }
 
2535
 
 
2536
                /* Flush the log, in case this is holding up the
 
2537
                 * writer!
 
2538
                 */
 
2539
                if (!db->db_xlog.xlog_flush(self))
 
2540
                        xt_throw(self);
 
2541
 
 
2542
                xt_lock_mutex(self, &db->db_wr_lock);
 
2543
                pushr_(xt_unlock_mutex, &db->db_wr_lock);
 
2544
                db->db_wr_thread_waiting++;
 
2545
                /*
 
2546
                 * Wake the writer if it is sleeping. In order to
 
2547
                 * flush a table we must wait for the writer to complete
 
2548
                 * committing all the changes in the table to the database.
 
2549
                 */
 
2550
                if (db->db_wr_idle) {
 
2551
                        if (!xt_broadcast_cond_ns(&db->db_wr_cond))
 
2552
                                xt_log_and_clear_exception_ns();
 
2553
                }
 
2554
 
 
2555
                freer_(); // xt_unlock_mutex(&db->db_wr_lock)
 
2556
                xt_sleep_milli_second(10);
 
2557
 
 
2558
                xt_lock_mutex(self, &db->db_wr_lock);
 
2559
                pushr_(xt_unlock_mutex, &db->db_wr_lock);
 
2560
                db->db_wr_thread_waiting--;
 
2561
                freer_(); // xt_unlock_mutex(&db->db_wr_lock)
 
2562
        }
 
2563
 
 
2564
        xt_flush_table(self, ot);
 
2565
}
 
2566
 
 
2567
xtBool XTFlushRecRowTask::tk_task(XTThreadPtr thread)
 
2568
{
 
2569
        XTOpenTablePtr ot;
 
2570
 
 
2571
        /* {TASK-TABLE-GONE}
 
2572
         * If this task was scheduled before the table was deleted
 
2573
         * or renamed, then we may be caught holding an invalid
 
2574
         * table (frt_table) object.
 
2575
         *
 
2576
         * As a result we just use the ID, to get the open table
 
2577
         * pointer.
 
2578
         *
 
2579
         * If the tables are not identical, then there is no point
 
2580
         * in proceeding...
 
2581
         */
 
2582
        if (!(xt_db_open_pool_table_ns(&ot, frt_table->tab_db, frt_table->tab_id)))
 
2583
                return FAILED;
 
2584
 
 
2585
        if (!ot) {
 
2586
                /* Can happen if the table has been dropped: */
 
2587
                if (thread->t_exception.e_xt_err)
 
2588
                        xt_log_and_clear_exception(thread);
 
2589
                xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table was not found\n", (u_long) frt_table->tab_id);
 
2590
                xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
 
2591
                return OK;
 
2592
        }
 
2593
 
 
2594
        if (ot->ot_table != frt_table) {
 
2595
                /* Can happen if the table has been renamed: */
 
2596
                if (thread->t_exception.e_xt_err)
 
2597
                        xt_log_and_clear_exception(thread);
 
2598
                xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table has been renamed\n", (u_long) frt_table->tab_id);
 
2599
                xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
 
2600
                goto table_gone;
 
2601
        }
 
2602
 
 
2603
        if (!xt_flush_record_row(ot, NULL, FALSE)) {
 
2604
                xt_db_return_table_to_pool_ns(ot);
 
2605
                return FAILED;
 
2606
        }
 
2607
 
 
2608
        table_gone:
 
2609
        xt_db_return_table_to_pool_ns(ot);
 
2610
        return OK;
 
2611
}
 
2612
 
 
2613
void XTFlushRecRowTask::tk_reference()
 
2614
{
 
2615
        xt_heap_reference_ns(frt_table);
 
2616
}
 
2617
 
 
2618
void XTFlushRecRowTask::tk_release()
 
2619
{
 
2620
        xt_heap_release_ns(frt_table);
 
2621
}
 
2622
 
 
2623
/*
 
2624
 * Start a flush of this file in background.
 
2625
 */
 
2626
xtPublic xtBool xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread)
 
2627
{
 
2628
        if (tab->tab_rec_flush_task->tk_is_running())
 
2629
                return OK;
 
2630
 
 
2631
        /* Run the task: */
 
2632
        return xt_run_async_task(tab->tab_rec_flush_task, notify_complete, FALSE, thread, tab->tab_db);
 
2633
}
 
2634
 
 
2635
xtPublic xtBool xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_lock)
 
2636
{
 
2637
        XTTableHeadDRec                 rec_head;
 
2638
        XTTableHPtr                             tab = ot->ot_table;
 
2639
        off_t                                   to_flush;
 
2640
#ifdef TRACE_FLUSH_TABLE
 
2641
        time_t                                  tnow = 0;
 
2642
#endif
 
2643
 
 
2644
        if (!xt_begin_checkpoint(tab->tab_db, have_table_lock, ot->ot_thread))
 
2645
                return FAILED;
 
2646
 
 
2647
        xt_lock_mutex_ns(&tab->tab_rec_flush_lock);
 
2648
#ifdef XT_SORT_REC_WRITES
 
2649
        if (!xt_xres_delay_flush(ot, TRUE))
 
2650
                goto failed;
 
2651
#endif
 
2652
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_START_REC_ROW);
 
2653
 
 
2654
        ASSERT_NS(ot->ot_thread == xt_get_self());
 
2655
        /* Make sure that the table recovery point, in
 
2656
         * particular the operation ID is recorded
 
2657
         * before all other flush activity!
 
2658
         *
 
2659
         * This is because only operations after the
 
2660
         * recovery point in the header are applied
 
2661
         * to the table on recovery.
 
2662
         *
 
2663
         * So the operation ID is recorded before the
 
2664
         * flush activity, and written after all is done.
 
2665
         */
 
2666
        xt_tab_store_header(ot, &rec_head);
 
2667
 
 
2668
        /* Write the table header: */
 
2669
        if (tab->tab_flush_pending) {
 
2670
                tab->tab_flush_pending = FALSE;
 
2671
 
 
2672
#ifdef TRACE_FLUSH_TABLE
 
2673
                tnow = time(NULL);
 
2674
                printf("FLUSH TABLE bytes=%lu %s\n", (u_long) tab->tab_bytes_to_flush, tab->tab_name->ps_path);
 
2675
                fflush(stdout);
 
2676
#endif
 
2677
                // Want to see how much was to be flushed in the debugger:
 
2678
                to_flush = tab->tab_bytes_to_flush;
 
2679
                tab->tab_bytes_to_flush = 0;
 
2680
                if (bytes_flushed)
 
2681
                        *bytes_flushed += to_flush;
 
2682
                
 
2683
#ifdef XT_REC_FLUSH_THRESHOLD
 
2684
                XTThreadPtr writer;
 
2685
 
 
2686
                /* Reset the writer's byte level: */
 
2687
                if ((writer = ot->ot_table->tab_db->db_wr_thread))
 
2688
                        tab->tab_rec_wr_last_flush = writer->st_statistics.st_rec.ts_write;
 
2689
#endif
 
2690
 
 
2691
                /* Flush the table data: */
 
2692
                if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags)) {
 
2693
                        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread) ||
 
2694
                                !XT_FLUSH_RR_FILE(ot->ot_row_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread)) {
 
2695
                                tab->tab_flush_pending = TRUE;
 
2696
                                goto failed;
 
2697
                        }
 
2698
                }
 
2699
 
 
2700
                /* The header includes the operation number which
 
2701
                 * must be written AFTER all other data,
 
2702
                 * because operations will not be applied again.
 
2703
                 */
 
2704
                if (!tab_write_header(ot, &rec_head)) {
 
2705
                        tab->tab_flush_pending = TRUE;
 
2706
                        goto failed;
 
2707
                }
 
2708
        }
 
2709
 
 
2710
        /* Flush the auto-increment: */
 
2711
        if (xt_db_auto_increment_mode == 1) {
 
2712
                if (tab->tab_auto_inc != tab->tab_dic.dic_min_auto_inc) {
 
2713
                        tab->tab_dic.dic_min_auto_inc = tab->tab_auto_inc;
 
2714
                        if (!xt_tab_write_min_auto_inc(ot))
 
2715
                                goto failed;
 
2716
                }
 
2717
        }
 
2718
 
 
2719
        /* Mark this table as record/row flushed: */
 
2720
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_DONE_REC_ROW);
 
2721
 
 
2722
#ifdef TRACE_FLUSH_TABLE
 
2723
        if (tnow) {
 
2724
                printf("flush table (%d) %s DONE\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
 
2725
                fflush(stdout);
 
2726
        }
 
2727
#endif
 
2728
 
 
2729
        xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
 
2730
 
 
2731
        if (!xt_end_checkpoint(tab->tab_db, ot->ot_thread, NULL))
 
2732
                return FAILED;
 
2733
        return OK;
 
2734
        
 
2735
        failed:
 
2736
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_STOP_REC_ROW);
 
2737
 
 
2738
#ifdef TRACE_FLUSH_TABLE
 
2739
        if (tnow) {
 
2740
                printf("flush table (%d) %s FAILED\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
 
2741
                fflush(stdout);
 
2742
        }
 
2743
#endif
 
2744
 
 
2745
        xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
 
2746
        return FAILED;
 
2747
}
 
2748
 
 
2749
xtPublic void xt_flush_table(XTThreadPtr self, XTOpenTablePtr ot)
 
2750
{
 
2751
        /* GOTCHA {FLUSH-BUG}: This bug was difficult to find.
 
2752
         * It occured on Windows in the multi_update
 
2753
         * test, sometimes.
 
2754
         *
 
2755
         * What happens is the checkpointer starts to
 
2756
         * flush the table, and gets to the 
 
2757
         * XT_FLUSH_RR_FILE part.
 
2758
         *
 
2759
         * Then a rename occurs, and the user thread
 
2760
         * flushes the table, and goes through and
 
2761
         * writes the table header, with the most
 
2762
         * recent table operation (the last operation
 
2763
         * that occurred).
 
2764
         *
 
2765
         * The checkpointer the completes and
 
2766
         * also writes the header, but with old
 
2767
         * values (as read in xt_tab_store_header()).
 
2768
         *
 
2769
         * The then user thread continues, and
 
2770
         * reopens the table after rename.
 
2771
         * On reopen, it reads the old value from the header,
 
2772
         * and sets the current operation number.
 
2773
         *
 
2774
         * Now there is a problem in the table cache,
 
2775
         * because some cache pages have operation numbers
 
2776
         * that are greater than current operation
 
2777
         * number!
 
2778
         *
 
2779
         * This later lead to the free-er hanging while
 
2780
         * it waited for an operation to be 
 
2781
         * written to the disk that never would be.
 
2782
         * This is because a page can only be freed when
 
2783
         * the head operation number has passed the
 
2784
         * page operation number.
 
2785
         *
 
2786
         * Which indicates that the page has been written
 
2787
         * to disk.
 
2788
         *
 
2789
         * THE BUG FIX:
 
2790
         * As a result I now use mutex so that only one
 
2791
         * thread can flush at a time.
 
2792
         */
 
2793
 
 
2794
        if (!xt_flush_record_row(ot, NULL, FALSE))
 
2795
                xt_throw(self);
 
2796
 
 
2797
        /* This was before the table data flush,
 
2798
         * (after xt_tab_store_header() above,
 
2799
         * but I don't think it makes any difference.
 
2800
         * Because in the checkpointer it was at this
 
2801
         * position.
 
2802
         */
 
2803
        if (!xt_flush_indices(ot, NULL, FALSE, NULL))
 
2804
                xt_throw(self);
 
2805
 
 
2806
}
 
2807
 
 
2808
static XTOpenTablePtr tab_open_table(XTTableHPtr tab)
 
2809
{
 
2810
        volatile XTOpenTablePtr ot;
 
2811
        XTThreadPtr                             self;
 
2812
 
 
2813
        if (!(ot = (XTOpenTablePtr) xt_malloc_ns(sizeof(XTOpenTableRec))))
 
2814
                return NULL;
 
2815
        memset(ot, 0, offsetof(XTOpenTableRec, ot_ind_wbuf));
 
2816
 
 
2817
        ot->ot_seq_page = NULL;
 
2818
        ot->ot_seq_data = NULL;
 
2819
 
 
2820
        self = xt_get_self();
 
2821
        try_(a) {
 
2822
                xt_heap_reference(self, tab);
 
2823
                ot->ot_table = tab;
 
2824
                ot->ot_row_file = xt_open_file(self, ot->ot_table->tab_row_file->fil_path, xt_row_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_row_file_grow_size);
 
2825
                ot->ot_rec_file = xt_open_file(self, ot->ot_table->tab_rec_file->fil_path, xt_rec_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_data_file_grow_size);
 
2826
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
2827
                ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK | XT_FS_DIRECT_IO, XT_INDEX_PAGE_SIZE*256);
 
2828
#else
 
2829
                ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
 
2830
#endif
 
2831
        }
 
2832
        catch_(a) {
 
2833
                ;
 
2834
        }
 
2835
        cont_(a);
 
2836
 
 
2837
        if (!ot->ot_table || !ot->ot_row_file || !ot->ot_rec_file)
 
2838
                goto failed;
 
2839
 
 
2840
        if (!(ot->ot_row_rbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
 
2841
                goto failed;
 
2842
        ot->ot_row_rbuf_size = ot->ot_table->tab_dic.dic_rec_size;
 
2843
        if (!(ot->ot_row_wbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
 
2844
                goto failed;
 
2845
        ot->ot_row_wbuf_size = ot->ot_table->tab_dic.dic_rec_size;
 
2846
 
 
2847
        /* Cache this stuff to speed access a bit: */
 
2848
        ot->ot_rec_fixed = ot->ot_table->tab_dic.dic_rec_fixed;
 
2849
        ot->ot_rec_size = ot->ot_table->tab_dic.dic_rec_size;
 
2850
 
 
2851
        return ot;
 
2852
 
 
2853
        failed:
 
2854
        tab_close_table(ot);
 
2855
        return NULL;
 
2856
}
 
2857
 
 
2858
xtPublic XTOpenTablePtr xt_open_table(XTTableHPtr tab)
 
2859
{
 
2860
        return tab_open_table(tab);
 
2861
}
 
2862
 
 
2863
xtPublic void xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock)
 
2864
{
 
2865
        if (flush) {
 
2866
                if (!xt_flush_record_row(ot, NULL, have_table_lock))
 
2867
                        xt_log_and_clear_exception_ns();
 
2868
 
 
2869
                if (!xt_flush_indices(ot, NULL, have_table_lock, NULL))
 
2870
                        xt_log_and_clear_exception_ns();
 
2871
        }
 
2872
        tab_close_table(ot);
 
2873
}
 
2874
 
 
2875
static int tab_use_table_by_id(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id)
 
2876
{
 
2877
        XTTableEntryPtr te_ptr;
 
2878
        XTTableHPtr             tab = NULL;
 
2879
        int                             r = XT_TAB_OK;
 
2880
        char                    path[PATH_MAX];
 
2881
 
 
2882
        if (!db)
 
2883
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
 
2884
        xt_ht_lock(self, db->db_tables);
 
2885
        pushr_(xt_ht_unlock, db->db_tables);
 
2886
 
 
2887
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
 
2888
        if (te_ptr) {
 
2889
                if (!(tab = te_ptr->te_table)) {
 
2890
                        /* Open the table: */
 
2891
                        xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
 
2892
                        xt_add_dir_char(PATH_MAX, path);
 
2893
                        xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
 
2894
                        r = tab_new_handle(self, &tab, db, tab_id, (XTPathStrPtr) path, TRUE, NULL);
 
2895
                }
 
2896
        }
 
2897
        else
 
2898
                r = XT_TAB_NOT_FOUND;
 
2899
 
 
2900
        if (tab)
 
2901
                xt_heap_reference(self, tab);
 
2902
        *r_tab = tab;
 
2903
 
 
2904
        freer_(); // xt_ht_unlock(db->db_tables)
 
2905
        return r;
 
2906
}
 
2907
 
 
2908
xtPublic XTTableHPtr xt_use_table_by_id(XTThreadPtr self, XTDatabaseHPtr db, xtTableID tab_id, int *result)
 
2909
{
 
2910
        XTTableHPtr tab;
 
2911
        int                     r;
 
2912
 
 
2913
        r = tab_use_table_by_id(self, &tab, db, tab_id);
 
2914
        if (result) {
 
2915
                if (r != XT_TAB_OK) {
 
2916
                        *result = r;
 
2917
                        return NULL;
 
2918
                }
 
2919
        }
 
2920
        else {
 
2921
                switch (r) {
 
2922
                        case XT_TAB_NOT_FOUND:
 
2923
                                return NULL;
 
2924
                        case XT_TAB_NO_DICTIONARY:
 
2925
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, (u_long) tab_id);
 
2926
                        case XT_TAB_POOL_CLOSED:
 
2927
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
 
2928
                        default:
 
2929
                                break;
 
2930
                }
 
2931
        }
 
2932
        
 
2933
        return tab;
 
2934
}
 
2935
 
 
2936
xtPublic XTTableHPtr xt_use_table_by_id_ns(XTDatabaseHPtr db, xtTableID tab_id)
 
2937
{
 
2938
        XTTableHPtr     tab;
 
2939
        XTThreadPtr     self = xt_get_self();
 
2940
 
 
2941
        try_(a) {
 
2942
                tab = xt_use_table_by_id(self, db, tab_id, NULL);
 
2943
        }
 
2944
        catch_(a) {
 
2945
                tab = NULL;
 
2946
        }
 
2947
        cont_(a);
 
2948
        return tab;
 
2949
 
 
2950
}
 
2951
 
 
2952
/* The fixed part of the record is already in the row buffer.
 
2953
 * This function loads the extended part, expanding the row
 
2954
 * buffer if necessary.
 
2955
 */
 
2956
xtPublic xtBool xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req)
 
2957
{
 
2958
        size_t                                  log_size;
 
2959
        xtLogID                                 log_id;
 
2960
        xtLogOffset                             log_offset;
 
2961
        xtWord1                                 save_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
 
2962
        xtBool                                  retried = FALSE;
 
2963
        XTactExtRecEntryDPtr    ext_data_ptr;
 
2964
        size_t                                  log_size2;
 
2965
        xtTableID                               curr_tab_id;
 
2966
        xtRecordID                              curr_rec_id;
 
2967
 
 
2968
        log_size = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->re_log_dat_siz_4);
 
2969
        XT_GET_LOG_REF(log_id, log_offset, (XTTabRecExtDPtr) ot->ot_row_rbuffer);
 
2970
 
 
2971
        if (ot->ot_rec_size + log_size > ot->ot_row_rbuf_size) {
 
2972
                if (!xt_realloc_ns((void **) &ot->ot_row_rbuffer, ot->ot_rec_size + log_size))
 
2973
                        return FAILED;
 
2974
                ot->ot_row_rbuf_size = ot->ot_rec_size + log_size;
 
2975
        }
 
2976
 
 
2977
        /* Read the extended part first: */
 
2978
        ext_data_ptr = (XTactExtRecEntryDPtr) (ot->ot_row_rbuffer + ot->ot_rec_size - offsetof(XTactExtRecEntryDRec, er_data));
 
2979
 
 
2980
        /* Save the data which the header will overwrite: */
 
2981
        memcpy(save_buffer, ext_data_ptr, offsetof(XTactExtRecEntryDRec, er_data));
 
2982
        
 
2983
        reread:
 
2984
        if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
2985
                xt_tab_read_ext_record(ot->ot_table, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr);
 
2986
        else {
 
2987
                if (!ot->ot_thread->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr, ot->ot_thread))
 
2988
                        goto retry_read;
 
2989
        }
 
2990
 
 
2991
        log_size2 = XT_GET_DISK_4(ext_data_ptr->er_data_size_4);
 
2992
        curr_tab_id = XT_GET_DISK_4(ext_data_ptr->er_tab_id_4);
 
2993
        curr_rec_id = XT_GET_DISK_4(ext_data_ptr->er_rec_id_4);
 
2994
 
 
2995
        if (log_size2 != log_size || curr_tab_id != ot->ot_table->tab_id || curr_rec_id != load_rec_id) {
 
2996
                /* [(3)] This can happen in the following circumstances:
 
2997
                 * - A new record is created, but the data log is not
 
2998
                 * flushed.
 
2999
                 * - The server quits.
 
3000
                 * - On restart the transaction is rolled back, but the data record
 
3001
                 *   was not written, so later a new record could be written at this
 
3002
                 *   location.
 
3003
                 * - Later the sweeper tries to cleanup this record, and finds
 
3004
                 *   that a different record has been written at this position.
 
3005
                 *
 
3006
                 * NOTE: Index entries can only be written to disk for records
 
3007
                 *       that have been committed to the disk, because uncommitted
 
3008
                 *       records may not exist in order to remove the index entry
 
3009
                 *       on cleanup.
 
3010
                 */
 
3011
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_EXT_RECORD);
 
3012
                goto retry_read;
 
3013
        }
 
3014
 
 
3015
        /* Restore the saved area: */
 
3016
        memcpy(ext_data_ptr, save_buffer, offsetof(XTactExtRecEntryDRec, er_data));
 
3017
 
 
3018
        if (retried)
 
3019
                xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
 
3020
        return myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req);
 
3021
 
 
3022
        retry_read:
 
3023
        if (!retried) {
 
3024
                /* (1) It may be that reading the log fails because the garbage collector
 
3025
                 * has moved the record since we determined the location.
 
3026
                 * We handle this here, by re-reading the data the garbage collector
 
3027
                 * would have updated.
 
3028
                 *
 
3029
                 * (2) It may also happen that a new record is just being updated or
 
3030
                 * inserted. It is possible that the handle part of the record
 
3031
                 * has been written, but not yet the overflow.
 
3032
                 * This means that repeating the read attempt could work.
 
3033
                 *
 
3034
                 * (3) The extended data has been written by another handler and not yet
 
3035
                 * flushed. This should not happen because on committed extended
 
3036
                 * records are read, and all data should be flushed before
 
3037
                 * commit!
 
3038
                 *
 
3039
                 * NOTE: (2) above is not a problem when versioning is working
 
3040
                 * correctly. In this case, we should never try to read the extended
 
3041
                 * part of an uncommitted record (belonging to some other thread/
 
3042
                 * transaction).
 
3043
                 */
 
3044
                XTTabRecExtDRec rec_buf;
 
3045
 
 
3046
                xt_lock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
 
3047
                retried = TRUE;
 
3048
 
 
3049
                if (!xt_tab_get_rec_data(ot, load_rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &rec_buf))
 
3050
                        goto failed;
 
3051
 
 
3052
                XT_GET_LOG_REF(log_id, log_offset, &rec_buf);
 
3053
                goto reread;
 
3054
        }
 
3055
 
 
3056
        failed:
 
3057
        if (retried)
 
3058
                xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
 
3059
        return FAILED;
 
3060
}
 
3061
 
 
3062
xtPublic xtBool xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
 
3063
{
 
3064
        register XTTableHPtr    tab = ot->ot_table;
 
3065
 
 
3066
        ASSERT_NS(rec_id);
 
3067
 
 
3068
        return tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread);
 
3069
}
 
3070
 
 
3071
xtPublic xtBool xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer)
 
3072
{
 
3073
        register XTTableHPtr    tab = ot->ot_table;
 
3074
        xtOpSeqNo                               op_seq;
 
3075
 
 
3076
        ASSERT_NS(rec_id);
 
3077
 
 
3078
        if (status == XT_LOG_ENT_REC_MOVED) {
 
3079
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, &op_seq, TRUE, ot->ot_thread))
 
3080
                        return FAILED;
 
3081
        }
 
3082
#ifdef DEBUG
 
3083
        else if (status == XT_LOG_ENT_REC_CLEANED_1) {
 
3084
                ASSERT_NS(0);   // shouldn't be used anymore
 
3085
        }
 
3086
#endif
 
3087
        else {
 
3088
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, &op_seq, TRUE, ot->ot_thread))
 
3089
                        return FAILED;
 
3090
        }
 
3091
 
 
3092
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
 
3093
}
 
3094
 
 
3095
xtPublic xtBool xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
 
3096
{
 
3097
        register XTTableHPtr    tab = ot->ot_table;
 
3098
 
 
3099
        ASSERT_NS(rec_id);
 
3100
 
 
3101
        if (status == XT_LOG_ENT_REC_MOVED) {
 
3102
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, op_seq, TRUE, ot->ot_thread))
 
3103
                        return FAILED;
 
3104
        }
 
3105
        else {
 
3106
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread))
 
3107
                        return FAILED;
 
3108
        }
 
3109
 
 
3110
        return xt_xlog_modify_table(tab->tab_id, status, *op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
 
3111
}
 
3112
 
 
3113
xtPublic xtBool xt_tab_get_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer)
 
3114
{
 
3115
        register XTTableHPtr    tab = ot->ot_table;
 
3116
 
 
3117
        ASSERT_NS(rec_id);
 
3118
 
 
3119
        return tab->tab_recs.xt_tc_read(ot->ot_rec_file, rec_id, (size_t) size, buffer, ot->ot_thread);
 
3120
}
 
3121
 
 
3122
/*
 
3123
 * Note: this function grants locks even to transactions that
 
3124
 * are not specifically waiting for this transaction.
 
3125
 * This is required, because all threads waiting for 
 
3126
 * a lock should be considered "equal". In other words,
 
3127
 * they should not have to wait for the "right" transaction
 
3128
 * before they get the lock, or it will turn into a
 
3129
 * race to wait for the correct transaction.
 
3130
 *
 
3131
 * A transaction T1 can end up waiting for the wrong transaction
 
3132
 * T2, because T2 has released the lock, and given it to T3.
 
3133
 * Of course, T1 will wake up soon and realize this, but
 
3134
 * it is a matter of timing.
 
3135
 *
 
3136
 * The main point is that T2 has release the lock because
 
3137
 * it has ended (see {RELEASING-LOCKS} for more details)
 
3138
 * and therefore, there is no danger of it claiming the
 
3139
 * lock again, which can lead to a deadlock if T1 is
 
3140
 * given the lock instead of T3 in the example above.
 
3141
 * Then, if T2 tries to regain the lock before T1
 
3142
 * realizes that it has the lock.
 
3143
 */
 
3144
//static xtBool tab_get_lock_after_wait(XTThreadPtr thread, XTLockWaitPtr lw)
 
3145
//{
 
3146
//      register XTTableHPtr    tab = lw->lw_ot->ot_table;
 
3147
 
 
3148
        /* {ROW-LIST-LOCK}
 
3149
         * I don't believe this lock is required. If it is, please explain why!!
 
3150
         * XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[gl->lw_row_id % XT_ROW_RWLOCKS], thread);
 
3151
         *
 
3152
         * With the old row lock implementation a XT_TAB_ROW_WRITE_LOCK was required because
 
3153
         * the row locking did not have its own locks.
 
3154
         * The new list locking has its own locks. I was using XT_TAB_ROW_READ_LOCK,
 
3155
         * but i don't think this is required.
 
3156
         */
 
3157
//      return tab->tab_locks.xt_set_temp_lock(lw->lw_ot, lw, &lw->lw_thread->st_lock_list);
 
3158
//}
 
3159
 
 
3160
/*
 
3161
 * NOTE: Previously this function did not gain the row lock.
 
3162
 * If this change is a problem, please document why!
 
3163
 * The previously implementation did wait until no lock was on the
 
3164
 * row.
 
3165
 *
 
3166
 * I am thinking that it is simply a good idea to grab the lock,
 
3167
 * instead of waiting for no lock, before the retry. But it could
 
3168
 * result in locking more than required!
 
3169
 */
 
3170
static xtBool tab_wait_for_update(register XTOpenTablePtr ot, xtRowID row_id, xtXactID xn_id, XTThreadPtr thread)
 
3171
{
 
3172
        XTLockWaitRec   lw;
 
3173
        XTXactWaitRec   xw;
 
3174
        xtBool                  ok;
 
3175
                                
 
3176
        xw.xw_xn_id = xn_id;
 
3177
 
 
3178
        lw.lw_thread = thread;
 
3179
        lw.lw_ot = ot;
 
3180
        lw.lw_row_id = row_id;
 
3181
        lw.lw_row_updated = FALSE;
 
3182
 
 
3183
        /* First try to get the lock: */
 
3184
        if (!ot->ot_table->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list))
 
3185
                return FAILED;
 
3186
        if (lw.lw_curr_lock != XT_NO_LOCK)
 
3187
                /* Wait for the lock, then the transaction: */
 
3188
                ok = xt_xn_wait_for_xact(thread, &xw, &lw);
 
3189
        else
 
3190
                /* Just wait for the transaction: */
 
3191
                ok = xt_xn_wait_for_xact(thread, &xw, NULL);
 
3192
        
 
3193
#ifdef DEBUG_LOCK_QUEUE
 
3194
        ot->ot_table->tab_locks.rl_check(&lw);
 
3195
#endif
 
3196
        return ok;
 
3197
}
 
3198
 
 
3199
/* {WAIT-FOR}
 
3200
 * XT_OLD - The record is old. No longer visible because there is
 
3201
 * newer committed record before it in the record list.
 
3202
 * This is a special case of FALSE (the record is not visible).
 
3203
 * (see {WAIT-FOR} for details).
 
3204
 * It is significant because if we find too many of these when
 
3205
 * searching for records, then we have reason to believe the
 
3206
 * sweeper is far behind. This can happen in a test like this:
 
3207
 * runTest(INCREMENT_TEST, 2, INCREMENT_TEST_UPDATE_COUNT);
 
3208
 * What happens is T1 detects an updated row by T2,
 
3209
 * but T2 has not committed yet.
 
3210
 * It waits for T2. T2 commits and updates again before T1
 
3211
 * can update.
 
3212
 *
 
3213
 * Of course if we got a lock on the row when T2 quits, then
 
3214
 * this would not happen!
 
3215
 */
 
3216
 
 
3217
/*
 
3218
 * Is a record visible?
 
3219
 * Returns TRUE, FALSE, XT_ERR.
 
3220
 *
 
3221
 * TRUE - The record is visible.
 
3222
 * FALSE - The record is not visible.
 
3223
 * XT_ERR - An exception (error) occurred.
 
3224
 * XT_NEW - The most recent variation of this row has been returned
 
3225
 * and is to be used instead of the input!
 
3226
 * XT_REREAD - Re-read the record, and try again.
 
3227
 *
 
3228
 * Basically, a record is visible if it was committed on or before
 
3229
 * the transactions "visible time" (st_visible_time), and there
 
3230
 * are no other visible records before this record in the
 
3231
 * variation chain for the record.
 
3232
 *
 
3233
 * This holds in general, but you don't always get to see the
 
3234
 * visible record (as defined in this sence).
 
3235
 *
 
3236
 * On any kind of update (SELECT FOR UPDATE, UPDATE or DELETE), you
 
3237
 * get to see the most recent variation of the row!
 
3238
 *
 
3239
 * So on update, this function will wait if necessary for a recent
 
3240
 * update to be committed.
 
3241
 *
 
3242
 * So an update is a kind of "committed read" with a wait for
 
3243
 * uncommitted records.
 
3244
 *
 
3245
 * The result:
 
3246
 * - INSERTS may not seen by the update read, depending on when
 
3247
 *   they occur.
 
3248
 * - Records may be returned in non-index order.
 
3249
 * - New records returned must be checked again by an index scan
 
3250
 *   to make sure they conform to the condition!
 
3251
 * 
 
3252
 * CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), 
 
3253
 * index(Value, Name)) ENGINE=pbxt;
 
3254
 * INSERT test_tab values(4, 2, 'D');
 
3255
 * INSERT test_tab values(5, 2, 'E');
 
3256
 * INSERT test_tab values(6, 2, 'F');
 
3257
 * INSERT test_tab values(7, 2, 'G');
 
3258
 * 
 
3259
 * -- C1
 
3260
 * begin;
 
3261
 * select * from test_tab where id = 6 for update;
 
3262
 * -- C2
 
3263
 * begin;
 
3264
 * select * from test_tab where value = 2 order by value, name for update;
 
3265
 * -- C1
 
3266
 * update test_tab set Name = 'A' where id = 7;
 
3267
 * commit;
 
3268
 * -- C2
 
3269
 * Result order D, E, F, A.
 
3270
 *
 
3271
 * But Jim does it like this, so it should be OK.
 
3272
 */
 
3273
static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xtRecordID *new_rec_id)
 
3274
{
 
3275
        XTThreadPtr                             thread = ot->ot_thread;
 
3276
        xtXactID                                xn_id;
 
3277
        XTTabRecHeadDRec                var_head;
 
3278
        xtRowID                                 row_id;
 
3279
        xtRecordID                              var_rec_id;
 
3280
        register XTTableHPtr    tab;
 
3281
        xtBool                                  wait = FALSE;
 
3282
        xtXactID                                wait_xn_id = 0;
 
3283
#ifdef TRACE_VARIATIONS
 
3284
        char                                    t_buf[500];
 
3285
        int                                             len;
 
3286
#endif
 
3287
        int                                             result = TRUE;
 
3288
        xtBool                                  rec_clean;
 
3289
        xtRecordID                              invalid_rec;
 
3290
 
 
3291
        retry:
 
3292
        /* It can be that between the time that I read the index,
 
3293
         * and the time that I try to access the
 
3294
         * record, that the record is removed by
 
3295
         * the sweeper!
 
3296
         */
 
3297
        if (XT_REC_NOT_VALID(rec_head->tr_rec_type_1))
 
3298
                return FALSE;
 
3299
 
 
3300
        row_id = XT_GET_DISK_4(rec_head->tr_row_id_4);
 
3301
 
 
3302
        /* This can happen if the row has been removed, and
 
3303
         * reused:
 
3304
         */
 
3305
        if (ot->ot_curr_row_id && row_id != ot->ot_curr_row_id)
 
3306
                return FALSE;
 
3307
 
 
3308
#ifdef TRACE_VARIATIONS
 
3309
        len = sprintf(t_buf, "row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
 
3310
#endif
 
3311
        if (!(rec_clean = XT_REC_IS_CLEAN(rec_head->tr_rec_type_1))) {
 
3312
                /* The record is not clean, which means it has not been swept.
 
3313
                 * So we have to check if it is visible.
 
3314
                 */
 
3315
                xn_id = XT_GET_DISK_4(rec_head->tr_xact_id_4);
 
3316
                switch (xt_xn_status(ot, xn_id, ot->ot_curr_rec_id)) {
 
3317
                        case XT_XN_VISIBLE:
 
3318
                                break;
 
3319
                        case XT_XN_NOT_VISIBLE:
 
3320
                                if (ot->ot_for_update) {
 
3321
                                        /* It is visible, only if it is an insert,
 
3322
                                         * which means if has no previous variation.
 
3323
                                         * Note, if an insert is updated, the record
 
3324
                                         * should be overwritten (TODO - check this).
 
3325
                                         */
 
3326
                                        var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
 
3327
                                        if (!var_rec_id)
 
3328
                                                break;
 
3329
#ifdef TRACE_VARIATIONS
 
3330
                                        if (len <= 450)
 
3331
                                                len += sprintf(t_buf+len, "OTHER COMMIT (OVERWRITTEN) T%d\n", (int) xn_id);
 
3332
                                        xt_ttracef(thread, "%s", t_buf);
 
3333
#endif
 
3334
                                }
 
3335
#ifdef TRACE_VARIATIONS
 
3336
                                else {
 
3337
                                        if (len <= 450)
 
3338
                                                len += sprintf(t_buf+len, "OTHER COMMIT T%d\n", (int) xn_id);
 
3339
                                        xt_ttracef(thread, "%s", t_buf);
 
3340
                                }
 
3341
#endif
 
3342
                                /* {WAKE-SW}
 
3343
                                 * The record is not visible, although it has been committed.
 
3344
                                 * Clean the transaction ASAP.
 
3345
                                 */
 
3346
                                ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
 
3347
                                return FALSE;
 
3348
                        case XT_XN_ABORTED:
 
3349
                                /* {WAKE-SW}
 
3350
                                 * Reading an aborted record, this transaction
 
3351
                                 * must be cleaned up ASAP!
 
3352
                                 */
 
3353
                                ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
 
3354
#ifdef TRACE_VARIATIONS
 
3355
                                if (len <= 450)
 
3356
                                        len += sprintf(t_buf+len, "ABORTED T%d\n", (int) xn_id);
 
3357
                                xt_ttracef(thread, "%s", t_buf);
 
3358
#endif
 
3359
                                return FALSE;
 
3360
                        case XT_XN_MY_UPDATE:
 
3361
                                /* This is a record written by this transaction. */
 
3362
                                if (thread->st_is_update) {
 
3363
                                        /* Check that it was not written by the current update statement: */
 
3364
                                        if (XT_STAT_ID_MASK(ot->ot_update_id) == rec_head->tr_stat_id_1) {
 
3365
#ifdef TRACE_VARIATIONS
 
3366
                                                if (len <= 450)
 
3367
                                                        len += sprintf(t_buf+len, "MY UPDATE IN THIS STATEMENT T%d\n", (int) xn_id);
 
3368
                                                xt_ttracef(thread, "%s", t_buf);
 
3369
#endif
 
3370
                                                return FALSE;
 
3371
                                        }
 
3372
                                }
 
3373
                                ot->ot_curr_row_id = row_id;
 
3374
                                ot->ot_curr_updated = TRUE;
 
3375
                                if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
3376
                                        return XT_ERR;
 
3377
                                /* It is visible if it is at the front of the list.
 
3378
                                 * An update can end up not being at the front of the list
 
3379
                                 * if it is deleted afterwards!
 
3380
                                 */
 
3381
#ifdef TRACE_VARIATIONS
 
3382
                                if (len <= 450) {
 
3383
                                        if (var_rec_id == ot->ot_curr_rec_id)
 
3384
                                                len += sprintf(t_buf+len, "MY UPDATE T%d\n", (int) xn_id);
 
3385
                                        else
 
3386
                                                len += sprintf(t_buf+len, "MY UPDATE (OVERWRITTEN) T%d\n", (int) xn_id);
 
3387
                                }
 
3388
                                xt_ttracef(thread, "%s", t_buf);
 
3389
#endif
 
3390
                                return var_rec_id == ot->ot_curr_rec_id;
 
3391
                        case XT_XN_OTHER_UPDATE:
 
3392
                                if (ot->ot_for_update) {
 
3393
                                        /* If this is an insert, we are interested!
 
3394
                                         * Updated values are handled below. This is because
 
3395
                                         * the changed (new) records returned below are always
 
3396
                                         * followed (in the version chain) by the record
 
3397
                                         * we would have returned (if nothing had changed).
 
3398
                                         *
 
3399
                                         * As a result, we only return records here which have
 
3400
                                         * no "history". 
 
3401
                                         */
 
3402
                                        var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
 
3403
                                        if (!var_rec_id) {
 
3404
#ifdef TRACE_VARIATIONS
 
3405
                                                if (len <= 450)
 
3406
                                                        len += sprintf(t_buf+len, "OTHER INSERT (WAIT FOR) T%d\n", (int) xn_id);
 
3407
                                                xt_ttracef(thread, "%s", t_buf);
 
3408
#endif
 
3409
                                                if (!tab_wait_for_update(ot, row_id, xn_id, thread))
 
3410
                                                        return XT_ERR;
 
3411
                                                if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
 
3412
                                                        return XT_ERR;
 
3413
                                                rec_head = &var_head;
 
3414
                                                goto retry;
 
3415
                                        }
 
3416
                                }
 
3417
#ifdef TRACE_VARIATIONS
 
3418
                                if (len <= 450)
 
3419
                                        len += sprintf(t_buf+len, "OTHER UPDATE T%d\n", (int) xn_id);
 
3420
                                xt_ttracef(thread, "%s", t_buf);
 
3421
#endif
 
3422
                                return FALSE;
 
3423
                        case XT_XN_REREAD:
 
3424
#ifdef TRACE_VARIATIONS
 
3425
                                if (len <= 450)
 
3426
                                        len += sprintf(t_buf+len, "REREAD?! T%d\n", (int) xn_id);
 
3427
                                xt_ttracef(thread, "%s", t_buf);
 
3428
#endif
 
3429
                                return XT_REREAD;
 
3430
                }
 
3431
        }
 
3432
 
 
3433
        /* Follow the variation chain until we come to this record.
 
3434
         * If it is not the first visible variation then
 
3435
         * it is not visible at all. If it in not found on the
 
3436
         * variation chain, it is also not visible.
 
3437
         */
 
3438
        tab = ot->ot_table;
 
3439
 
 
3440
        retry_2:
 
3441
 
 
3442
#ifdef XT_USE_LIST_BASED_ROW_LOCKS
 
3443
        /* The list based row locks used there own locks, so
 
3444
         * it is not necessary to get a write lock here.
 
3445
         */
 
3446
        XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3447
#else
 
3448
        if (ot->ot_for_update)
 
3449
                XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3450
        else
 
3451
                XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3452
#endif
 
3453
 
 
3454
        invalid_rec = 0;
 
3455
        retry_3:
 
3456
        if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
3457
                goto failed;
 
3458
#ifdef TRACE_VARIATIONS
 
3459
        len += sprintf(t_buf+len, "ROW=%d", (int) row_id);
 
3460
#endif
 
3461
        while (var_rec_id != ot->ot_curr_rec_id) {
 
3462
                if (!var_rec_id) {
 
3463
#ifdef TRACE_VARIATIONS
 
3464
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI not found in list\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3465
#endif
 
3466
                        goto not_found;
 
3467
                }
 
3468
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
 
3469
                        goto failed;
 
3470
#ifdef TRACE_VARIATIONS
 
3471
                if (len <= 450)
 
3472
                        len += sprintf(t_buf+len, " -> %d(%d)", (int) var_rec_id, (int) var_head.tr_rec_type_1);
 
3473
#endif
 
3474
                /* All clean records are visible, by all transactions: */
 
3475
                if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) {
 
3476
#ifdef TRACE_VARIATIONS
 
3477
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI clean rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3478
#endif
 
3479
                        goto not_found;
 
3480
                }
 
3481
                if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
 
3482
#ifdef TRACE_VARIATIONS
 
3483
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI free rec found?!\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3484
#endif
 
3485
                        /*
 
3486
                         * After an analysis we came to conclusion that this situation is
 
3487
                         * possible and valid. It can happen if index scan and row deletion
 
3488
                         * go in parallel:
 
3489
                         *
 
3490
                         *      Client Thread                                Sweeper
 
3491
                         *      -------------                                -------
 
3492
                         *   1. start index scan, lock the index file.
 
3493
                         *                                                2. start row deletion, wait for index lock
 
3494
                         *   3. unlock the index file, start search for 
 
3495
                         *      the valid version of the record
 
3496
                         *                                                4. delete the row, mark record as freed, 
 
3497
                         *                                                   but not yet cleaned by sweeper
 
3498
                         *   5. observe the record being freed
 
3499
                         *
 
3500
                         * after these steps we can get here, if the record was marked as free after
 
3501
                         * the tab_visible was entered by the scanning thread. 
 
3502
                         *
 
3503
                         */
 
3504
                        if (invalid_rec != var_rec_id) {
 
3505
                                /* This was "var_rec_id = invalid_rec", caused an infinite loop (bug #310184!) */
 
3506
                                invalid_rec = var_rec_id;
 
3507
                                goto retry_3;
 
3508
                        }
 
3509
                        /* Assume end of list. */
 
3510
                        goto not_found;
 
3511
                }
 
3512
 
 
3513
                /* This can happen if the row has been removed, and
 
3514
                 * reused:
 
3515
                 */
 
3516
                if (row_id != XT_GET_DISK_4(var_head.tr_row_id_4))
 
3517
                        goto not_found;
 
3518
 
 
3519
                xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
 
3520
                /* This variation is visibleif committed before this
 
3521
                 * transaction started, or updated by this transaction.
 
3522
                 *
 
3523
                 * We now know that this is the valid variation for
 
3524
                 * this record (for this table) for this transaction!
 
3525
                 * This will not change, unless the transaction
 
3526
                 * updates the record (again).
 
3527
                 *
 
3528
                 * So we can store this information as a hint, if
 
3529
                 * we see other variations belonging to this record,
 
3530
                 * then we can ignore them immediately!
 
3531
                 */
 
3532
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
 
3533
                        case XT_XN_VISIBLE:
 
3534
                                /* {WAKE-SW}
 
3535
                                 * We have encountered a record that has been overwritten, if the
 
3536
                                 * record has not been cleaned, then the sweeper is too far
 
3537
                                 * behind!
 
3538
                                 */
 
3539
                                if (!rec_clean)
 
3540
                                        ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
 
3541
#ifdef TRACE_VARIATIONS
 
3542
                                xt_ttracef(thread, "row=%d rec=%d NOT VISI committed rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3543
#endif
 
3544
                                goto not_found;
 
3545
                        case XT_XN_NOT_VISIBLE:
 
3546
                                if (ot->ot_for_update) {
 
3547
                                        /* Substitute this record for the one we
 
3548
                                         * are reading!!
 
3549
                                         */
 
3550
                                        if (result == TRUE) {
 
3551
                                                if (XT_REC_IS_DELETE(var_head.tr_rec_type_1))
 
3552
                                                        result = FALSE;
 
3553
                                                else {
 
3554
                                                        *new_rec_id = var_rec_id;
 
3555
                                                        result = XT_NEW;
 
3556
                                                }
 
3557
                                        }
 
3558
                                }
 
3559
                                break;
 
3560
                        case XT_XN_ABORTED:
 
3561
                                /* Ignore the record, it will be removed. */
 
3562
                                break;
 
3563
                        case XT_XN_MY_UPDATE:
 
3564
#ifdef TRACE_VARIATIONS
 
3565
                                xt_ttracef(thread, "row=%d rec=%d NOT VISI my update found\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3566
#endif
 
3567
                                goto not_found;
 
3568
                        case XT_XN_OTHER_UPDATE:
 
3569
                                /* Wait for this update to commit or abort: */
 
3570
                                if (!wait) {
 
3571
                                        wait = TRUE;
 
3572
                                        wait_xn_id = xn_id;
 
3573
                                }
 
3574
#ifdef TRACE_VARIATIONS
 
3575
                                if (len <= 450)
 
3576
                                        len += sprintf(t_buf+len, "-T%d", (int) wait_xn_id);
 
3577
#endif
 
3578
                                break;
 
3579
                        case XT_XN_REREAD:
 
3580
                                if (invalid_rec != var_rec_id) {
 
3581
                                        invalid_rec = var_rec_id;
 
3582
                                        goto retry_3;
 
3583
                                }
 
3584
                                /* Assume end of list. */
 
3585
#ifdef XT_CRASH_DEBUG
 
3586
                                /* Should not happen! */
 
3587
                                xt_crash_me();
 
3588
#endif
 
3589
                                goto not_found;
 
3590
                }
 
3591
                var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
 
3592
        }
 
3593
#ifdef TRACE_VARIATIONS
 
3594
        if (len <= 450)
 
3595
                sprintf(t_buf+len, " -> %d(%d)\n", (int) var_rec_id, (int) rec_head->tr_rec_type_1);
 
3596
        else
 
3597
                sprintf(t_buf+len, " ...\n");
 
3598
        //xt_ttracef(thread, "%s", t_buf);
 
3599
#endif
 
3600
 
 
3601
        if (ot->ot_for_update) {
 
3602
                xtBool                  ok;
 
3603
                XTLockWaitRec   lw;
 
3604
 
 
3605
                if (wait) {
 
3606
                        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3607
#ifdef TRACE_VARIATIONS
 
3608
                        xt_ttracef(thread, "T%d WAIT FOR T%d (will retry)\n", (int) thread->st_xact_data->xd_start_xn_id, (int) wait_xn_id);
 
3609
#endif
 
3610
                        if (!tab_wait_for_update(ot, row_id, wait_xn_id, thread))
 
3611
                                return XT_ERR;
 
3612
                        wait = FALSE;
 
3613
                        wait_xn_id = 0;
 
3614
                        /*
 
3615
                         * Retry in order to try to avoid missing
 
3616
                         * any records that we should see in FOR UPDATE
 
3617
                         * mode.
 
3618
                         *
 
3619
                         * We also want to take another look at the record
 
3620
                         * we just tried to read.
 
3621
                         *
 
3622
                         * If it has been updated, then a new record has
 
3623
                         * been created. This will be detected when we
 
3624
                         * try to read it again, and XT_NEW will be returned.
 
3625
                         */
 
3626
                        thread->st_statistics.st_retry_index_scan++;
 
3627
                        return XT_RETRY;
 
3628
                }
 
3629
 
 
3630
                /* {ROW-LIST-LOCK} */
 
3631
                lw.lw_thread = thread;
 
3632
                lw.lw_ot = ot;
 
3633
                lw.lw_row_id = row_id;
 
3634
                lw.lw_row_updated = FALSE;
 
3635
                ok = tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list);
 
3636
                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3637
                if (!ok) {
 
3638
#ifdef DEBUG_LOCK_QUEUE
 
3639
                        ot->ot_table->tab_locks.rl_check(&lw);
 
3640
#endif
 
3641
                        return XT_ERR;
 
3642
                }
 
3643
                if (lw.lw_curr_lock != XT_NO_LOCK) {
 
3644
#ifdef TRACE_VARIATIONS
 
3645
                        xt_ttracef(thread, "T%d WAIT FOR LOCK(%s) T%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) lw.lw_curr_lock == XT_TEMP_LOCK ? "temp" : "perm", (int) xn_id);
 
3646
#endif
 
3647
                        if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
 
3648
#ifdef DEBUG_LOCK_QUEUE
 
3649
                                ot->ot_table->tab_locks.rl_check(&lw);
 
3650
#endif
 
3651
                                return XT_ERR;
 
3652
                        }
 
3653
#ifdef DEBUG_LOCK_QUEUE
 
3654
                        ot->ot_table->tab_locks.rl_check(&lw);
 
3655
#endif
 
3656
#ifdef TRACE_VARIATIONS
 
3657
                        len = sprintf(t_buf, "(retry): row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
 
3658
#endif
 
3659
                        /* GOTCHA!
 
3660
                         * Reset the result before we go down the list again, to make sure we
 
3661
                         * get the latest record!!
 
3662
                         */
 
3663
                        result = TRUE;
 
3664
                        thread->st_statistics.st_reread_record_list++;
 
3665
                        goto retry_2;
 
3666
                }
 
3667
#ifdef DEBUG_LOCK_QUEUE
 
3668
                ot->ot_table->tab_locks.rl_check(&lw);
 
3669
#endif
 
3670
        }
 
3671
        else {
 
3672
                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3673
        }
 
3674
 
 
3675
#ifdef TRACE_VARIATIONS
 
3676
        if (result == XT_NEW)
 
3677
                xt_ttracef(thread, "row=%d rec=%d RETURN NEW %d\n", (int) row_id, (int) ot->ot_curr_rec_id, (int) *new_rec_id);
 
3678
        else if (result)
 
3679
                xt_ttracef(thread, "row=%d rec=%d VISIBLE\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3680
        else
 
3681
                xt_ttracef(thread, "row=%d rec=%d RETURN NOT VISIBLE (NEW)\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3682
#endif
 
3683
 
 
3684
        ot->ot_curr_row_id = row_id;
 
3685
        ot->ot_curr_updated = FALSE;
 
3686
        return result;
 
3687
 
 
3688
        not_found:
 
3689
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3690
        return FALSE;
 
3691
 
 
3692
        failed:
 
3693
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3694
        return XT_ERR;
 
3695
}
 
3696
 
 
3697
/*
 
3698
 * Return TRUE if the record has been read, and is visible.
 
3699
 * Return FALSE if the record is not visible.
 
3700
 * Return XT_ERR if an error occurs.
 
3701
 */
 
3702
xtPublic int xt_tab_visible(XTOpenTablePtr ot)
 
3703
{
 
3704
        xtRowID                         row_id;
 
3705
        XTTabRecHeadDRec        rec_head;
 
3706
        xtRecordID                      new_rec_id;
 
3707
        xtBool                          read_again = FALSE;
 
3708
        int                                     r;
 
3709
 
 
3710
        if ((row_id = ot->ot_curr_row_id)) {
 
3711
                /* Fast track, do a quick check.
 
3712
                 * Row ID is only set if this record has been committed,
 
3713
                 * (and swept).
 
3714
                 * Check if it is the first on the list!
 
3715
                 */
 
3716
                xtRecordID var_rec_id;
 
3717
 
 
3718
                retry:
 
3719
                if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
3720
                        return XT_ERR;
 
3721
                if (ot->ot_curr_rec_id == var_rec_id) {
 
3722
                        /* Looks good.. */
 
3723
                        if (ot->ot_for_update) {
 
3724
                                XTThreadPtr             thread = ot->ot_thread;
 
3725
                                XTTableHPtr             tab = ot->ot_table;
 
3726
                                XTLockWaitRec   lw;
 
3727
 
 
3728
                                /* {ROW-LIST-LOCK} */
 
3729
                                lw.lw_thread = thread;
 
3730
                                lw.lw_ot = ot;
 
3731
                                lw.lw_row_id = row_id;
 
3732
                                lw.lw_row_updated = FALSE;
 
3733
                                if (!tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list)) {
 
3734
#ifdef DEBUG_LOCK_QUEUE
 
3735
                                        ot->ot_table->tab_locks.rl_check(&lw);
 
3736
#endif
 
3737
                                        return XT_ERR;
 
3738
                                }
 
3739
                                if (lw.lw_curr_lock != XT_NO_LOCK) {
 
3740
                                        if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
 
3741
#ifdef DEBUG_LOCK_QUEUE
 
3742
                                                ot->ot_table->tab_locks.rl_check(&lw);
 
3743
#endif
 
3744
                                                return XT_ERR;
 
3745
                                        }
 
3746
#ifdef DEBUG_LOCK_QUEUE
 
3747
                                        ot->ot_table->tab_locks.rl_check(&lw);
 
3748
#endif
 
3749
                                        goto retry;
 
3750
                                }
 
3751
#ifdef DEBUG_LOCK_QUEUE
 
3752
                                ot->ot_table->tab_locks.rl_check(&lw);
 
3753
#endif
 
3754
                        }
 
3755
                        return TRUE;
 
3756
                }
 
3757
        }
 
3758
 
 
3759
        reread:
 
3760
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
 
3761
                return XT_ERR;
 
3762
 
 
3763
        switch ((r = tab_visible(ot, &rec_head, &new_rec_id))) {
 
3764
                case XT_NEW:
 
3765
                        ot->ot_curr_rec_id = new_rec_id;
 
3766
                        break;
 
3767
                case XT_REREAD:
 
3768
                        /* Avoid infinite loop: */
 
3769
                        if (read_again) {
 
3770
                                /* Should not happen! */
 
3771
#ifdef XT_CRASH_DEBUG
 
3772
                                /* Generate a core dump! */
 
3773
                                xt_crash_me();
 
3774
#endif
 
3775
                                return FALSE;
 
3776
                        }
 
3777
                        read_again = TRUE;
 
3778
                        goto reread;
 
3779
                default:
 
3780
                        break;
 
3781
        }
 
3782
        return r;
 
3783
}
 
3784
 
 
3785
/*
 
3786
 * Read a record, and return one of the following:
 
3787
 * TRUE - the record has been read, and is visible.
 
3788
 * FALSE - the record is not visible.
 
3789
 * XT_ERR - an error occurs.
 
3790
 * XT_NEW - Means the expected record has been changed.
 
3791
 * When doing an index scan, the conditions must be checked again!
 
3792
 */
 
3793
xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
 
3794
{
 
3795
        register XTTableHPtr    tab = ot->ot_table;
 
3796
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
 
3797
        xtRecordID                              new_rec_id;
 
3798
        int                                             result;
 
3799
        xtBool                                  read_again = FALSE;
 
3800
 
 
3801
        if (!(ot->ot_thread->st_xact_data)) {
 
3802
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
 
3803
                return XT_ERR;
 
3804
        }
 
3805
 
 
3806
        reread:
 
3807
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
 
3808
                return XT_ERR;
 
3809
 
 
3810
        switch (tab_visible(ot, (XTTabRecHeadDPtr) ot->ot_row_rbuffer, &new_rec_id)) {
 
3811
                case FALSE:
 
3812
                        return FALSE;
 
3813
                case XT_ERR:
 
3814
                        return XT_ERR;
 
3815
                case XT_NEW:
 
3816
                        if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
 
3817
                                return XT_ERR;
 
3818
                        ot->ot_curr_rec_id = new_rec_id;
 
3819
                        result = XT_NEW;
 
3820
                        break;
 
3821
                case XT_RETRY:
 
3822
                        return XT_RETRY;
 
3823
                case XT_REREAD:
 
3824
                        /* Avoid infinite loop: */
 
3825
                        if (read_again) {
 
3826
                                /* Should not happen! */
 
3827
#ifdef XT_CRASH_DEBUG
 
3828
                                /* Generate a core dump! */
 
3829
                                xt_crash_me();
 
3830
#endif
 
3831
                                return FALSE;
 
3832
                        }
 
3833
                        read_again = TRUE;
 
3834
                        goto reread;
 
3835
                default:
 
3836
                        result = OK;
 
3837
                        break;
 
3838
        }
 
3839
 
 
3840
        if (ot->ot_rec_fixed)
 
3841
                memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
 
3842
        else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
 
3843
                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
 
3844
                        return XT_ERR;
 
3845
        }
 
3846
        else {
 
3847
                u_int cols_req = ot->ot_cols_req;
 
3848
 
 
3849
                ASSERT_NS(cols_req);
 
3850
                if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
3851
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
 
3852
                                return XT_ERR;
 
3853
                }
 
3854
                else {
 
3855
                        if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
 
3856
                                return XT_ERR;
 
3857
                }
 
3858
        }
 
3859
 
 
3860
        return result;
 
3861
}
 
3862
 
 
3863
/*
 
3864
 * Returns:
 
3865
 *
 
3866
 * TRUE/OK - record was read.
 
3867
 * FALSE/FAILED - An error occurred.
 
3868
 */
 
3869
xtPublic int xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
 
3870
{
 
3871
        register XTTableHPtr    tab = ot->ot_table;
 
3872
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
 
3873
 
 
3874
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
 
3875
                return FAILED;
 
3876
 
 
3877
        if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
 
3878
                /* Should not happen! */
 
3879
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_DELETED);
 
3880
                return FAILED;
 
3881
        }
 
3882
 
 
3883
        ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
 
3884
        ot->ot_curr_updated =
 
3885
                (XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
 
3886
 
 
3887
        if (ot->ot_rec_fixed)
 
3888
                memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
 
3889
        else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
 
3890
                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
 
3891
                        return FAILED;
 
3892
        }
 
3893
        else {
 
3894
                u_int cols_req = ot->ot_cols_req;
 
3895
 
 
3896
                ASSERT_NS(cols_req);
 
3897
                if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
3898
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
 
3899
                                return FAILED;
 
3900
                }
 
3901
                else {
 
3902
                        if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
 
3903
                                return FAILED;
 
3904
                }
 
3905
        }
 
3906
 
 
3907
        return OK;
 
3908
}
 
3909
 
 
3910
#ifdef XT_USE_ROW_REC_MMAP_FILES
 
3911
/* Loading into cache is not required,
 
3912
 * Instead we copy the memory map to load the
 
3913
 * data.
 
3914
 */
 
3915
#define TAB_ROW_LOAD_CACHE              FALSE
 
3916
#else
 
3917
#define TAB_ROW_LOAD_CACHE              TRUE
 
3918
#endif
 
3919
 
 
3920
/*
 
3921
 * Pull the entire row pointer file into memory.
 
3922
 */
 
3923
xtPublic void xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot)
 
3924
{
 
3925
        XTTableHPtr     tab = ot->ot_table;
 
3926
        xtRecordID      eof_rec_id = tab->tab_row_eof_id;
 
3927
        xtInt8          usage;
 
3928
        xtWord1         *buffer = NULL;
 
3929
 
 
3930
        /* Check if there is enough cache: */
 
3931
        usage = xt_tc_get_usage();
 
3932
        if (xt_tc_get_high() > usage)
 
3933
                usage = xt_tc_get_high();
 
3934
        if (usage + ((xtInt8) eof_rec_id * (xtInt8) tab->tab_rows.tci_rec_size) < xt_tc_get_size()) {
 
3935
                xtRecordID                      rec_id;
 
3936
                size_t                          poffset, tfer;
 
3937
                off_t                           offset, end_offset;
 
3938
                XTTabCachePagePtr       page;
 
3939
                
 
3940
                end_offset = xt_row_id_to_row_offset(tab, eof_rec_id);
 
3941
                rec_id = 1;
 
3942
                while (rec_id < eof_rec_id) {
 
3943
                        if (!tab->tab_rows.xt_tc_get_page(ot->ot_row_file, rec_id, TAB_ROW_LOAD_CACHE, &page, &poffset, self))
 
3944
                                xt_throw(self);
 
3945
                        if (page)
 
3946
                                tab->tab_rows.xt_tc_release_page(ot->ot_row_file, page, self);
 
3947
                        else {
 
3948
                                xtWord1 *buff_ptr;
 
3949
 
 
3950
                                if (!buffer)
 
3951
                                        buffer = (xtWord1 *) xt_malloc(self, tab->tab_rows.tci_page_size);
 
3952
                                offset = xt_row_id_to_row_offset(tab, rec_id);
 
3953
                                tfer = tab->tab_rows.tci_page_size;
 
3954
                                if (offset + (off_t) tfer > end_offset)
 
3955
                                        tfer = (size_t) (end_offset - offset);
 
3956
                                XT_LOCK_MEMORY_PTR(buff_ptr, ot->ot_row_file, offset, tfer, &self->st_statistics.st_rec, self);
 
3957
                                if (buff_ptr) {
 
3958
                                        memcpy(buffer, buff_ptr, tfer);
 
3959
                                        XT_UNLOCK_MEMORY_PTR(ot->ot_row_file, buff_ptr, self);
 
3960
                                }
 
3961
                        }
 
3962
                        rec_id += tab->tab_rows.tci_rows_per_page;
 
3963
                }
 
3964
                if (buffer)
 
3965
                        xt_free(self, buffer);
 
3966
        }
 
3967
}
 
3968
 
 
3969
xtPublic void xt_tab_load_table(XTThreadPtr self, XTOpenTablePtr ot)
 
3970
{
 
3971
        xt_load_pages(self, ot);
 
3972
        xt_load_indices(self, ot);
 
3973
}
 
3974
 
 
3975
xtPublic xtBool xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf)
 
3976
{
 
3977
        register XTTableHPtr    tab = ot->ot_table;
 
3978
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
 
3979
 
 
3980
        if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
 
3981
                return FAILED;
 
3982
 
 
3983
        if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
 
3984
                /* Should not happen! */
 
3985
                XTThreadPtr self = ot->ot_thread;
 
3986
 
 
3987
                xt_log(XT_WARNING, "Recently updated record invalid\n");
 
3988
                return OK;
 
3989
        }
 
3990
 
 
3991
        ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
 
3992
        ot->ot_curr_updated =
 
3993
                (XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
 
3994
 
 
3995
        if (ot->ot_rec_fixed) {
 
3996
                size_t size = rec_size - XT_REC_FIX_HEADER_SIZE;
 
3997
                if (!xt_ib_alloc(NULL, rec_buf, size))
 
3998
                        return FAILED;
 
3999
                memcpy(rec_buf->ib_db.db_data, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, size);
 
4000
        }
 
4001
        else {
 
4002
                if (!xt_ib_alloc(NULL, rec_buf, tab->tab_dic.dic_mysql_buf_size))
 
4003
                        return FAILED;
 
4004
                if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
 
4005
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_buf->ib_db.db_data, ot->ot_cols_req))
 
4006
                                return FAILED;
 
4007
                }
 
4008
                else {
 
4009
                        u_int cols_req = ot->ot_cols_req;
 
4010
 
 
4011
                        ASSERT_NS(cols_req);
 
4012
                        if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
4013
                                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_buf->ib_db.db_data, cols_req))
 
4014
                                        return FAILED;
 
4015
                        }
 
4016
                        else {
 
4017
                                if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, rec_buf->ib_db.db_data, cols_req))
 
4018
                                        return FAILED;
 
4019
                        }
 
4020
                }
 
4021
        }
 
4022
 
 
4023
        return OK;
 
4024
}
 
4025
 
 
4026
xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
 
4027
{
 
4028
        XTTabRowRefDRec free_row;
 
4029
        xtRowID                 prev_row;
 
4030
        xtOpSeqNo               op_seq;
 
4031
 
 
4032
        ASSERT_NS(row_id); // Cannot free the header!
 
4033
 
 
4034
        xt_lock_mutex_ns(&tab->tab_row_lock);
 
4035
        prev_row = tab->tab_row_free_id;
 
4036
        XT_SET_DISK_4(free_row.rr_ref_id_4, prev_row);
 
4037
        if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, &op_seq, TRUE, ot->ot_thread)) {
 
4038
                xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4039
                return FAILED;
 
4040
        }
 
4041
        tab->tab_row_free_id = row_id;
 
4042
        tab->tab_row_fnum++;
 
4043
        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4044
 
 
4045
        if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread))
 
4046
                return FAILED;
 
4047
 
 
4048
        return OK;
 
4049
}
 
4050
 
 
4051
static void tab_free_ext_record_on_fail(XTOpenTablePtr ot, xtRecordID rec_id, XTTabRecExtDPtr ext_rec, xtBool log_err)
 
4052
{
 
4053
        xtWord4         log_over_size = XT_GET_DISK_4(ext_rec->re_log_dat_siz_4);
 
4054
        xtLogID         log_id;
 
4055
        xtLogOffset     log_offset;
 
4056
 
 
4057
        XT_GET_LOG_REF(log_id, log_offset, ext_rec);
 
4058
 
 
4059
        if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
4060
                xt_tab_free_ext_slot(ot->ot_table, log_id, log_offset, log_over_size);
 
4061
        else {
 
4062
                if (!ot->ot_thread->st_dlog_buf.dlb_delete_log(log_id, log_offset, log_over_size, ot->ot_table->tab_id, rec_id, ot->ot_thread)) {
 
4063
                        if (log_err)
 
4064
                                xt_log_and_clear_exception_ns();
 
4065
                }
 
4066
        }
 
4067
}
 
4068
 
 
4069
static void tab_save_exception(XTExceptionPtr e)
 
4070
{
 
4071
        XTThreadPtr self = xt_get_self();
 
4072
 
 
4073
        *e = self->t_exception;
 
4074
}
 
4075
 
 
4076
static void tab_restore_exception(XTExceptionPtr e)
 
4077
{
 
4078
        XTThreadPtr self = xt_get_self();
 
4079
 
 
4080
        self->t_exception = *e;
 
4081
}
 
4082
 
 
4083
/*
 
4084
 * This function assumes that a record may be partially written.
 
4085
 * It removes all associated data and references to the record.
 
4086
 *
 
4087
 * This function return XT_ERR if an error occurs.
 
4088
 * TRUE if the record has been removed, and may be freed.
 
4089
 * FALSE if the record has already been freed. 
 
4090
 *
 
4091
 */
 
4092
xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_id, xtBool clean_delete, xtRowID row_id, xtXactID XT_UNUSED(xn_id))
 
4093
{
 
4094
        register XTTableHPtr    tab = ot->ot_table;
 
4095
        size_t                                  rec_size;
 
4096
        xtWord1                                 old_rec_type;
 
4097
        u_int                                   cols_req;
 
4098
        u_int                                   cols_in_buffer;
 
4099
 
 
4100
        *prev_var_id = 0;
 
4101
 
 
4102
        if (!rec_id)
 
4103
                return FALSE;
 
4104
 
 
4105
        /*
 
4106
         * NOTE: This function uses the read buffer. This should be OK because
 
4107
         * the function is only called by the sweeper. The read buffer
 
4108
         * is REQUIRED because of the call to xt_tab_load_ext_data()!!!
 
4109
         */
 
4110
        rec_size = tab->tab_dic.dic_rec_size;
 
4111
        if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
 
4112
                return XT_ERR;
 
4113
        old_rec_type = ot->ot_row_rbuffer[0];
 
4114
 
 
4115
        /* Check of the record has not already been freed: */
 
4116
        if (XT_REC_IS_FREE(old_rec_type))
 
4117
                return FALSE;
 
4118
 
 
4119
        /* This record must belong to the given row: */
 
4120
        if (XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_row_id_4) != row_id)
 
4121
                return FALSE;
 
4122
 
 
4123
        /* The transaction ID of the record must be BEFORE or equal to the given
 
4124
         * transaction ID.
 
4125
         *
 
4126
         * No, this does not always hold. Because we wait for updates now,
 
4127
         * a "younger" transaction can update before an older
 
4128
         * transaction.
 
4129
         * Commit order determined the actual order in which the transactions
 
4130
         * should be replicated. This is determined by the log number of
 
4131
         * the commit record!
 
4132
        if (db->db_xn_curr_id(xn_id, XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_xact_id_4)))
 
4133
                return FALSE;
 
4134
         */
 
4135
 
 
4136
        *prev_var_id = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_prev_rec_id_4);
 
4137
 
 
4138
        if (tab->tab_dic.dic_key_count) {
 
4139
                XTIndexPtr      *ind;
 
4140
 
 
4141
                switch (old_rec_type) {
 
4142
                        case XT_TAB_STATUS_DELETE:
 
4143
                        case XT_TAB_STATUS_DEL_CLEAN:
 
4144
                                rec_size = sizeof(XTTabRecHeadDRec);
 
4145
                                goto set_removed;
 
4146
                        case XT_TAB_STATUS_FIXED:
 
4147
                        case XT_TAB_STATUS_FIX_CLEAN:
 
4148
                                /* We know that for a fixed length record, 
 
4149
                                 * dic_ind_rec_len <= dic_rec_size! */
 
4150
                                rec_size = (size_t) tab->tab_dic.dic_ind_rec_len + XT_REC_FIX_HEADER_SIZE;
 
4151
                                rec_data = ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE;
 
4152
                                break;
 
4153
                        case XT_TAB_STATUS_VARIABLE:
 
4154
                        case XT_TAB_STATUS_VAR_CLEAN:
 
4155
                                cols_req = tab->tab_dic.dic_ind_cols_req;
 
4156
 
 
4157
                                cols_in_buffer = cols_req;
 
4158
                                rec_size = myxt_load_row_length(ot, rec_size - XT_REC_FIX_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, &cols_in_buffer);
 
4159
                                if (cols_in_buffer < cols_req)
 
4160
                                        rec_size = tab->tab_dic.dic_rec_size;
 
4161
                                else 
 
4162
                                        rec_size += XT_REC_FIX_HEADER_SIZE;
 
4163
                                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_data, cols_req)) {
 
4164
                                        xt_log_and_clear_exception_ns();
 
4165
                                        goto set_removed;
 
4166
                                }
 
4167
                                break;
 
4168
                        case XT_TAB_STATUS_EXT_DLOG:
 
4169
                        case XT_TAB_STATUS_EXT_CLEAN:
 
4170
                                cols_req = tab->tab_dic.dic_ind_cols_req;
 
4171
 
 
4172
                                ASSERT_NS(cols_req);
 
4173
                                cols_in_buffer = cols_req;
 
4174
                                rec_size = myxt_load_row_length(ot, rec_size - XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, &cols_in_buffer);
 
4175
                                if (cols_in_buffer < cols_req) {
 
4176
                                        rec_size = tab->tab_dic.dic_rec_size;
 
4177
                                        if (!xt_tab_load_ext_data(ot, rec_id, rec_data, cols_req)) {
 
4178
                                                /* This is actually quite possible after recovery, see [(3)] */
 
4179
                                                if (ot->ot_thread->t_exception.e_xt_err != XT_ERR_BAD_EXT_RECORD &&
 
4180
                                                        ot->ot_thread->t_exception.e_xt_err != XT_ERR_DATA_LOG_NOT_FOUND)
 
4181
                                                        xt_log_and_clear_exception_ns();
 
4182
                                                goto set_removed;
 
4183
                                        }
 
4184
                                }
 
4185
                                else {
 
4186
                                        /* All the records we require are in the buffer... */
 
4187
                                        rec_size += XT_REC_EXT_HEADER_SIZE;
 
4188
                                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_data, cols_req)) {
 
4189
                                                xt_log_and_clear_exception_ns();
 
4190
                                                goto set_removed;
 
4191
                                        }
 
4192
                                }
 
4193
                                break;
 
4194
                        default:
 
4195
                                break;
 
4196
                }
 
4197
 
 
4198
                /* Could this be the case?: This change may only be flushed after the
 
4199
                 * operation below has been flushed to the log.
 
4200
                 *
 
4201
                 * No, remove records are never "undone". The sweeper will delete
 
4202
                 * the record again if it does not land in the log.
 
4203
                 *
 
4204
                 * The fact that the index entries have already been removed is not
 
4205
                 * a problem.
 
4206
                 */
 
4207
                if (!tab->tab_dic.dic_disable_index) {
 
4208
                        ind = tab->tab_dic.dic_keys;
 
4209
                        for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
 
4210
                                if (!xt_idx_delete(ot, *ind, rec_id, rec_data))
 
4211
                                        xt_log_and_clear_exception_ns();
 
4212
                        }
 
4213
                }
 
4214
        }
 
4215
        else {
 
4216
                /* No indices: */
 
4217
                switch (old_rec_type) {
 
4218
                        case XT_TAB_STATUS_DELETE:
 
4219
                        case XT_TAB_STATUS_DEL_CLEAN:
 
4220
                                rec_size = XT_REC_FIX_HEADER_SIZE;
 
4221
                                break;
 
4222
                        case XT_TAB_STATUS_FIXED:
 
4223
                        case XT_TAB_STATUS_FIX_CLEAN:
 
4224
                        case XT_TAB_STATUS_VARIABLE:
 
4225
                        case XT_TAB_STATUS_VAR_CLEAN:
 
4226
                                rec_size = XT_REC_FIX_HEADER_SIZE;
 
4227
                                break;
 
4228
                        case XT_TAB_STATUS_EXT_DLOG:
 
4229
                        case XT_TAB_STATUS_EXT_CLEAN:
 
4230
                                rec_size = XT_REC_EXT_HEADER_SIZE;
 
4231
                                break;
 
4232
                }
 
4233
        }
 
4234
 
 
4235
        set_removed:
 
4236
        if (XT_REC_IS_EXT_DLOG(old_rec_type)) {
 
4237
                /* {LOCK-EXT-REC} Lock, and read again to make sure that the
 
4238
                 * compactor does not change this record, while
 
4239
                 * we are removing it! */
 
4240
                xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock);
 
4241
                if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) {
 
4242
                        xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
 
4243
                        return FAILED;
 
4244
                }
 
4245
                xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
 
4246
 
 
4247
        }
 
4248
 
 
4249
 
 
4250
        /* A record is "clean" deleted if the record was
 
4251
         * XT_TAB_STATUS_DELETE which was comitted.
 
4252
         * This makes sure that the record will still invalidate
 
4253
         * following records in a row.
 
4254
         *
 
4255
         * Example:
 
4256
         *
 
4257
         * 1. INSERT A ROW, then DELETE it, assume the sweeper is delayed.
 
4258
         *
 
4259
         * We now have the sequence row X --> del rec A --> valid rec B.
 
4260
         *
 
4261
         * 2. A SELECT can still find B. Assume it now goes to check
 
4262
         *    if the record is valid, it reads row X, and gets A.
 
4263
         *
 
4264
         * 3. Now the sweeper gets control and removes X, A and B.
 
4265
         *    It frees A with the clean bit.
 
4266
         *
 
4267
         * 4. Now the SELECT gets control and reads A. Normally a freed record
 
4268
         *    would be ignored, and it would go onto B, which would then
 
4269
         *    be considered valid (note, even after the free, the next
 
4270
         *    pointer is not affected).
 
4271
         *
 
4272
         * However, because the clean bit has been set, it will stop at A
 
4273
         * and consider B invalid (which is the desired result).
 
4274
         *
 
4275
         * NOTE: We assume it is not possible for A to be allocated and refer
 
4276
         * to B, because B is freed before A. This means that B may refer to
 
4277
         * A after the next allocation.
 
4278
         */
 
4279
 
 
4280
        xtOpSeqNo                       op_seq;
 
4281
        XTTabRecFreeDPtr        free_rec = (XTTabRecFreeDPtr) ot->ot_row_rbuffer;
 
4282
        xtRecordID                      prev_rec_id;
 
4283
        xtWord1                         new_rec_type = XT_TAB_STATUS_FREED | (clean_delete ? XT_TAB_STATUS_CLEANED_BIT : 0);
 
4284
        u_int                           status = XT_LOG_ENT_REC_REMOVED_BI;
 
4285
 
 
4286
        xt_lock_mutex_ns(&tab->tab_rec_lock);
 
4287
        free_rec->rf_rec_type_1 = new_rec_type;
 
4288
#ifdef XT_CLUSTER_FREE_RECORDS
 
4289
        XTTabCachePagePtr       page;
 
4290
        size_t                          offset;
 
4291
 
 
4292
        if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
 
4293
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4294
                return FAILED;
 
4295
        }
 
4296
 
 
4297
        if (page->tcp_free_rec == 0xFFFF) {
 
4298
                /* There is no free record on this page. */
 
4299
                prev_rec_id = tab->tab_rec_free_id;
 
4300
                XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
 
4301
                memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
 
4302
                tab->tab_rec_free_id = rec_id;
 
4303
        }
 
4304
        else {
 
4305
                XTTabRecFreeDPtr        prev_free_rec = (XTTabRecFreeDPtr) (page->tcp_data + page->tcp_free_rec);
 
4306
 
 
4307
                status = XT_LOG_ENT_REC_REMOVED_BI_L;
 
4308
                XT_COPY_DISK_4(free_rec->rf_next_rec_id_4, prev_free_rec->rf_next_rec_id_4);
 
4309
                memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
 
4310
 
 
4311
                /* The previous now references the next: */
 
4312
                XT_SET_DISK_4(prev_free_rec->rf_next_rec_id_4, rec_id);
 
4313
 
 
4314
                /* This is the record ID of the previous record: */
 
4315
                ASSERT_NS((page->tcp_free_rec % tab->tab_recs.tci_rec_size) == 0);
 
4316
                prev_rec_id = (page->tcp_page_idx * tab->tab_recs.tci_rows_per_page) + (page->tcp_free_rec / tab->tab_recs.tci_rec_size) + 1;
 
4317
                ASSERT_NS(prev_rec_id != rec_id);
 
4318
        }
 
4319
 
 
4320
        /* Link after this page in future! */
 
4321
        ASSERT_NS((offset % tab->tab_recs.tci_rec_size) == 0);
 
4322
        page->tcp_free_rec = offset;
 
4323
        tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
 
4324
#else
 
4325
        prev_rec_id = tab->tab_rec_free_id;
 
4326
        XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
 
4327
        if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) {
 
4328
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4329
                return FAILED;
 
4330
        }
 
4331
        tab->tab_rec_free_id = rec_id;
 
4332
        ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id);
 
4333
#endif
 
4334
        tab->tab_rec_fnum++;
 
4335
        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4336
 
 
4337
        free_rec->rf_rec_type_1 = old_rec_type;
 
4338
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, new_rec_type, prev_rec_id, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread);
 
4339
}
 
4340
 
 
4341
static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab)
 
4342
{
 
4343
        xtRowID                 row_id;
 
4344
        xtOpSeqNo               op_seq;
 
4345
        xtRowID                 next_row_id = 0;
 
4346
        u_int                   status;
 
4347
 
 
4348
        xt_lock_mutex_ns(&tab->tab_row_lock);
 
4349
        if ((row_id = tab->tab_row_free_id)) {
 
4350
                status = XT_LOG_ENT_ROW_NEW_FL;
 
4351
 
 
4352
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
 
4353
                        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4354
                        return 0;
 
4355
                }
 
4356
                tab->tab_row_free_id = next_row_id;
 
4357
                tab->tab_row_fnum--;
 
4358
        }
 
4359
        else {
 
4360
                status = XT_LOG_ENT_ROW_NEW;
 
4361
                row_id = tab->tab_row_eof_id;
 
4362
                if (row_id == 0xFFFFFFFF) {
 
4363
                        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4364
                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_MAX_ROW_COUNT);
 
4365
                        return 0;
 
4366
                }
 
4367
                if (((row_id - 1) % tab->tab_rows.tci_rows_per_page) == 0) {
 
4368
                        /* By fetching the page now, we avoid reading it later... */
 
4369
                        XTTabCachePagePtr       page;
 
4370
                        XTTabCacheSegPtr        seg;
 
4371
                        size_t                          poffset;
 
4372
 
 
4373
                        if (!tab->tab_rows.tc_fetch(ot->ot_row_file, row_id, &seg, &page, &poffset, FALSE, ot->ot_thread)) {
 
4374
                                xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4375
                                return 0;
 
4376
                        }
 
4377
                        TAB_CAC_UNLOCK(&seg->tcs_lock, ot->ot_thread->t_id);
 
4378
                }
 
4379
                tab->tab_row_eof_id++;
 
4380
        }
 
4381
        op_seq = tab->tab_seq.ts_get_op_seq();
 
4382
        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4383
 
 
4384
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_row_id, row_id, 0, NULL, ot->ot_thread))
 
4385
                return 0;
 
4386
 
 
4387
        XT_DISABLED_TRACE(("new row tx=%d row=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id));
 
4388
        ASSERT_NS(row_id);
 
4389
        return row_id;
 
4390
}
 
4391
 
 
4392
xtPublic xtBool xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id)
 
4393
{
 
4394
        register XTTableHPtr    tab = ot->ot_table;
 
4395
 
 
4396
        (void) ASSERT_NS(sizeof(XTTabRowRefDRec) == 4);
 
4397
 
 
4398
        if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, var_rec_id, ot->ot_thread))
 
4399
                return FAILED;
 
4400
        return OK;
 
4401
}
 
4402
 
 
4403
xtPublic xtBool xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id)
 
4404
{
 
4405
        register XTTableHPtr    tab = ot->ot_table;
 
4406
        XTTabRowRefDRec                 row_buf;
 
4407
        xtOpSeqNo                               op_seq;
 
4408
 
 
4409
        ASSERT_NS(var_rec_id < tab->tab_rec_eof_id);
 
4410
        XT_SET_DISK_4(row_buf.rr_ref_id_4, var_rec_id);
 
4411
 
 
4412
        if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, &op_seq, TRUE, ot->ot_thread))
 
4413
                return FAILED;
 
4414
 
 
4415
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, ot->ot_thread);
 
4416
}
 
4417
 
 
4418
static void tab_free_row_on_fail(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
 
4419
{
 
4420
        XTExceptionRec e;
 
4421
 
 
4422
        tab_save_exception(&e);
 
4423
        xt_tab_free_row(ot, tab, row_id);
 
4424
        tab_restore_exception(&e);
 
4425
}
 
4426
 
 
4427
#ifdef CHECK_CLUSTER_EFFICIENCY
 
4428
u_int           next_on_page = 0;
 
4429
u_int           next_off_page = 0;
 
4430
#endif
 
4431
 
 
4432
static xtBool tab_add_record(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, u_int status)
 
4433
{
 
4434
        register XTTableHPtr    tab = ot->ot_table;
 
4435
        XTThreadPtr                             thread = ot->ot_thread;
 
4436
        xtRecordID                              rec_id;
 
4437
        xtLogID                                 log_id;
 
4438
        xtLogOffset                             log_offset;
 
4439
        xtOpSeqNo                               op_seq;
 
4440
        xtRecordID                              next_rec_id = 0;
 
4441
 
 
4442
        if (rec_info->ri_ext_rec) {
 
4443
                /* Determine where the overflow will go... */
 
4444
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
4445
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
 
4446
                                return FAILED;
 
4447
                }
 
4448
                else {
 
4449
                        if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
 
4450
                                return FAILED;
 
4451
                }
 
4452
                XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
 
4453
        }
 
4454
 
 
4455
        /* Write the record to disk: */
 
4456
        xt_lock_mutex_ns(&tab->tab_rec_lock);
 
4457
        if ((rec_id = tab->tab_rec_free_id)) {
 
4458
                ASSERT_NS(rec_id < tab->tab_rec_eof_id);
 
4459
#ifdef XT_CLUSTER_FREE_RECORDS
 
4460
                XTTabCachePagePtr       page;
 
4461
                size_t                          offset;
 
4462
                XTTabRecFreeDPtr        free_block;
 
4463
 
 
4464
                if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
 
4465
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4466
                        return FAILED;
 
4467
                }
 
4468
 
 
4469
                /* Read the data from the old record: */
 
4470
                free_block = (XTTabRecFreeDPtr) (page->tcp_data + offset);
 
4471
                next_rec_id = XT_GET_DISK_4(free_block->rf_next_rec_id_4);
 
4472
 
 
4473
#ifdef CHECK_CLUSTER_EFFICIENCY
 
4474
                xtRecordID      dbg_rec_id;
 
4475
 
 
4476
                dbg_rec_id = next_rec_id-1;
 
4477
                if (page->tcp_page_idx == dbg_rec_id / tab->tab_recs.tci_rows_per_page)
 
4478
                        next_on_page++;
 
4479
                else
 
4480
                        next_off_page++;
 
4481
                if ((next_on_page % 1000) == 0)
 
4482
                        printf("Next on: %d off: %d\n", next_on_page, next_off_page);
 
4483
#endif
 
4484
 
 
4485
                if (page->tcp_free_rec == offset) {
 
4486
                        /* Adjust the free record: */
 
4487
                        xtRecordID      tmp_rec_id;
 
4488
 
 
4489
                        /* Check if the next record is on the same page: */
 
4490
                        tmp_rec_id = next_rec_id-1;
 
4491
                        if (page->tcp_page_idx == tmp_rec_id / tab->tab_recs.tci_rows_per_page)
 
4492
                                /* This is the next free record on this page: */
 
4493
                                page->tcp_free_rec = (xtWord2) ((tmp_rec_id % tab->tab_recs.tci_rows_per_page) * tab->tab_recs.tci_rec_size);
 
4494
                        else
 
4495
                                /* Not on the same page, so there are no more free records on this page: */
 
4496
                                page->tcp_free_rec = 0xFFFF;
 
4497
                }
 
4498
 
 
4499
                /* Write the new record: */
 
4500
                memcpy(free_block, rec_info->ri_fix_rec_buf, rec_info->ri_rec_buf_size);
 
4501
                tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
 
4502
#else
 
4503
                XTTabRecFreeDRec free_block;
 
4504
 
 
4505
                if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), (xtWord1 *) &free_block)) {
 
4506
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4507
                        return FAILED;
 
4508
                }
 
4509
                next_rec_id = XT_GET_DISK_4(free_block.rf_next_rec_id_4);
 
4510
                if (!xt_tab_put_rec_data(ot, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq)) {
 
4511
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4512
                        return FAILED;
 
4513
                }
 
4514
#endif
 
4515
                tab->tab_rec_free_id = next_rec_id;                     
 
4516
                tab->tab_rec_fnum--;
 
4517
                
 
4518
                /* XT_LOG_ENT_UPDATE --> XT_LOG_ENT_UPDATE_FL */
 
4519
                /* XT_LOG_ENT_INSERT --> XT_LOG_ENT_INSERT_FL */
 
4520
                /* XT_LOG_ENT_DELETE --> XT_LOG_ENT_DELETE_FL */
 
4521
                status += 2;
 
4522
        }
 
4523
        else {
 
4524
                xtBool read;
 
4525
 
 
4526
                rec_id = tab->tab_rec_eof_id;
 
4527
                tab->tab_rec_eof_id++;
 
4528
 
 
4529
                /* If we are writing to a new page (at the EOF)
 
4530
                 * then we do not need to read the page from the
 
4531
                 * file because it is new.
 
4532
                 *
 
4533
                 * Note that this only works because we are holding
 
4534
                 * a lock on the record file.
 
4535
                 */
 
4536
                read = ((rec_id - 1) % tab->tab_recs.tci_rows_per_page) != 0;
 
4537
 
 
4538
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq, read, ot->ot_thread)) {
 
4539
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4540
                        return FAILED;
 
4541
                }
 
4542
        }
 
4543
        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4544
 
 
4545
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_rec_id, rec_id,  rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, ot->ot_thread))
 
4546
                return FAILED;
 
4547
 
 
4548
        if (rec_info->ri_ext_rec) {
 
4549
                /* Write the log buffer overflow: */            
 
4550
                rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
 
4551
                XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
 
4552
                XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
 
4553
                XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
 
4554
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
4555
                        if (!xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf))
 
4556
                                return FAILED;
 
4557
                }
 
4558
                else {
 
4559
                        if (!thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, ot->ot_thread)) {
 
4560
                                /* Failed to write the overflow, free the record allocated above: */
 
4561
                                return FAILED;
 
4562
                        }
 
4563
                }
 
4564
        }
 
4565
 
 
4566
        XT_DISABLED_TRACE(("new rec tx=%d val=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) rec_id));
 
4567
        rec_info->ri_rec_id = rec_id;
 
4568
        return OK;
 
4569
}
 
4570
 
 
4571
static void tab_delete_record_on_fail(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, XTTabRecHeadDPtr row_ptr, xtWord1 *rec_data, u_int key_count)
 
4572
{
 
4573
        XTExceptionRec  e;
 
4574
        xtBool                  log_err = TRUE;
 
4575
        XTTabRecInfoRec rec_info;
 
4576
 
 
4577
        tab_save_exception(&e);
 
4578
        
 
4579
        if (e.e_xt_err == XT_ERR_DUPLICATE_KEY || 
 
4580
                e.e_xt_err == XT_ERR_DUPLICATE_FKEY) {
 
4581
                /* If the error does not cause rollback, then we will ignore the
 
4582
                 * error if an error occurs in the UNDO!
 
4583
                 */
 
4584
                log_err = FALSE;
 
4585
                tab_restore_exception(&e);
 
4586
        }
 
4587
        if (key_count) {
 
4588
                XTIndexPtr      *ind;
 
4589
 
 
4590
                ind = ot->ot_table->tab_dic.dic_keys;
 
4591
                for (u_int i=0; i<key_count; i++, ind++) {
 
4592
                        if (!xt_idx_delete(ot, *ind, rec_id, rec_data)) {
 
4593
                                if (log_err)
 
4594
                                        xt_log_and_clear_exception_ns();
 
4595
                        }
 
4596
                }
 
4597
        }
 
4598
 
 
4599
        if (row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_DLOG || row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_CLEAN)
 
4600
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) row_ptr, log_err);
 
4601
 
 
4602
        rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
 
4603
        rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
 
4604
        rec_info.ri_ext_rec = NULL;
 
4605
        rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
 
4606
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
 
4607
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
4608
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, rec_id);
 
4609
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
 
4610
 
 
4611
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
 
4612
                goto failed;
 
4613
 
 
4614
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
4615
                goto failed;
 
4616
 
 
4617
        if (log_err)
 
4618
                tab_restore_exception(&e);
 
4619
        return;
 
4620
 
 
4621
        failed:
 
4622
        if (log_err)
 
4623
                xt_log_and_clear_exception_ns();
 
4624
        else
 
4625
                tab_restore_exception(&e);
 
4626
}
 
4627
 
 
4628
/*
 
4629
 * Wait until all the variations between the start of the chain, and
 
4630
 * the given record have been rolled-back.
 
4631
 * If any is committed, register a locked error, and return FAILED.
 
4632
 */
 
4633
static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordID commit_rec_id)
 
4634
{
 
4635
        register XTTableHPtr    tab = ot->ot_table;
 
4636
        xtRecordID                              var_rec_id;
 
4637
        XTTabRecHeadDRec                var_head;
 
4638
        xtXactID                                xn_id;
 
4639
        xtRecordID                              invalid_rec = 0;
 
4640
        XTXactWaitRec                   xw;
 
4641
 
 
4642
        retry:
 
4643
        if (!xt_tab_get_row(ot, row_id, &var_rec_id))
 
4644
                return FAILED;
 
4645
 
 
4646
        while (var_rec_id != commit_rec_id) {
 
4647
                if (!var_rec_id)
 
4648
                        goto locked;
 
4649
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
 
4650
                        return FAILED;
 
4651
                if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1))
 
4652
                        goto locked;
 
4653
                if (XT_REC_IS_FREE(var_head.tr_rec_type_1))
 
4654
                        /* Should not happen: */
 
4655
                        goto record_invalid;
 
4656
                xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
 
4657
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
 
4658
                        case XT_XN_VISIBLE:
 
4659
                        case XT_XN_NOT_VISIBLE:
 
4660
                                goto locked;
 
4661
                        case XT_XN_ABORTED:
 
4662
                                /* Ingore the record, it will be removed. */
 
4663
                                break;
 
4664
                        case XT_XN_MY_UPDATE:
 
4665
                                /* Should not happen: */
 
4666
                                goto locked;
 
4667
                        case XT_XN_OTHER_UPDATE:
 
4668
                                /* Wait for the transaction to commit or rollback: */
 
4669
                                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4670
                                xw.xw_xn_id = xn_id;
 
4671
                                if (!xt_xn_wait_for_xact(ot->ot_thread, &xw, NULL)) {
 
4672
                                        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4673
                                        return FAILED;
 
4674
                                }
 
4675
                                XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4676
                                goto retry;
 
4677
                        case XT_XN_REREAD:
 
4678
                                goto record_invalid;
 
4679
                }
 
4680
                var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
 
4681
        }
 
4682
        return OK;
 
4683
 
 
4684
        locked:
 
4685
        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_CHANGED);
 
4686
        return FAILED;
 
4687
        
 
4688
        record_invalid:
 
4689
        /* Prevent an infinite loop due to a bad record: */
 
4690
        if (invalid_rec != var_rec_id) {
 
4691
                var_rec_id = invalid_rec;
 
4692
                goto retry;
 
4693
        }
 
4694
        /* The record is invalid, it will be "overwritten"... */
 
4695
#ifdef XT_CRASH_DEBUG
 
4696
        /* Should not happen! */
 
4697
        xt_crash_me();
 
4698
#endif
 
4699
        return OK;
 
4700
}
 
4701
 
 
4702
/* Check if a record may be visible:
 
4703
 * Return TRUE of the record may be visible now.
 
4704
 * Return XT_MAYBE if the record may be visible in the future (set out_xn_id).
 
4705
 * Return FALSE of the record is not valid (freed or is a delete record).
 
4706
 * Return XT_ERR if an error occurred.
 
4707
 */
 
4708
xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *out_xn_id, xtRowID *out_rowid, xtBool *out_updated)
 
4709
{
 
4710
        XTTabRecHeadDRec                rec_head;
 
4711
        xtXactID                                rec_xn_id = 0;
 
4712
        xtBool                                  wait = FALSE;
 
4713
        xtXactID                                wait_xn_id = 0;
 
4714
        xtRowID                                 row_id;
 
4715
        xtRecordID                              var_rec_id;
 
4716
        xtXactID                                xn_id;
 
4717
        register XTTableHPtr    tab;
 
4718
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4719
        char                                    t_buf[500];
 
4720
        int                                             len;
 
4721
        char                                    *t_type = "C";
 
4722
#endif
 
4723
        xtRecordID                              invalid_rec = 0;
 
4724
 
 
4725
        reread:
 
4726
        if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
 
4727
                return XT_ERR;
 
4728
 
 
4729
        if (XT_REC_NOT_VALID(rec_head.tr_rec_type_1))
 
4730
                return FALSE;
 
4731
 
 
4732
        if (!XT_REC_IS_CLEAN(rec_head.tr_rec_type_1)) {
 
4733
                rec_xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
 
4734
                switch (xt_xn_status(ot, rec_xn_id, rec_id)) {
 
4735
                        case XT_XN_VISIBLE:
 
4736
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4737
                                t_type="V";
 
4738
#endif
 
4739
                                break;
 
4740
                        case XT_XN_NOT_VISIBLE:
 
4741
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4742
                                t_type="NV";
 
4743
#endif
 
4744
                                break;
 
4745
                        case XT_XN_ABORTED:
 
4746
                                return FALSE;
 
4747
                        case XT_XN_MY_UPDATE:
 
4748
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4749
                                t_type="My-Upd";
 
4750
#endif
 
4751
                                break;
 
4752
                        case XT_XN_OTHER_UPDATE:
 
4753
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4754
                                t_type="Wait";
 
4755
#endif
 
4756
                                wait = TRUE;
 
4757
                                wait_xn_id = rec_xn_id;
 
4758
                                break;
 
4759
                        case XT_XN_REREAD:
 
4760
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4761
                                t_type="Re-read";
 
4762
#endif
 
4763
                                /* Avoid infinite loop: */
 
4764
                                if (invalid_rec == rec_id) {
 
4765
                                        /* Should not happen! */
 
4766
#ifdef XT_CRASH_DEBUG
 
4767
                                        /* Generate a core dump! */
 
4768
                                        xt_crash_me();
 
4769
#endif
 
4770
                                        return FALSE;
 
4771
                                }
 
4772
                                invalid_rec = rec_id;
 
4773
                                goto reread;
 
4774
                }
 
4775
        }
 
4776
 
 
4777
        /* Follow the variation chain until we come to this record.
 
4778
         * If it is not the first visible variation then
 
4779
         * it is not visible at all. If it in not found on the
 
4780
         * variation chain, it is also not visible.
 
4781
         */
 
4782
        row_id = XT_GET_DISK_4(rec_head.tr_row_id_4);
 
4783
 
 
4784
        tab = ot->ot_table;
 
4785
        XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4786
 
 
4787
        invalid_rec = 0;
 
4788
        retry:
 
4789
        if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
4790
                goto failed;
 
4791
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4792
        len = sprintf(t_buf, "dup row=%d", (int) row_id);
 
4793
#endif
 
4794
        while (var_rec_id != rec_id) {
 
4795
                if (!var_rec_id)
 
4796
                        goto not_found;
 
4797
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4798
                if (len <= 450)
 
4799
                        len += sprintf(t_buf+len, " -> %d", (int) var_rec_id);
 
4800
#endif
 
4801
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
 
4802
                        goto failed;
 
4803
                /* All clean records are visible, by all transactions: */
 
4804
                if (XT_REC_IS_CLEAN(rec_head.tr_rec_type_1))
 
4805
                        goto not_found;
 
4806
 
 
4807
                if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) {
 
4808
                        /* Should not happen: */
 
4809
                        if (invalid_rec != var_rec_id) {
 
4810
                                var_rec_id = invalid_rec;
 
4811
                                goto retry;
 
4812
                        }
 
4813
                        /* Assume end of list. */
 
4814
#ifdef XT_CRASH_DEBUG
 
4815
                        /* Should not happen! */
 
4816
                        xt_crash_me();
 
4817
#endif
 
4818
                        goto not_found;
 
4819
                }
 
4820
 
 
4821
                xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
 
4822
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
 
4823
                        case XT_XN_VISIBLE:
 
4824
                        case XT_XN_NOT_VISIBLE:
 
4825
                                goto not_found;
 
4826
                        case XT_XN_ABORTED:
 
4827
                                /* Ingore the record, it will be removed. */
 
4828
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4829
                                if (len <= 450)
 
4830
                                        len += sprintf(t_buf+len, "(T%d-A)", (int) xn_id);
 
4831
#endif
 
4832
                                break;
 
4833
                        case XT_XN_MY_UPDATE:
 
4834
                                goto not_found;
 
4835
                        case XT_XN_OTHER_UPDATE:
 
4836
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4837
                                if (len <= 450)
 
4838
                                        len += sprintf(t_buf+len, "(T%d-wait)", (int) xn_id);
 
4839
#endif
 
4840
                                /* Wait for this update to commit or abort: */
 
4841
                                if (!wait) {
 
4842
                                        wait = TRUE;
 
4843
                                        wait_xn_id = xn_id;
 
4844
                                }
 
4845
                                break;
 
4846
                        case XT_XN_REREAD:
 
4847
                                if (invalid_rec != var_rec_id) {
 
4848
                                        var_rec_id = invalid_rec;
 
4849
                                        goto retry;
 
4850
                                }
 
4851
                                /* Assume end of list. */
 
4852
#ifdef XT_CRASH_DEBUG
 
4853
                                /* Should not happen! */
 
4854
                                xt_crash_me();
 
4855
#endif
 
4856
                                goto not_found;
 
4857
                }
 
4858
                var_rec_id = XT_GET_DISK_4(rec_head.tr_prev_rec_id_4);
 
4859
        }
 
4860
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4861
        if (len <= 450)
 
4862
                sprintf(t_buf+len, " -> %d(T%d-%s)\n", (int) var_rec_id, (int) rec_xn_id, t_type);
 
4863
        else
 
4864
                sprintf(t_buf+len, " ...(T%d-%s)\n", (int) rec_xn_id, t_type);
 
4865
#endif
 
4866
 
 
4867
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4868
        if (wait) {
 
4869
                *out_xn_id = wait_xn_id;
 
4870
                return XT_MAYBE;
 
4871
        }
 
4872
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4873
        xt_ttracef(thread, "%s", t_buf);
 
4874
#endif
 
4875
        if (out_rowid) {
 
4876
                *out_rowid = row_id;
 
4877
                *out_updated = (rec_xn_id == ot->ot_thread->st_xact_data->xd_start_xn_id);
 
4878
        }
 
4879
        return TRUE;
 
4880
 
 
4881
        not_found:
 
4882
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4883
        return FALSE;
 
4884
 
 
4885
        failed:
 
4886
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4887
        return XT_ERR;
 
4888
}
 
4889
 
 
4890
xtPublic xtBool xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
 
4891
{
 
4892
        register XTTableHPtr    tab = ot->ot_table;
 
4893
        register XTThreadPtr    self = ot->ot_thread;
 
4894
        XTTabRecInfoRec                 rec_info;
 
4895
        xtRowID                                 row_id;
 
4896
        u_int                                   idx_cnt = 0;
 
4897
        XTIndexPtr                              *ind;
 
4898
 
 
4899
        if (!myxt_store_row(ot, &rec_info, (char *) rec_buf))
 
4900
                goto failed_0;
 
4901
 
 
4902
        /* Get a new row ID: */
 
4903
        if (!(row_id = tab_new_row(ot, tab)))
 
4904
                goto failed_0;
 
4905
 
 
4906
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
 
4907
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
4908
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, 0);
 
4909
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
 
4910
 
 
4911
        /* Note, it is important that this record is written BEFORE the row
 
4912
         * due to the problem distributed here [(5)]
 
4913
         */
 
4914
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_INSERT))
 
4915
                goto failed_1;
 
4916
 
 
4917
#ifdef TRACE_VARIATIONS
 
4918
        xt_ttracef(self, "insert: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
 
4919
#endif
 
4920
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
4921
                goto failed_1;
 
4922
        XT_DISABLED_TRACE(("set new tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
 
4923
 
 
4924
        /* Add the index references: */
 
4925
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
4926
                if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, rec_buf, NULL, FALSE)) {
 
4927
                        ot->ot_err_index_no = (*ind)->mi_index_no;
 
4928
                        goto failed_2;
 
4929
                }
 
4930
        }
 
4931
 
 
4932
        /* Do the foreign key stuff: */
 
4933
        if (ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
 
4934
                if (!ot->ot_table->tab_dic.dic_table->insertRow(ot, rec_buf))
 
4935
                        goto failed_2;
 
4936
        }
 
4937
 
 
4938
        self->st_statistics.st_row_insert++;
 
4939
        return OK;      
 
4940
 
 
4941
        failed_2:
 
4942
        /* Once the row has been inserted, it is to late to remove it!
 
4943
         * Now all we can do is delete it!
 
4944
         */
 
4945
        tab_delete_record_on_fail(ot, row_id, rec_info.ri_rec_id, (XTTabRecHeadDPtr) rec_info.ri_fix_rec_buf, rec_buf, idx_cnt);
 
4946
        goto failed_0;
 
4947
 
 
4948
        failed_1:
 
4949
        tab_free_row_on_fail(ot, tab, row_id);
 
4950
 
 
4951
        failed_0:
 
4952
        return FAILED;
 
4953
}
 
4954
 
 
4955
/* We cannot remove a change we have made to a row while a transaction
 
4956
 * is running, so we have to undo what we have done by
 
4957
 * overwriting the record we just created with
 
4958
 * the before image!
 
4959
 */
 
4960
static xtBool tab_overwrite_record_on_fail(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, xtWord1 *before_buf, xtWord1 *after_buf, u_int idx_cnt)
 
4961
{
 
4962
        register XTTableHPtr    tab = ot->ot_table;
 
4963
        XTTabRecHeadDRec                prev_rec_head;
 
4964
        u_int                                   i;
 
4965
        XTIndexPtr                              *ind;
 
4966
        XTThreadPtr                             thread = ot->ot_thread;
 
4967
        xtLogID                                 log_id;
 
4968
        xtLogOffset                             log_offset;
 
4969
        xtRecordID                              rec_id = rec_info->ri_rec_id;
 
4970
 
 
4971
        /* Remove the new extended record: */
 
4972
        if (rec_info->ri_ext_rec)
 
4973
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info->ri_fix_rec_buf, TRUE);
 
4974
 
 
4975
        /* Undo index entries of the new record: */
 
4976
        if (after_buf) {
 
4977
                for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
 
4978
                        if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
 
4979
                                return FAILED;
 
4980
                }
 
4981
        }
 
4982
 
 
4983
        memcpy(&prev_rec_head, rec_info->ri_fix_rec_buf, sizeof(XTTabRecHeadDRec));
 
4984
 
 
4985
        if (!before_buf) {
 
4986
                /* Can happen if the delete was called from some cascaded action.
 
4987
                 * And this is better than a crash...
 
4988
                 *
 
4989
                 * TODO: to make sure the change will not be applied in case the 
 
4990
                 * transaction will be commited, we'd need to add a log entry to 
 
4991
                 * restore the record like it's done for top-level operation. In 
 
4992
                 * order to do this we'd need to read the before-image of the 
 
4993
                 * record before modifying it.
 
4994
                 */
 
4995
                if (!ot->ot_thread->t_exception.e_xt_err)
 
4996
                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_BEFORE_IMAGE);
 
4997
                return FAILED;
 
4998
        }
 
4999
 
 
5000
        /* Restore the previous record! */
 
5001
        if (!myxt_store_row(ot, rec_info, (char *) before_buf))
 
5002
                return FAILED;
 
5003
 
 
5004
        memcpy(rec_info->ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
 
5005
 
 
5006
        if (rec_info->ri_ext_rec) {
 
5007
                /* Determine where the overflow will go... */
 
5008
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
5009
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
 
5010
                                return FAILED;
 
5011
                }
 
5012
                else {
 
5013
                        if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
 
5014
                                return FAILED;
 
5015
                }
 
5016
                XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
 
5017
        }
 
5018
 
 
5019
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf))
 
5020
                return FAILED;
 
5021
 
 
5022
        if (rec_info->ri_ext_rec) {
 
5023
                /* Write the log buffer overflow: */            
 
5024
                rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
 
5025
                XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
 
5026
                XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
 
5027
                XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
 
5028
 
 
5029
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
5030
                        if (!xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf))
 
5031
                                return FAILED;
 
5032
                }
 
5033
                else {
 
5034
                        if (!thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, ot->ot_thread))
 
5035
                                return FAILED;
 
5036
                }
 
5037
        }
 
5038
 
 
5039
        /* Put the index entries back: */
 
5040
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5041
                if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
 
5042
                        /* Incomplete restore, there will be a rollback... */
 
5043
                        return FAILED;
 
5044
        }
 
5045
 
 
5046
        return OK;
 
5047
}
 
5048
 
 
5049
/*
 
5050
 * GOTCHA:
 
5051
 * If a transaction updates the same record over again, we should update
 
5052
 * in place. This prevents producing unnecessary variations!
 
5053
 */
 
5054
static xtBool tab_overwrite_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
 
5055
{
 
5056
        register XTTableHPtr    tab = ot->ot_table;
 
5057
        xtRowID                                 row_id = ot->ot_curr_row_id;
 
5058
        register XTThreadPtr    self = ot->ot_thread;
 
5059
        xtRecordID                              rec_id = ot->ot_curr_rec_id;
 
5060
        XTTabRecExtDRec                 prev_rec_head;
 
5061
        XTTabRecInfoRec                 rec_info;
 
5062
        u_int                                   idx_cnt = 0, i;
 
5063
        XTIndexPtr                              *ind;
 
5064
        xtLogID                                 log_id;
 
5065
        xtLogOffset                             log_offset;
 
5066
        xtBool                                  prev_ext_rec;
 
5067
 
 
5068
        if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
 
5069
                goto failed_0;
 
5070
 
 
5071
        /* Read before we overwrite! */
 
5072
        if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &prev_rec_head))
 
5073
                goto failed_0;
 
5074
 
 
5075
        prev_ext_rec = prev_rec_head.tr_rec_type_1 & XT_TAB_STATUS_EXT_DLOG;
 
5076
 
 
5077
        if (rec_info.ri_ext_rec) {
 
5078
                /* Determine where the overflow will go... */
 
5079
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
5080
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size))
 
5081
                                goto failed_0;
 
5082
                }
 
5083
                else {
 
5084
                        if (!self->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, ot->ot_thread))
 
5085
                                goto failed_0;
 
5086
                }
 
5087
                XT_SET_LOG_REF(rec_info.ri_ext_rec, log_id, log_offset);
 
5088
        }
 
5089
 
 
5090
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
 
5091
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5092
        XT_COPY_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, prev_rec_head.tr_prev_rec_id_4);
 
5093
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
 
5094
 
 
5095
        /* Remove the index references, that have changed: */
 
5096
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5097
                if (!xt_idx_delete(ot, *ind, rec_id, before_buf)) {
 
5098
                        goto failed_0;
 
5099
                }
 
5100
        }
 
5101
 
 
5102
#ifdef TRACE_VARIATIONS
 
5103
        xt_ttracef(self, "overwrite: row=%d rec=%d T%d\n", (int) row_id, (int) rec_id, (int) self->st_xact_data->xd_start_xn_id);
 
5104
#endif
 
5105
        /* Overwrite the record: */
 
5106
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
 
5107
                goto failed_0;
 
5108
 
 
5109
        if (rec_info.ri_ext_rec) {
 
5110
                /* Write the log buffer overflow: */            
 
5111
                rec_info.ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
 
5112
                XT_SET_DISK_4(rec_info.ri_log_buf->er_data_size_4, rec_info.ri_log_data_size);
 
5113
                XT_SET_DISK_4(rec_info.ri_log_buf->er_tab_id_4, tab->tab_id);
 
5114
                XT_SET_DISK_4(rec_info.ri_log_buf->er_rec_id_4, rec_id);
 
5115
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
5116
                        if (!xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, (xtWord1 *) rec_info.ri_log_buf))
 
5117
                                goto failed_1;
 
5118
                }
 
5119
                else {
 
5120
                        if (!self->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, (xtWord1 *) rec_info.ri_log_buf, ot->ot_thread))
 
5121
                                goto failed_1;
 
5122
                }
 
5123
        }
 
5124
 
 
5125
        /* Add the index references that have changed: */
 
5126
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5127
                if (!xt_idx_insert(ot, *ind, 0, rec_id, after_buf, before_buf, FALSE)) {
 
5128
                        ot->ot_err_index_no = (*ind)->mi_index_no;
 
5129
                        goto failed_2;
 
5130
                }
 
5131
        }
 
5132
 
 
5133
        /* Do the foreign key stuff: */
 
5134
        if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
 
5135
                if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
 
5136
                        goto failed_2;
 
5137
        }
 
5138
        
 
5139
        /* Delete the previous overflow area: */
 
5140
        if (prev_ext_rec)
 
5141
                tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
 
5142
 
 
5143
        return OK;
 
5144
 
 
5145
        failed_2:
 
5146
        /* Remove the new extended record: */
 
5147
        if (rec_info.ri_ext_rec)
 
5148
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info.ri_fix_rec_buf, TRUE);
 
5149
 
 
5150
        /* Restore the previous record! */
 
5151
        /* Undo index entries: */
 
5152
        for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
 
5153
                if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
 
5154
                        goto failed_1;
 
5155
        }
 
5156
 
 
5157
        /* Restore the record: */
 
5158
        if (!myxt_store_row(ot, &rec_info, (char *) before_buf))
 
5159
                goto failed_1;
 
5160
 
 
5161
        if (rec_info.ri_ext_rec)
 
5162
                memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, XT_REC_EXT_HEADER_SIZE);
 
5163
        else
 
5164
                memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
 
5165
 
 
5166
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
 
5167
                goto failed_1;
 
5168
 
 
5169
        /* Put the index entries back: */
 
5170
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5171
                if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
 
5172
                        /* Incomplete restore, there will be a rollback... */
 
5173
                        goto failed_0;
 
5174
        }
 
5175
 
 
5176
        /* The previous record has now been restored. */
 
5177
        goto failed_0;
 
5178
 
 
5179
        failed_1:
 
5180
        /* The old record is overwritten, I must free the previous extended record: */
 
5181
        if (prev_ext_rec)
 
5182
                tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
 
5183
 
 
5184
        failed_0:
 
5185
        return FAILED;
 
5186
}
 
5187
 
 
5188
xtPublic xtBool xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
 
5189
{
 
5190
        register XTTableHPtr    tab;
 
5191
        xtRowID                                 row_id;
 
5192
        register XTThreadPtr    self;
 
5193
        xtRecordID                              curr_var_rec_id;
 
5194
        XTTabRecInfoRec                 rec_info;
 
5195
        u_int                                   idx_cnt = 0;
 
5196
        XTIndexPtr                              *ind;
 
5197
 
 
5198
        /*
 
5199
         * Originally only the flag ot->ot_curr_updated was checked, and if it was on, then
 
5200
         * tab_overwrite_record() was called, but this caused crashes in some cases like:
 
5201
         *
 
5202
         * set @@autocommit = 0;
 
5203
         * create table t1 (s1 int primary key); 
 
5204
         * create table t2 (s1 int primary key, foreign key (s1) references t1 (s1) on update cascade);
 
5205
     * insert into t1 values (1);
 
5206
         * insert into t2 values (1);
 
5207
         * update t1 set s1 = 1;
 
5208
         *
 
5209
         * the last update lead to a crash on t2 cascade update because before_buf argument is NULL 
 
5210
         * in the call below. It is NULL only during cascade update of child table. In that case we 
 
5211
         * cannot pass before_buf value from XTDDTableRef::modifyRow as the before_buf is the original 
 
5212
         * row for the parent (t1) table and it would be used to update any existing indexes
 
5213
         * in the child table which would be wrong of course.
 
5214
         *
 
5215
         * Alternative solution would be to copy the after_info in the XTDDTableRef::modifyRow():
 
5216
         * 
 
5217
         * ...
 
5218
         * if (!xt_tab_load_record(ot, ot->ot_curr_rec_id, &after_info))
 
5219
         *     goto failed_2;
 
5220
         * ...
 
5221
         *
 
5222
         * here the xt_tab_load_record() loads the original row, so we can copy it from there, but in 
 
5223
         * that case we'd need to allocate a new (possibly up to 65536 bytes long) buffer, which makes 
 
5224
         * the optimization questionable
 
5225
         *
 
5226
         */
 
5227
        if (ot->ot_curr_updated && before_buf)
 
5228
                /* This record has already been updated by this transaction.
 
5229
                 * Do the update in place!
 
5230
                 */
 
5231
                return tab_overwrite_record(ot, before_buf, after_buf);
 
5232
 
 
5233
        tab = ot->ot_table;
 
5234
        row_id = ot->ot_curr_row_id;
 
5235
        self = ot->ot_thread;
 
5236
 
 
5237
        if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
 
5238
                goto failed_0;
 
5239
 
 
5240
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
 
5241
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5242
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
 
5243
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
 
5244
 
 
5245
        /* Create the new record: */
 
5246
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_UPDATE))
 
5247
                goto failed_0;
 
5248
 
 
5249
        /* Link the new variation into the list: */
 
5250
        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5251
 
 
5252
        if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
 
5253
                goto failed_1;
 
5254
 
 
5255
        if (curr_var_rec_id != ot->ot_curr_rec_id) {
 
5256
                /* If the transaction does not rollback, I will get an
 
5257
                 * exception here:
 
5258
                 */
 
5259
                if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
 
5260
                        goto failed_1;
 
5261
                /* [(4)] This is the situation when we overwrite the
 
5262
                 * reference to curr_var_rec_id!
 
5263
                 * When curr_var_rec_id is cleaned up by the sweeper, the
 
5264
                 * sweeper will notice that the record is no longer in
 
5265
                 * the row list.
 
5266
                 */
 
5267
        }
 
5268
 
 
5269
#ifdef TRACE_VARIATIONS
 
5270
        xt_ttracef(self, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
 
5271
#endif
 
5272
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
5273
                goto failed_1;
 
5274
        XT_DISABLED_TRACE(("set upd tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
 
5275
 
 
5276
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5277
 
 
5278
        /* Add the index references: */
 
5279
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5280
                if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, after_buf, before_buf, FALSE)) {
 
5281
                        ot->ot_err_index_no = (*ind)->mi_index_no;
 
5282
                        goto failed_2;
 
5283
                }
 
5284
        }
 
5285
 
 
5286
        if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
 
5287
                if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
 
5288
                        goto failed_2;
 
5289
        }
 
5290
 
 
5291
        ot->ot_thread->st_statistics.st_row_update++;
 
5292
        return OK;
 
5293
 
 
5294
        failed_2:
 
5295
        tab_overwrite_record_on_fail(ot, &rec_info, before_buf, after_buf, idx_cnt);
 
5296
        goto failed_0;
 
5297
 
 
5298
        failed_1:
 
5299
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5300
 
 
5301
        failed_0:
 
5302
        return FAILED;
 
5303
}
 
5304
 
 
5305
xtPublic xtBool xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
 
5306
{
 
5307
        register XTTableHPtr    tab = ot->ot_table;
 
5308
        xtRowID                                 row_id = ot->ot_curr_row_id;
 
5309
        xtRecordID                              curr_var_rec_id;
 
5310
        XTTabRecInfoRec                 rec_info;
 
5311
 
 
5312
        /* Setup a delete record: */
 
5313
        rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
 
5314
        rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
 
5315
        rec_info.ri_ext_rec = NULL;
 
5316
        rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
 
5317
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
 
5318
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5319
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
 
5320
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
 
5321
 
 
5322
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
 
5323
                return FAILED;
 
5324
 
 
5325
        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5326
 
 
5327
        if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
 
5328
                goto failed_1;
 
5329
 
 
5330
        if (curr_var_rec_id != ot->ot_curr_rec_id) {
 
5331
                if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
 
5332
                        goto failed_1;          
 
5333
        }
 
5334
 
 
5335
#ifdef TRACE_VARIATIONS
 
5336
        xt_ttracef(ot->ot_thread, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) ot->ot_thread->st_xact_data->xd_start_xn_id);
 
5337
#endif
 
5338
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
5339
                goto failed_1;
 
5340
        XT_DISABLED_TRACE(("del row tx=%d row=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
 
5341
 
 
5342
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5343
 
 
5344
        if (ot->ot_table->tab_dic.dic_table->dt_trefs) {
 
5345
                if (!ot->ot_table->tab_dic.dic_table->deleteRow(ot, rec_buf))
 
5346
                        goto failed_2;
 
5347
        }
 
5348
 
 
5349
        ot->ot_thread->st_statistics.st_row_delete++;
 
5350
        return OK;
 
5351
 
 
5352
        failed_2:
 
5353
        tab_overwrite_record_on_fail(ot, &rec_info, rec_buf, NULL, 0);
 
5354
        return FAILED;
 
5355
 
 
5356
        failed_1:
 
5357
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5358
        return FAILED;
 
5359
}
 
5360
 
 
5361
xtPublic xtBool xt_tab_restrict_rows(XTBasicListPtr list, XTThreadPtr thread)
 
5362
{
 
5363
        u_int                           i;
 
5364
        XTRestrictItemPtr       item;
 
5365
        XTOpenTablePtr          pot = NULL;
 
5366
        XTDatabaseHPtr          db = thread->st_database;
 
5367
        xtBool                          ok = TRUE;
 
5368
 
 
5369
        for (i=0; i<list->bl_count; i++) {
 
5370
                item = (XTRestrictItemPtr) xt_bl_item_at(list, i);
 
5371
                if (item)
 
5372
                        if (pot) {
 
5373
                                if (pot->ot_table->tab_id == item->ri_tab_id)
 
5374
                                        goto check_action;
 
5375
                                xt_db_return_table_to_pool_ns(pot);
 
5376
                                pot = NULL;
 
5377
                        }
 
5378
 
 
5379
                        if (!xt_db_open_pool_table_ns(&pot, db, item->ri_tab_id)) {
 
5380
                                /* Should not happen, but just in case, we just don't
 
5381
                                 * remove the lock. We will probably end up with a deadlock
 
5382
                                 * somewhere.
 
5383
                                 */
 
5384
                                xt_log_and_clear_exception_ns();
 
5385
                                goto skip_check_action;
 
5386
                        }
 
5387
                        if (!pot)
 
5388
                                /* Can happen of the table has been dropped: */
 
5389
                                goto skip_check_action;
 
5390
 
 
5391
                        check_action:
 
5392
                        if (!pot->ot_table->tab_dic.dic_table->checkNoAction(pot, item->ri_rec_id)) {
 
5393
                                ok = FALSE;
 
5394
                                break;
 
5395
                        }
 
5396
                        skip_check_action:;
 
5397
        }
 
5398
 
 
5399
        if (pot)
 
5400
                xt_db_return_table_to_pool_ns(pot);
 
5401
        xt_bl_free(NULL, list);
 
5402
        return ok;
 
5403
}
 
5404
 
 
5405
 
 
5406
xtPublic xtBool xt_tab_seq_init(XTOpenTablePtr ot)
 
5407
{
 
5408
        register XTTableHPtr tab = ot->ot_table;
 
5409
        
 
5410
        ASSERT_NS(!ot->ot_seq_page);
 
5411
        ot->ot_seq_page = NULL;
 
5412
        ot->ot_seq_data = NULL;
 
5413
        ot->ot_on_page = FALSE;
 
5414
        ot->ot_seq_offset = 0;
 
5415
 
 
5416
        ot->ot_curr_rec_id = 0;                 // 0 is an invalid position!
 
5417
        ot->ot_curr_row_id = 0;                 // 0 is an invalid row ID!
 
5418
        ot->ot_curr_updated = FALSE;
 
5419
 
 
5420
        /* We note the current EOF before we start a sequential scan.
 
5421
         * It is basically possible to update the same record more than
 
5422
         * once because an updated record creates a new record which
 
5423
         * has a new position which may be in the area that is
 
5424
         * still to be scanned.
 
5425
         *
 
5426
         * By noting the EOF before we start a sequential scan we
 
5427
         * reduce the possibility of this.
 
5428
         *
 
5429
         * However, the possibility still remains, but it should
 
5430
         * not be a problem because a record is not modified
 
5431
         * if there is nothing to change, which is the case
 
5432
         * if the record has already been changed!
 
5433
         *
 
5434
         * NOTE (2008-01-29) There is no longer a problem with updating a
 
5435
         * record twice because records are marked by an update.
 
5436
         *
 
5437
         * [(10)] I have changed this (see below). I now check the
 
5438
         * current EOF of the table.
 
5439
         *
 
5440
         * The reason is that committed read must be able to see the
 
5441
         * changes that occur during table table scan.   * 
 
5442
         */
 
5443
        ot->ot_seq_eof_id = tab->tab_rec_eof_id;
 
5444
 
 
5445
        if (!ot->ot_thread->st_xact_data) {
 
5446
                /* MySQL ignores this error, so we
 
5447
                 * setup the sequential scan so that it will
 
5448
                 * deliver nothing!
 
5449
                 */
 
5450
                ot->ot_seq_rec_id = ot->ot_seq_eof_id;
 
5451
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
 
5452
                return FAILED;
 
5453
        }
 
5454
 
 
5455
        ot->ot_seq_rec_id = 1;
 
5456
        ot->ot_thread->st_statistics.st_scan_table++;
 
5457
        return OK;
 
5458
}
 
5459
 
 
5460
xtPublic void xt_tab_seq_reset(XTOpenTablePtr ot)
 
5461
{
 
5462
        ot->ot_seq_rec_id = 0;
 
5463
        ot->ot_seq_eof_id = 0;
 
5464
        ASSERT_NS(!ot->ot_seq_page);
 
5465
        ot->ot_seq_page = NULL;
 
5466
        ot->ot_seq_data = NULL;
 
5467
        ot->ot_on_page = FALSE;
 
5468
        ot->ot_seq_offset = 0;
 
5469
}
 
5470
 
 
5471
xtPublic void xt_tab_seq_exit(XTOpenTablePtr ot)
 
5472
{
 
5473
        register XTTableHPtr    tab = ot->ot_table;
 
5474
 
 
5475
        if (ot->ot_seq_page) {
 
5476
                tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
 
5477
                ot->ot_seq_page = NULL;
 
5478
        }
 
5479
        if (ot->ot_seq_data) {
 
5480
                XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
 
5481
                ot->ot_seq_data = NULL;
 
5482
        }
 
5483
        ot->ot_on_page = FALSE;
 
5484
}
 
5485
 
 
5486
#ifdef XT_USE_ROW_REC_MMAP_FILES
 
5487
#define TAB_SEQ_LOAD_CACHE              FALSE
 
5488
#else
 
5489
#ifdef XT_SEQ_SCAN_LOADS_CACHE
 
5490
#define TAB_SEQ_LOAD_CACHE              TRUE
 
5491
#else
 
5492
#define TAB_SEQ_LOAD_CACHE              FALSE
 
5493
#endif
 
5494
#endif
 
5495
 
 
5496
xtPublic void xt_tab_seq_repeat(XTOpenTablePtr ot)
 
5497
{
 
5498
        ot->ot_seq_rec_id--;
 
5499
        ot->ot_seq_offset -= ot->ot_table->tab_dic.dic_rec_size;
 
5500
}
 
5501
 
 
5502
xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof)
 
5503
{
 
5504
        register XTTableHPtr    tab = ot->ot_table;
 
5505
        register size_t                 rec_size = tab->tab_dic.dic_rec_size;
 
5506
        xtWord1                                 *buff_ptr;
 
5507
        xtRecordID                              new_rec_id;
 
5508
        xtRecordID                              invalid_rec = 0;
 
5509
 
 
5510
        next_page:
 
5511
        if (!ot->ot_on_page) {
 
5512
                ASSERT_NS(!ot->ot_seq_page);
 
5513
                if (!(ot->ot_on_page = tab->tab_recs.xt_tc_get_page(ot->ot_rec_file, ot->ot_seq_rec_id, TAB_SEQ_LOAD_CACHE, &ot->ot_seq_page, &ot->ot_seq_offset, ot->ot_thread)))
 
5514
                        return FAILED;
 
5515
                if (!ot->ot_seq_page) {
 
5516
                        XT_LOCK_MEMORY_PTR(ot->ot_seq_data, ot->ot_rec_file, xt_rec_id_to_rec_offset(tab, ot->ot_seq_rec_id), tab->tab_rows.tci_page_size, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
 
5517
                        if (!ot->ot_seq_data)
 
5518
                                return FAILED;
 
5519
                        ot->ot_on_page = TRUE;
 
5520
                        ot->ot_seq_offset = 0;
 
5521
                }
 
5522
        }
 
5523
 
 
5524
        next_record:
 
5525
        /* [(10)] The current EOF is used: */
 
5526
        if (ot->ot_seq_rec_id >= ot->ot_seq_eof_id) {
 
5527
                *eof = TRUE;
 
5528
                return OK;
 
5529
        }
 
5530
 
 
5531
        if (ot->ot_seq_offset >= tab->tab_recs.tci_page_size) {
 
5532
                if (ot->ot_seq_page) {
 
5533
                        tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
 
5534
                        ot->ot_seq_page = NULL;
 
5535
                }
 
5536
                if (ot->ot_seq_data) {
 
5537
                        /* NULL here means that in the case of non-memory mapped
 
5538
                         * files we "keep" the lock.
 
5539
                         */
 
5540
                        XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
 
5541
                        ot->ot_seq_data = NULL;
 
5542
                }
 
5543
                ot->ot_on_page = FALSE;
 
5544
                goto next_page;
 
5545
        }
 
5546
 
 
5547
        if (ot->ot_seq_page)
 
5548
                buff_ptr = ot->ot_seq_page->tcp_data + ot->ot_seq_offset;
 
5549
        else
 
5550
                buff_ptr = ot->ot_seq_data + ot->ot_seq_offset;
 
5551
 
 
5552
        /* This is the current record: */
 
5553
        ot->ot_curr_rec_id = ot->ot_seq_rec_id;
 
5554
        ot->ot_curr_row_id = 0;
 
5555
 
 
5556
        /* Move to the next record: */
 
5557
        ot->ot_seq_rec_id++;
 
5558
        ot->ot_seq_offset += rec_size;
 
5559
 
 
5560
        retry:
 
5561
        switch (tab_visible(ot, (XTTabRecHeadDPtr) buff_ptr, &new_rec_id)) {
 
5562
                case FALSE:
 
5563
                        goto next_record;
 
5564
                case XT_ERR:
 
5565
                        goto failed;
 
5566
                case XT_NEW:
 
5567
                        buff_ptr = ot->ot_row_rbuffer;
 
5568
                        if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
 
5569
                                return XT_ERR;
 
5570
                        ot->ot_curr_rec_id = new_rec_id;
 
5571
                        break;
 
5572
                case XT_RETRY:
 
5573
                        goto retry;
 
5574
                case XT_REREAD:
 
5575
                        if (invalid_rec != ot->ot_curr_rec_id) {
 
5576
                                /* Don't re-read for the same record twice: */
 
5577
                                invalid_rec = ot->ot_curr_rec_id;
 
5578
 
 
5579
                                /* Undo move to next: */
 
5580
                                ot->ot_seq_rec_id--;
 
5581
                                ot->ot_seq_offset -= rec_size;
 
5582
                                
 
5583
                                /* Prepare to reread the page: */
 
5584
                                if (ot->ot_seq_page) {
 
5585
                                        tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
 
5586
                                        ot->ot_seq_page = NULL;
 
5587
                                }
 
5588
                                ot->ot_on_page = FALSE;
 
5589
                                goto next_page;
 
5590
                        }
 
5591
#ifdef XT_CRASH_DEBUG
 
5592
                        /* Should not happen! */
 
5593
                        xt_crash_me();
 
5594
#endif
 
5595
                        /* Continue, and skip the record... */
 
5596
                        invalid_rec = 0;
 
5597
                        goto next_record;
 
5598
                default:
 
5599
                        break;
 
5600
        }
 
5601
 
 
5602
        switch (*buff_ptr) {
 
5603
                case XT_TAB_STATUS_FIXED:
 
5604
                case XT_TAB_STATUS_FIX_CLEAN:
 
5605
                        memcpy(buffer, buff_ptr + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
 
5606
                        break;
 
5607
                case XT_TAB_STATUS_VARIABLE:
 
5608
                case XT_TAB_STATUS_VAR_CLEAN:
 
5609
                        if (!myxt_load_row(ot, buff_ptr + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
 
5610
                                goto failed_1;
 
5611
                        break;
 
5612
                case XT_TAB_STATUS_EXT_DLOG:
 
5613
                case XT_TAB_STATUS_EXT_CLEAN: {
 
5614
                        u_int cols_req = ot->ot_cols_req;
 
5615
 
 
5616
                        ASSERT_NS(cols_req);
 
5617
                        if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
5618
                                if (!myxt_load_row(ot, buff_ptr + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
 
5619
                                        goto failed_1;
 
5620
                        }
 
5621
                        else {
 
5622
                                if (buff_ptr != ot->ot_row_rbuffer)
 
5623
                                        memcpy(ot->ot_row_rbuffer, buff_ptr, rec_size);
 
5624
                                if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
 
5625
                                        goto failed_1;
 
5626
                        }
 
5627
                        break;
 
5628
                }
 
5629
        }
 
5630
 
 
5631
        *eof = FALSE;
 
5632
        return OK;
 
5633
 
 
5634
        failed_1:
 
5635
 
 
5636
        failed:
 
5637
        return FAILED;
 
5638
}
 
5639
 
 
5640
/*
 
5641
 * -----------------------------------------------------------------------
 
5642
 * REPAIR TABLE
 
5643
 */
 
5644
 
 
5645
#define REP_FIND                0
 
5646
#define REP_ADD                 1
 
5647
#define REP_DEL                 2
 
5648
 
 
5649
static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_name)
 
5650
{
 
5651
        XTThreadPtr                     thread = xt_get_self();
 
5652
        char                            file_path[PATH_MAX];
 
5653
        XTOpenFilePtr           of = NULL;
 
5654
        int                                     len;
 
5655
        char                            *buffer = NULL, *ptr, *name;
 
5656
        char                            ch;
 
5657
        xtBool                          found = FALSE;
 
5658
 
 
5659
        xt_strcpy(PATH_MAX, file_path, db->db_main_path);
 
5660
        xt_add_pbxt_file(PATH_MAX, file_path, "repair-pending");
 
5661
        
 
5662
        if (what == REP_ADD) {
 
5663
                if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 0))
 
5664
                        return FALSE;
 
5665
        }
 
5666
        else {
 
5667
                if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_DEFAULT | XT_FS_MISSING_OK, 0))
 
5668
                        return FALSE;
 
5669
        }
 
5670
        if (!of)
 
5671
                return FALSE;
 
5672
 
 
5673
        len = (int) xt_seek_eof_file(NULL, of);
 
5674
        
 
5675
        if (!(buffer = (char *) xt_malloc_ns(len + 1)))
 
5676
                goto failed;
 
5677
 
 
5678
        if (!xt_pread_file(of, 0, len, len, buffer, NULL, &thread->st_statistics.st_x, thread))
 
5679
                goto failed;
 
5680
 
 
5681
        buffer[len] = 0;
 
5682
        ptr = buffer;
 
5683
        for(;;) {
 
5684
                name = ptr;
 
5685
                while (*ptr && *ptr != '\n' && *ptr != '\r')
 
5686
                        ptr++;
 
5687
                if (ptr > name) {
 
5688
                        ch = *ptr;
 
5689
                        *ptr = 0;
 
5690
                        if (xt_tab_compare_names(name, table_name) == 0) {
 
5691
                                *ptr = ch;
 
5692
                                found = TRUE;
 
5693
                                break;
 
5694
                        }       
 
5695
                        *ptr = ch;
 
5696
                }
 
5697
                if (!*ptr)
 
5698
                        break;
 
5699
                ptr++;
 
5700
        }
 
5701
 
 
5702
        switch (what) {
 
5703
                case REP_ADD:
 
5704
                        if (!found) {
 
5705
                                /* Remove any trailing empty lines: */
 
5706
                                while (len > 0) {
 
5707
                                        if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
 
5708
                                                break;
 
5709
                                        len--;
 
5710
                                }
 
5711
                                if (len > 0) {
 
5712
                                        if (!xt_pwrite_file(of, len, 1, (void *) "\n", &thread->st_statistics.st_x, thread))
 
5713
                                                goto failed;
 
5714
                                        len++;
 
5715
                                }
 
5716
                                if (!xt_pwrite_file(of, len, strlen(table_name), table_name, &thread->st_statistics.st_x, thread))
 
5717
                                        goto failed;
 
5718
                                len += strlen(table_name);
 
5719
                                if (!xt_set_eof_file(NULL, of, len))
 
5720
                                        goto failed;
 
5721
                        }
 
5722
                        break;
 
5723
                case REP_DEL:
 
5724
                        if (found) {
 
5725
                                if (*ptr != '\0')
 
5726
                                        ptr++;
 
5727
                                memmove(name, ptr, len - (ptr - buffer));
 
5728
                                len = len - (ptr - name);
 
5729
 
 
5730
                                /* Remove trailing empty lines: */
 
5731
                                while (len > 0) {
 
5732
                                        if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
 
5733
                                                break;
 
5734
                                        len--;
 
5735
                                }
 
5736
 
 
5737
                                if (len > 0) {
 
5738
                                        if (!xt_pwrite_file(of, 0, len, buffer, &thread->st_statistics.st_x, thread))
 
5739
                                                goto failed;
 
5740
                                        if (!xt_set_eof_file(NULL, of, len))
 
5741
                                                goto failed;
 
5742
                                }
 
5743
                        }
 
5744
                        break;
 
5745
        }
 
5746
 
 
5747
        xt_close_file_ns(of);
 
5748
        xt_free_ns(buffer);
 
5749
 
 
5750
        if (len == 0)
 
5751
                xt_fs_delete(NULL, file_path);
 
5752
        return found;
 
5753
 
 
5754
        failed:
 
5755
        if (of)
 
5756
                xt_close_file_ns(of);
 
5757
        if (buffer)
 
5758
                xt_free_ns(buffer);
 
5759
        xt_log_and_clear_exception(thread);
 
5760
        return FALSE;
 
5761
}
 
5762
 
 
5763
xtPublic void xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size)
 
5764
{
 
5765
        char    *nptr;
 
5766
 
 
5767
        nptr = xt_last_name_of_path(tab_path->ps_path);
 
5768
        if (xt_starts_with(nptr, "#sql")) {
 
5769
                /* {INVALID-OLD-TABLE-FIX}
 
5770
                 * Temporary files can have strange paths, for example
 
5771
                 * ..../var/tmp/mysqld.1/#sqldaec_1_6
 
5772
                 * This occurs, for example, occurs when the temp_table.test is
 
5773
                 * run using the PBXT suite in MariaDB:
 
5774
                 * ./mtr --suite=pbxt --do-test=temp_table
 
5775
                 *
 
5776
                 * Calling myxt_static_convert_file_name, with a '.', in the name
 
5777
                 * causes the error:
 
5778
                 * [ERROR] Invalid (old?) table or database name 'mysqld.1'
 
5779
                 * To prevent this, we do not convert the temporary
 
5780
                 * table names using the mysql functions.
 
5781
                 *
 
5782
                 * Note, this bug was found by Monty, and fixed by modifying
 
5783
                 * xt_2nd_last_name_of_path(), see {INVALID-OLD-TABLE-FIX}.
 
5784
                 *
 
5785
                 */
 
5786
                xt_2nd_last_name_of_path(size, table_name, tab_path->ps_path);
 
5787
                xt_strcat(size, table_name, ".");
 
5788
                xt_strcat(size, table_name, nptr);
 
5789
        }
 
5790
        else {
 
5791
                char    name_buf[XT_TABLE_NAME_SIZE*3+3];
 
5792
                char    *part_ptr;
 
5793
                size_t  len;
 
5794
 
 
5795
                xt_2nd_last_name_of_path(sizeof(name_buf), name_buf, tab_path->ps_path);
 
5796
                myxt_static_convert_file_name(name_buf, table_name, size);
 
5797
                xt_strcat(size, table_name, ".");
 
5798
                
 
5799
                /* Handle partition extensions to table names: */
 
5800
                if ((part_ptr = strstr(nptr, "#P#")))
 
5801
                        xt_strncpy(sizeof(name_buf), name_buf, nptr, part_ptr - nptr);
 
5802
                else
 
5803
                        xt_strcpy(sizeof(name_buf), name_buf, nptr);
 
5804
 
 
5805
                len = strlen(table_name);
 
5806
                myxt_static_convert_file_name(name_buf, table_name + len, size - len);
 
5807
 
 
5808
                if (part_ptr) {
 
5809
                        /* Add the partition extension (which is relevant to the engine). */
 
5810
                        char    *sub_part_ptr;
 
5811
 
 
5812
                        part_ptr += 3;
 
5813
                        if ((sub_part_ptr = strstr(part_ptr, "#SP#")))
 
5814
                                xt_strncpy(sizeof(name_buf), name_buf, part_ptr, sub_part_ptr - part_ptr);
 
5815
                        else
 
5816
                                xt_strcpy(sizeof(name_buf), name_buf, part_ptr);
 
5817
                        
 
5818
                        xt_strcat(size, table_name, " (");
 
5819
                        len = strlen(table_name);
 
5820
                        myxt_static_convert_file_name(name_buf, table_name + len, size - len);
 
5821
                        
 
5822
                        if (sub_part_ptr) {
 
5823
                        
 
5824
                                sub_part_ptr += 4;
 
5825
                                xt_strcat(size, table_name, " - ");
 
5826
                                len = strlen(table_name);
 
5827
                                myxt_static_convert_file_name(sub_part_ptr, table_name + len, size - len);
 
5828
                        }
 
5829
 
 
5830
                        xt_strcat(size, table_name, ")");
 
5831
                }
 
5832
        }
 
5833
}
 
5834
 
 
5835
xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab)
 
5836
{
 
5837
        char table_name[XT_TABLE_NAME_BUF_SIZE];
 
5838
 
 
5839
        xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
5840
        return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name);
 
5841
}
 
5842
 
 
5843
xtPublic void xt_tab_table_repaired(XTTableHPtr tab)
 
5844
{
 
5845
        if (tab->tab_repair_pending) {
 
5846
                char table_name[XT_TABLE_NAME_BUF_SIZE];
 
5847
 
 
5848
                tab->tab_repair_pending = FALSE;
 
5849
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
5850
                tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name);
 
5851
        }
 
5852
}
 
5853
 
 
5854
xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab)
 
5855
{
 
5856
        if (!tab->tab_repair_pending) {
 
5857
                char table_name[XT_TABLE_NAME_BUF_SIZE];
 
5858
 
 
5859
                tab->tab_repair_pending = TRUE;
 
5860
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
5861
                tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name);
 
5862
        }
 
5863
}
 
5864
 
 
5865
/*
 
5866
 * -----------------------------------------------------------------------
 
5867
 * EXTENDED DATA FOR RAM TABLES
 
5868
 */
 
5869
 
 
5870
xtPublic xtBool xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t /*req_size*/)
 
5871
{
 
5872
        size_t new_slot;
 
5873
 
 
5874
        xt_spinlock_lock(&tab->tab_mem_lock);
 
5875
        if (tab->tab_mem_ind_free) {
 
5876
                new_slot = tab->tab_mem_ind_free - 1;
 
5877
                tab->tab_mem_ind_free = (size_t) tab->tab_mem_index[new_slot];
 
5878
        }
 
5879
        else {
 
5880
                if (tab->tab_mem_ind_usage == tab->tab_mem_ind_size) {
 
5881
                        /* Grow the index: */
 
5882
                        if (!xt_realloc_ns((void **) &tab->tab_mem_index, (tab->tab_mem_ind_size + 100) * sizeof(xtWord1 *)))
 
5883
                                return FAILED;
 
5884
                        tab->tab_mem_ind_size += 100;
 
5885
                }
 
5886
                new_slot = tab->tab_mem_ind_usage;
 
5887
                tab->tab_mem_ind_usage++;
 
5888
        }
 
5889
        xt_spinlock_unlock(&tab->tab_mem_lock);
 
5890
        tab->tab_mem_index[new_slot] = NULL;
 
5891
        *log_id = 1;
 
5892
        *log_offset = new_slot + 1;
 
5893
        return OK;
 
5894
}
 
5895
 
 
5896
xtPublic xtBool xt_tab_save_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
 
5897
{
 
5898
        size_t  slot = ((size_t) log_offset) - 1;
 
5899
        xtWord1 *rec_data;
 
5900
 
 
5901
        if (!(rec_data = (xtWord1 *) xt_malloc_ns(size)))
 
5902
                return FAILED;
 
5903
        memcpy(rec_data, data, size);
 
5904
        xt_spinlock_lock(&tab->tab_mem_lock);
 
5905
        tab->tab_mem_total += size;
 
5906
        tab->tab_mem_index[slot] = rec_data;
 
5907
        xt_spinlock_unlock(&tab->tab_mem_lock);
 
5908
        return OK;
 
5909
}
 
5910
 
 
5911
xtPublic void xt_tab_read_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
 
5912
{
 
5913
        size_t  slot = ((size_t) log_offset) - 1;
 
5914
 
 
5915
        if (slot < tab->tab_mem_ind_usage && tab->tab_mem_index[slot])
 
5916
                memcpy(data, tab->tab_mem_index[slot], size);
 
5917
        else
 
5918
                memset(data, 0, size);
 
5919
}
 
5920
 
 
5921
xtPublic void xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size)
 
5922
{
 
5923
        size_t  slot = ((size_t) log_offset) - 1;
 
5924
 
 
5925
        xt_spinlock_lock(&tab->tab_mem_lock);
 
5926
        if (tab->tab_mem_index[slot]) {
 
5927
                xt_free_ns(tab->tab_mem_index[slot]);
 
5928
                tab->tab_mem_total -= size;
 
5929
        }
 
5930
        tab->tab_mem_index[slot] = (xtWord1 *) tab->tab_mem_ind_free;
 
5931
        tab->tab_mem_ind_free = slot + 1;
 
5932
        xt_spinlock_unlock(&tab->tab_mem_lock);
 
5933
}
 
5934
 
 
5935
static void tab_free_ext_records(XTTableHPtr tab)
 
5936
{
 
5937
        size_t i, next;
 
5938
        
 
5939
        if (!tab->tab_mem_index)
 
5940
                return;
 
5941
 
 
5942
        i = tab->tab_mem_ind_free;
 
5943
        while (i) {
 
5944
                next = (size_t) tab->tab_mem_index[i-1];
 
5945
                tab->tab_mem_index[i-1] = NULL;
 
5946
                i = next;
 
5947
        }
 
5948
 
 
5949
        for (i=0; i<tab->tab_mem_ind_usage; i++) {
 
5950
                if (tab->tab_mem_index[i])
 
5951
                        xt_free_ns(tab->tab_mem_index[i]);
 
5952
        }
 
5953
        
 
5954
        xt_free_ns(tab->tab_mem_index);
 
5955
}
 
5956
 
 
5957
 
 
5958