1
/* Copyright (C) 2000-2002 MySQL AB
2
Copyright (C) 2008 eBay, Inc
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; version 2 of the License.
8
This program is distributed in the hope that it will be useful,
9
but WITHOUT ANY WARRANTY; without even the implied warranty of
10
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License
14
along with this program; if not, write to the Free Software
15
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
17
/* Implements various base dataspace-related functions - allocate, free, clear */
19
#include "heap_priv.h"
25
MySQL Heap tables keep data in arrays of fixed-size chunks.
26
These chunks are organized into two groups of HP_BLOCK structures:
27
- group1 contains indexes, with one HP_BLOCK per key
29
- group2 contains record data, with single HP_BLOCK
30
for all records, referenced by HP_SHARE.recordspace.block
32
While columns used in index are usually small, other columns
33
in the table may need to accomodate larger data. Typically,
34
larger data is placed into VARCHAR or BLOB columns. With actual
35
sizes varying, Heap Engine has to support variable-sized records
36
in memory. Heap Engine implements the concept of dataspace
37
(HP_DATASPACE), which incorporates HP_BLOCK for the record data,
38
and adds more information for managing variable-sized records.
40
Variable-size records are stored in multiple "chunks",
41
which means that a single record of data (database "row") can
42
consist of multiple chunks organized into one "set". HP_BLOCK
43
contains chunks. In variable-size format, one record
44
is represented as one or many chunks, depending on the actual
45
data, while in fixed-size mode, one record is always represented
46
as one chunk. The index structures would always point to the first
47
chunk in the chunkset.
49
At the time of table creation, Heap Engine attempts to find out
50
if variable-size records are desired. A user can request
51
variable-size records by providing either row_type=dynamic or
52
block_size=NNN table create option. Heap Engine will check
53
whether block_size provides enough space in the first chunk
54
to keep all null bits and columns that are used in indexes.
55
If block_size is too small, table creation will be aborted
56
with an error. Heap Engine will revert to fixed-size allocation
57
mode if block_size provides no memory benefits (no VARCHAR
58
fields extending past first chunk).
60
In order to improve index search performance, Heap Engine needs
61
to keep all null flags and all columns used as keys inside
62
the first chunk of a chunkset. In particular, this means that
63
all columns used as keys should be defined first in the table
64
creation SQL. The length of data used by null bits and key columns
65
is stored as fixed_data_length inside HP_SHARE. fixed_data_length
66
will extend past last key column if more fixed-length fields can
67
fit into the first chunk.
69
Variable-size records are necessary only in the presence
70
of variable-size columns. Heap Engine will be looking for VARCHAR
71
columns, which declare length of 32 or more. If no such columns
72
are found, table will be switched to fixed-size format. You should
73
always try to put such columns at the end of the table definition.
75
Whenever data is being inserted or updated in the table
76
Heap Engine will calculate how many chunks are necessary.
77
For insert operations, Heap Engine allocates new chunkset in
78
the recordspace. For update operations it will modify length of
79
the existing chunkset, unlinking unnecessary chunks at the end,
80
or allocating and adding more if larger length is necessary.
82
When writing data to chunks or copying data back to record,
83
Heap Engine will first copy fixed_data_length of data using single
84
memcpy call. The rest of the columns are processed one-by-one.
85
Non-VARCHAR columns are copied in their full format. VARCHAR's
86
are copied based on their actual length. Any NULL values after
87
fixed_data_length are skipped.
89
The allocation and contents of the actual chunks varies between
90
fixed and variable-size modes. Total chunk length is always
91
aligned to the next sizeof(unsigned char*). Here is the format of
93
unsigned char[] - sizeof=chunk_dataspace_length, but at least
94
sizeof(unsigned char*) bytes. Keeps actual data or pointer
95
to the next deleted chunk.
96
chunk_dataspace_length equals to full record length
97
unsigned char - status field (1 means "in use", 0 means "deleted")
98
Variable-size uses different format:
99
unsigned char[] - sizeof=chunk_dataspace_length, but at least
100
sizeof(unsigned char*) bytes. Keeps actual data or pointer
101
to the next deleted chunk.
102
chunk_dataspace_length is set according to table
104
unsigned char* - pointer to the next chunk in this chunkset,
105
or NULL for the last chunk
106
unsigned char - status field (1 means "first", 0 means "deleted",
109
When allocating a new chunkset of N chunks, Heap Engine will try
110
to allocate chunks one-by-one, linking them as they become
111
allocated. Allocation of a single chunk will attempt to reuse
112
a deleted (freed) chunk. If no free chunks are available,
113
it will attempt to allocate a new area inside HP_BLOCK.
114
Freeing chunks will place them at the front of free list
115
referenced by del_link in HP_DATASPACE. The newly freed chunk
116
will contain reference to the previously freed chunk in its first
117
sizeof(unsigned char*) of the payload space.
120
1. It is not very nice to require people to keep key columns
121
at the beginning of the table creation SQL. There are three
122
proposed resolutions:
123
a. Leave it as is. It's a reasonable limitation
124
b. Add new HA_KEEP_KEY_COLUMNS_TO_FRONT flag to handler.h and
125
make table.cpp align columns when it creates the table
126
c. Make HeapEngine reorder columns in the chunk data, so that
127
key columns go first. Add parallel HA_KEYSEG structures
128
to distinguish positions in record vs. positions in
129
the first chunk. Copy all data field-by-field rather than
130
using single memcpy unless DBA kept key columns to
132
2. heap_check_heap needs verify linked chunks, looking for
133
issues such as orphans, cycles, and bad links. However,
134
Heap Engine today does not do similar things even for
136
3. With new HP_DATASPACE allocation mechaism, BLOB will become
137
increasingly simple to implement, but I may not have time
138
for that. In one approach, BLOB data can be placed at
139
the end of the same record. In another approach (which I
140
prefer) BLOB data would have its own HP_DATASPACE with
141
variable-size entries.
142
4. In a more sophisticated implementation, some space can
143
be saved even with all fixed-size columns if many of them
144
have NULL value, as long as these columns are not used
146
5. In variable-size format status should be moved to lower
147
bits of the "next" pointer. Pointer is always aligned
148
to sizeof(unsigned char*), which is at least 4, leaving 2 lower
149
bits free. This will save 8 bytes per chunk
151
6. As we do not want to modify FRM format, BLOCK_SIZE option
152
of "CREATE TABLE" is saved as "RAID_CHUNKSIZE" for
156
static unsigned char *hp_allocate_one_chunk(HP_DATASPACE *info);
162
Frees memory and zeros-out any relevant counters in the dataspace
164
@param info the dataspace to clear
167
void hp_clear_dataspace(HP_DATASPACE *info)
169
if (info->block.levels)
171
hp_free_level(&info->block,info->block.levels,info->block.root,
174
info->block.levels=0;
175
info->del_chunk_count= info->chunk_count= 0;
177
info->total_data_length= 0;
182
Allocate or reallocate a chunkset in the dataspace
184
Attempts to allocate a new chunkset or change the size of an existing chunkset
186
@param info the hosting dataspace
187
@param chunk_count the number of chunks that we expect as the result
188
@param existing_set non-null value asks function to resize existing chunkset,
189
return value would point to this set
191
@return Pointer to the first chunk in the new or updated chunkset, or NULL if unsuccessful
194
static unsigned char *hp_allocate_variable_chunkset(HP_DATASPACE *info,
195
uint32_t chunk_count, unsigned char* existing_set)
197
int alloc_count= chunk_count, i;
198
unsigned char *first_chunk= 0, *curr_chunk= 0, *prev_chunk= 0, *last_existing_chunk= 0;
204
first_chunk= existing_set;
206
curr_chunk= existing_set;
207
while (curr_chunk && alloc_count)
209
prev_chunk= curr_chunk;
210
curr_chunk= *((unsigned char**)(curr_chunk + info->offset_link));
218
/* We came through all chunks and there is more left, let's truncate the list */
219
*((unsigned char**)(prev_chunk + info->offset_link)) = NULL;
220
hp_free_chunks(info, curr_chunk);
226
last_existing_chunk = prev_chunk;
229
/* We can reach this point only if we're allocating new chunkset or more chunks in existing set */
231
for (i=0; i<alloc_count; i++)
233
curr_chunk= hp_allocate_one_chunk(info);
236
/* no space in the current block */
238
if (last_existing_chunk)
240
/* Truncate whatever was added at the end of the existing chunkset */
241
prev_chunk= last_existing_chunk;
242
curr_chunk= *((unsigned char**)(prev_chunk + info->offset_link));
243
*((unsigned char**)(prev_chunk + info->offset_link)) = NULL;
244
hp_free_chunks(info, curr_chunk);
246
else if (first_chunk)
248
/* free any chunks previously allocated */
249
hp_free_chunks(info, first_chunk);
255
/* mark as if this chunk is last in the chunkset */
256
*((unsigned char**) (curr_chunk + info->offset_link))= 0;
260
/* tie them into a linked list */
261
*((unsigned char**) (prev_chunk + info->offset_link))= curr_chunk;
262
curr_chunk[info->offset_status]= CHUNK_STATUS_LINKED; /* Record linked from active */
266
curr_chunk[info->offset_status]= CHUNK_STATUS_ACTIVE; /* Record active */
271
first_chunk= curr_chunk;
274
prev_chunk= curr_chunk;
282
Allocate a new chunkset in the dataspace
284
Attempts to allocate a new chunkset
286
@param info the hosting dataspace
287
@param chunk_count the number of chunks that we expect as the result
289
@return Pointer to the first chunk in the new or updated chunkset, or NULL if unsuccessful
292
unsigned char *hp_allocate_chunkset(HP_DATASPACE *info, uint32_t chunk_count)
294
unsigned char* result;
297
if (info->is_variable_size)
299
result = hp_allocate_variable_chunkset(info, chunk_count, NULL);
303
result= hp_allocate_one_chunk(info);
306
result[info->offset_status]= CHUNK_STATUS_ACTIVE;
317
Reallocate an existing chunkset in the dataspace
319
Attempts to change the size of an existing chunkset
321
@param info the hosting dataspace
322
@param chunk_count the number of chunks that we expect as the result
323
@param pos pointer to the existing chunkset
325
@return Error code or zero if successful
328
int hp_reallocate_chunkset(HP_DATASPACE *info, uint32_t chunk_count, unsigned char* pos)
331
if (!info->is_variable_size)
333
/* Update should never change chunk_count in fixed-size mode */
334
errno=HA_ERR_WRONG_COMMAND;
338
/* Reallocate never moves the first chunk */
339
if (!hp_allocate_variable_chunkset(info, chunk_count, pos))
347
Allocate a single chunk in the dataspace
349
Attempts to allocate a new chunk or reuse one from deleted list
351
@param info the hosting dataspace
353
@return Pointer to the chunk, or NULL if unsuccessful
356
static unsigned char *hp_allocate_one_chunk(HP_DATASPACE *info)
358
unsigned char* curr_chunk;
359
size_t length, block_pos;
363
curr_chunk=info->del_link;
364
info->del_link= *((unsigned char**) curr_chunk);
365
info->del_chunk_count--;
370
block_pos= (info->chunk_count % info->block.records_in_block);
373
if (hp_get_new_block(&info->block,&length))
375
/* no space in the current block */
379
info->total_data_length+= length;
383
curr_chunk= ((unsigned char*) info->block.level_info[0].last_blocks +
384
block_pos * info->block.recbuffer);
392
Free a list of chunks
394
Reclaims all chunks linked by the pointer,
395
which could be the whole chunkset or a part of an existing chunkset
397
@param info the hosting dataspace
398
@param pos pointer to the head of the chunkset
401
void hp_free_chunks(HP_DATASPACE *info, unsigned char *pos)
403
unsigned char* curr_chunk= pos;
406
info->del_chunk_count++;
407
*((unsigned char**) curr_chunk)= info->del_link;
408
info->del_link= curr_chunk;
410
curr_chunk[info->offset_status]= CHUNK_STATUS_DELETED;
413
if (!info->is_variable_size)
418
/* Delete next chunk in this chunkset */
419
curr_chunk= *((unsigned char**)(curr_chunk + info->offset_link));