1
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
19
#define USE_MY_FUNC /* We need at least my_malloc */
22
#include "maria_def.h"
25
#include "mysys_err.h"
29
#ifndef __GNU_LIBRARY__
30
#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */
32
#include <my_getopt.h>
35
#if SIZEOF_LONG_LONG > 4
41
#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
42
#define HEAD_LENGTH 32
43
#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */
45
#define DATA_TMP_EXT ".TMD"
46
#define OLD_EXT ".OLD"
47
#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
49
struct st_file_buffer {
51
uchar *buffer,*pos,*end;
58
struct st_huff_element;
60
typedef struct st_huff_counts {
61
uint field_length,max_zero_fill;
63
uint max_end_space,max_pre_space,length_bits,min_space;
65
enum en_fieldtype field_type;
66
struct st_huff_tree *tree; /* Tree for field */
68
my_off_t end_space[8];
69
my_off_t pre_space[8];
70
my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
71
TREE int_tree; /* Tree for detecting distinct column values. */
72
uchar *tree_buff; /* Column values, 'field_length' each. */
73
uchar *tree_pos; /* Points to end of column values in 'tree_buff'. */
76
typedef struct st_huff_element HUFF_ELEMENT;
79
WARNING: It is crucial for the optimizations in calc_packed_length()
80
that 'count' is the first element of 'HUFF_ELEMENT'.
82
struct st_huff_element {
86
HUFF_ELEMENT *left,*right;
90
uint element_nr; /* Number of element */
96
typedef struct st_huff_tree {
97
HUFF_ELEMENT *root,*element_buffer;
101
my_off_t bytes_packed;
102
uint tree_pack_length;
103
uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
109
typedef struct st_isam_mrg {
110
MARIA_HA **file,**current,**end;
113
uint min_pack_length; /* Theese is used by packed data */
114
uint max_pack_length;
116
uint max_blob_length;
118
/* true if at least one source file has at least one disabled index */
119
my_bool src_file_has_indexes_disabled;
123
extern int main(int argc,char * *argv);
124
static void get_options(int *argc,char ***argv);
125
static MARIA_HA *open_maria_file(char *name,int mode);
126
static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count);
127
static int compress(PACK_MRG_INFO *file,char *join_name);
128
static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records);
129
static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
131
HUFF_COUNTS *huff_counts,
133
static int compare_tree(void* cmp_arg __attribute__((unused)),
134
const uchar *s,const uchar *t);
135
static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
136
static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
138
static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
139
uint max_space_length,my_off_t *space_counts,
140
my_off_t tot_space_count,
141
enum en_fieldtype field_type);
142
static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
143
static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
144
static int compare_huff_elements(void *not_used, uchar *a,uchar *b);
145
static int save_counts_in_queue(uchar *key,element_count count,
147
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
148
static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
149
static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
150
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
151
HUFF_ELEMENT *element,uint size,
153
static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
154
my_off_t tot_elements,my_off_t filelength);
155
static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
156
static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
157
static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
158
HUFF_ELEMENT *element,
160
static uint max_bit(uint value);
161
static int compress_maria_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
162
static char *make_new_name(char *new_name,char *old_name);
163
static char *make_old_name(char *new_name,char *old_name);
164
static void init_file_buffer(File file,pbool read_buffer);
165
static int flush_buffer(ulong neaded_length);
166
static void end_file_buffer(void);
167
static void write_bits(ulonglong value, uint bits);
168
static void flush_bits(void);
169
static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
170
my_off_t new_length, ha_checksum crc);
171
static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,
172
my_off_t new_length, ha_checksum crc);
173
static int mrg_close(PACK_MRG_INFO *mrg);
174
static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf);
175
static void mrg_reset(PACK_MRG_INFO *mrg);
176
#if !defined(DBUG_OFF)
177
static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
178
static int fakecmp(my_off_t **count1, my_off_t **count2);
182
static int error_on_write=0,test_only=0,verbose=0,silent=0,
183
write_loop=0,force_pack=0, isamchk_neaded=0;
184
static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
185
static my_bool backup, opt_wait;
187
tree_buff_length is somewhat arbitrary. The bigger it is the better
188
the chance to win in terms of compression factor. On the other hand,
189
this table becomes part of the compressed file header. And its length
190
is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
192
static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
193
static char tmp_dir[FN_REFLEN]={0},*join_table;
194
static my_off_t intervall_length;
195
static ha_checksum glob_crc;
196
static struct st_file_buffer file_buffer;
198
static HUFF_COUNTS *global_count;
199
static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
200
static const char *load_default_groups[]= { "mariapack",0 };
202
/* The main program */
204
int main(int argc, char **argv)
211
load_defaults("my",load_default_groups,&argc,&argv);
213
get_options(&argc,&argv);
216
error=ok=isamchk_neaded=0;
218
{ /* Join files into one */
219
if (open_maria_files(&merge,argv,(uint) argc) ||
220
compress(&merge,join_table))
226
if (!(isam_file=open_maria_file(*argv++,O_RDWR)))
230
merge.file= &isam_file;
234
if (compress(&merge,0))
240
if (ok && isamchk_neaded && !silent)
241
puts("Remember to run maria_chk -rq on compressed tables");
242
(void)(fflush(stdout));
243
(void)(fflush(stderr));
244
free_defaults(default_argv);
246
my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
249
return 0; /* No compiler warning */
253
enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE};
255
static struct my_option my_long_options[] =
258
{"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
259
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
261
{"backup", 'b', "Make a backup of the table as table_name.OLD.",
262
(uchar**) &backup, (uchar**) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
263
{"character-sets-dir", OPT_CHARSETS_DIR_MP,
264
"Directory where character sets are.", (uchar**) &charsets_dir,
265
(uchar**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
266
{"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
267
0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
269
"Force packing of table even if it gets bigger or if tempfile exists.",
270
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
272
"Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
273
(uchar**) &join_table, (uchar**) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
275
{"help", '?', "Display this help and exit.",
276
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
277
{"silent", 's', "Be more silent.",
278
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
279
{"tmpdir", 'T', "Use temporary directory to store temporary table.",
280
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
281
{"test", 't', "Don't pack table, only test packing it.",
282
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
283
{"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
284
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
285
{"version", 'V', "Output version information and exit.",
286
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
287
{"wait", 'w', "Wait and retry if table is in use.", (uchar**) &opt_wait,
288
(uchar**) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
289
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
292
#include <help_start.h>
294
static void print_version(void)
296
(void)(printf("%s Ver 1.0 for %s on %s\n",
297
my_progname, SYSTEM_TYPE, MACHINE_TYPE));
298
NETWARE_SET_SCREEN_MODE(1);
302
static void usage(void)
305
puts("Copyright (C) 2002 MySQL AB");
306
puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
307
puts("and you are welcome to modify and redistribute it under the GPL license\n");
309
puts("Pack a MARIA-table to take much less space.");
310
puts("Keys are not updated, you must run maria_chk -rq on the index (.MAI) file");
311
puts("afterwards to update the keys.");
312
puts("You should give the .MAI file as the filename argument.");
313
puts("To unpack a packed table, run maria_chk -u on the table");
315
(void)(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname));
316
my_print_help(my_long_options);
317
print_defaults("my", load_default_groups);
318
my_print_variables(my_long_options);
321
#include <help_end.h>
324
get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
332
setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
337
tmpfile_createflag= O_RDWR | O_TRUNC;
340
write_loop= verbose= 0;
345
/* Avoid to reset 'verbose' if it was already set > 1. */
350
length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
351
if (length != dirname_length(tmp_dir))
353
tmp_dir[length]=FN_LIBCHAR;
358
verbose++; /* Allow for selecting the level of verbosity. */
362
DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace");
376
/* Initiates DEBUG - but no debugging here ! */
378
static void get_options(int *argc,char ***argv)
382
my_progname= argv[0][0];
383
if (isatty(fileno(stdout)))
386
if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
396
backup=0; /* Not needed */
403
static MARIA_HA *open_maria_file(char *name,int mode)
407
DBUG_ENTER("open_maria_file");
409
if (!(isam_file=maria_open(name, mode, HA_OPEN_IGNORE_MOVED_STATE |
410
(opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
411
HA_OPEN_ABORT_IF_LOCKED))))
413
(void)(fprintf(stderr, "%s gave error %d on open\n", name, my_errno));
417
if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
421
(void)(fprintf(stderr, "%s is already compressed\n", name));
422
(void)(maria_close(isam_file));
426
puts("Recompressing already compressed table");
427
share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
429
if (! force_pack && share->state.state.records != 0 &&
430
(share->state.state.records <= 1 ||
431
share->state.state.data_file_length < 1024))
433
(void)(fprintf(stderr, "%s is too small to compress\n", name));
434
(void)(maria_close(isam_file));
437
(void)(maria_lock_database(isam_file,F_WRLCK));
438
maria_ignore_trids(isam_file);
439
DBUG_RETURN(isam_file);
443
static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count)
448
mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE));
450
mrg->src_file_has_indexes_disabled= 0;
451
for (i=0; i < count ; i++)
453
if (!(mrg->file[i]=open_maria_file(names[i],O_RDONLY)))
456
mrg->src_file_has_indexes_disabled|=
457
! maria_is_all_keys_active(mrg->file[i]->s->state.key_map,
458
mrg->file[i]->s->base.keys);
460
/* Check that files are identical */
461
for (j=0 ; j < count-1 ; j++)
463
MARIA_COLUMNDEF *m1,*m2,*end;
464
if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
465
mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
467
m1=mrg->file[j]->s->columndef;
468
end=m1+mrg->file[j]->s->base.fields;
469
m2=mrg->file[j+1]->s->columndef;
470
for ( ; m1 != end ; m1++,m2++)
472
if (m1->type != m2->type || m1->length != m2->length)
480
(void)(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
481
my_progname, names[j], names[j+1]));
484
maria_close(mrg->file[i]);
485
my_free((uchar*) mrg->file,MYF(0));
490
static int compress(PACK_MRG_INFO *mrg,char *result_table)
493
File new_file,join_maria_file;
496
char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
497
uint i,header_length,fields,trees,used_trees;
498
my_off_t old_length,new_length,tot_elements;
499
HUFF_COUNTS *huff_counts;
500
HUFF_TREE *huff_trees;
501
DBUG_ENTER("compress");
503
isam_file=mrg->file[0]; /* Take this as an example */
505
new_file=join_maria_file= -1;
509
maria_block_size= isam_file->s->block_size;
511
/* Create temporary or join file */
513
(void)(fn_format(org_name,isam_file->s->open_file_name,"",MARIA_NAME_DEXT,
516
(void)(fn_format(org_name,isam_file->s->open_file_name,"",MARIA_NAME_DEXT,
519
if (init_pagecache(maria_pagecache, MARIA_MIN_PAGE_CACHE_SIZE, 0, 0,
520
maria_block_size, MY_WME) == 0)
522
fprintf(stderr, "Can't initialize page cache\n");
526
if (!test_only && result_table)
528
/* Make a new indexfile based on first file in list */
531
strmov(org_name,result_table); /* Fix error messages */
532
(void)(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2));
533
if ((join_maria_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
536
length=(uint) share->base.keystart;
537
if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME))))
539
if (my_pread(share->kfile.file, buff, length, 0L, MYF(MY_WME | MY_NABP)) ||
540
my_write(join_maria_file,buff,length,
541
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
543
my_free(buff,MYF(0));
546
my_free(buff,MYF(0));
547
(void)(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2));
549
else if (!tmp_dir[0])
550
(void)(make_new_name(new_name,org_name));
552
(void)(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4));
554
(new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
557
/* Start calculating statistics */
560
for (i=0 ; i < mrg->count ; i++)
561
mrg->records+=mrg->file[i]->s->state.state.records;
563
DBUG_PRINT("info", ("Compressing %s: (%lu records)",
564
result_table ? new_name : org_name,
565
(ulong) mrg->records));
566
if (write_loop || verbose)
568
(void)(printf("Compressing %s: (%lu records)\n",
569
result_table ? new_name : org_name, (ulong) mrg->records));
571
trees=fields=share->base.fields;
572
huff_counts=init_huff_count(isam_file,mrg->records);
576
Read the whole data file(s) for statistics.
578
DBUG_PRINT("info", ("- Calculating statistics"));
579
if (write_loop || verbose)
580
(void)(printf("- Calculating statistics\n"));
581
if (get_statistic(mrg,huff_counts))
585
for (i=0; i < mrg->count ; i++)
586
old_length+= (mrg->file[i]->s->state.state.data_file_length -
587
mrg->file[i]->s->state.state.empty);
590
Create a global priority queue in preparation for making
591
temporary Huffman trees.
593
if (init_queue(&queue,256,0,0,compare_huff_elements,0))
597
Check each column if we should use pre-space-compress, end-space-
598
compress, empty-field-compress or zero-field-compress.
600
check_counts(huff_counts,fields,mrg->records);
603
Build a Huffman tree for each column.
605
huff_trees=make_huff_trees(huff_counts,trees);
608
If the packed lengths of combined columns is less then the sum of
609
the non-combined columns, then create common Huffman trees for them.
610
We do this only for uchar compressed columns, not for distinct values
613
if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
617
Assign codes to all uchar or column values.
619
if (make_huff_decode_table(huff_trees,fields))
622
/* Prepare a file buffer. */
623
init_file_buffer(new_file,0);
626
Reserve space in the target file for the fixed compressed file header.
628
file_buffer.pos_in_file=HEAD_LENGTH;
630
(void)(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0)));
633
Write field infos: field type, pack type, length bits, tree number.
635
write_field_info(huff_counts,fields,used_trees);
640
if (!(tot_elements=write_huff_tree(huff_trees,trees)))
644
Calculate the total length of the compression info header.
645
This includes the fixed compressed file header, the column compression
646
type descriptions, and the decode trees.
648
header_length=(uint) file_buffer.pos_in_file+
649
(uint) (file_buffer.pos-file_buffer.buffer);
652
Compress the source file into the target file.
654
DBUG_PRINT("info", ("- Compressing file"));
655
if (write_loop || verbose)
656
(void)(printf("- Compressing file\n"));
657
error=compress_maria_file(mrg,huff_counts);
658
new_length=file_buffer.pos_in_file;
659
if (!error && !test_only)
661
uchar buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */
662
bzero(buff,sizeof(buff));
663
error=my_write(file_buffer.file,buff,sizeof(buff),
664
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
668
Write the fixed compressed file header.
671
error=write_header(mrg,header_length,used_trees,tot_elements,
674
/* Flush the file buffer. */
677
/* Display statistics. */
678
DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
679
"Mean total length: %6ld",
680
mrg->min_pack_length, mrg->max_pack_length,
681
(ulong) (mrg->records ? (new_length/mrg->records) : 0)));
682
if (verbose && mrg->records)
683
(void)(printf("Min record length: %6d Max length: %6d "
684
"Mean total length: %6ld\n", mrg->min_pack_length,
685
mrg->max_pack_length, (ulong) (new_length/mrg->records)));
687
/* Close source and target file. */
690
error|=my_close(new_file,MYF(MY_WME));
693
error|=my_close(isam_file->dfile.file, MYF(MY_WME));
694
isam_file->dfile.file= -1; /* Tell maria_close file is closed */
695
isam_file->s->bitmap.file.file= -1;
700
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
701
if (! test_only && ! error)
705
error=save_state_mrg(join_maria_file,mrg,new_length,glob_crc);
711
if (my_rename(org_name,make_old_name(temp_name,
712
isam_file->s->open_file_name),
718
error=my_copy(new_name,org_name,MYF(MY_WME));
720
error=my_rename(new_name,org_name,MYF(MY_WME));
723
(void)(my_copystat(temp_name,org_name,MYF(MY_COPYTIME)));
725
(void)(my_delete(new_name,MYF(MY_WME)));
733
error=my_copy(new_name,org_name,
734
MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
736
(void)(my_delete(new_name,MYF(MY_WME)));
739
error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME));
742
error=save_state(isam_file,mrg,new_length,glob_crc);
745
error|=mrg_close(mrg);
746
if (join_maria_file >= 0)
747
error|=my_close(join_maria_file,MYF(MY_WME));
750
(void)(fprintf(stderr, "Aborting: %s is not compressed\n", org_name));
751
(void)(my_delete(new_name,MYF(MY_WME)));
754
if (write_loop || verbose)
757
(void)(printf("%.4g%% \n",
758
(((longlong) (old_length - new_length)) * 100.0 /
759
(longlong) old_length)));
761
puts("Empty file saved in compressed format");
766
end_pagecache(maria_pagecache, 1);
767
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
769
(void)(my_close(new_file,MYF(0)));
770
if (join_maria_file >= 0)
771
(void)(my_close(join_maria_file,MYF(0)));
773
(void)(fprintf(stderr, "Aborted: %s is not compressed\n", org_name));
777
/* Init a huff_count-struct for each field and init it */
779
static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
782
reg1 HUFF_COUNTS *count;
783
if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
785
MYF(MY_ZEROFILL | MY_WME))))
787
for (i=0 ; i < info->s->base.fields ; i++)
789
enum en_fieldtype type;
790
count[i].field_length=info->s->columndef[i].length;
791
type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type;
792
if (type == FIELD_INTERVALL ||
793
type == FIELD_CONSTANT ||
796
if (count[i].field_length <= 8 &&
797
(type == FIELD_NORMAL ||
798
type == FIELD_SKIP_ZERO))
799
count[i].max_zero_fill= count[i].field_length;
801
For every column initialize a tree, which is used to detect distinct
802
column values. 'int_tree' works together with 'tree_buff' and
803
'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
804
This is accomplished by '-1' as the element size.
806
init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL,
808
if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
809
count[i].tree_pos=count[i].tree_buff =
810
my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
818
/* Free memory used by counts and trees */
820
static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
821
HUFF_COUNTS *huff_counts,
828
for (i=0 ; i < trees ; i++)
830
if (huff_trees[i].element_buffer)
831
my_free((uchar*) huff_trees[i].element_buffer,MYF(0));
832
if (huff_trees[i].code)
833
my_free((uchar*) huff_trees[i].code,MYF(0));
835
my_free((uchar*) huff_trees,MYF(0));
839
for (i=0 ; i < fields ; i++)
841
if (huff_counts[i].tree_buff)
843
my_free((uchar*) huff_counts[i].tree_buff,MYF(0));
844
delete_tree(&huff_counts[i].int_tree);
847
my_free((uchar*) huff_counts,MYF(0));
849
delete_queue(&queue); /* This is safe to free */
853
/* Read through old file and gather some statistics */
855
static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
858
uint length, null_bytes;
859
ulong reclength,max_blob_length;
860
uchar *record,*pos,*next_pos,*end_pos,*start_pos;
861
ha_rows record_count;
862
HUFF_COUNTS *count,*end_count;
863
TREE_ELEMENT *element;
864
ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
865
DBUG_ENTER("get_statistic");
867
reclength= mrg->file[0]->s->base.reclength;
868
null_bytes= mrg->file[0]->s->base.null_bytes;
869
record=(uchar*) my_alloca(reclength);
870
end_count=huff_counts+mrg->file[0]->s->base.fields;
871
record_count=0; glob_crc=0;
874
/* Check how to calculate checksum */
875
if (mrg->file[0]->s->data_file_type == STATIC_RECORD)
876
calc_checksum= _ma_static_checksum;
878
calc_checksum= _ma_checksum;
881
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
883
ulong tot_blob_length=0;
886
/* glob_crc is a checksum over all bytes of all records. */
887
glob_crc+= (*calc_checksum)(mrg->file[0],record);
889
/* Count the incidence of values separately for every column. */
890
for (pos=record + null_bytes, count=huff_counts ;
895
next_pos=end_pos=(start_pos=pos)+count->field_length;
898
Put the whole column value in a tree if there is room for it.
899
'int_tree' is used to quickly check for duplicate values.
900
'tree_buff' collects as many distinct column values as
901
possible. If the field length is > 1, it is tree_buff_length,
902
else 2 bytes. Each value is 'field_length' bytes big. If there
903
are more distinct column values than fit into the buffer, we
904
give up with this tree. BLOBs and VARCHARs do not have a
905
tree_buff as it can only be used with fixed length columns.
906
For the special case of field length == 1, we handle only the
907
case that there is only one distinct value in the table(s).
908
Otherwise, we can have a maximum of 256 distinct values. This
909
is then handled by the normal Huffman tree build.
911
Another limit for collecting distinct column values is the
912
number of values itself. Since we would need to build a
913
Huffman tree for the values, we are limited by the 'IS_OFFSET'
914
constant. This constant expresses a bit which is used to
915
determine if a tree element holds a final value or an offset
916
to a child element. Hence, all values and offsets need to be
917
smaller than 'IS_OFFSET'. A tree element is implemented with
918
two integer values, one for the left branch and one for the
919
right branch. For the extreme case that the first element
920
points to the last element, the number of integers in the tree
921
must be less or equal to IS_OFFSET. So the number of elements
922
must be less or equal to IS_OFFSET / 2.
924
WARNING: At first, we insert a pointer into the record buffer
925
as the key for the tree. If we got a new distinct value, which
926
is really inserted into the tree, instead of being counted
927
only, we will copy the column value from the record buffer to
928
'tree_buff' and adjust the key pointer of the tree accordingly.
930
if (count->tree_buff)
933
if (!(element=tree_insert(&count->int_tree,pos, 0,
934
count->int_tree.custom_arg)) ||
935
(element->count == 1 &&
936
(count->tree_buff + tree_buff_length <
937
count->tree_pos + count->field_length)) ||
938
(count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
939
(count->field_length == 1 &&
940
count->int_tree.elements_in_tree > 1))
942
delete_tree(&count->int_tree);
943
my_free(count->tree_buff,MYF(0));
949
If tree_insert() succeeds, it either creates a new element
950
or increments the counter of an existing element.
952
if (element->count == 1)
954
/* Copy the new column value into 'tree_buff'. */
955
memcpy(count->tree_pos,pos,(size_t) count->field_length);
956
/* Adjust the key pointer in the tree. */
957
tree_set_pointer(element,count->tree_pos);
958
/* Point behind the last column value so far. */
959
count->tree_pos+=count->field_length;
964
/* Save character counters and space-counts and zero-field-counts */
965
if (count->field_type == FIELD_NORMAL ||
966
count->field_type == FIELD_SKIP_ENDSPACE)
968
/* Ignore trailing space. */
969
for ( ; end_pos > pos ; end_pos--)
970
if (end_pos[-1] != ' ')
972
/* Empty fields are just counted. Go to the next record. */
975
count->empty_fields++;
976
count->max_zero_fill=0;
980
Count the total of all trailing spaces and the number of
981
short trailing spaces. Remember the longest trailing space.
983
length= (uint) (next_pos-end_pos);
984
count->tot_end_space+=length;
986
count->end_space[length]++;
987
if (count->max_end_space < length)
988
count->max_end_space = length;
991
if (count->field_type == FIELD_NORMAL ||
992
count->field_type == FIELD_SKIP_PRESPACE)
994
/* Ignore leading space. */
995
for (pos=start_pos; pos < end_pos ; pos++)
998
/* Empty fields are just counted. Go to the next record. */
1001
count->empty_fields++;
1002
count->max_zero_fill=0;
1006
Count the total of all leading spaces and the number of
1007
short leading spaces. Remember the longest leading space.
1009
length= (uint) (pos-start_pos);
1010
count->tot_pre_space+=length;
1012
count->pre_space[length]++;
1013
if (count->max_pre_space < length)
1014
count->max_pre_space = length;
1017
/* Calculate pos, end_pos, and max_length for variable length fields. */
1018
if (count->field_type == FIELD_BLOB)
1020
uint field_length=count->field_length -portable_sizeof_char_ptr;
1021
ulong blob_length= _ma_calc_blob_length(field_length, start_pos);
1022
memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*));
1023
end_pos=pos+blob_length;
1024
tot_blob_length+=blob_length;
1025
set_if_bigger(count->max_length,blob_length);
1027
else if (count->field_type == FIELD_VARCHAR)
1029
uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
1030
length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
1031
uint2korr(start_pos));
1032
pos= start_pos+pack_length;
1033
end_pos= pos+length;
1034
set_if_bigger(count->max_length,length);
1037
/* Evaluate 'max_zero_fill' for short fields. */
1038
if (count->field_length <= 8 &&
1039
(count->field_type == FIELD_NORMAL ||
1040
count->field_type == FIELD_SKIP_ZERO))
1043
/* Zero fields are just counted. Go to the next record. */
1044
if (!memcmp((uchar*) start_pos,zero_string,count->field_length))
1046
count->zero_fields++;
1050
max_zero_fill starts with field_length. It is decreased every
1051
time a shorter "zero trailer" is found. It is set to zero when
1052
an empty field is found (see above). This suggests that the
1053
variable should be called 'min_zero_fill'.
1055
for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
1057
if (i < count->max_zero_fill)
1058
count->max_zero_fill=i;
1061
/* Ignore zero fields and check fields. */
1062
if (count->field_type == FIELD_ZERO ||
1063
count->field_type == FIELD_CHECK)
1067
Count the incidence of every uchar value in the
1068
significant field value.
1070
for ( ; pos < end_pos ; pos++)
1071
count->counts[(uchar) *pos]++;
1073
/* Step to next field. */
1076
if (tot_blob_length > max_blob_length)
1077
max_blob_length=tot_blob_length;
1079
if (write_loop && record_count % WRITE_COUNT == 0)
1081
(void)(printf("%lu\r", (ulong) record_count));
1082
(void)(fflush(stdout));
1085
else if (error != HA_ERR_RECORD_DELETED)
1087
(void)(fprintf(stderr, "Got error %d while reading rows\n", error));
1091
/* Step to next record. */
1095
(void)(printf(" \r"));
1096
(void)(fflush(stdout));
1100
If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
1103
DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
1105
DBUG_PRINT("info", ("Found the following number of incidents "
1106
"of the uchar codes:"));
1108
(void)(printf("Found the following number of incidents "
1109
"of the uchar codes:\n"));
1110
for (count= huff_counts ; count < end_count; count++)
1113
my_off_t total_count;
1116
DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
1118
(void)(printf("column: %3u\n", (uint) (count - huff_counts + 1)));
1119
if (count->tree_buff)
1121
DBUG_PRINT("info", ("number of distinct values: %u",
1122
(uint) ((count->tree_pos - count->tree_buff) /
1123
count->field_length)));
1125
(void)(printf("number of distinct values: %u\n",
1126
(uint) ((count->tree_pos - count->tree_buff) /
1127
count->field_length)));
1130
for (idx= 0; idx < 256; idx++)
1132
if (count->counts[idx])
1134
total_count+= count->counts[idx];
1135
DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
1136
llstr((longlong) count->counts[idx], llbuf)));
1138
(void)(printf("counts[0x%02x]: %12s\n", idx,
1139
llstr((longlong) count->counts[idx], llbuf)));
1142
DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
1144
if ((verbose >= 2) && total_count)
1146
(void)(printf("total: %12s\n",
1147
llstr((longlong) total_count, llbuf)));
1151
mrg->records=record_count;
1152
mrg->max_blob_length=max_blob_length;
1153
my_afree((uchar*) record);
1154
DBUG_RETURN(error != HA_ERR_END_OF_FILE);
1157
static int compare_huff_elements(void *not_used __attribute__((unused)),
1160
return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
1161
(*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1);
1164
/* Check each tree if we should use pre-space-compress, end-space-
1165
compress, empty-field-compress or zero-field-compress */
1167
static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
1170
uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
1171
my_off_t old_length,new_length,length;
1172
DBUG_ENTER("check_counts");
1174
bzero((uchar*) field_count,sizeof(field_count));
1175
space_fields=fill_zero_fields=0;
1177
for (; trees-- ; huff_counts++)
1179
if (huff_counts->field_type == FIELD_BLOB)
1181
huff_counts->length_bits=max_bit(huff_counts->max_length);
1184
else if (huff_counts->field_type == FIELD_VARCHAR)
1186
huff_counts->length_bits=max_bit(huff_counts->max_length);
1189
else if (huff_counts->field_type == FIELD_CHECK)
1191
huff_counts->bytes_packed=0;
1192
huff_counts->counts[0]=0;
1196
huff_counts->field_type=FIELD_NORMAL;
1197
huff_counts->pack_type=0;
1199
/* Check for zero-filled records (in this column), or zero records. */
1200
if (huff_counts->zero_fields || ! records)
1202
my_off_t old_space_count;
1204
If there are only zero filled records (in this column),
1205
or no records at all, we are done.
1207
if (huff_counts->zero_fields == records)
1209
huff_counts->field_type= FIELD_ZERO;
1210
huff_counts->bytes_packed=0;
1211
huff_counts->counts[0]=0;
1214
/* Remeber the number of significant spaces. */
1215
old_space_count=huff_counts->counts[' '];
1216
/* Add all leading and trailing spaces. */
1217
huff_counts->counts[' ']+= (huff_counts->tot_end_space +
1218
huff_counts->tot_pre_space +
1219
huff_counts->empty_fields *
1220
huff_counts->field_length);
1221
/* Check, what the compressed length of this would be. */
1222
old_length=calc_packed_length(huff_counts,0)+records/8;
1223
/* Get the number of zero bytes. */
1224
length=huff_counts->zero_fields*huff_counts->field_length;
1225
/* Add it to the counts. */
1226
huff_counts->counts[0]+=length;
1227
/* Check, what the compressed length of this would be. */
1228
new_length=calc_packed_length(huff_counts,0);
1229
/* If the compression without the zeroes would be shorter, we are done. */
1230
if (old_length < new_length && huff_counts->field_length > 1)
1232
huff_counts->field_type=FIELD_SKIP_ZERO;
1233
huff_counts->counts[0]-=length;
1234
huff_counts->bytes_packed=old_length- records/8;
1237
/* Remove the insignificant spaces, but keep the zeroes. */
1238
huff_counts->counts[' ']=old_space_count;
1240
/* Check, what the compressed length of this column would be. */
1241
huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
1244
If there are enough empty records (in this column),
1245
treating them specially may pay off.
1247
if (huff_counts->empty_fields)
1249
if (huff_counts->field_length > 2 &&
1250
huff_counts->empty_fields + (records - huff_counts->empty_fields)*
1251
(1+max_bit(max(huff_counts->max_pre_space,
1252
huff_counts->max_end_space))) <
1253
records * max_bit(huff_counts->field_length))
1255
huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
1259
length=huff_counts->empty_fields*huff_counts->field_length;
1260
if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
1262
huff_counts->tot_end_space+=length;
1263
huff_counts->max_end_space=huff_counts->field_length;
1264
if (huff_counts->field_length < 8)
1265
huff_counts->end_space[huff_counts->field_length]+=
1266
huff_counts->empty_fields;
1268
if (huff_counts->tot_pre_space)
1270
huff_counts->tot_pre_space+=length;
1271
huff_counts->max_pre_space=huff_counts->field_length;
1272
if (huff_counts->field_length < 8)
1273
huff_counts->pre_space[huff_counts->field_length]+=
1274
huff_counts->empty_fields;
1280
If there are enough trailing spaces (in this column),
1281
treating them specially may pay off.
1283
if (huff_counts->tot_end_space)
1285
huff_counts->counts[' ']+=huff_counts->tot_pre_space;
1286
if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
1287
huff_counts->end_space,
1288
huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
1290
huff_counts->counts[' ']-=huff_counts->tot_pre_space;
1294
If there are enough leading spaces (in this column),
1295
treating them specially may pay off.
1297
if (huff_counts->tot_pre_space)
1299
if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
1300
huff_counts->pre_space,
1301
huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
1305
found_pack: /* Found field-packing */
1307
/* Test if we can use zero-fill */
1309
if (huff_counts->max_zero_fill &&
1310
(huff_counts->field_type == FIELD_NORMAL ||
1311
huff_counts->field_type == FIELD_SKIP_ZERO))
1313
huff_counts->counts[0]-=huff_counts->max_zero_fill*
1314
(huff_counts->field_type == FIELD_SKIP_ZERO ?
1315
records - huff_counts->zero_fields : records);
1316
huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
1317
huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
1320
/* Test if intervall-field is better */
1322
if (huff_counts->tree_buff)
1326
DBUG_EXECUTE_IF("forceintervall",
1327
huff_counts->bytes_packed= ~ (my_off_t) 0;);
1328
tree.element_buffer=0;
1329
if (!make_huff_tree(&tree,huff_counts) &&
1330
tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
1332
if (tree.elements == 1)
1333
huff_counts->field_type=FIELD_CONSTANT;
1335
huff_counts->field_type=FIELD_INTERVALL;
1336
huff_counts->pack_type=0;
1340
my_free((uchar*) huff_counts->tree_buff,MYF(0));
1341
delete_tree(&huff_counts->int_tree);
1342
huff_counts->tree_buff=0;
1344
if (tree.element_buffer)
1345
my_free((uchar*) tree.element_buffer,MYF(0));
1347
if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
1349
if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
1351
field_count[huff_counts->field_type]++;
1353
DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
1354
"empty-zero: %3d empty-fill: %3d",
1355
field_count[FIELD_NORMAL],space_fields,
1356
field_count[FIELD_SKIP_ZERO],fill_zero_fields));
1357
DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
1358
"intervall-fields: %3d zero: %3d",
1359
field_count[FIELD_SKIP_PRESPACE],
1360
field_count[FIELD_SKIP_ENDSPACE],
1361
field_count[FIELD_INTERVALL],
1362
field_count[FIELD_ZERO]));
1364
(void)(printf("\nnormal: %3d empty-space: %3d "
1365
"empty-zero: %3d empty-fill: %3d\n"
1366
"pre-space: %3d end-space: %3d "
1367
"intervall-fields: %3d zero: %3d\n",
1368
field_count[FIELD_NORMAL],space_fields,
1369
field_count[FIELD_SKIP_ZERO],fill_zero_fields,
1370
field_count[FIELD_SKIP_PRESPACE],
1371
field_count[FIELD_SKIP_ENDSPACE],
1372
field_count[FIELD_INTERVALL],
1373
field_count[FIELD_ZERO]));
1378
/* Test if we can use space-compression and empty-field-compression */
1381
test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
1382
uint max_space_length, my_off_t *space_counts,
1383
my_off_t tot_space_count, enum en_fieldtype field_type)
1387
my_off_t space_count,min_space_count,min_pack,new_length,skip;
1389
length_bits=max_bit(max_space_length);
1391
/* Default no end_space-packing */
1392
space_count=huff_counts->counts[(uint) ' '];
1393
min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
1394
min_pack=calc_packed_length(huff_counts,0);
1396
huff_counts->counts[(uint) ' ']=space_count;
1398
/* Test with allways space-count */
1399
new_length=huff_counts->bytes_packed+length_bits*records/8;
1400
if (new_length+1 < min_pack)
1403
min_pack=new_length;
1404
min_space_count=space_count;
1406
/* Test with length-flag */
1407
for (skip=0L, i=0 ; i < 8 ; i++)
1409
if (space_counts[i])
1412
huff_counts->counts[(uint) ' ']+=space_counts[i];
1413
skip+=huff_counts->pre_space[i];
1414
new_length=calc_packed_length(huff_counts,0)+
1415
(records+(records-skip)*(1+length_bits))/8;
1416
if (new_length < min_pack)
1419
min_pack=new_length;
1420
min_space_count=huff_counts->counts[(uint) ' '];
1425
huff_counts->counts[(uint) ' ']=min_space_count;
1426
huff_counts->bytes_packed=min_pack;
1429
return(0); /* No space-compress */
1430
case -1: /* Always space-count */
1431
huff_counts->field_type=field_type;
1432
huff_counts->min_space=0;
1433
huff_counts->length_bits=max_bit(max_space_length);
1436
huff_counts->field_type=field_type;
1437
huff_counts->min_space=(uint) min_pos;
1438
huff_counts->pack_type|=PACK_TYPE_SELECTED;
1439
huff_counts->length_bits=max_bit(max_space_length);
1442
return(1); /* Using space-compress */
1446
/* Make a huff_tree of each huff_count */
1448
static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
1451
HUFF_TREE *huff_tree;
1452
DBUG_ENTER("make_huff_trees");
1454
if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
1455
MYF(MY_WME | MY_ZEROFILL))))
1458
for (tree=0 ; tree < trees ; tree++)
1460
if (make_huff_tree(huff_tree+tree,huff_counts+tree))
1463
my_free((uchar*) huff_tree[tree].element_buffer,MYF(0));
1464
my_free((uchar*) huff_tree,MYF(0));
1468
DBUG_RETURN(huff_tree);
1472
Build a Huffman tree.
1476
huff_tree The Huffman tree.
1477
huff_counts The counts.
1480
Build a Huffman tree according to huff_counts->counts or
1481
huff_counts->tree_buff. tree_buff, if non-NULL contains up to
1482
tree_buff_length of distinct column values. In that case, whole
1483
values can be Huffman encoded instead of single bytes.
1490
static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
1492
uint i,found,bits_packed,first,last;
1493
my_off_t bytes_packed;
1494
HUFF_ELEMENT *a,*b,*new_huff_el;
1497
if (huff_counts->tree_buff)
1499
/* Calculate the number of distinct values in tree_buff. */
1500
found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
1501
huff_counts->field_length;
1502
first=0; last=found-1;
1506
/* Count the number of uchar codes found in the column. */
1507
for (i=found=0 ; i < 256 ; i++)
1509
if (huff_counts->counts[i])
1520
/* When using 'tree_buff' we can have more that 256 values. */
1521
if (queue.max_elements < found)
1523
delete_queue(&queue);
1524
if (init_queue(&queue,found,0,0,compare_huff_elements,0))
1528
/* Allocate or reallocate an element buffer for the Huffman tree. */
1529
if (!huff_tree->element_buffer)
1531
if (!(huff_tree->element_buffer=
1532
(HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
1539
(HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer,
1540
found*2*sizeof(HUFF_ELEMENT),
1543
huff_tree->element_buffer=temp;
1546
huff_counts->tree=huff_tree;
1547
huff_tree->counts=huff_counts;
1548
huff_tree->min_chr=first;
1549
huff_tree->max_chr=last;
1550
huff_tree->char_bits=max_bit(last-first);
1551
huff_tree->offset_bits=max_bit(found-1)+1;
1553
if (huff_counts->tree_buff)
1555
huff_tree->elements=0;
1556
huff_tree->tree_pack_length=(1+15+16+5+5+
1557
(huff_tree->char_bits+1)*found+
1558
(huff_tree->offset_bits+1)*
1560
(uint) (huff_tree->counts->tree_pos-
1561
huff_tree->counts->tree_buff);
1563
Put a HUFF_ELEMENT into the queue for every distinct column value.
1565
tree_walk() calls save_counts_in_queue() for every element in
1566
'int_tree'. This takes elements from the target trees element
1567
buffer and places references to them into the buffer of the
1568
priority queue. We insert in column value order, but the order is
1569
in fact irrelevant here. We will establish the correct order
1572
tree_walk(&huff_counts->int_tree,
1573
(int (*)(void*, element_count,void*)) save_counts_in_queue,
1574
(uchar*) huff_tree, left_root_right);
1578
huff_tree->elements=found;
1579
huff_tree->tree_pack_length=(9+9+5+5+
1580
(huff_tree->char_bits+1)*found+
1581
(huff_tree->offset_bits+1)*
1584
Put a HUFF_ELEMENT into the queue for every uchar code found in the column.
1586
The elements are taken from the target trees element buffer.
1587
Instead of using queue_insert(), we just place references to the
1588
elements into the buffer of the priority queue. We insert in byte
1589
value order, but the order is in fact irrelevant here. We will
1590
establish the correct order later.
1592
for (i=first, found=0 ; i <= last ; i++)
1594
if (huff_counts->counts[i])
1596
new_huff_el=huff_tree->element_buffer+(found++);
1597
new_huff_el->count=huff_counts->counts[i];
1598
new_huff_el->a.leaf.null=0;
1599
new_huff_el->a.leaf.element_nr=i;
1600
queue.root[found]=(uchar*) new_huff_el;
1604
If there is only a single uchar value in this field in all records,
1605
add a second element with zero incidence. This is required to enter
1606
the loop, which builds the Huffman tree.
1610
new_huff_el=huff_tree->element_buffer+(found++);
1611
new_huff_el->count=0;
1612
new_huff_el->a.leaf.null=0;
1614
new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
1616
new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
1617
queue.root[found]=(uchar*) new_huff_el;
1621
/* Make a queue from the queue buffer. */
1622
queue.elements=found;
1625
Make a priority queue from the queue. Construct its index so that we
1626
have a partially ordered tree.
1628
for (i=found/2 ; i > 0 ; i--)
1629
_downheap(&queue,i);
1631
/* The Huffman algorithm. */
1632
bytes_packed=0; bits_packed=0;
1633
for (i=1 ; i < found ; i++)
1636
Pop the top element from the queue (the one with the least incidence).
1637
Popping from a priority queue includes a re-ordering of the queue,
1638
to get the next least incidence element to the top.
1640
a=(HUFF_ELEMENT*) queue_remove(&queue,0);
1642
Copy the next least incidence element. The queue implementation
1643
reserves root[0] for temporary purposes. root[1] is the top.
1645
b=(HUFF_ELEMENT*) queue.root[1];
1646
/* Get a new element from the element buffer. */
1647
new_huff_el=huff_tree->element_buffer+found+i;
1648
/* The new element gets the sum of the two least incidence elements. */
1649
new_huff_el->count=a->count+b->count;
1651
The Huffman algorithm assigns another bit to the code for a byte
1652
every time that bytes incidence is combined (directly or indirectly)
1653
to a new element as one of the two least incidence elements.
1654
This means that one more bit per incidence of that uchar is required
1655
in the resulting file. So we add the new combined incidence as the
1656
number of bits by which the result grows.
1658
bits_packed+=(uint) (new_huff_el->count & 7);
1659
bytes_packed+=new_huff_el->count/8;
1660
/* The new element points to its children, lesser in left. */
1661
new_huff_el->a.nod.left=a;
1662
new_huff_el->a.nod.right=b;
1664
Replace the copied top element by the new element and re-order the
1667
queue.root[1]=(uchar*) new_huff_el;
1668
queue_replaced(&queue);
1670
huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
1671
huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
1675
static int compare_tree(void* cmp_arg __attribute__((unused)),
1676
register const uchar *s, register const uchar *t)
1679
for (length=global_count->field_length; length-- ;)
1681
return (int) s[-1] - (int) t[-1];
1686
Organize distinct column values and their incidences into a priority queue.
1689
save_counts_in_queue()
1690
key The column value.
1691
count The incidence of this value.
1692
tree The Huffman tree to be built later.
1695
We use the element buffer of the targeted tree. The distinct column
1696
values are organized in a priority queue first. The Huffman
1697
algorithm will later organize the elements into a Huffman tree. For
1698
the time being, we just place references to the elements into the
1699
queue buffer. The buffer will later be organized into a priority
1706
static int save_counts_in_queue(uchar *key, element_count count,
1709
HUFF_ELEMENT *new_huff_el;
1711
new_huff_el=tree->element_buffer+(tree->elements++);
1712
new_huff_el->count=count;
1713
new_huff_el->a.leaf.null=0;
1714
new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
1715
tree->counts->field_length;
1716
queue.root[tree->elements]=(uchar*) new_huff_el;
1722
Calculate length of file if given counts should be used.
1725
calc_packed_length()
1726
huff_counts The counts for a column of the table(s).
1727
add_tree_lenght If the decode tree length should be added.
1730
We need to follow the Huffman algorithm until we know, how many bits
1731
are required for each uchar code. But we do not need the resulting
1732
Huffman tree. Hence, we can leave out some steps which are essential
1733
in make_huff_tree().
1736
Number of bytes required to compress this table column.
1739
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
1740
uint add_tree_lenght)
1742
uint i,found,bits_packed,first,last;
1743
my_off_t bytes_packed;
1744
HUFF_ELEMENT element_buffer[256];
1745
DBUG_ENTER("calc_packed_length");
1748
WARNING: We use a small hack for efficiency: Instead of placing
1749
references to HUFF_ELEMENTs into the queue, we just insert
1750
references to the counts of the uchar codes which appeared in this
1751
table column. During the Huffman algorithm they are successively
1752
replaced by references to HUFF_ELEMENTs. This works, because
1753
HUFF_ELEMENTs have the incidence count at their beginning.
1754
Regardless, wether the queue array contains references to counts of
1755
type my_off_t or references to HUFF_ELEMENTs which have the count of
1756
type my_off_t at their beginning, it always points to a count of the
1759
Instead of using queue_insert(), we just copy the references into
1760
the buffer of the priority queue. We insert in uchar value order, but
1761
the order is in fact irrelevant here. We will establish the correct
1765
for (i=found=0 ; i < 256 ; i++)
1767
if (huff_counts->counts[i])
1772
/* We start with root[1], which is the queues top element. */
1773
queue.root[found]=(uchar*) &huff_counts->counts[i];
1777
DBUG_RETURN(0); /* Empty tree */
1779
If there is only a single uchar value in this field in all records,
1780
add a second element with zero incidence. This is required to enter
1781
the loop, which follows the Huffman algorithm.
1784
queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1];
1786
/* Make a queue from the queue buffer. */
1787
queue.elements=found;
1789
bytes_packed=0; bits_packed=0;
1790
/* Add the length of the coding table, which would become part of the file. */
1791
if (add_tree_lenght)
1792
bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
1793
(max_bit(found-1)+1+1)*(found-2) +7)/8;
1796
Make a priority queue from the queue. Construct its index so that we
1797
have a partially ordered tree.
1799
for (i=(found+1)/2 ; i > 0 ; i--)
1800
_downheap(&queue,i);
1802
/* The Huffman algorithm. */
1803
for (i=0 ; i < found-1 ; i++)
1807
HUFF_ELEMENT *new_huff_el;
1810
Pop the top element from the queue (the one with the least
1811
incidence). Popping from a priority queue includes a re-ordering
1812
of the queue, to get the next least incidence element to the top.
1814
a= (my_off_t*) queue_remove(&queue, 0);
1816
Copy the next least incidence element. The queue implementation
1817
reserves root[0] for temporary purposes. root[1] is the top.
1819
b= (my_off_t*) queue.root[1];
1820
/* Create a new element in a local (automatic) buffer. */
1821
new_huff_el= element_buffer + i;
1822
/* The new element gets the sum of the two least incidence elements. */
1823
new_huff_el->count= *a + *b;
1825
The Huffman algorithm assigns another bit to the code for a byte
1826
every time that bytes incidence is combined (directly or indirectly)
1827
to a new element as one of the two least incidence elements.
1828
This means that one more bit per incidence of that uchar is required
1829
in the resulting file. So we add the new combined incidence as the
1830
number of bits by which the result grows.
1832
bits_packed+=(uint) (new_huff_el->count & 7);
1833
bytes_packed+=new_huff_el->count/8;
1835
Replace the copied top element by the new element and re-order the
1836
queue. This successively replaces the references to counts by
1837
references to HUFF_ELEMENTs.
1839
queue.root[1]=(uchar*) new_huff_el;
1840
queue_replaced(&queue);
1842
DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
1846
/* Remove trees that don't give any compression */
1848
static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
1851
HUFF_COUNTS count,*i,*j,*last_count;
1853
last_count=huff_counts+trees;
1854
for (tree_number=0, i=huff_counts ; i < last_count ; i++)
1856
if (!i->tree->tree_number)
1858
i->tree->tree_number= ++tree_number;
1860
continue; /* Don't join intervall */
1861
for (j=i+1 ; j < last_count ; j++)
1863
if (! j->tree->tree_number && ! j->tree_buff)
1865
for (k=0 ; k < 256 ; k++)
1866
count.counts[k]=i->counts[k]+j->counts[k];
1867
if (calc_packed_length(&count,1) <=
1868
i->tree->bytes_packed + j->tree->bytes_packed+
1869
i->tree->tree_pack_length+j->tree->tree_pack_length+
1872
memcpy_fixed((uchar*) i->counts,(uchar*) count.counts,
1873
sizeof(count.counts[0])*256);
1874
my_free((uchar*) j->tree->element_buffer,MYF(0));
1875
j->tree->element_buffer=0;
1877
bmove((uchar*) i->counts,(uchar*) count.counts,
1878
sizeof(count.counts[0])*256);
1879
if (make_huff_tree(i->tree,i))
1886
DBUG_PRINT("info", ("Original trees: %d After join: %d",
1887
trees, tree_number));
1889
(void)(printf("Original trees: %d After join: %d\n", trees, tree_number));
1890
return tree_number; /* Return trees left */
1895
Fill in huff_tree encode tables.
1898
make_huff_decode_table()
1899
huff_tree An array of HUFF_TREE which are to be encoded.
1900
trees The number of HUFF_TREE in the array.
1907
static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
1910
for ( ; trees-- ; huff_tree++)
1912
if (huff_tree->tree_number > 0)
1914
elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
1915
if (!(huff_tree->code =
1916
(ulonglong*) my_malloc(elements*
1917
(sizeof(ulonglong) + sizeof(uchar)),
1918
MYF(MY_WME | MY_ZEROFILL))))
1920
huff_tree->code_len=(uchar*) (huff_tree->code+elements);
1921
make_traverse_code_tree(huff_tree, huff_tree->root,
1922
8 * sizeof(ulonglong), LL(0));
1929
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
1930
HUFF_ELEMENT *element,
1931
uint size, ulonglong code)
1934
if (!element->a.leaf.null)
1936
chr=element->a.leaf.element_nr;
1937
huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
1938
huff_tree->code[chr]= (code >> size);
1939
if (huff_tree->height < 8 * sizeof(ulonglong) - size)
1940
huff_tree->height= 8 * sizeof(ulonglong) - size;
1945
make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
1946
make_traverse_code_tree(huff_tree, element->a.nod.right, size,
1947
code + (((ulonglong) 1) << size));
1954
Convert a value into binary digits.
1959
length The number of low order bits to convert.
1962
The result string is in static storage. It is reused on every call.
1963
So you cannot use it twice in one expression.
1966
A pointer to a static NUL-terminated string.
1969
static char *bindigits(ulonglong value, uint bits)
1971
static char digits[72];
1975
DBUG_ASSERT(idx < sizeof(digits));
1977
*(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
1984
Convert a value into hexadecimal digits.
1991
The result string is in static storage. It is reused on every call.
1992
So you cannot use it twice in one expression.
1995
A pointer to a static NUL-terminated string.
1998
static char *hexdigits(ulonglong value)
2000
static char digits[20];
2002
uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
2004
DBUG_ASSERT(idx < sizeof(digits));
2007
if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
2008
*(ptr - 1)+= 'a' - '9' - 1;
2015
/* Write header to new packed data file */
2017
static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
2018
my_off_t tot_elements,my_off_t filelength)
2020
uchar *buff= (uchar*) file_buffer.pos;
2022
bzero(buff,HEAD_LENGTH);
2023
memcpy_fixed(buff,maria_pack_file_magic,4);
2024
int4store(buff+4,head_length);
2025
int4store(buff+8, mrg->min_pack_length);
2026
int4store(buff+12,mrg->max_pack_length);
2027
int4store(buff+16,tot_elements);
2028
int4store(buff+20,intervall_length);
2029
int2store(buff+24,trees);
2030
buff[26]=(char) mrg->ref_length;
2031
/* Save record pointer length */
2032
buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2);
2035
(void)(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0)));
2036
return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH,
2037
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
2040
/* Write fieldinfo to new packed file */
2042
static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
2045
uint huff_tree_bits;
2046
huff_tree_bits=max_bit(trees ? trees-1 : 0);
2048
DBUG_PRINT("info", (" "));
2049
DBUG_PRINT("info", ("column types:"));
2050
DBUG_PRINT("info", ("FIELD_NORMAL 0"));
2051
DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
2052
DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
2053
DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
2054
DBUG_PRINT("info", ("FIELD_BLOB 4"));
2055
DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
2056
DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
2057
DBUG_PRINT("info", ("FIELD_ZERO 7"));
2058
DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
2059
DBUG_PRINT("info", ("FIELD_CHECK 9"));
2060
DBUG_PRINT("info", (" "));
2061
DBUG_PRINT("info", ("pack type as a set of flags:"));
2062
DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
2063
DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
2064
DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
2065
DBUG_PRINT("info", (" "));
2068
(void)(printf("\n"));
2069
(void)(printf("column types:\n"));
2070
(void)(printf("FIELD_NORMAL 0\n"));
2071
(void)(printf("FIELD_SKIP_ENDSPACE 1\n"));
2072
(void)(printf("FIELD_SKIP_PRESPACE 2\n"));
2073
(void)(printf("FIELD_SKIP_ZERO 3\n"));
2074
(void)(printf("FIELD_BLOB 4\n"));
2075
(void)(printf("FIELD_CONSTANT 5\n"));
2076
(void)(printf("FIELD_INTERVALL 6\n"));
2077
(void)(printf("FIELD_ZERO 7\n"));
2078
(void)(printf("FIELD_VARCHAR 8\n"));
2079
(void)(printf("FIELD_CHECK 9\n"));
2080
(void)(printf("\n"));
2081
(void)(printf("pack type as a set of flags:\n"));
2082
(void)(printf("PACK_TYPE_SELECTED 1\n"));
2083
(void)(printf("PACK_TYPE_SPACE_FIELDS 2\n"));
2084
(void)(printf("PACK_TYPE_ZERO_FILL 4\n"));
2085
(void)(printf("\n"));
2087
for (i=0 ; i++ < fields ; counts++)
2089
write_bits((ulonglong) (int) counts->field_type, 5);
2090
write_bits(counts->pack_type,6);
2091
if (counts->pack_type & PACK_TYPE_ZERO_FILL)
2092
write_bits(counts->max_zero_fill,5);
2094
write_bits(counts->length_bits,5);
2095
write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
2096
DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
2097
"lbits: %2u tree: %2u length: %4u",
2098
i , counts->field_type, counts->pack_type,
2099
counts->max_zero_fill, counts->length_bits,
2100
counts->tree->tree_number, counts->field_length));
2102
(void)(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
2103
"tree: %2u length: %4u\n", i , counts->field_type,
2104
counts->pack_type, counts->max_zero_fill, counts->length_bits,
2105
counts->tree->tree_number, counts->field_length));
2111
/* Write all huff_trees to new datafile. Return tot count of
2112
elements in all trees
2113
Returns 0 on error */
2115
static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
2121
uint *packed_tree,*offset,length;
2124
/* Find the highest number of elements in the trees. */
2125
for (i=length=0 ; i < trees ; i++)
2126
if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
2127
length=huff_tree[i].elements;
2129
Allocate a buffer for packing a decode tree. Two numbers per element
2130
(left child and right child).
2132
if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
2134
my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
2138
DBUG_PRINT("info", (" "));
2140
(void)(printf("\n"));
2143
for (elements=0; trees-- ; huff_tree++)
2145
/* Skip columns that have been joined with other columns. */
2146
if (huff_tree->tree_number == 0)
2147
continue; /* Deleted tree */
2149
DBUG_PRINT("info", (" "));
2151
(void)(printf("\n"));
2152
/* Count the total number of elements (byte codes or column values). */
2153
elements+=huff_tree->elements;
2154
huff_tree->max_offset=2;
2155
/* Build a tree of offsets and codes for decoding in 'packed_tree'. */
2156
if (huff_tree->elements <= 1)
2159
offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
2161
/* This should be the same as 'length' above. */
2162
huff_tree->offset_bits=max_bit(huff_tree->max_offset);
2165
Since we check this during collecting the distinct column values,
2166
this should never happen.
2168
if (huff_tree->max_offset >= IS_OFFSET)
2169
{ /* This should be impossible */
2170
(void)(fprintf(stderr, "Tree offset got too big: %d, aborted\n",
2171
huff_tree->max_offset));
2172
my_afree((uchar*) packed_tree);
2176
DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
2178
(ulong) (file_buffer.pos - file_buffer.buffer),
2179
huff_tree->elements, (ulong) (offset - packed_tree),
2180
huff_tree->char_bits));
2181
if (!huff_tree->counts->tree_buff)
2183
/* We do a uchar compression on this column. Mark with bit 0. */
2185
write_bits(huff_tree->min_chr,8);
2186
write_bits(huff_tree->elements,9);
2187
write_bits(huff_tree->char_bits,5);
2188
write_bits(huff_tree->offset_bits,5);
2193
int_length=(uint) (huff_tree->counts->tree_pos -
2194
huff_tree->counts->tree_buff);
2195
/* We have distinct column values for this column. Mark with bit 1. */
2197
write_bits(huff_tree->elements,15);
2198
write_bits(int_length,16);
2199
write_bits(huff_tree->char_bits,5);
2200
write_bits(huff_tree->offset_bits,5);
2201
intervall_length+=int_length;
2203
DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
2204
"offset_bits: %2u %s: %5u codelen: %2u",
2205
tree_no, huff_tree->elements, huff_tree->char_bits,
2206
huff_tree->offset_bits, huff_tree->counts->tree_buff ?
2207
"bufflen" : "min_chr", huff_tree->counts->tree_buff ?
2208
int_length : huff_tree->min_chr, huff_tree->height));
2210
(void)(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
2211
"%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
2212
huff_tree->char_bits, huff_tree->offset_bits,
2213
huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
2214
huff_tree->counts->tree_buff ? int_length :
2215
huff_tree->min_chr, huff_tree->height));
2217
/* Check that the code tree length matches the element count. */
2218
length=(uint) (offset-packed_tree);
2219
if (length != huff_tree->elements*2-2)
2221
(void)(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
2222
length, huff_tree->elements * 2 - 2));
2227
for (i=0 ; i < length ; i++)
2229
if (packed_tree[i] & IS_OFFSET)
2230
write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
2231
huff_tree->offset_bits+1);
2233
write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
2234
DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
2235
i, (packed_tree[i] & IS_OFFSET) ?
2236
" -> " : "", (packed_tree[i] & IS_OFFSET) ?
2237
packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
2239
(void)(printf("tree[0x%04x]: %s0x%04x\n",
2240
i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
2241
(packed_tree[i] & IS_OFFSET) ?
2242
packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
2247
Display coding tables and check their correctness.
2249
codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
2250
for (i= 0; i < codes; i++)
2257
if (! (len= huff_tree->code_len[i]))
2259
DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
2260
hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
2261
bindigits(huff_tree->code[i],
2262
huff_tree->code_len[i])));
2264
(void)(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
2265
hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
2266
bindigits(huff_tree->code[i], huff_tree->code_len[i])));
2268
/* Check that the encode table decodes correctly. */
2272
DBUG_EXECUTE_IF("forcechkerr1", len--;);
2273
DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
2274
DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
2279
(void)(fflush(stdout));
2280
(void)(fprintf(stderr, "error: code 0x%s with %u bits not found\n",
2281
hexdigits(huff_tree->code[i]), huff_tree->code_len[i]));
2286
code|= (huff_tree->code[i] >> (--len)) & 1;
2288
if (bits > 8 * sizeof(code))
2290
(void)(fflush(stdout));
2291
(void)(fprintf(stderr, "error: Huffman code too long: %u/%u\n",
2292
bits, (uint) (8 * sizeof(code))));
2296
idx+= (uint) code & 1;
2299
(void)(fflush(stdout));
2300
(void)(fprintf(stderr, "error: illegal tree offset: %u/%u\n",
2305
if (packed_tree[idx] & IS_OFFSET)
2306
idx+= packed_tree[idx] & ~IS_OFFSET;
2308
break; /* Hit a leaf. This contains the result value. */
2313
DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
2314
if (packed_tree[idx] != i)
2316
(void)(fflush(stdout));
2317
(void)(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
2318
packed_tree[idx], i));
2322
} /*end for (codes)*/
2326
/* Write column values in case of distinct column value compression. */
2327
if (huff_tree->counts->tree_buff)
2329
for (i=0 ; i < int_length ; i++)
2331
write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
2332
DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
2333
i, (uchar) huff_tree->counts->tree_buff[i]));
2335
(void)(printf("column_values[0x%04x]: 0x%02x\n",
2336
i, (uchar) huff_tree->counts->tree_buff[i]));
2341
DBUG_PRINT("info", (" "));
2343
(void)(printf("\n"));
2344
my_afree((uchar*) packed_tree);
2347
(void)(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n"));
2354
static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
2359
prev_offset= offset;
2361
'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
2362
then there is no left child and, hence no right child either. This
2363
is a property of a binary tree. An element is either a node with two
2364
childs, or a leaf without childs.
2366
The current element is always a node with two childs. Go left first.
2368
if (!element->a.nod.left->a.leaf.null)
2370
/* Store the uchar code or the index of the column value. */
2371
prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
2377
Recursively traverse the tree to the left. Mark it as an offset to
2378
another tree node (in contrast to a uchar code or column value index).
2380
prev_offset[0]= IS_OFFSET+2;
2381
offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
2384
/* Now, check the right child. */
2385
if (!element->a.nod.right->a.leaf.null)
2387
/* Store the uchar code or the index of the column value. */
2388
prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
2394
Recursively traverse the tree to the right. Mark it as an offset to
2395
another tree node (in contrast to a uchar code or column value index).
2397
uint temp=(uint) (offset-prev_offset-1);
2398
prev_offset[1]= IS_OFFSET+ temp;
2399
if (huff_tree->max_offset < temp)
2400
huff_tree->max_offset = temp;
2401
return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
2405
/* Get number of bits neaded to represent value */
2407
static uint max_bit(register uint value)
2417
static int compress_maria_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
2420
uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length;
2421
uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes;
2422
my_off_t record_count;
2424
ulong length,pack_length;
2425
uchar *record,*pos,*end_pos,*record_pos,*start_pos;
2426
HUFF_COUNTS *count,*end_count;
2428
MARIA_HA *isam_file=mrg->file[0];
2429
uint pack_version= (uint) isam_file->s->pack.version;
2430
DBUG_ENTER("compress_maria_file");
2432
/* Allocate a buffer for the records (excluding blobs). */
2433
if (!(record=(uchar*) my_alloca(isam_file->s->base.reclength)))
2436
end_count=huff_counts+isam_file->s->base.fields;
2437
min_record_length= (uint) ~0;
2438
max_record_length=0;
2439
null_bytes= isam_file->s->base.null_bytes;
2442
Calculate the maximum number of bits required to pack the records.
2443
Remember to understand 'max_zero_fill' as 'min_zero_fill'.
2444
The tree height determines the maximum number of bits per value.
2445
Some fields skip leading or trailing spaces or zeroes. The skipped
2446
number of bytes is encoded by 'length_bits' bits.
2447
Empty blobs and varchar are encoded with a single 1 bit. Other blobs
2448
and varchar get a leading 0 bit.
2450
max_calc_length= null_bytes;
2451
for (i= 0 ; i < isam_file->s->base.fields ; i++)
2453
if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
2454
huff_counts[i].max_zero_fill=0;
2455
if (huff_counts[i].field_type == FIELD_CONSTANT ||
2456
huff_counts[i].field_type == FIELD_ZERO ||
2457
huff_counts[i].field_type == FIELD_CHECK)
2459
if (huff_counts[i].field_type == FIELD_INTERVALL)
2460
max_calc_length+=huff_counts[i].tree->height;
2461
else if (huff_counts[i].field_type == FIELD_BLOB ||
2462
huff_counts[i].field_type == FIELD_VARCHAR)
2463
max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
2466
(huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
2467
huff_counts[i].tree->height+huff_counts[i].length_bits;
2469
max_calc_length= (max_calc_length + 7) / 8;
2470
pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length);
2472
/* 'max_blob_length' is the max length of all blobs of a record. */
2473
pack_blob_length= isam_file->s->base.blobs ?
2474
_ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0;
2475
max_pack_length=pack_ref_length+pack_blob_length;
2477
DBUG_PRINT("fields", ("==="));
2479
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
2481
ulong tot_blob_length=0;
2484
if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length +
2487
record_pos= (uchar*) file_buffer.pos;
2488
file_buffer.pos+= max_pack_length;
2491
/* Copy null bits 'as is' */
2492
memcpy(file_buffer.pos, record, null_bytes);
2493
file_buffer.pos+= null_bytes;
2495
for (start_pos=record+null_bytes, count= huff_counts;
2499
end_pos=start_pos+(field_length=count->field_length);
2502
DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
2503
"lbits: %2u tree: %2u length: %4u",
2504
(ulong) (count - huff_counts + 1),
2506
count->pack_type, count->max_zero_fill,
2507
count->length_bits, count->tree->tree_number,
2508
count->field_length));
2510
/* Check if the column contains spaces only. */
2511
if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
2513
for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
2516
DBUG_PRINT("fields",
2517
("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
2518
DBUG_PRINT("fields", ("---"));
2523
DBUG_PRINT("fields",
2524
("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
2527
end_pos-=count->max_zero_fill;
2528
field_length-=count->max_zero_fill;
2530
switch (count->field_type) {
2531
case FIELD_SKIP_ZERO:
2532
if (!memcmp((uchar*) start_pos,zero_string,field_length))
2534
DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
2539
DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
2543
DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
2544
(ulong) (end_pos - start_pos)));
2545
for ( ; start_pos < end_pos ; start_pos++)
2547
DBUG_PRINT("fields",
2548
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2550
hexdigits(tree->code[(uchar) *start_pos]),
2551
(uint) tree->code_len[(uchar) *start_pos],
2552
bindigits(tree->code[(uchar) *start_pos],
2553
(uint) tree->code_len[(uchar) *start_pos])));
2554
write_bits(tree->code[(uchar) *start_pos],
2555
(uint) tree->code_len[(uchar) *start_pos]);
2558
case FIELD_SKIP_ENDSPACE:
2559
for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
2560
length= (ulong) (end_pos - pos);
2561
if (count->pack_type & PACK_TYPE_SELECTED)
2563
if (length > count->min_space)
2565
DBUG_PRINT("fields",
2566
("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
2567
DBUG_PRINT("fields",
2568
("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
2569
length, field_length, count->length_bits));
2571
write_bits(length,count->length_bits);
2575
DBUG_PRINT("fields",
2576
("FIELD_SKIP_ENDSPACE not more than min_space, "
2584
DBUG_PRINT("fields",
2585
("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
2586
length, field_length, count->length_bits));
2587
write_bits(length,count->length_bits);
2589
/* Encode all significant bytes. */
2590
DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
2591
(ulong) (pos - start_pos)));
2592
for ( ; start_pos < pos ; start_pos++)
2594
DBUG_PRINT("fields",
2595
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2597
hexdigits(tree->code[(uchar) *start_pos]),
2598
(uint) tree->code_len[(uchar) *start_pos],
2599
bindigits(tree->code[(uchar) *start_pos],
2600
(uint) tree->code_len[(uchar) *start_pos])));
2601
write_bits(tree->code[(uchar) *start_pos],
2602
(uint) tree->code_len[(uchar) *start_pos]);
2606
case FIELD_SKIP_PRESPACE:
2607
for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
2608
length= (ulong) (pos - start_pos);
2609
if (count->pack_type & PACK_TYPE_SELECTED)
2611
if (length > count->min_space)
2613
DBUG_PRINT("fields",
2614
("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
2615
DBUG_PRINT("fields",
2616
("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
2617
length, field_length, count->length_bits));
2619
write_bits(length,count->length_bits);
2623
DBUG_PRINT("fields",
2624
("FIELD_SKIP_PRESPACE not more than min_space, "
2632
DBUG_PRINT("fields",
2633
("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
2634
length, field_length, count->length_bits));
2635
write_bits(length,count->length_bits);
2637
/* Encode all significant bytes. */
2638
DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
2639
(ulong) (end_pos - start_pos)));
2640
for (start_pos=pos ; start_pos < end_pos ; start_pos++)
2642
DBUG_PRINT("fields",
2643
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2645
hexdigits(tree->code[(uchar) *start_pos]),
2646
(uint) tree->code_len[(uchar) *start_pos],
2647
bindigits(tree->code[(uchar) *start_pos],
2648
(uint) tree->code_len[(uchar) *start_pos])));
2649
write_bits(tree->code[(uchar) *start_pos],
2650
(uint) tree->code_len[(uchar) *start_pos]);
2653
case FIELD_CONSTANT:
2656
DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
2659
case FIELD_INTERVALL:
2661
pos=(uchar*) tree_search(&count->int_tree, start_pos,
2662
count->int_tree.custom_arg);
2663
intervall=(uint) (pos - count->tree_buff)/field_length;
2664
DBUG_PRINT("fields", ("FIELD_INTERVALL"));
2665
DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
2666
intervall, hexdigits(tree->code[intervall]),
2667
(uint) tree->code_len[intervall]));
2668
write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
2673
ulong blob_length= _ma_calc_blob_length(field_length-
2674
portable_sizeof_char_ptr,
2676
/* Empty blobs are encoded with a single 1 bit. */
2679
DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
2684
uchar *blob,*blob_end;
2685
DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
2687
/* Write the blob length. */
2688
DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
2689
blob_length, count->length_bits));
2690
write_bits(blob_length,count->length_bits);
2691
memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr,
2693
blob_end=blob+blob_length;
2694
/* Encode the blob bytes. */
2695
for ( ; blob < blob_end ; blob++)
2697
DBUG_PRINT("fields",
2698
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2699
(uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
2700
(uint) tree->code_len[(uchar) *blob],
2701
bindigits(tree->code[(uchar) *start_pos],
2702
(uint)tree->code_len[(uchar) *start_pos])));
2703
write_bits(tree->code[(uchar) *blob],
2704
(uint) tree->code_len[(uchar) *blob]);
2706
tot_blob_length+=blob_length;
2713
uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
2714
ulong col_length= (var_pack_length == 1 ?
2715
(uint) *(uchar*) start_pos :
2716
uint2korr(start_pos));
2717
/* Empty varchar are encoded with a single 1 bit. */
2720
DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
2721
write_bits(1,1); /* Empty varchar */
2725
uchar *end= start_pos + var_pack_length + col_length;
2726
DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
2728
/* Write the varchar length. */
2729
DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
2730
col_length, count->length_bits));
2731
write_bits(col_length,count->length_bits);
2732
/* Encode the varchar bytes. */
2733
for (start_pos+= var_pack_length ; start_pos < end ; start_pos++)
2735
DBUG_PRINT("fields",
2736
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2738
hexdigits(tree->code[(uchar) *start_pos]),
2739
(uint) tree->code_len[(uchar) *start_pos],
2740
bindigits(tree->code[(uchar) *start_pos],
2741
(uint)tree->code_len[(uchar) *start_pos])));
2742
write_bits(tree->code[(uchar) *start_pos],
2743
(uint) tree->code_len[(uchar) *start_pos]);
2750
case FIELD_enum_val_count:
2751
abort(); /* Impossible */
2753
start_pos+=count->max_zero_fill;
2754
DBUG_PRINT("fields", ("---"));
2757
length=(ulong) ((uchar*) file_buffer.pos - record_pos) - max_pack_length;
2758
pack_length= _ma_save_pack_length(pack_version, record_pos, length);
2759
if (pack_blob_length)
2760
pack_length+= _ma_save_pack_length(pack_version,
2761
record_pos + pack_length,
2763
DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
2764
"length-bytes: %lu", (ulong) record_count, length,
2765
tot_blob_length, pack_length));
2766
DBUG_PRINT("fields", ("==="));
2768
/* Correct file buffer if the header was smaller */
2769
if (pack_length != max_pack_length)
2771
bmove(record_pos+pack_length,record_pos+max_pack_length,length);
2772
file_buffer.pos-= (max_pack_length-pack_length);
2774
if (length < (ulong) min_record_length)
2775
min_record_length=(uint) length;
2776
if (length > (ulong) max_record_length)
2777
max_record_length=(uint) length;
2779
if (write_loop && record_count % WRITE_COUNT == 0)
2781
(void)(printf("%lu\r", (ulong) record_count));
2782
(void)(fflush(stdout));
2785
else if (error != HA_ERR_RECORD_DELETED)
2788
if (error == HA_ERR_END_OF_FILE)
2792
(void)(fprintf(stderr, "%s: Got error %d reading records\n",
2793
my_progname, error));
2796
(void)(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf)));
2798
my_afree((uchar*) record);
2799
mrg->ref_length=max_pack_length;
2800
mrg->min_pack_length=max_record_length ? min_record_length : 0;
2801
mrg->max_pack_length=max_record_length;
2802
DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
2806
static char *make_new_name(char *new_name, char *old_name)
2808
return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
2811
static char *make_old_name(char *new_name, char *old_name)
2813
return fn_format(new_name,old_name,"",OLD_EXT,2+4);
2816
/* rutines for bit writing buffer */
2818
static void init_file_buffer(File file, pbool read_buffer)
2820
file_buffer.file=file;
2821
file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
2823
file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
2824
file_buffer.pos_in_file=0;
2829
file_buffer.pos=file_buffer.end;
2834
file_buffer.pos=file_buffer.buffer;
2835
file_buffer.bits=BITS_SAVED;
2837
file_buffer.bitbucket= 0;
2841
static int flush_buffer(ulong neaded_length)
2846
file_buffer.end is 8 bytes lower than the real end of the buffer.
2847
This is done so that the end-of-buffer condition does not need to be
2848
checked for every uchar (see write_bits()). Consequently,
2849
file_buffer.pos can become greater than file_buffer.end. The
2850
algorithms in the other functions ensure that there will never be
2851
more than 8 bytes written to the buffer without an end-of-buffer
2852
check. So the buffer cannot be overrun. But we need to check for the
2853
near-to-buffer-end condition to avoid a negative result, which is
2854
casted to unsigned and thus becomes giant.
2856
if ((file_buffer.pos < file_buffer.end) &&
2857
((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
2859
length=(ulong) (file_buffer.pos-file_buffer.buffer);
2860
file_buffer.pos=file_buffer.buffer;
2861
file_buffer.pos_in_file+=length;
2864
if (error_on_write|| my_write(file_buffer.file,
2865
(const uchar*) file_buffer.buffer,
2867
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
2873
if (neaded_length != ~(ulong) 0 &&
2874
(ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
2877
neaded_length+=256; /* some margin */
2878
tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME));
2881
file_buffer.pos= ((uchar*) tmp +
2882
(ulong) (file_buffer.pos - file_buffer.buffer));
2883
file_buffer.buffer= (uchar*) tmp;
2884
file_buffer.end= (uchar*) (tmp+neaded_length-8);
2890
static void end_file_buffer(void)
2892
my_free((uchar*) file_buffer.buffer,MYF(0));
2895
/* output `bits` low bits of `value' */
2897
static void write_bits(register ulonglong value, register uint bits)
2899
DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
2900
(bits == 8 * sizeof(value)));
2902
if ((file_buffer.bits-= (int) bits) >= 0)
2904
file_buffer.bitbucket|= value << file_buffer.bits;
2908
reg3 ulonglong bit_buffer;
2909
bits= (uint) -file_buffer.bits;
2910
bit_buffer= (file_buffer.bitbucket |
2911
((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
2912
#if BITS_SAVED == 64
2913
*file_buffer.pos++= (uchar) (bit_buffer >> 56);
2914
*file_buffer.pos++= (uchar) (bit_buffer >> 48);
2915
*file_buffer.pos++= (uchar) (bit_buffer >> 40);
2916
*file_buffer.pos++= (uchar) (bit_buffer >> 32);
2918
*file_buffer.pos++= (uchar) (bit_buffer >> 24);
2919
*file_buffer.pos++= (uchar) (bit_buffer >> 16);
2920
*file_buffer.pos++= (uchar) (bit_buffer >> 8);
2921
*file_buffer.pos++= (uchar) (bit_buffer);
2923
if (bits != 8 * sizeof(value))
2924
value&= (((ulonglong) 1) << bits) - 1;
2925
if (file_buffer.pos >= file_buffer.end)
2926
(void)(flush_buffer(~ (ulong) 0));
2927
file_buffer.bits=(int) (BITS_SAVED - bits);
2928
file_buffer.bitbucket= value << (BITS_SAVED - bits);
2933
/* Flush bits in bit_buffer to buffer */
2935
static void flush_bits(void)
2938
ulonglong bit_buffer;
2940
bits= file_buffer.bits & ~7;
2941
bit_buffer= file_buffer.bitbucket >> bits;
2942
bits= BITS_SAVED - bits;
2946
*file_buffer.pos++= (uchar) (bit_buffer >> bits);
2948
if (file_buffer.pos >= file_buffer.end)
2949
(void)(flush_buffer(~ (ulong) 0));
2950
file_buffer.bits= BITS_SAVED;
2951
file_buffer.bitbucket= 0;
2955
/****************************************************************************
2956
** functions to handle the joined files
2957
****************************************************************************/
2959
static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
2960
my_off_t new_length,
2963
MARIA_SHARE *share=isam_file->s;
2964
uint options=mi_uint2korr(share->state.header.options);
2966
DBUG_ENTER("save_state");
2968
options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA;
2969
mi_int2store(share->state.header.options,options);
2970
/* Save the original file type of we have to undo the packing later */
2971
share->state.header.org_data_file_type= share->state.header.data_file_type;
2972
share->state.header.data_file_type= COMPRESSED_RECORD;
2974
share->state.state.data_file_length=new_length;
2975
share->state.state.del=0;
2976
share->state.state.empty=0;
2977
share->state.dellink= HA_OFFSET_ERROR;
2978
share->state.split=(ha_rows) mrg->records;
2979
share->state.version=(ulong) time((time_t*) 0);
2980
if (share->base.born_transactional)
2981
share->state.create_rename_lsn= share->state.is_of_horizon=
2982
share->state.skip_redo_lsn= LSN_REPAIRED_BY_MARIA_CHK;
2983
if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
2986
Some indexes are disabled, cannot use current key_file_length value
2987
as an estimate of upper bound of index file size. Use packed data file
2990
share->state.state.key_file_length= new_length;
2993
If there are no disabled indexes, keep key_file_length value from
2994
original file so "maria_chk -rq" can use this value (this is necessary
2995
because index size cannot be easily calculated for fulltext keys)
2997
maria_clear_all_keys_active(share->state.key_map);
2998
for (key=0 ; key < share->base.keys ; key++)
2999
share->state.key_root[key]= HA_OFFSET_ERROR;
3000
share->state.key_del= HA_OFFSET_ERROR;
3001
share->state.state.checksum= crc; /* Save crc in file */
3002
share->changed=1; /* Force write of header */
3003
share->state.open_count=0;
3004
share->global_changed=0;
3005
(void)(my_chsize(share->kfile.file, share->base.keystart, 0, MYF(0)));
3006
if (share->base.keys)
3008
DBUG_RETURN(_ma_state_info_write_sub(share->kfile.file,
3009
&share->state, (1 + 2)));
3013
static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
3016
MARIA_STATE_INFO state;
3017
MARIA_HA *isam_file=mrg->file[0];
3019
DBUG_ENTER("save_state_mrg");
3021
state= isam_file->s->state;
3022
options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
3023
HA_OPTION_READ_ONLY_DATA);
3024
mi_int2store(state.header.options,options);
3025
/* Save the original file type of we have to undo the packing later */
3026
state.header.org_data_file_type= state.header.data_file_type;
3027
state.header.data_file_type= COMPRESSED_RECORD;
3029
state.state.data_file_length=new_length;
3031
state.state.empty=0;
3032
state.state.records=state.split=(ha_rows) mrg->records;
3033
state.create_rename_lsn= state.is_of_horizon= state.skip_redo_lsn=
3034
LSN_REPAIRED_BY_MARIA_CHK;
3036
/* See comment above in save_state about key_file_length handling. */
3037
if (mrg->src_file_has_indexes_disabled)
3039
isam_file->s->state.state.key_file_length=
3040
max(isam_file->s->state.state.key_file_length, new_length);
3042
state.dellink= HA_OFFSET_ERROR;
3043
state.version=(ulong) time((time_t*) 0);
3044
maria_clear_all_keys_active(state.key_map);
3045
state.state.checksum=crc;
3046
if (isam_file->s->base.keys)
3048
state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
3049
DBUG_RETURN (_ma_state_info_write_sub(file,&state,1+2));
3053
/* reset for mrg_rrnd */
3055
static void mrg_reset(PACK_MRG_INFO *mrg)
3059
maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
3064
static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf)
3067
MARIA_HA *isam_info;
3072
isam_info= *(info->current=info->file);
3073
info->end=info->current+info->count;
3074
maria_reset(isam_info);
3075
maria_extra(isam_info, HA_EXTRA_CACHE, 0);
3076
if ((error= maria_scan_init(isam_info)))
3080
isam_info= *info->current;
3084
if (!(error= maria_scan(isam_info, buf)) ||
3085
error != HA_ERR_END_OF_FILE)
3087
maria_scan_end(isam_info);
3088
maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
3089
if (info->current+1 == info->end)
3090
return(HA_ERR_END_OF_FILE);
3092
isam_info= *info->current;
3093
filepos=isam_info->s->pack.header_length;
3094
maria_reset(isam_info);
3095
maria_extra(isam_info,HA_EXTRA_CACHE, 0);
3096
if ((error= maria_scan_init(isam_info)))
3102
static int mrg_close(PACK_MRG_INFO *mrg)
3106
DBUG_ENTER("mrg_close");
3108
for (i=0 ; i < mrg->count ; i++)
3109
error|=maria_close(mrg->file[i]);
3111
my_free((uchar*) mrg->file,MYF(0));
3116
#if !defined(DBUG_OFF)
3118
Fake the counts to get big Huffman codes.
3122
huff_counts A pointer to the counts array.
3123
end_count A pointer past the counts array.
3127
Huffman coding works by removing the two least frequent values from
3128
the list of values and add a new value with the sum of their
3129
incidences in a loop until only one value is left. Every time a
3130
value is reused for a new value, it gets one more bit for its
3131
encoding. Hence, the least frequent values get the longest codes.
3133
To get a maximum code length for a value, two of the values must
3134
have an incidence of 1. As their sum is 2, the next infrequent value
3135
must have at least an incidence of 2, then 4, 8, 16 and so on. This
3136
means that one needs 2**n bytes (values) for a code length of n
3137
bits. However, using more distinct values forces the use of longer
3138
codes, or reaching the code length with less total bytes (values).
3140
To get 64(32)-bit codes, I sort the counts by decreasing incidence.
3141
I assign counts of 1 to the two most frequent values, a count of 2
3142
for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
3143
the remaining values get 1. That way every possible uchar has an
3144
assigned code, though not all codes are used if not all uchar values
3145
are present in the column.
3147
This strategy would work with distinct column values too, but
3148
requires that at least 64(32) values are present. To make things
3149
easier here, I cancel all distinct column values and force byte
3150
compression for all columns.
3156
static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
3159
my_off_t *cur_count_p;
3160
my_off_t *end_count_p;
3161
my_off_t **cur_sort_p;
3162
my_off_t **end_sort_p;
3163
my_off_t *sort_counts[256];
3165
DBUG_ENTER("fakebigcodes");
3167
for (count= huff_counts; count < end_count; count++)
3170
Remove distinct column values.
3172
if (huff_counts->tree_buff)
3174
my_free((uchar*) huff_counts->tree_buff, MYF(0));
3175
delete_tree(&huff_counts->int_tree);
3176
huff_counts->tree_buff= NULL;
3177
DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
3181
Sort counts by decreasing incidence.
3183
cur_count_p= count->counts;
3184
end_count_p= cur_count_p + 256;
3185
cur_sort_p= sort_counts;
3186
while (cur_count_p < end_count_p)
3187
*(cur_sort_p++)= cur_count_p++;
3188
(void) my_qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
3191
Assign faked counts.
3193
cur_sort_p= sort_counts;
3194
#if SIZEOF_LONG_LONG > 4
3195
end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
3197
end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
3199
/* Most frequent value gets a faked count of 1. */
3200
**(cur_sort_p++)= 1;
3202
while (cur_sort_p < end_sort_p)
3204
**(cur_sort_p++)= total;
3207
/* Set the last value. */
3208
**(cur_sort_p++)= --total;
3210
Set the remaining counts.
3212
end_sort_p= sort_counts + 256;
3213
while (cur_sort_p < end_sort_p)
3214
**(cur_sort_p++)= 1;
3221
Compare two counts for reverse sorting.
3226
count2 Another count.
3234
static int fakecmp(my_off_t **count1, my_off_t **count2)
3236
return ((**count1 < **count2) ? 1 :
3237
(**count1 > **count2) ? -1 : 0);
3241
#include "ma_check_standalone.h"