1
/* xdelta 3 - delta compression tools and library
2
* Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
#ifndef _XDELTA3_DJW_H_
20
#define _XDELTA3_DJW_H_
22
/* The following people deserve much credit for the algorithms and techniques contained in
26
Bzip2 sources, implementation of the multi-table Huffman technique.
28
Jean-loup Gailly and Mark Adler and L. Peter Deutsch
29
Zlib source code, RFC 1951
31
Daniel S. Hirschberg and Debra A. LeLewer
32
"Efficient Decoding of Prefix Codes"
33
Communications of the ACM, April 1990 33(4).
36
Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
37
This contains the idea behind the multi-table Huffman and 1-2 coding techniques.
38
ftp://ftp.cl.cam.ac.uk/users/djw3/
42
/* OPT: during the multi-table iteration, pick the worst-overall performing table and
43
* replace it with exactly the frequencies of the worst-overall performing sector or
44
* N-worst performing sectors. */
46
/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with the Bzip prefix coding
47
* strategy. xdfs-0.256 contains the last of the other-format tests, including RFC1950
48
* and the RFC1950+MTF tests. */
50
#define DJW_MAX_CODELEN 32 /* Maximum length of an alphabet code. */
52
#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) /* [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
54
#define RUN_0 0 /* Symbols used in MTF+1/2 coding. */
57
#define DJW_BASIC_CODES 5 /* Number of code lengths always encoded (djw_encode_basic array) */
58
#define DJW_RUN_CODES 2 /* Number of run codes */
59
#define DJW_EXTRA_12OFFSET 7 /* Offset of extra codes */
60
#define DJW_EXTRA_CODES 15 /* Number of optionally encoded code lengths (djw_encode_extra array) */
61
#define DJW_EXTRA_CODE_BITS 4 /* Number of bits to code [0-DJW_EXTRA_CODES] */
63
#define DJW_MAX_GROUPS 8 /* Max number of group coding tables */
64
#define DJW_GROUP_BITS 3 /* Number of bits to code [1-DJW_MAX_GROUPS] */
66
#define DJW_SECTORSZ_MULT 5 /* Multiplier for encoded sectorsz */
67
#define DJW_SECTORSZ_BITS 5 /* Number of bits to code group size */
68
#define DJW_SECTORSZ_MAX ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
70
#define DJW_MAX_ITER 6 /* Maximum number of iterations to find group tables. */
71
#define DJW_MIN_IMPROVEMENT 20 /* Minimum number of bits an iteration must reduce coding by. */
73
#define DJW_MAX_CLCLEN 15 /* Maximum code length of a prefix code length */
74
#define DJW_CLCLEN_BITS 4 /* Number of bits to code [0-DJW_MAX_CLCLEN] */
76
#define DJW_MAX_GBCLEN 7 /* Maximum code length of a group selector */
77
#define DJW_GBCLEN_BITS 3 /* Number of bits to code [0-DJW_MAX_GBCLEN]
78
* @!@ Actually, should never have zero code lengths here, or
79
* else a group went unused. Write a test for this: if a group
80
* goes unused, eliminate it? */
82
#define EFFICIENCY_BITS 16 /* It has to save at least this many bits... */
84
typedef struct _djw_stream djw_stream;
85
typedef struct _djw_heapen djw_heapen;
86
typedef struct _djw_prefix djw_prefix;
87
typedef uint32_t djw_weight;
89
/* To enable Huffman tuning code... */
91
#define TUNE_HUFFMAN 0
95
#define xd3_real_encode_huff xd3_encode_huff
99
static uint xd3_bitsof_output (xd3_output *output, bit_state *bstate);
102
static djw_weight tune_freq[DJW_TOTAL_CODES];
103
static uint8_t tune_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
104
static usize_t tune_prefix_bits;
105
static usize_t tune_select_bits;
106
static usize_t tune_encode_bits;
129
/* Each Huffman table consists of 256 "code length" (CLEN) codes, which are themselves
130
* Huffman coded after eliminating repeats and move-to-front coding. The prefix consists
131
* of all the CLEN codes in djw_encode_basic plus a 4-bit value stating how many of the
132
* djw_encode_extra codes are actually coded (the rest are presumed zero, or unused CLEN
135
* These values of these two arrays were arrived at by studying the distribution of min
136
* and max clen over a collection of DATA, INST, and ADDR inputs. The goal is to specify
137
* the order of djw_extra_codes that is most likely to minimize the number of extra codes
138
* that must be encoded.
140
* Results: 158896 sections were counted by compressing files (window size 512K) listed
141
* with: `find / -type f ( -user jmacd -o -perm +444 )`
143
* The distribution of CLEN codes for each efficient invocation of the secondary
144
* compressor (taking the best number of groups/sector size) was recorded. Then we look at
145
* the distribution of min and max clen values, counting the number of times the value
146
* C_low is less than the min and C_high is greater than the max. Values >= C_high and <=
147
* C_low will not have their lengths coded. The results are sorted and the least likely
148
* 15 are placed into the djw_encode_extra[] array in order. These values are used as
149
* the initial MTF ordering.
180
static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
182
9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20
185
static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
190
/*********************************************************************/
192
/*********************************************************************/
194
static djw_stream* djw_alloc (xd3_stream *stream /*, int alphabet_size */);
195
static void djw_init (djw_stream *h);
196
static void djw_destroy (xd3_stream *stream,
200
static int xd3_encode_huff (xd3_stream *stream,
201
djw_stream *sec_stream,
207
static int xd3_decode_huff (xd3_stream *stream,
208
djw_stream *sec_stream,
209
const uint8_t **input,
210
const uint8_t *const input_end,
212
const uint8_t *const output_end);
214
/*********************************************************************/
216
/*********************************************************************/
219
djw_alloc (xd3_stream *stream)
221
return xd3_alloc (stream, sizeof (djw_stream), 1);
225
djw_init (djw_stream *h)
227
/* Fields are initialized prior to use. */
231
djw_destroy (xd3_stream *stream,
234
xd3_free (stream, h);
238
/*********************************************************************/
240
/*********************************************************************/
243
heap_less (const djw_heapen *a, const djw_heapen *b)
245
return a->freq < b->freq ||
246
(a->freq == b->freq &&
247
a->depth < b->depth);
251
heap_insert (uint *heap, const djw_heapen *ents, uint p, const uint e)
253
/* Insert ents[e] into next slot heap[p] */
254
uint pp = p/2; /* P's parent */
256
while (heap_less (& ents[e], & ents[heap[pp]]))
266
static INLINE djw_heapen*
267
heap_extract (uint *heap, const djw_heapen *ents, uint heap_last)
269
uint smallest = heap[1];
272
/* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
273
heap[1] = heap[heap_last+1];
276
for (p = 1; ; p = pc)
280
/* Reached bottom of heap */
281
if (pc > heap_last) { break; }
283
/* See if second child is smaller. */
284
if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) { pc += 1; }
286
/* If pc is not smaller than p, heap property re-established. */
287
if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
294
return (djw_heapen*) & ents[smallest];
299
heap_check (uint *heap, djw_heapen *ents, uint heap_last)
302
for (i = 1; i <= heap_last; i += 1)
304
/* Heap property: child not less than parent */
305
XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
310
/*********************************************************************/
312
/*********************************************************************/
314
static INLINE usize_t
315
djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
318
usize_t sym = mtf[mtf_i];
320
for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
327
djw_update_1_2 (int *mtf_run, usize_t *mtf_i, uint8_t *mtfsym, djw_weight *freq)
333
/* Offset by 1, since any number of RUN_ symbols implies run>0... */
336
code = (*mtf_run & 1) ? RUN_1 : RUN_0;
338
mtfsym[(*mtf_i)++] = code;
342
while (*mtf_run >= 1);
348
djw_init_clen_mtf_1_2 (uint8_t *clmtf)
353
for (i = 0; i < DJW_BASIC_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12basic[i]; }
354
for (i = 0; i < DJW_EXTRA_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12extra[i]; }
357
/*********************************************************************/
359
/*********************************************************************/
362
djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen)
364
/* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 internal nodes,
365
* never more than ALPHABET_SIZE entries actually in the heap (minimum weight subtrees
366
* during prefix construction). First ALPHABET_SIZE entries are the actual symbols,
367
* next ALPHABET_SIZE-1 are internal nodes. */
368
djw_heapen ents[ALPHABET_SIZE * 2];
369
uint heap[ALPHABET_SIZE + 1];
371
uint heap_last; /* Index of the last _valid_ heap entry. */
372
uint ents_size; /* Number of entries, including 0th fake entry */
373
int overflow; /* Number of code lengths that overflow */
377
IF_DEBUG (uint32_t first_bits = 0);
379
/* Insert real symbol frequences. */
380
for (i = 0; i < asize; i += 1)
382
ents[i+1].freq = freq[i];
387
/* The loop is re-entered each time an overflow occurs. Re-initialize... */
393
/* 0th entry terminates the while loop in heap_insert (its the parent of the smallest
394
* element, always less-than) */
400
for (i = 0; i < asize; i += 1, ents_size += 1)
402
ents[ents_size].depth = 0;
403
ents[ents_size].parent = 0;
405
if (ents[ents_size].freq != 0)
407
heap_insert (heap, ents, ++heap_last, ents_size);
411
IF_DEBUG (heap_check (heap, ents, heap_last));
413
/* Must be at least one symbol, or else we can't get here. */
414
XD3_ASSERT (heap_last != 0);
416
/* If there is only one symbol, fake a second to prevent zero-length codes. */
417
if (unlikely (heap_last == 1))
419
/* Pick either the first or last symbol. */
420
int s = freq[0] ? asize-1 : 0;
425
/* Build prefix tree. */
426
while (heap_last > 1)
428
djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
429
djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
431
ents[ents_size].freq = h1->freq + h2->freq;
432
ents[ents_size].depth = 1 + max (h1->depth, h2->depth);
433
ents[ents_size].parent = 0;
435
h1->parent = h2->parent = ents_size;
437
heap_insert (heap, ents, ++heap_last, ents_size++);
439
IF_DEBUG (heap_check (heap, ents, heap_last));
442
/* Now compute prefix code lengths, counting parents. */
443
for (i = 1; i < asize+1; i += 1)
447
if (ents[i].freq != 0)
451
while ((p = ents[p].parent) != 0) { b += 1; }
453
if (b > maxlen) { overflow = 1; }
455
total_bits += b * freq[i-1];
458
/* clen is 0-origin, unlike ents. */
462
IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
466
IF_DEBUG (if (first_bits != total_bits)
468
DP(RINT "code length overflow changed %u bits\n", (usize_t)(total_bits - first_bits));
473
/* OPT: There is a non-looping way to fix overflow shown in zlib, but this is easier
474
* (for now), as done in bzip2. */
475
for (i = 1; i < asize+1; i += 1)
477
ents[i].freq = ents[i].freq / 2 + 1;
484
djw_build_codes (uint *codes, const uint8_t *clen, int asize DEBUG_ARG (int abs_max))
487
int min_clen = DJW_MAX_CODELEN;
491
for (i = 0; i < asize; i += 1)
493
if (clen[i] > 0 && clen[i] < min_clen)
498
max_clen = max (max_clen, (int) clen[i]);
501
XD3_ASSERT (max_clen <= abs_max);
503
for (l = min_clen; l <= max_clen; l += 1)
505
for (i = 0; i < asize; i += 1)
507
if (clen[i] == l) { codes[i] = code++; }
514
/*********************************************************************/
516
/*********************************************************************/
518
djw_compute_mtf_1_2 (djw_prefix *prefix,
520
djw_weight *freq_out, /* freak out! */
525
usize_t size = prefix->scount;
529
memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+1));
531
for (i = 0; i < size; )
533
/* OPT: Bzip optimizes this algorithm a little by effectively checking j==0 before
535
sym = prefix->symbol[i++];
537
for (j = 0; mtf[j] != sym; j += 1) { }
539
XD3_ASSERT (j < nsym);
541
for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
553
djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
556
/* Non-zero symbols are offset by RUN_1 */
557
prefix->mtfsym[mtf_i++] = j+RUN_1;
558
freq_out[j+RUN_1] += 1;
563
djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
566
prefix->mcount = mtf_i;
570
djw_count_freqs (djw_weight *freq, xd3_output *input)
575
memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
577
/* Freqency counting. OPT: can be accomplished beforehand. */
578
for (in = input; in; in = in->next_page)
580
const uint8_t *p = in->base;
581
const uint8_t *p_max = p + in->next;
585
do { freq[*p++] += 1; } while (p < p_max);
590
for (i = 0; i < ALPHABET_SIZE; i += 1) { DP(RINT "%u ", freq[i]); }
597
djw_compute_multi_prefix (int groups,
598
uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
603
prefix->scount = ALPHABET_SIZE;
604
memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
606
for (gp = 1; gp < groups; gp += 1)
608
for (i = 0; i < ALPHABET_SIZE; i += 1)
610
if (clen[gp][i] == 0)
615
prefix->symbol[prefix->scount++] = clen[gp][i];
621
djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
623
uint8_t clmtf[DJW_MAX_CODELEN+1];
625
djw_init_clen_mtf_1_2 (clmtf);
627
djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN+1);
631
djw_encode_prefix (xd3_stream *stream,
638
djw_weight clfreq[DJW_TOTAL_CODES];
639
uint8_t clclen[DJW_TOTAL_CODES];
640
uint clcode[DJW_TOTAL_CODES];
642
IF_TUNE (memset (clfreq, 0, sizeof (clfreq)));
644
/* Move-to-front encode prefix symbols, count frequencies */
645
djw_compute_prefix_1_2 (prefix, clfreq);
648
djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
649
djw_build_codes (clcode, clclen, DJW_TOTAL_CODES DEBUG_ARG (DJW_MAX_CLCLEN));
651
/* Compute number of extra codes beyond basic ones for this template. */
652
num_to_encode = DJW_TOTAL_CODES;
653
while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; }
654
XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
656
/* Encode: # of extra codes */
657
if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
658
num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; }
660
/* Encode: MTF code lengths */
661
for (i = 0; i < num_to_encode; i += 1)
663
if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; }
666
/* Encode: CLEN code lengths */
667
for (i = 0; i < prefix->mcount; i += 1)
669
usize_t mtf_sym = prefix->mtfsym[i];
670
usize_t bits = clclen[mtf_sym];
671
usize_t code = clcode[mtf_sym];
673
if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; }
676
IF_TUNE (memcpy (tune_freq, clfreq, sizeof (clfreq)));
682
djw_compute_selector_1_2 (djw_prefix *prefix,
684
djw_weight *gbest_freq)
686
uint8_t grmtf[DJW_MAX_GROUPS];
689
for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
691
djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
695
xd3_encode_howmany_groups (xd3_stream *stream,
699
usize_t *ret_sector_size)
701
usize_t cfg_groups = 0;
702
usize_t cfg_sector_size = 0;
703
usize_t sugg_groups = 0;
704
usize_t sugg_sector_size = 0;
706
if (cfg->ngroups != 0)
708
if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS)
710
stream->msg = "invalid secondary encoder group number";
714
cfg_groups = cfg->ngroups;
717
if (cfg->sector_size != 0)
719
if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
721
stream->msg = "invalid secondary encoder sector size";
725
cfg_sector_size = cfg->sector_size;
728
if (cfg_groups == 0 || cfg_sector_size == 0)
730
/* These values were found empirically using xdelta3-tune around version
732
switch (cfg->data_type)
735
if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; }
736
else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; }
737
else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; }
738
else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; }
739
else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; }
740
else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; }
741
else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
742
else { sugg_groups = 8; sugg_sector_size = 70; }
745
if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; }
746
else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; }
747
else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; }
748
else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; }
749
else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
750
else { sugg_groups = 8; sugg_sector_size = 40; }
753
if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; }
754
else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; }
755
else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; }
756
else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
757
else { sugg_groups = 7; sugg_sector_size = 130; }
763
cfg_groups = sugg_groups;
766
if (cfg_sector_size == 0)
768
cfg_sector_size = sugg_sector_size;
772
if (cfg_groups != 1 && cfg_sector_size == 0)
774
switch (cfg->data_type)
777
cfg_sector_size = 20;
780
cfg_sector_size = 50;
783
cfg_sector_size = 130;
788
(*ret_groups) = cfg_groups;
789
(*ret_sector_size) = cfg_sector_size;
791
XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
792
XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX));
798
xd3_real_encode_huff (xd3_stream *stream,
805
usize_t groups, sector_size;
806
bit_state bstate = BIT_STATE_ENCODE_INIT;
811
usize_t initial_offset = output->next;
812
djw_weight real_freq[ALPHABET_SIZE];
813
uint8_t *gbest = NULL; /* Dynamic allocations: could put these in djw_stream. */
814
uint8_t *gbest_mtf = NULL;
816
input_bytes = djw_count_freqs (real_freq, input);
817
input_bits = input_bytes * 8;
819
XD3_ASSERT (input_bytes > 0);
821
if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size)))
829
/* Sometimes we dynamically decide there are too many groups. Arrive here. */
830
output->next = initial_offset;
831
xd3_bit_state_encode_init (& bstate);
834
/* Encode: # of groups (3 bits) */
835
if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; }
839
/* Single Huffman group. */
840
uint code[ALPHABET_SIZE]; /* Codes */
841
IF_TUNE (uint8_t *clen = tune_clen[0];)
842
IF_NTUNE (uint8_t clen[ALPHABET_SIZE];)
843
uint8_t prefix_mtfsym[ALPHABET_SIZE];
847
djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
848
djw_build_codes (code, clen, ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
850
if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
853
prefix.mtfsym = prefix_mtfsym;
854
prefix.symbol = clen;
855
prefix.scount = ALPHABET_SIZE;
857
if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
859
if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
861
IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
862
IF_TUNE (tune_select_bits = 0);
863
IF_TUNE (tune_encode_bits = encode_bits);
866
for (in = input; in; in = in->next_page)
868
const uint8_t *p = in->base;
869
const uint8_t *p_max = p + in->next;
874
usize_t bits = clen[sym];
876
IF_DEBUG (encode_bits -= bits);
878
if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; }
883
XD3_ASSERT (encode_bits == 0);
888
djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
889
#if TUNE_HUFFMAN == 0
890
uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
892
#define evolve_clen tune_clen
894
djw_weight left = input_bytes;
898
usize_t sym1 = 0, sym2 = 0, s;
899
usize_t gcost[DJW_MAX_GROUPS];
900
uint gbest_code[DJW_MAX_GROUPS+2];
901
uint8_t gbest_clen[DJW_MAX_GROUPS+2];
902
usize_t gbest_max = 1 + (input_bytes - 1) / sector_size;
907
IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]);
909
/* Encode: sector size (5 bits) */
910
if ((ret = xd3_encode_bits (stream, & output, & bstate,
911
DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; }
913
/* Dynamic allocation. */
916
if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; }
919
if (gbest_mtf == NULL)
921
if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; }
924
/* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
926
/* Generate initial code length tables. */
927
for (gp = 0; gp < groups; gp += 1)
930
djw_weight goal = left / (groups - gp);
932
IF_DEBUG1 (usize_t nz = 0);
934
/* Due to the single-code granularity of this distribution, it may be that we
935
* can't generate a distribution for each group. In that case subtract one
936
* group and try again. If (inefficient), we're testing group behavior, so
937
* don't mess things up. */
938
if (goal == 0 && !cfg->inefficient)
940
IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n", groups));
945
/* Sum == goal is possible when (cfg->inefficient)... */
948
XD3_ASSERT (sym2 < ALPHABET_SIZE);
949
IF_DEBUG1 (nz += real_freq[sym2] != 0);
950
sum += real_freq[sym2++];
953
IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n",
954
gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes););
956
for (s = 0; s < ALPHABET_SIZE; s += 1)
958
evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
969
memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
970
IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups));
972
/* For each input page (loop is irregular to allow non-pow2-size group size. */
976
/* For each group-size sector. */
979
const uint8_t *p0 = p;
980
xd3_output *in0 = in;
984
/* Select best group for each sector, update evolve_freq. */
985
memset (gcost, 0, sizeof (gcost[0]) * groups);
987
/* For each byte in sector. */
988
for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
990
/* For each group. */
991
for (gp = 0; gp < groups; gp += 1)
993
gcost[gp] += evolve_clen[gp][*p];
996
/* Check end-of-input-page. */
998
if (++p - in->base == in->next) \
1000
in = in->next_page; \
1001
if (in == NULL) { break; } \
1008
/* Find min cost group for this sector */
1010
for (gp = 0; gp < groups; gp += 1)
1012
if (gcost[gp] < best) { best = gcost[gp]; winner = gp; }
1015
XD3_ASSERT(gbest_no < gbest_max);
1016
gbest[gbest_no++] = winner;
1017
IF_DEBUG1 (gcount[winner] += 1);
1022
/* Update group frequencies. */
1023
for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1025
evolve_freq[winner][*p] += 1;
1032
XD3_ASSERT (gbest_no == gbest_max);
1034
/* Recompute code lengths. */
1036
for (gp = 0; gp < groups; gp += 1)
1039
uint8_t evolve_zero[ALPHABET_SIZE];
1042
memset (evolve_zero, 0, sizeof (evolve_zero));
1044
/* Cannot allow a zero clen when the real frequency is non-zero. Note: this
1045
* means we are going to encode a fairly long code for these unused entries. An
1046
* improvement would be to implement a NOTUSED code for when these are actually
1047
* zero, but this requires another data structure (evolve_zero) since we don't
1048
* know when evolve_freq[i] == 0... Briefly tested, looked worse. */
1049
for (i = 0; i < ALPHABET_SIZE; i += 1)
1051
if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
1053
evolve_freq[gp][i] = 1;
1059
encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN);
1061
/* The above faking of frequencies does not matter for the last iteration, but
1062
* we don't know when that is yet. However, it also breaks the encode_bits
1063
* computation. Necessary for accuracy, and for the (encode_bits==0) assert
1064
* after all bits are output. */
1067
IF_DEBUG1 (usize_t save_total = encode_bits);
1069
for (i = 0; i < ALPHABET_SIZE; i += 1)
1071
if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; }
1074
IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp));
1079
DP(RINT "pass %u total bits: %u group uses: ", niter, encode_bits);
1080
for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }
1083
/* End iteration. (The following assertion proved invalid.) */
1084
/*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/
1086
IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) {
1087
DP(RINT "iteration lost %u bits\n", encode_bits - best_bits); });
1089
if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT))
1091
best_bits = encode_bits;
1095
/* Efficiency check. */
1096
if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
1098
IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0));
1100
/* Encode: prefix */
1102
uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
1103
uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
1104
uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
1107
prefix.symbol = prefix_symbol;
1108
prefix.mtfsym = prefix_mtfsym;
1109
prefix.repcnt = prefix_repcnt;
1111
djw_compute_multi_prefix (groups, evolve_clen, & prefix);
1112
if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
1115
/* Encode: selector frequencies */
1117
djw_weight gbest_freq[DJW_MAX_GROUPS+1];
1118
djw_prefix gbest_prefix;
1121
gbest_prefix.scount = gbest_no;
1122
gbest_prefix.symbol = gbest;
1123
gbest_prefix.mtfsym = gbest_mtf;
1125
djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
1128
djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
1129
djw_build_codes (gbest_code, gbest_clen, groups+1 DEBUG_ARG (DJW_MAX_GBCLEN));
1131
IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
1132
IF_TUNE (tune_select_bits = select_bits);
1133
IF_TUNE (tune_encode_bits = encode_bits);
1135
for (i = 0; i < groups+1; i += 1)
1137
if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; }
1140
for (i = 0; i < gbest_prefix.mcount; i += 1)
1142
usize_t gp_mtf = gbest_mtf[i];
1143
usize_t gp_sel_bits = gbest_clen[gp_mtf];
1144
usize_t gp_sel_code = gbest_code[gp_mtf];
1146
XD3_ASSERT (gp_mtf < groups+1);
1148
if ((ret = xd3_encode_bits (stream, & output, & bstate, gp_sel_bits, gp_sel_code))) { goto failure; }
1150
IF_DEBUG (select_bits -= gp_sel_bits);
1153
XD3_ASSERT (select_bits == 0);
1156
/* Efficiency check. */
1157
if (encode_bits + select_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
1161
uint evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
1164
/* Build code tables for each group. */
1165
for (gp = 0; gp < groups; gp += 1)
1167
djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
1170
/* Now loop over the input. */
1176
/* For each sector. */
1177
usize_t gp_best = gbest[sector];
1178
uint *gp_codes = evolve_code[gp_best];
1179
uint8_t *gp_clens = evolve_clen[gp_best];
1181
XD3_ASSERT (sector < gbest_no);
1185
/* Encode the sector data. */
1186
for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1189
usize_t bits = gp_clens[sym];
1190
usize_t code = gp_codes[sym];
1192
IF_DEBUG (encode_bits -= bits);
1194
if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; }
1201
XD3_ASSERT (select_bits == 0);
1202
XD3_ASSERT (encode_bits == 0);
1208
ret = xd3_flush_bits (stream, & output, & bstate);
1213
stream->msg = "secondary compression was inefficient";
1219
xd3_free (stream, gbest);
1220
xd3_free (stream, gbest_mtf);
1223
#endif /* XD3_ENCODER */
1225
/*********************************************************************/
1227
/*********************************************************************/
1230
djw_build_decoder (xd3_stream *stream,
1233
const uint8_t *clen,
1242
uint nr_clen [DJW_MAX_CODELEN+2];
1243
uint tmp_base[DJW_MAX_CODELEN+2];
1247
/* Assumption: the two temporary arrays are large enough to hold abs_max. */
1248
XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
1250
/* This looks something like the start of zlib's inftrees.c */
1251
memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
1253
/* Count number of each code length */
1258
/* Caller _must_ check that values are in-range. Most of the time
1259
* the caller decodes a specific number of bits, which imply the max value, and the
1260
* other time the caller decodes a huffman value, which must be in-range. Therefore,
1261
* its an assertion and this function cannot otherwise fail. */
1262
XD3_ASSERT (*ci <= abs_max);
1268
/* Compute min, max. */
1269
for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
1271
for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
1274
/* Fill the BASE, LIMIT table. */
1275
tmp_base[min_clen] = 0;
1277
limit[min_clen] = nr_clen[min_clen] - 1;
1278
for (i = min_clen + 1; i <= max_clen; i += 1)
1280
uint last_limit = ((limit[i-1] + 1) << 1);
1281
tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
1282
limit[i] = last_limit + nr_clen[i] - 1;
1283
base[i] = last_limit - tmp_base[i];
1286
/* Fill the inorder array, canonically ordered codes. */
1288
for (i = 0; i < asize; i += 1)
1290
if ((l = *ci++) != 0)
1292
inorder[tmp_base[l]++] = i;
1296
*min_clenp = min_clen;
1297
*max_clenp = max_clen;
1301
djw_decode_symbol (xd3_stream *stream,
1303
const uint8_t **input,
1304
const uint8_t *input_end,
1305
const uint8_t *inorder,
1316
/* OPT: Supposedly a small lookup table improves speed here... */
1318
/* Code outline is similar to xd3_decode_bits... */
1319
if (bstate->cur_mask == 0x100) { goto next_byte; }
1325
if (bits == max_clen) { goto corrupt; }
1330
if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
1332
bstate->cur_mask <<= 1;
1334
if (bits >= min_clen && code <= limit[bits]) { goto done; }
1336
while (bstate->cur_mask != 0x100);
1340
if (*input == input_end)
1342
stream->msg = "secondary decoder end of input";
1343
return XD3_INTERNAL;
1346
bstate->cur_byte = *(*input)++;
1347
bstate->cur_mask = 1;
1352
if (base[bits] <= code)
1354
usize_t offset = code - base[bits];
1356
if (offset <= max_sym)
1358
IF_DEBUG2 (DP(RINT "(j) %u ", code));
1359
*sym = inorder[offset];
1365
stream->msg = "secondary decoder invalid code";
1366
return XD3_INTERNAL;
1370
djw_decode_clclen (xd3_stream *stream,
1372
const uint8_t **input,
1373
const uint8_t *input_end,
1374
uint8_t *cl_inorder,
1382
uint8_t cl_clen[DJW_TOTAL_CODES];
1383
usize_t num_codes, value;
1386
/* How many extra code lengths to encode. */
1387
if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_EXTRA_CODE_BITS, & num_codes))) { return ret; }
1389
num_codes += DJW_EXTRA_12OFFSET;
1391
/* Read num_codes. */
1392
for (i = 0; i < num_codes; i += 1)
1394
if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_CLCLEN_BITS, & value))) { return ret; }
1399
/* Set the rest to zero. */
1400
for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
1402
/* No need to check for in-range clen values, because: */
1403
XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
1405
/* Build the code-length decoder. */
1406
djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
1407
cl_clen, cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen);
1409
/* Initialize the MTF state. */
1410
djw_init_clen_mtf_1_2 (cl_mtf);
1416
djw_decode_1_2 (xd3_stream *stream,
1418
const uint8_t **input,
1419
const uint8_t *input_end,
1420
const uint8_t *inorder,
1427
usize_t skip_offset,
1430
usize_t n = 0, rep = 0, mtf = 0, s = 0;
1435
/* Special case inside generic code: CLEN only: If not the first group, we already
1436
* know the zero frequencies. */
1437
if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
1443
/* Repeat last symbol. */
1446
values[n++] = mtfvals[0];
1451
/* Symbol following last repeat code. */
1454
usize_t sym = djw_update_mtf (mtfvals, mtf);
1460
/* Decode next symbol/repeat code. */
1461
if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
1462
inorder, base, limit, *minlen, *maxlen,
1463
& mtf, DJW_TOTAL_CODES))) { return ret; }
1468
rep = ((mtf + 1) << s);
1474
/* Remove the RUN_1 MTF offset. */
1480
/* If (rep != 0) there were too many codes received. */
1483
stream->msg = "secondary decoder invalid repeat code";
1484
return XD3_INTERNAL;
1491
djw_decode_prefix (xd3_stream *stream,
1493
const uint8_t **input,
1494
const uint8_t *input_end,
1495
const uint8_t *cl_inorder,
1496
const uint *cl_base,
1497
const uint *cl_limit,
1498
const uint *cl_minlen,
1499
const uint *cl_maxlen,
1504
return djw_decode_1_2 (stream, bstate, input, input_end,
1505
cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen, cl_mtf,
1506
ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
1510
xd3_decode_huff (xd3_stream *stream,
1512
const uint8_t **input_pos,
1513
const uint8_t *const input_end,
1514
uint8_t **output_pos,
1515
const uint8_t *const output_end)
1517
const uint8_t *input = *input_pos;
1518
uint8_t *output = *output_pos;
1519
bit_state bstate = BIT_STATE_DECODE_INIT;
1520
uint8_t *sel_group = NULL;
1522
usize_t output_bytes = (output_end - output);
1523
usize_t sector_size;
1527
/* Invalid input. */
1528
if (output_bytes == 0)
1530
stream->msg = "secondary decoder invalid input";
1531
return XD3_INTERNAL;
1534
/* Decode: number of groups */
1535
if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GROUP_BITS, & groups))) { goto fail; }
1541
/* Decode: group size */
1542
if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_SECTORSZ_BITS, & sector_size))) { goto fail; }
1544
sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
1548
/* Default for groups == 1 */
1549
sector_size = output_bytes;
1552
sectors = 1 + (output_bytes - 1) / sector_size;
1554
/* @!@ In the case of groups==1, lots of extra stack space gets used here. Could
1555
* dynamically allocate this memory, which would help with excess parameter passing,
1556
* too. Passing too many parameters in this file, simplify it! */
1558
/* Outer scope: per-group symbol decoder tables. */
1560
uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
1561
uint base [DJW_MAX_GROUPS][DJW_MAX_CODELEN+2];
1562
uint limit [DJW_MAX_GROUPS][DJW_MAX_CODELEN+2];
1563
uint minlen [DJW_MAX_GROUPS];
1564
uint maxlen [DJW_MAX_GROUPS];
1566
/* Nested scope: code length decoder tables. */
1568
uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE];
1569
uint8_t cl_inorder[DJW_TOTAL_CODES];
1570
uint cl_base [DJW_MAX_CLCLEN+2];
1571
uint cl_limit [DJW_MAX_CLCLEN+2];
1572
uint8_t cl_mtf [DJW_TOTAL_CODES];
1576
/* Compute the code length decoder. */
1577
if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
1578
cl_inorder, cl_base, cl_limit, & cl_minlen,
1579
& cl_maxlen, cl_mtf))) { goto fail; }
1581
/* Now decode each group decoder. */
1582
if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
1583
cl_inorder, cl_base, cl_limit,
1584
& cl_minlen, & cl_maxlen, cl_mtf,
1585
groups, clen[0]))) { goto fail; }
1587
/* Prepare the actual decoding tables. */
1588
for (gp = 0; gp < groups; gp += 1)
1590
djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
1591
clen[gp], inorder[gp], base[gp], limit[gp],
1592
& minlen[gp], & maxlen[gp]);
1596
/* Decode: selector clens. */
1598
uint8_t sel_inorder[DJW_MAX_GROUPS+2];
1599
uint sel_base [DJW_MAX_GBCLEN+2];
1600
uint sel_limit [DJW_MAX_GBCLEN+2];
1601
uint8_t sel_mtf [DJW_MAX_GROUPS+2];
1605
/* Setup group selection. */
1608
uint8_t sel_clen[DJW_MAX_GROUPS+1];
1610
for (gp = 0; gp < groups+1; gp += 1)
1614
if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GBCLEN_BITS, & value))) { goto fail; }
1616
sel_clen[gp] = value;
1620
if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) { ret = ENOMEM; goto fail; }
1622
djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
1623
sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen);
1625
if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
1626
sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen, sel_mtf,
1627
sectors, 0, sel_group))) { goto fail; }
1630
/* Now decode each sector. */
1632
uint8_t *gp_inorder = inorder[0]; /* Initialize for (groups==1) case. */
1633
uint *gp_base = base[0];
1634
uint *gp_limit = limit[0];
1635
uint gp_minlen = minlen[0];
1636
uint gp_maxlen = maxlen[0];
1639
for (c = 0; c < sectors; c += 1)
1647
XD3_ASSERT (gp < groups);
1649
gp_inorder = inorder[gp];
1651
gp_limit = limit[gp];
1652
gp_minlen = minlen[gp];
1653
gp_maxlen = maxlen[gp];
1656
XD3_ASSERT (output_end - output > 0);
1658
/* Decode next sector. */
1659
n = min (sector_size, (usize_t) (output_end - output));
1665
if ((ret = djw_decode_symbol (stream, & bstate, & input, input_end,
1666
gp_inorder, gp_base, gp_limit, gp_minlen, gp_maxlen,
1667
& sym, ALPHABET_SIZE))) { goto fail; }
1677
IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) { goto fail; });
1678
XD3_ASSERT (ret == 0);
1681
xd3_free (stream, sel_group);
1683
(*input_pos) = input;
1684
(*output_pos) = output;
1688
/*********************************************************************/
1690
/*********************************************************************/
1692
#if TUNE_HUFFMAN && XD3_ENCODER
1694
#include "xdelta3-fgk.h"
1697
xd3_bitsof_output (xd3_output *output, bit_state *bstate)
1700
uint m = bstate->cur_mask;
1708
return x + 8 * xd3_sizeof_output (output);
1711
static const char* xd3_sect_type (xd3_section_type type)
1715
case DATA_SECTION: return "DATA";
1716
case INST_SECTION: return "INST";
1717
case ADDR_SECTION: return "ADDR";
1723
xd3_encode_huff (xd3_stream *stream,
1726
xd3_output *unused_output,
1730
int input_size = xd3_sizeof_output (input);
1732
const char *sect_type = xd3_sect_type (cfg->data_type);
1734
usize_t output_size;
1736
if (hdr == 0) { hdr = 1; DP(RINT "____ SECT INSZ SECTORSZ GPNO OUTSZ PREFIX SELECT ENCODE\n"); }
1738
DP(RINT "SECTION %s %u\n", sect_type, input_size);
1742
int best_size = 99999999;
1743
usize_t best_prefix = 0, best_select = 0, best_encode = 0, best_sector_size = 0;
1745
const char *t12 = "12";
1746
usize_t clen_count[DJW_MAX_CODELEN+1];
1747
djw_weight best_freq[DJW_TOTAL_CODES];
1749
for (cfg->ngroups = 1; cfg->ngroups <= /*1*/ DJW_MAX_GROUPS; cfg->ngroups += 1)
1751
for (cfg->sector_size = 10; cfg->sector_size <= DJW_SECTORSZ_MAX; cfg->sector_size += 10)
1753
output = xd3_alloc_output (stream, NULL);
1755
if ((ret = xd3_real_encode_huff (stream, h, input, output, cfg))) { goto fail; }
1757
output_size = xd3_sizeof_output (output);
1759
if (output_size < best_size)
1761
best_size = output_size;
1762
best_gpno = cfg->ngroups;
1763
best_prefix = tune_prefix_bits;
1764
best_select = tune_select_bits;
1765
best_encode = tune_encode_bits;
1766
best_sector_size = cfg->sector_size;
1767
memset (clen_count, 0, sizeof (clen_count));
1769
for (gp = 0; gp < cfg->ngroups; gp += 1)
1771
for (i = 0; i < ALPHABET_SIZE; i += 1)
1773
clen_count[tune_clen[gp][i]] += 1;
1777
memcpy (best_freq, tune_freq, sizeof (tune_freq));
1779
XD3_ASSERT (sizeof (tune_freq) == sizeof (mtf_freq));
1784
DP(RINT "COMP%s %u %u %u %u %u %u\n",
1785
t12, cfg->ngroups, cfg->sector_size,
1786
output_size, tune_prefix_bits, tune_select_bits, tune_encode_bits);
1791
DP(RINT "COMP%s %u %u %u %u %u %u\n",
1792
t12, cfg->ngroups, cfg->sector_size,
1793
input_size, 0, 0, 0);
1796
xd3_free_output (stream, output);
1798
XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
1800
if (cfg->ngroups == 1) { break; }
1806
DP(RINT "BEST%s %u %u %u %u %u %u\n",
1807
t12, best_gpno, best_sector_size,
1808
best_size, best_prefix, best_select, best_encode);
1811
DP(RINT "CLEN%s ", t12);
1812
for (i = 1; i <= DJW_MAX_CODELEN; i += 1)
1814
DP(RINT "%u ", clen_count[i]);
1818
DP(RINT "FREQ%s ", t12);
1819
for (i = 0; i < DJW_TOTAL_CODES; i += 1)
1821
DP(RINT "%u ", tune_freq[i]);
1828
/* Compare to split single-table windows. */
1834
for (parts = 2; parts <= DJW_MAX_GROUPS; parts += 1)
1836
usize_t part_size = input_size / parts;
1837
xd3_output *inp = input, *partin, *partin_head;
1839
usize_t part_total = 0;
1841
if (part_size < 1000) { break; }
1843
for (i = 0; i < parts; i += 1)
1847
partin = partin_head = xd3_alloc_output (stream, NULL);
1848
output = xd3_alloc_output (stream, NULL);
1850
for (inc = 0; ((i < parts-1) && inc < part_size) ||
1851
((i == parts-1) && inp != NULL); )
1857
take = min (part_size - inc, inp->next - off);
1861
take = inp->next - off;
1864
ret = xd3_emit_bytes (stream, & partin, inp->base + off, take);
1869
if (off == inp->next)
1871
inp = inp->next_page;
1876
ret = xd3_real_encode_huff (stream, h, partin_head, output, cfg);
1878
part_total += xd3_sizeof_output (output);
1880
xd3_free_output (stream, partin_head);
1881
xd3_free_output (stream, output);
1883
XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
1885
if (ret == XD3_NOSECOND)
1891
if (ret != XD3_NOSECOND)
1893
DP(RINT "PART %u %u\n", parts, part_total);
1898
/* Compare to FGK */
1900
fgk_stream *fgk = fgk_alloc (stream);
1904
output = xd3_alloc_output (stream, NULL);
1906
ret = xd3_encode_fgk (stream, fgk, input, output, NULL);
1908
output_size = xd3_sizeof_output (output);
1909
xd3_free_output (stream, output);
1910
fgk_destroy (stream, fgk);
1912
XD3_ASSERT (ret == 0);
1914
DP(RINT "FGK %u\n", output_size);
1917
DP(RINT "END_SECTION %s %u\n", sect_type, input_size);