1
1
/* trees.c -- output deflated data using Huffman coding
2
* Copyright (C) 1995-2005 Jean-loup Gailly
2
* Copyright (C) 1995-2012 Jean-loup Gailly
3
* detect_data_type() function provided freely by Cosmin Truta, 2006
3
4
* For conditions of distribution and use, see copyright notice in zlib.h
73
74
* probability, to avoid transmitting the lengths for unused bit length codes.
76
#define Buf_size (8 * 2*sizeof(char))
77
/* Number of bits used within bi_buf. (bi_buf might be implemented on
78
* more than 16 bits on some systems.)
81
77
/* ===========================================================================
82
78
* Local data. These are initialized only once.
150
146
local int build_bl_tree OF((deflate_state *s));
151
147
local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
153
local void compress_block OF((deflate_state *s, ct_data *ltree,
155
local void set_data_type OF((deflate_state *s));
149
local void compress_block OF((deflate_state *s, const ct_data *ltree,
150
const ct_data *dtree));
151
local int detect_data_type OF((deflate_state *s));
156
152
local unsigned bi_reverse OF((unsigned value, int length));
157
153
local void bi_windup OF((deflate_state *s));
158
154
local void bi_flush OF((deflate_state *s));
203
199
* unused bits in value.
205
201
if (s->bi_valid > (int)Buf_size - length) {
206
s->bi_buf |= (value << s->bi_valid);
202
s->bi_buf |= (ush)value << s->bi_valid;
207
203
put_short(s, s->bi_buf);
208
204
s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
209
205
s->bi_valid += length - Buf_size;
211
s->bi_buf |= value << s->bi_valid;
207
s->bi_buf |= (ush)value << s->bi_valid;
212
208
s->bi_valid += length;
218
214
{ int len = length;\
219
215
if (s->bi_valid > (int)Buf_size - len) {\
220
216
int val = value;\
221
s->bi_buf |= (val << s->bi_valid);\
217
s->bi_buf |= (ush)val << s->bi_valid;\
222
218
put_short(s, s->bi_buf);\
223
219
s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
224
220
s->bi_valid += len - Buf_size;\
226
s->bi_buf |= (value) << s->bi_valid;\
222
s->bi_buf |= (ush)(value) << s->bi_valid;\
227
223
s->bi_valid += len;\
250
246
if (static_init_done) return;
252
248
/* For some embedded targets, global variables are not initialized: */
249
#ifdef NO_INIT_GLOBAL_POINTERS
253
250
static_l_desc.static_tree = static_ltree;
254
251
static_l_desc.extra_bits = extra_lbits;
255
252
static_d_desc.static_tree = static_dtree;
256
253
static_d_desc.extra_bits = extra_dbits;
257
254
static_bl_desc.extra_bits = extra_blbits;
259
257
/* Initialize the mapping length (0..255) -> length code (0..28) */
348
346
static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
351
fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
349
fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
352
350
for (i = 0; i < DIST_CODE_LEN; i++) {
353
351
fprintf(header, "%2u%s", _dist_code[i],
354
352
SEPARATOR(i, DIST_CODE_LEN-1, 20));
357
fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
356
"const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
358
357
for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
359
358
fprintf(header, "%2u%s", _length_code[i],
360
359
SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
864
862
/* ===========================================================================
865
863
* Send a stored block
867
void _tr_stored_block(s, buf, stored_len, eof)
865
void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
868
866
deflate_state *s;
869
867
charf *buf; /* input block */
870
868
ulg stored_len; /* length of input block */
871
int eof; /* true if this is the last block for a file */
869
int last; /* one if this is the last block for a file */
873
send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
871
send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
875
873
s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
876
874
s->compressed_len += (stored_len + 4) << 3;
881
879
/* ===========================================================================
880
* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
882
void ZLIB_INTERNAL _tr_flush_bits(s)
888
/* ===========================================================================
882
889
* Send one empty static block to give enough lookahead for inflate.
883
890
* This takes 10 bits, of which 7 may remain in the bit buffer.
884
* The current inflate code requires 9 bits of lookahead. If the
885
* last two codes for the previous block (real code plus EOB) were coded
886
* on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
887
* the last real code. In this case we send two empty static blocks instead
888
* of one. (There are no problems if the previous block is stored or fixed.)
889
* To simplify the code, we assume the worst case of last real code encoded
892
void ZLIB_INTERNAL _tr_align(s)
893
893
deflate_state *s;
895
895
send_bits(s, STATIC_TREES<<1, 3);
898
898
s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
901
/* Of the 10 bits for the empty block, we have already sent
902
* (10 - bi_valid) bits. The lookahead for the last real code (before
903
* the EOB of the previous block) was thus at least one plus the length
904
* of the EOB plus what we have just sent of the empty static block.
906
if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
907
send_bits(s, STATIC_TREES<<1, 3);
908
send_code(s, END_BLOCK, static_ltree);
910
s->compressed_len += 10L;
917
903
/* ===========================================================================
918
904
* Determine the best encoding for the current block: dynamic trees, static
919
905
* trees or store, and output the encoded block to the zip file.
921
void _tr_flush_block(s, buf, stored_len, eof)
907
void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
922
908
deflate_state *s;
923
909
charf *buf; /* input block, or NULL if too old */
924
910
ulg stored_len; /* length of input block */
925
int eof; /* true if this is the last block for a file */
911
int last; /* one if this is the last block for a file */
927
913
ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
928
914
int max_blindex = 0; /* index of last bit length code of non zero freq */
931
917
if (s->level > 0) {
933
919
/* Check if the file is binary or text */
934
if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN)
920
if (s->strm->data_type == Z_UNKNOWN)
921
s->strm->data_type = detect_data_type(s);
937
923
/* Construct the literal and distance trees */
938
924
build_tree(s, (tree_desc *)(&(s->l_desc)));
978
964
* successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
979
965
* transform a block into a stored block.
981
_tr_stored_block(s, buf, stored_len, eof);
967
_tr_stored_block(s, buf, stored_len, last);
983
969
#ifdef FORCE_STATIC
984
970
} else if (static_lenb >= 0) { /* force static trees */
986
972
} else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
988
send_bits(s, (STATIC_TREES<<1)+eof, 3);
989
compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
974
send_bits(s, (STATIC_TREES<<1)+last, 3);
975
compress_block(s, (const ct_data *)static_ltree,
976
(const ct_data *)static_dtree);
991
978
s->compressed_len += 3 + s->static_len;
994
send_bits(s, (DYN_TREES<<1)+eof, 3);
981
send_bits(s, (DYN_TREES<<1)+last, 3);
995
982
send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
997
compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
984
compress_block(s, (const ct_data *)s->dyn_ltree,
985
(const ct_data *)s->dyn_dtree);
999
987
s->compressed_len += 3 + s->opt_len;
1011
999
s->compressed_len += 7; /* align on byte boundary */
1014
1002
Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1015
s->compressed_len-7*eof));
1003
s->compressed_len-7*last));
1018
1006
/* ===========================================================================
1019
1007
* Save the match info and tally the frequency counts. Return true if
1020
1008
* the current block must be flushed.
1022
int _tr_tally (s, dist, lc)
1010
int ZLIB_INTERNAL _tr_tally (s, dist, lc)
1023
1011
deflate_state *s;
1024
1012
unsigned dist; /* distance of matched string */
1025
1013
unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
1072
1060
local void compress_block(s, ltree, dtree)
1073
1061
deflate_state *s;
1074
ct_data *ltree; /* literal tree */
1075
ct_data *dtree; /* distance tree */
1062
const ct_data *ltree; /* literal tree */
1063
const ct_data *dtree; /* distance tree */
1077
1065
unsigned dist; /* distance of matched string */
1078
1066
int lc; /* match length or unmatched char (if dist == 0) */
1114
1102
} while (lx < s->last_lit);
1116
1104
send_code(s, END_BLOCK, ltree);
1117
s->last_eob_len = ltree[END_BLOCK].Len;
1120
1107
/* ===========================================================================
1121
* Set the data type to BINARY or TEXT, using a crude approximation:
1122
* set it to Z_TEXT if all symbols are either printable characters (33 to 255)
1123
* or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise.
1108
* Check if the data type is TEXT or BINARY, using the following algorithm:
1109
* - TEXT if the two conditions below are satisfied:
1110
* a) There are no non-portable control characters belonging to the
1111
* "black list" (0..6, 14..25, 28..31).
1112
* b) There is at least one printable character belonging to the
1113
* "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1114
* - BINARY otherwise.
1115
* - The following partially-portable control characters form a
1116
* "gray list" that is ignored in this detection algorithm:
1117
* (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1124
1118
* IN assertion: the fields Freq of dyn_ltree are set.
1126
local void set_data_type(s)
1120
local int detect_data_type(s)
1127
1121
deflate_state *s;
1123
/* black_mask is the bit mask of black-listed bytes
1124
* set bits 0..6, 14..25, and 28..31
1125
* 0xf3ffc07f = binary 11110011111111111100000001111111
1127
unsigned long black_mask = 0xf3ffc07fUL;
1131
for (n = 0; n < 9; n++)
1130
/* Check for non-textual ("black-listed") bytes. */
1131
for (n = 0; n <= 31; n++, black_mask >>= 1)
1132
if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
1135
/* Check for textual ("white-listed") bytes. */
1136
if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
1137
|| s->dyn_ltree[13].Freq != 0)
1139
for (n = 32; n < LITERALS; n++)
1132
1140
if (s->dyn_ltree[n].Freq != 0)
1135
for (n = 14; n < 32; n++)
1136
if (s->dyn_ltree[n].Freq != 0)
1138
s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY;
1143
/* There are no "black-listed" or "white-listed" bytes:
1144
* this stream either is empty or has tolerated ("gray-listed") bytes only.
1141
1149
/* ===========================================================================