2
* Copyright (c) 2008 Joerg Sonnenberger
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
14
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
* Copyright (c) 1985, 1986, 1992, 1993
28
* The Regents of the University of California. All rights reserved.
30
* This code is derived from software contributed to Berkeley by
31
* Diomidis Spinellis and James A. Woods, derived from original
32
* work by Spencer Thomas and Joseph Orost.
34
* Redistribution and use in source and binary forms, with or without
35
* modification, are permitted provided that the following conditions
37
* 1. Redistributions of source code must retain the above copyright
38
* notice, this list of conditions and the following disclaimer.
39
* 2. Redistributions in binary form must reproduce the above copyright
40
* notice, this list of conditions and the following disclaimer in the
41
* documentation and/or other materials provided with the distribution.
42
* 3. Neither the name of the University nor the names of its contributors
43
* may be used to endorse or promote products derived from this software
44
* without specific prior written permission.
46
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59
#include "archive_platform.h"
61
__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $");
74
#include "archive_private.h"
75
#include "archive_write_private.h"
77
#define HSIZE 69001 /* 95% occupancy */
78
#define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */
79
#define CHECK_GAP 10000 /* Ratio check interval. */
81
#define MAXCODE(bits) ((1 << (bits)) - 1)
84
* the next two codes should not be changed lightly, as they must not
85
* lie within the contiguous general code space.
87
#define FIRST 257 /* First free entry. */
88
#define CLEAR 256 /* Table clear output code. */
91
off_t in_count, out_count, checkpoint;
93
int code_len; /* Number of bits/code. */
94
int cur_maxcode; /* Maximum code, given n_bits. */
95
int max_maxcode; /* Should NEVER generate this code. */
97
unsigned short codetab [HSIZE];
98
int first_free; /* First unused entry. */
101
int cur_code, cur_fcode;
104
unsigned char bit_buf;
106
unsigned char *compressed;
107
size_t compressed_buffer_size;
108
size_t compressed_offset;
111
static int archive_compressor_compress_finish(struct archive_write *);
112
static int archive_compressor_compress_init(struct archive_write *);
113
static int archive_compressor_compress_write(struct archive_write *,
114
const void *, size_t);
117
* Allocate, initialize and return a archive object.
120
archive_write_set_compression_compress(struct archive *_a)
122
struct archive_write *a = (struct archive_write *)_a;
123
__archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
124
ARCHIVE_STATE_NEW, "archive_write_set_compression_compress");
125
a->compressor.init = &archive_compressor_compress_init;
126
a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
127
a->archive.compression_name = "compress";
135
archive_compressor_compress_init(struct archive_write *a)
138
struct private_data *state;
140
a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
141
a->archive.compression_name = "compress";
143
if (a->bytes_per_block < 4) {
144
archive_set_error(&a->archive, EINVAL,
145
"Can't write Compress header as single block");
146
return (ARCHIVE_FATAL);
149
if (a->client_opener != NULL) {
150
ret = (a->client_opener)(&a->archive, a->client_data);
151
if (ret != ARCHIVE_OK)
155
state = (struct private_data *)malloc(sizeof(*state));
157
archive_set_error(&a->archive, ENOMEM,
158
"Can't allocate data for compression");
159
return (ARCHIVE_FATAL);
161
memset(state, 0, sizeof(*state));
163
state->compressed_buffer_size = a->bytes_per_block;
164
state->compressed = malloc(state->compressed_buffer_size);
166
if (state->compressed == NULL) {
167
archive_set_error(&a->archive, ENOMEM,
168
"Can't allocate data for compression buffer");
170
return (ARCHIVE_FATAL);
173
a->compressor.write = archive_compressor_compress_write;
174
a->compressor.finish = archive_compressor_compress_finish;
176
state->max_maxcode = 0x10000; /* Should NEVER generate this code. */
177
state->in_count = 0; /* Length of input. */
179
state->bit_offset = 0;
180
state->out_count = 3; /* Includes 3-byte header mojo. */
181
state->compress_ratio = 0;
182
state->checkpoint = CHECK_GAP;
184
state->cur_maxcode = MAXCODE(state->code_len);
185
state->first_free = FIRST;
187
memset(state->hashtab, 0xff, sizeof(state->hashtab));
189
/* Prime output buffer with a gzip header. */
190
state->compressed[0] = 0x1f; /* Compress */
191
state->compressed[1] = 0x9d;
192
state->compressed[2] = 0x90; /* Block mode, 16bit max */
193
state->compressed_offset = 3;
195
a->compressor.data = state;
200
* Output the given code.
202
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
203
* that n_bits =< (long)wordsize - 1.
205
* Outputs code to the file.
207
* Chars are 8 bits long.
209
* Maintain a BITS character long buffer (so that 8 codes will
210
* fit in it exactly). Use the VAX insv instruction to insert each
211
* code in turn. When the buffer fills up empty it and start over.
214
static unsigned char rmask[9] =
215
{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
218
output_byte(struct archive_write *a, unsigned char c)
220
struct private_data *state = a->compressor.data;
221
ssize_t bytes_written;
223
state->compressed[state->compressed_offset++] = c;
226
if (state->compressed_buffer_size == state->compressed_offset) {
227
bytes_written = (a->client_writer)(&a->archive,
229
state->compressed, state->compressed_buffer_size);
230
if (bytes_written <= 0)
231
return ARCHIVE_FATAL;
232
a->archive.raw_position += bytes_written;
233
state->compressed_offset = 0;
240
output_code(struct archive_write *a, int ocode)
242
struct private_data *state = a->compressor.data;
243
int bits, ret, clear_flg, bit_offset;
245
clear_flg = ocode == CLEAR;
248
* Since ocode is always >= 8 bits, only need to mask the first
251
bit_offset = state->bit_offset % 8;
252
state->bit_buf |= (ocode << bit_offset) & 0xff;
253
output_byte(a, state->bit_buf);
255
bits = state->code_len - (8 - bit_offset);
256
ocode >>= 8 - bit_offset;
257
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
259
output_byte(a, ocode & 0xff);
264
state->bit_offset += state->code_len;
265
state->bit_buf = ocode & rmask[bits];
266
if (state->bit_offset == state->code_len * 8)
267
state->bit_offset = 0;
270
* If the next entry is going to be too big for the ocode size,
271
* then increase it, if possible.
273
if (clear_flg || state->first_free > state->cur_maxcode) {
275
* Write the whole buffer, because the input side won't
276
* discover the size increase until after it has read it.
278
if (state->bit_offset > 0) {
279
while (state->bit_offset < state->code_len * 8) {
280
ret = output_byte(a, state->bit_buf);
281
if (ret != ARCHIVE_OK)
283
state->bit_offset += 8;
288
state->bit_offset = 0;
292
state->cur_maxcode = MAXCODE(state->code_len);
295
if (state->code_len == 16)
296
state->cur_maxcode = state->max_maxcode;
298
state->cur_maxcode = MAXCODE(state->code_len);
306
output_flush(struct archive_write *a)
308
struct private_data *state = a->compressor.data;
311
/* At EOF, write the rest of the buffer. */
312
if (state->bit_offset % 8) {
313
state->code_len = (state->bit_offset % 8 + 7) / 8;
314
ret = output_byte(a, state->bit_buf);
315
if (ret != ARCHIVE_OK)
323
* Write data to the compressed stream.
326
archive_compressor_compress_write(struct archive_write *a, const void *buff,
329
struct private_data *state;
333
const unsigned char *bp;
335
state = (struct private_data *)a->compressor.data;
336
if (a->client_writer == NULL) {
337
archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
338
"No write callback is registered? "
339
"This is probably an internal programming error.");
340
return (ARCHIVE_FATAL);
348
if (state->in_count == 0) {
349
state->cur_code = *bp++;
357
state->cur_fcode = (c << 16) + state->cur_code;
358
i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */
360
if (state->hashtab[i] == state->cur_fcode) {
361
state->cur_code = state->codetab[i];
364
if (state->hashtab[i] < 0) /* Empty slot. */
366
/* Secondary hash (after G. Knott). */
375
if (state->hashtab[i] == state->cur_fcode) {
376
state->cur_code = state->codetab[i];
379
if (state->hashtab[i] >= 0)
382
ret = output_code(a, state->cur_code);
383
if (ret != ARCHIVE_OK)
386
if (state->first_free < state->max_maxcode) {
387
state->codetab[i] = state->first_free++; /* code -> hashtable */
388
state->hashtab[i] = state->cur_fcode;
391
if (state->in_count < state->checkpoint)
394
state->checkpoint = state->in_count + CHECK_GAP;
396
if (state->in_count <= 0x007fffff)
397
ratio = state->in_count * 256 / state->out_count;
398
else if ((ratio = state->out_count / 256) == 0)
401
ratio = state->in_count / ratio;
403
if (ratio > state->compress_ratio)
404
state->compress_ratio = ratio;
406
state->compress_ratio = 0;
407
memset(state->hashtab, 0xff, sizeof(state->hashtab));
408
state->first_free = FIRST;
409
ret = output_code(a, CLEAR);
410
if (ret != ARCHIVE_OK)
420
* Finish the compression...
423
archive_compressor_compress_finish(struct archive_write *a)
425
ssize_t block_length, target_block_length, bytes_written;
427
struct private_data *state;
430
state = (struct private_data *)a->compressor.data;
431
if (a->client_writer == NULL) {
432
archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
433
"No write callback is registered? "
434
"This is probably an internal programming error.");
439
/* By default, always pad the uncompressed data. */
440
if (a->pad_uncompressed) {
441
while (state->in_count % a->bytes_per_block != 0) {
442
tocopy = a->bytes_per_block -
443
(state->in_count % a->bytes_per_block);
444
if (tocopy > a->null_length)
445
tocopy = a->null_length;
446
ret = archive_compressor_compress_write(a, a->nulls,
448
if (ret != ARCHIVE_OK)
453
ret = output_code(a, state->cur_code);
454
if (ret != ARCHIVE_OK)
456
ret = output_flush(a);
457
if (ret != ARCHIVE_OK)
460
/* Optionally, pad the final compressed block. */
461
block_length = state->compressed_offset;
463
/* Tricky calculation to determine size of last block. */
464
if (a->bytes_in_last_block <= 0)
465
/* Default or Zero: pad to full block */
466
target_block_length = a->bytes_per_block;
468
/* Round length to next multiple of bytes_in_last_block. */
469
target_block_length = a->bytes_in_last_block *
470
( (block_length + a->bytes_in_last_block - 1) /
471
a->bytes_in_last_block);
472
if (target_block_length > a->bytes_per_block)
473
target_block_length = a->bytes_per_block;
474
if (block_length < target_block_length) {
475
memset(state->compressed + state->compressed_offset, 0,
476
target_block_length - block_length);
477
block_length = target_block_length;
480
/* Write the last block */
481
bytes_written = (a->client_writer)(&a->archive, a->client_data,
482
state->compressed, block_length);
483
if (bytes_written <= 0)
486
a->archive.raw_position += bytes_written;
489
free(state->compressed);