2
* Copyright (c) 2003-2004 Tim Kientzle
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer
10
* in this position and unchanged.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
* This code borrows heavily from "compress" source code, which is
29
* protected by the following copyright. (Clause 3 dropped by request
34
* Copyright (c) 1985, 1986, 1992, 1993
35
* The Regents of the University of California. All rights reserved.
37
* This code is derived from software contributed to Berkeley by
38
* Diomidis Spinellis and James A. Woods, derived from original
39
* work by Spencer Thomas and Joseph Orost.
41
* Redistribution and use in source and binary forms, with or without
42
* modification, are permitted provided that the following conditions
44
* 1. Redistributions of source code must retain the above copyright
45
* notice, this list of conditions and the following disclaimer.
46
* 2. Redistributions in binary form must reproduce the above copyright
47
* notice, this list of conditions and the following disclaimer in the
48
* documentation and/or other materials provided with the distribution.
49
* 4. Neither the name of the University nor the names of its contributors
50
* may be used to endorse or promote products derived from this software
51
* without specific prior written permission.
53
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67
#include "archive_platform.h"
68
__FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.3 2004/10/17 23:40:10 kientzle Exp $");
76
#include "archive_private.h"
79
* Because LZW decompression is pretty simple, I've just implemented
80
* the whole decompressor here (cribbing from "compress" source code,
81
* of course), rather than relying on an external library. I have
82
* made an effort to clarify and simplify the algorithm, so the
83
* names and structure here don't exactly match those used by compress.
87
/* Input variables. */
88
const unsigned char *next_in;
92
size_t bytes_in_section;
94
/* Output variables. */
95
size_t uncompressed_buffer_size;
96
void *uncompressed_buffer;
97
unsigned char *read_next; /* Data for client. */
98
unsigned char *next_out; /* Where to write new data. */
99
size_t avail_out; /* Space at end of buffer. */
101
/* Decompression status variables. */
103
int end_of_stream; /* EOF status. */
104
int maxcode; /* Largest code. */
105
int maxcode_bits; /* Length of largest code. */
106
int section_end_code; /* When to increase bits. */
107
int bits; /* Current code length. */
108
int oldcode; /* Previous code. */
109
int finbyte; /* Last byte of prev code. */
112
int free_ent; /* Next dictionary entry. */
113
unsigned char suffix[65536];
114
uint16_t prefix[65536];
117
* Scratch area for expanding dictionary entries. Note:
118
* "worst" case here comes from compressing /dev/zero: the
119
* last code in the dictionary will code a sequence of
120
* 65536-256 zero bytes. Thus, we need stack space to expand
121
* a 65280-byte dictionary entry. (Of course, 32640:1
122
* compression could also be considered the "best" case. ;-)
124
unsigned char *stackp;
125
unsigned char stack[65300];
128
static int bid(const void *, size_t);
129
static int finish(struct archive *);
130
static int init(struct archive *, const void *, size_t);
131
static ssize_t read_ahead(struct archive *, const void **, size_t);
132
static ssize_t read_consume(struct archive *, size_t);
133
static int getbits(struct archive *, struct private_data *, int n);
134
static int next_code(struct archive *a, struct private_data *state);
137
archive_read_support_compression_compress(struct archive *a)
139
return (__archive_read_register_compression(a, bid, init));
143
* Test whether we can handle this data.
145
* This logic returns zero if any part of the signature fails. It
146
* also tries to Do The Right Thing if a very short buffer prevents us
147
* from verifying as much as we would like.
150
bid(const void *buff, size_t len)
152
const unsigned char *buffer;
160
if (buffer[0] != 037) /* Verify first ID byte. */
164
return (bits_checked);
166
if (buffer[1] != 0235) /* Verify second ID byte. */
170
return (bits_checked);
176
return (bits_checked);
180
* Setup the callbacks.
183
init(struct archive *a, const void *buff, size_t n)
185
struct private_data *state;
188
a->compression_code = ARCHIVE_COMPRESSION_COMPRESS;
189
a->compression_name = "compress (.Z)";
191
a->compression_read_ahead = read_ahead;
192
a->compression_read_consume = read_consume;
193
a->compression_finish = finish;
195
state = malloc(sizeof(*state));
197
archive_set_error(a, ENOMEM,
198
"Can't allocate data for %s decompression",
199
a->compression_name);
200
return (ARCHIVE_FATAL);
202
memset(state, 0, sizeof(*state));
204
state->uncompressed_buffer_size = 64 * 1024;
205
state->uncompressed_buffer = malloc(state->uncompressed_buffer_size);
207
if (state->uncompressed_buffer == NULL) {
208
archive_set_error(a, ENOMEM,
209
"Can't allocate %s decompression buffers",
210
a->compression_name);
214
state->next_in = buff;
216
state->read_next = state->next_out = state->uncompressed_buffer;
217
state->avail_out = state->uncompressed_buffer_size;
219
code = getbits(a, state, 8);
223
code = getbits(a, state, 8);
227
code = getbits(a, state, 8);
228
state->maxcode_bits = code & 0x1f;
229
state->maxcode = (1 << state->maxcode_bits);
230
state->use_reset_code = code & 0x80;
232
/* Initialize decompressor. */
233
state->free_ent = 256;
234
state->stackp = state->stack;
235
if (state->use_reset_code)
238
state->section_end_code = (1<<state->bits) - 1;
240
for (code = 255; code >= 0; code--) {
241
state->prefix[code] = 0;
242
state->suffix[code] = code;
245
a->compression_data = state;
251
return (ARCHIVE_FATAL);
255
* Return a block of data from the decompression buffer. Decompress more
259
read_ahead(struct archive *a, const void **p, size_t min)
261
struct private_data *state;
262
int read_avail, was_avail, ret;
264
state = a->compression_data;
266
if (!a->client_reader) {
267
archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER,
268
"No read callback is registered? "
269
"This is probably an internal programming error.");
270
return (ARCHIVE_FATAL);
273
read_avail = state->next_out - state->read_next;
275
if (read_avail < (int)min && state->end_of_stream) {
276
if (state->end_of_stream == ARCHIVE_EOF)
282
if (read_avail < (int)min) {
283
memmove(state->uncompressed_buffer, state->read_next,
285
state->read_next = state->uncompressed_buffer;
286
state->next_out = state->read_next + read_avail;
288
= state->uncompressed_buffer_size - read_avail;
290
while (read_avail < (int)state->uncompressed_buffer_size
291
&& !state->end_of_stream) {
292
if (state->stackp > state->stack) {
293
*state->next_out++ = *--state->stackp;
297
ret = next_code(a, state);
298
if (ret == ARCHIVE_EOF)
299
state->end_of_stream = ret;
300
else if (ret != ARCHIVE_OK)
306
*p = state->read_next;
311
* Mark a previously-returned block of data as read.
314
read_consume(struct archive *a, size_t n)
316
struct private_data *state;
318
state = a->compression_data;
319
a->file_position += n;
320
state->read_next += n;
321
if (state->read_next > state->next_out)
322
__archive_errx(1, "Request to consume too many "
323
"bytes from compress decompressor");
328
* Clean up the decompressor.
331
finish(struct archive *a)
333
struct private_data *state;
336
state = a->compression_data;
339
free(state->uncompressed_buffer);
342
a->compression_data = NULL;
343
if (a->client_closer != NULL)
344
(a->client_closer)(a, a->client_data);
350
* Process the next code and fill the stack with the expansion
351
* of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or
352
* format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
355
next_code(struct archive *a, struct private_data *state)
359
static int debug_buff[1024];
360
static unsigned debug_index;
362
code = newcode = getbits(a, state, state->bits);
366
debug_buff[debug_index++] = code;
367
if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
370
/* If it's a reset code, reset the dictionary. */
371
if ((code == 256) && state->use_reset_code) {
373
* The original 'compress' implementation blocked its
374
* I/O in a manner that resulted in junk bytes being
375
* inserted after every reset. The next section skips
376
* this junk. (Yes, the number of *bytes* to skip is
377
* a function of the current *bit* length.)
379
int skip_bytes = state->bits -
380
(state->bytes_in_section % state->bits);
381
skip_bytes %= state->bits;
382
state->bits_avail = 0; /* Discard rest of this byte. */
383
while (skip_bytes-- > 0) {
384
code = getbits(a, state, 8);
388
/* Now, actually do the reset. */
389
state->bytes_in_section = 0;
391
state->section_end_code = (1 << state->bits) - 1;
392
state->free_ent = 257;
394
return (next_code(a, state));
397
if (code > state->free_ent) {
398
/* An invalid code is a fatal error. */
399
archive_set_error(a, -1, "Invalid compressed data");
400
return (ARCHIVE_FATAL);
403
/* Special case for KwKwK string. */
404
if (code >= state->free_ent) {
405
*state->stackp++ = state->finbyte;
406
code = state->oldcode;
409
/* Generate output characters in reverse order. */
410
while (code >= 256) {
411
*state->stackp++ = state->suffix[code];
412
code = state->prefix[code];
414
*state->stackp++ = state->finbyte = code;
416
/* Generate the new entry. */
417
code = state->free_ent;
418
if (code < state->maxcode && state->oldcode >= 0) {
419
state->prefix[code] = state->oldcode;
420
state->suffix[code] = state->finbyte;
423
if (state->free_ent > state->section_end_code) {
425
state->bytes_in_section = 0;
426
if (state->bits == state->maxcode_bits)
427
state->section_end_code = state->maxcode;
429
state->section_end_code = (1 << state->bits) - 1;
432
/* Remember previous code. */
433
state->oldcode = newcode;
438
* Return next 'n' bits from stream.
440
* -1 indicates end of available data.
443
getbits(struct archive *a, struct private_data *state, int n)
446
static const int mask[] = {
447
0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
448
0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
452
while (state->bits_avail < n) {
453
if (state->avail_in <= 0) {
454
ret = (a->client_reader)(a, a->client_data,
455
(const void **)&state->next_in);
457
return (ARCHIVE_FATAL);
459
return (ARCHIVE_EOF);
460
a->raw_position += ret;
461
state->avail_in = ret;
463
state->bit_buffer |= *state->next_in++ << state->bits_avail;
465
state->bits_avail += 8;
466
state->bytes_in_section++;
469
code = state->bit_buffer;
470
state->bit_buffer >>= n;
471
state->bits_avail -= n;
473
return (code & mask[n]);