1
/* gun.c -- simple gunzip to give an example of the use of inflateBack()
2
* Copyright (C) 2003, 2005, 2008, 2010 Mark Adler
3
* For conditions of distribution and use, see copyright notice in zlib.h
4
Version 1.6 17 January 2010 Mark Adler */
7
1.0 16 Feb 2003 First version for testing of inflateBack()
8
1.1 21 Feb 2005 Decompress concatenated gzip streams
9
Remove use of "this" variable (C++ keyword)
10
Fix return value for in()
11
Improve allocation failure checking
12
Add typecasting for void * structures
13
Add -h option for command version and usage
14
Add a bunch of comments
15
1.2 20 Mar 2005 Add Unix compress (LZW) decompression
16
Copy file attributes from input file to output file
17
1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
18
1.4 8 Dec 2006 LZW decompression speed improvements
19
1.5 9 Feb 2008 Avoid warning in latest version of gcc
20
1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings
24
gun [ -t ] [ name ... ]
26
decompresses the data in the named gzip files. If no arguments are given,
27
gun will decompress from stdin to stdout. The names must end in .gz, -gz,
28
.z, -z, _z, or .Z. The uncompressed data will be written to a file name
29
with the suffix stripped. On success, the original file is deleted. On
30
failure, the output file is deleted. For most failures, the command will
31
continue to process the remaining names on the command line. A memory
32
allocation failure will abort the command. If -t is specified, then the
33
listed files or stdin will be tested as gzip files for integrity (without
34
checking for a proper suffix), no output will be written, and no files
37
Like gzip, gun allows concatenated gzip streams and will decompress them,
38
writing all of the uncompressed data to the output. Unlike gzip, gun allows
39
an empty file on input, and will produce no error writing an empty output
42
gun will also decompress files made by Unix compress, which uses LZW
43
compression. These files are automatically detected by virtue of their
44
magic header bytes. Since the end of Unix compress stream is marked by the
45
end-of-file, they cannot be concantenated. If a Unix compress stream is
46
encountered in an input file, it is the last stream in that file.
48
Like gunzip and uncompress, the file attributes of the orignal compressed
49
file are maintained in the final uncompressed file, to the extent that the
50
user permissions allow it.
52
On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
53
1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
54
LZW decompression provided by gun is about twice as fast as the standard
55
Unix uncompress command.
58
/* external functions and related types and constants */
59
#include <stdio.h> /* fprintf() */
60
#include <stdlib.h> /* malloc(), free() */
61
#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
62
#include <errno.h> /* errno */
63
#include <fcntl.h> /* open() */
64
#include <unistd.h> /* read(), write(), close(), chown(), unlink() */
65
#include <sys/types.h>
66
#include <sys/stat.h> /* stat(), chmod() */
67
#include <utime.h> /* utime() */
68
#include "zlib.h" /* inflateBackInit(), inflateBack(), */
69
/* inflateBackEnd(), crc32() */
71
/* function declaration */
74
/* buffer constants */
75
#define SIZE 32768U /* input and output buffer sizes */
76
#define PIECE 16384 /* limits i/o chunks for 16-bit int case */
78
/* structure for infback() to pass to input function in() -- it maintains the
79
input file and a buffer of size SIZE */
85
/* Load input buffer, assumed to be empty, and return bytes loaded and a
86
pointer to them. read() is called until the buffer is full, or until it
87
returns end-of-file or error. Return 0 on error. */
88
local unsigned in(void *in_desc, unsigned char **buf)
93
struct ind *me = (struct ind *)in_desc;
100
if ((unsigned)ret > SIZE - len)
101
ret = (int)(SIZE - len);
102
ret = (int)read(me->infile, next, ret);
109
} while (ret != 0 && len < SIZE);
113
/* structure for infback() to pass to output function out() -- it maintains the
114
output file, a running CRC-32 check on the output and the total number of
115
bytes output, both for checking against the gzip trailer. (The length in
116
the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
117
the output is greater than 4 GB.) */
120
int check; /* true if checking crc and total */
125
/* Write output buffer and update the CRC-32 and total bytes written. write()
126
is called until all of the output is written or an error is encountered.
127
On success out() returns 0. For a write failure, out() returns 1. If the
128
output file descriptor is -1, then nothing is written.
130
local int out(void *out_desc, unsigned char *buf, unsigned len)
133
struct outd *me = (struct outd *)out_desc;
136
me->crc = crc32(me->crc, buf, len);
139
if (me->outfile != -1)
142
if ((unsigned)ret > len)
144
ret = (int)write(me->outfile, buf, ret);
153
/* next input byte macro for use inside lunpipe() and gunpipe() */
154
#define NEXT() (have ? 0 : (have = in(indp, &next)), \
155
last = have ? (have--, (int)(*next++)) : -1)
157
/* memory for gunpipe() and lunpipe() --
158
the first 256 entries of prefix[] and suffix[] are never used, could
159
have offset the index, but it's faster to waste the memory */
160
unsigned char inbuf[SIZE]; /* input buffer */
161
unsigned char outbuf[SIZE]; /* output buffer */
162
unsigned short prefix[65536]; /* index to LZW prefix string */
163
unsigned char suffix[65536]; /* one-character LZW suffix */
164
unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
165
32K sliding window */
167
/* throw out what's left in the current bits byte buffer (this is a vestigial
168
aspect of the compressed data format derived from an implementation that
169
made use of a special VAX machine instruction!) */
170
#define FLUSHCODE() \
174
if (chunk > have) { \
180
if (chunk > have) { \
190
/* Decompress a compress (LZW) file from indp to outfile. The compress magic
191
header (two bytes) has already been read and verified. There are have bytes
192
of buffered input at next. strm is used for passing error information back
195
lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
196
file, read error, or write error (a write error indicated by strm->next_in
197
not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
199
local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
200
int outfile, z_stream *strm)
202
int last; /* last byte read by NEXT(), or -1 if EOF */
203
unsigned chunk; /* bytes left in current chunk */
204
int left; /* bits left in rem */
205
unsigned rem; /* unused bits from input */
206
int bits; /* current bits per code */
207
unsigned code; /* code, table traversal index */
208
unsigned mask; /* mask for current bits codes */
209
int max; /* maximum bits per code for this stream */
210
unsigned flags; /* compress flags, then block compress flag */
211
unsigned end; /* last valid entry in prefix/suffix tables */
212
unsigned temp; /* current code */
213
unsigned prev; /* previous code */
214
unsigned final; /* last character written for previous code */
215
unsigned stack; /* next position for reversed string */
216
unsigned outcnt; /* bytes in output buffer */
217
struct outd outd; /* output structure */
221
outd.outfile = outfile;
224
/* process remainder of compress header -- a flags byte */
229
strm->msg = (char *)"unknown lzw flags set";
233
if (max < 9 || max > 16) {
234
strm->msg = (char *)"lzw bits out of range";
237
if (max == 9) /* 9 doesn't really mean 9 */
239
flags &= 0x80; /* true if block compress */
244
end = flags ? 256 : 255;
246
/* set up: get first 9-bit code, which is the first decompressed byte, but
247
don't create a table entry until the next code */
248
if (NEXT() == -1) /* no compressed data is ok */
250
final = prev = (unsigned)last; /* low 8 bits of code */
251
if (NEXT() == -1) /* missing a bit */
253
if (last & 1) { /* code must be < 256 */
254
strm->msg = (char *)"invalid lzw code";
257
rem = (unsigned)last >> 1; /* remaining 7 bits */
259
chunk = bits - 2; /* 7 bytes left in this chunk */
260
outbuf[0] = (unsigned char)final; /* write first decompressed byte */
266
/* if the table will be full after this, increment the code size */
267
if (end >= mask && bits < max) {
274
/* get a code of length bits */
275
if (chunk == 0) /* decrement chunk modulo bits */
277
code = rem; /* low bits of code */
278
if (NEXT() == -1) { /* EOF is end of compressed data */
279
/* write remaining buffered output */
280
if (outcnt && out(&outd, outbuf, outcnt)) {
281
strm->next_in = outbuf; /* signal write error */
286
code += (unsigned)last << left; /* middle (or high) bits of code */
289
if (bits > left) { /* need more bits */
290
if (NEXT() == -1) /* can't end in middle of code */
292
code += (unsigned)last << left; /* high bits of code */
296
code &= mask; /* mask to current code length */
297
left -= bits; /* number of unused bits */
298
rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
300
/* process clear code (256) */
301
if (code == 256 && flags) {
303
bits = 9; /* initialize bits and mask */
305
end = 255; /* empty table */
306
continue; /* get next code */
309
/* special code to reuse last match */
310
temp = code; /* save the current code */
312
/* Be picky on the allowed code here, and make sure that the code
313
we drop through (prev) will be a valid index so that random
314
input does not cause an exception. The code != end + 1 check is
315
empirically derived, and not checked in the original uncompress
316
code. If this ever causes a problem, that check could be safely
317
removed. Leaving this check in greatly improves gun's ability
318
to detect random or corrupted input after a compress header.
319
In any case, the prev > end check must be retained. */
320
if (code != end + 1 || prev > end) {
321
strm->msg = (char *)"invalid lzw code";
324
match[stack++] = (unsigned char)final;
328
/* walk through linked list to generate output in reverse order */
330
while (code >= 256) {
335
match[stack++] = (unsigned char)code;
338
/* link new table entry */
341
prefix[end] = (unsigned short)prev;
342
suffix[end] = (unsigned char)final;
345
/* set previous code for next iteration */
348
/* write output in forward order */
349
while (stack > SIZE - outcnt) {
350
while (outcnt < SIZE)
351
outbuf[outcnt++] = match[--stack];
352
if (out(&outd, outbuf, outcnt)) {
353
strm->next_in = outbuf; /* signal write error */
360
outbuf[outcnt++] = *--p;
364
/* loop for next code with final and prev as the last match, rem and
365
left provide the first 0..7 bits of the next code, end is the last
370
/* Decompress a gzip file from infile to outfile. strm is assumed to have been
371
successfully initialized with inflateBackInit(). The input file may consist
372
of a series of gzip streams, in which case all of them will be decompressed
373
to the output file. If outfile is -1, then the gzip stream(s) integrity is
374
checked and nothing is written.
376
The return value is a zlib error code: Z_MEM_ERROR if out of memory,
377
Z_DATA_ERROR if the header or the compressed data is invalid, or if the
378
trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
379
prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
380
stream) follows a valid gzip stream.
382
local int gunpipe(z_stream *strm, int infile, int outfile)
384
int ret, first, last;
385
unsigned have, flags, len;
386
unsigned char *next = NULL;
387
struct ind ind, *indp;
390
/* setup input buffer */
395
/* decompress concatenated gzip streams */
396
have = 0; /* no input data read in yet */
397
first = 1; /* looking for first gzip header */
398
strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
400
/* look for the two magic header bytes for a gzip stream */
403
break; /* empty gzip stream is ok */
405
if (last != 31 || (NEXT() != 139 && last != 157)) {
406
strm->msg = (char *)"incorrect header check";
407
ret = first ? Z_DATA_ERROR : Z_ERRNO;
408
break; /* not a gzip or compress header */
410
first = 0; /* next non-header is junk */
412
/* process a compress (LZW) file -- can't be concatenated after this */
414
ret = lunpipe(have, next, indp, outfile, strm);
418
/* process remainder of gzip header */
420
if (NEXT() != 8) { /* only deflate method allowed */
421
if (last == -1) break;
422
strm->msg = (char *)"unknown compression method";
426
flags = NEXT(); /* header flags */
427
NEXT(); /* discard mod time, xflgs, os */
433
if (last == -1) break;
435
strm->msg = (char *)"unknown header flags set";
439
if (flags & 4) { /* extra field */
441
len += (unsigned)(NEXT()) << 8;
442
if (last == -1) break;
446
if (NEXT() == -1) break;
449
if (last == -1) break;
453
if (flags & 8) /* file name */
454
while (NEXT() != 0 && last != -1)
456
if (flags & 16) /* comment */
457
while (NEXT() != 0 && last != -1)
459
if (flags & 2) { /* header crc */
463
if (last == -1) break;
466
outd.outfile = outfile;
468
outd.crc = crc32(0L, Z_NULL, 0);
471
/* decompress data to output */
472
strm->next_in = next;
473
strm->avail_in = have;
474
ret = inflateBack(strm, in, indp, out, &outd);
475
if (ret != Z_STREAM_END) break;
476
next = strm->next_in;
477
have = strm->avail_in;
478
strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
482
if (NEXT() != (int)(outd.crc & 0xff) ||
483
NEXT() != (int)((outd.crc >> 8) & 0xff) ||
484
NEXT() != (int)((outd.crc >> 16) & 0xff) ||
485
NEXT() != (int)((outd.crc >> 24) & 0xff)) {
488
strm->msg = (char *)"incorrect data check";
493
if (NEXT() != (int)(outd.total & 0xff) ||
494
NEXT() != (int)((outd.total >> 8) & 0xff) ||
495
NEXT() != (int)((outd.total >> 16) & 0xff) ||
496
NEXT() != (int)((outd.total >> 24) & 0xff)) {
499
strm->msg = (char *)"incorrect length check";
505
/* go back and look for another gzip stream */
508
/* clean up and return */
512
/* Copy file attributes, from -> to, as best we can. This is best effort, so
513
no errors are reported. The mode bits, including suid, sgid, and the sticky
514
bit are copied (if allowed), the owner's user id and group id are copied
515
(again if allowed), and the access and modify times are copied. */
516
local void copymeta(char *from, char *to)
521
/* get all of from's Unix meta data, return if not a regular file */
522
if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
525
/* set to's mode bits, ignore errors */
526
(void)chmod(to, was.st_mode & 07777);
528
/* copy owner's user and group, ignore errors */
529
(void)chown(to, was.st_uid, was.st_gid);
531
/* copy access and modify times, ignore errors */
532
when.actime = was.st_atime;
533
when.modtime = was.st_mtime;
534
(void)utime(to, &when);
537
/* Decompress the file inname to the file outnname, of if test is true, just
538
decompress without writing and check the gzip trailer for integrity. If
539
inname is NULL or an empty string, read from stdin. If outname is NULL or
540
an empty string, write to stdout. strm is a pre-initialized inflateBack
541
structure. When appropriate, copy the file attributes from inname to
544
gunzip() returns 1 if there is an out-of-memory error or an unexpected
545
return code from gunpipe(). Otherwise it returns 0.
547
local int gunzip(z_stream *strm, char *inname, char *outname, int test)
553
if (inname == NULL || *inname == 0) {
555
infile = 0; /* stdin */
558
infile = open(inname, O_RDONLY, 0);
560
fprintf(stderr, "gun cannot open %s\n", inname);
566
else if (outname == NULL || *outname == 0) {
568
outfile = 1; /* stdout */
571
outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
574
fprintf(stderr, "gun cannot create %s\n", outname);
581
ret = gunpipe(strm, infile, outfile);
582
if (outfile > 2) close(outfile);
583
if (infile > 2) close(infile);
585
/* interpret result */
589
if (infile > 2 && outfile > 2) {
590
copymeta(inname, outname); /* copy attributes */
594
fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
598
if (outfile > 2) unlink(outname);
599
fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
602
if (outfile > 2) unlink(outname);
603
fprintf(stderr, "gun out of memory error--aborting\n");
606
if (outfile > 2) unlink(outname);
607
if (strm->next_in != Z_NULL) {
608
fprintf(stderr, "gun write error on %s: %s\n",
609
outname, strerror(errno));
612
fprintf(stderr, "gun read error on %s: %s\n",
613
inname, strerror(errno));
616
fprintf(stderr, "gun unexpected end of file on %s\n",
621
if (outfile > 2) unlink(outname);
622
fprintf(stderr, "gun internal error--aborting\n");
628
/* Process the gun command line arguments. See the command syntax near the
629
beginning of this source file. */
630
int main(int argc, char **argv)
634
unsigned char *window;
637
/* initialize inflateBack state for repeated use */
638
window = match; /* reuse LZW match buffer */
639
strm.zalloc = Z_NULL;
641
strm.opaque = Z_NULL;
642
ret = inflateBackInit(&strm, 15, window);
644
fprintf(stderr, "gun out of memory error--aborting\n");
648
/* decompress each file to the same name with the suffix removed */
652
if (argc && strcmp(*argv, "-h") == 0) {
653
fprintf(stderr, "gun 1.6 (17 Jan 2010)\n");
654
fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n");
655
fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
658
if (argc && strcmp(*argv, "-t") == 0) {
668
len = (int)strlen(*argv);
669
if (strcmp(*argv + len - 3, ".gz") == 0 ||
670
strcmp(*argv + len - 3, "-gz") == 0)
672
else if (strcmp(*argv + len - 2, ".z") == 0 ||
673
strcmp(*argv + len - 2, "-z") == 0 ||
674
strcmp(*argv + len - 2, "_z") == 0 ||
675
strcmp(*argv + len - 2, ".Z") == 0)
678
fprintf(stderr, "gun error: no gz type on %s--skipping\n",
682
outname = malloc(len + 1);
683
if (outname == NULL) {
684
fprintf(stderr, "gun out of memory error--aborting\n");
688
memcpy(outname, *argv, len);
691
ret = gunzip(&strm, *argv, outname, test);
692
if (outname != NULL) free(outname);
694
} while (argv++, --argc);
696
ret = gunzip(&strm, NULL, NULL, test);
699
inflateBackEnd(&strm);