5
* compress.c - File compression ala IEEE Computer, June 1984.
7
* Authors: Spencer W. Thomas (decvax!utah-cs!thomas)
8
* Jim McKie (decvax!mcvax!jim)
9
* Steve Davies (decvax!vax135!petsd!peora!srd)
10
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
11
* James A. Woods (decvax!ihnp4!ames!jaw)
12
* Joe Orost (decvax!vax135!petsd!joe)
14
* July, 1992, Jim Arnold
15
* Modified uncompress code to work as a discipline under sfio.
16
* Didn't need compression code and deleted it.
18
* Kiem-Phong Vo (03/18/1998)
19
* Small interface modifications to conform with other disciplines.
22
/*****************************************************************
23
* Algorithm from "A Technique for High Performance Data Compression",
24
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
27
* Modified Lempel-Ziv method (LZW). Basically finds common
28
* substrings and replaces them with a variable size code. This is
29
* deterministic, and can be done on the fly. Thus, the decompression
30
* procedure needs no input table, but tracks the way the table was built.
37
#define INIT_BITS 9 /* initial number of bits/code */
40
/* Defines for third byte of header.
41
* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
42
* a fourth header byte (for expansion). If the BLOCK_MASK is set,
43
* the CODE_CLEAR is disabled. Instead of flushing all the codes,
44
* they are simply overwritten.
48
#define BLOCK_MASK 0x80
51
/* The next two codes should not be changed lightly, as they must not
52
* lie within the contiguous general code space.
55
#define CODE_FIRST 257 /* first free entry */
56
#define CODE_CLEAR 256 /* table clear output code */
57
#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
60
/* A code_int must hold 2**LZWBITS non-negative values, and also -1
64
typedef long int code_int;
68
typedef unsigned char char_type;
75
int n_bits; /* number of bits/code */
76
int maxbits; /* user settable max # bits/code */
78
code_int maxcode; /* maximum code, given n_bits */
79
code_int maxmaxcode; /* should NEVER generate this code */
80
code_int free_ent; /* first unused entry */
87
int gc_offset; /* getcode() */
88
int gc_size; /* getcode() */
89
char_type* gc_buf; /* getcode() */
92
char io_buf[LZWBITS + 8192];
93
char_type de_stack[8000];
94
char_type tab_suffix [1 << LZWBITS];
95
unsigned short tab_prefix [1 << LZWBITS];
99
static char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
102
static int peek(Sfio_t* f, char_type** bufp, int count, reg LZW_Disc* disc)
104
static int peek(f, bufp, count, disc)
117
if ((io_sz = disc->io_end - disc->io_ptr) < count)
119
memcpy(disc->io_buf + LZWBITS - io_sz, disc->io_ptr, io_sz);
120
disc->io_ptr = (char_type *)disc->io_buf + LZWBITS - io_sz;
121
j = sfrd(f, disc->io_buf + LZWBITS, sizeof disc->io_buf - LZWBITS, (Sfdisc_t *)disc);
125
disc->io_end = disc->io_ptr + io_sz;
127
*bufp = disc->io_ptr;
130
disc->io_ptr += count;
135
/*****************************************************************
138
* Read one code from the standard input. If EOF, return -1.
142
* code or -1 is returned.
146
static code_int getcode(Sfio_t* f, LZW_Disc* disc)
148
static code_int getcode(f, disc)
157
if ( disc->clear_flg > 0
158
|| disc->gc_offset >= disc->gc_size
159
|| disc->free_ent > disc->maxcode ) {
161
* If the next entry will be too big for the current code
162
* size, then we must increase the size. This implies reading
163
* a new buffer full, too.
165
if ( disc->free_ent > disc->maxcode ) {
166
if (++disc->n_bits > disc->maxbits)
168
if ( disc->n_bits == disc->maxbits )
169
disc->maxcode = disc->maxmaxcode;
171
disc->maxcode = MAXCODE(disc->n_bits);
173
if ( disc->clear_flg > 0) {
174
disc->maxcode = MAXCODE (disc->n_bits = INIT_BITS);
177
disc->gc_size = peek(f, &disc->gc_buf, disc->n_bits, disc);
178
if ( disc->gc_size <= 0 )
179
return -1; /* end of file */
181
/* Round size down to integral number of codes */
182
disc->gc_size = (disc->gc_size << 3) - (disc->n_bits - 1);
185
r_off = disc->gc_offset;
188
* Get to the first byte.
192
/* Get first part (low order bits) */
193
code = (*bp++ >> r_off);
195
r_off = 8 - r_off; /* now, offset into code word */
196
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
198
code |= *bp++ << r_off;
202
/* high order bits. */
203
code |= (*bp & rmask[bits]) << r_off;
204
disc->gc_offset += disc->n_bits;
210
static int lzwExcept(Sfio_t* f, int type, Void_t* data, Sfdisc_t* disc)
212
static int lzwExcept(f, type, data, disc)
219
if(type == SF_FINAL || type == SF_DPOP)
226
* Uncompress. This routine adapts to the codes in the
227
* file building the "string" table on-the-fly; requiring no table to
228
* be stored in the compressed file. The tables used herein are shared
229
* with those of the compress() routine. See the definitions above.
233
ssize_t lzwRead(Sfio_t* f, Void_t* iobuf, size_t iocnt, Sfdisc_t* sfdisc)
235
ssize_t lzwRead(f, iobuf, iocnt, sfdisc)
242
LZW_Disc *disc = (LZW_Disc *)sfdisc;
243
reg char_type *stackp;
245
char *ioend = (char*)iobuf + iocnt;
246
register char *ioptr = iobuf;
247
#define END_REGS {disc->code=code;disc->stackp=stackp;}
248
#define BEGIN_REGS {code=disc->code;stackp=disc->stackp;}
259
/* Check the magic number */
260
if (peek(f, &p, 3, disc) != 3
261
|| *p++ != (char_type)0x1f
262
|| *p++ != (char_type)0x9d)
263
return disc->init = -1;
264
disc->maxbits = *p; /* set -b from file */
265
disc->block_compress = disc->maxbits & BLOCK_MASK;
266
disc->maxbits &= BIT_MASK;
267
disc->maxmaxcode = 1 << disc->maxbits;
268
if(disc->maxbits > LZWBITS)
269
return disc->init = -1;
273
* As above, initialize the first 256 entries in the table.
275
disc->maxcode = MAXCODE(disc->n_bits = INIT_BITS);
276
for ( code = 255; code >= 0; code-- ) {
277
disc->tab_prefix[code] = 0;
278
disc->tab_suffix[code] = (char_type)code;
280
disc->free_ent = ((disc->block_compress) ? CODE_FIRST : 256 );
282
stackp = disc->de_stack;
283
disc->finchar = disc->oldcode = getcode(f, disc);
284
if(disc->oldcode == -1) /* EOF already? */
287
return 0; /* Get out of here */
289
*ioptr++ = (char)disc->finchar;
290
if ((code = getcode(f, disc)) < 0)
299
if (stackp <= disc->de_stack)
301
if ( (code == CODE_CLEAR) && disc->block_compress ) {
302
for ( code = 255; code >= 0; code-- )
303
disc->tab_prefix[code] = 0;
305
disc->free_ent = CODE_FIRST - 1;
306
if ( (code = getcode (f, disc)) == -1 )
313
* Special case for KwKwK string.
315
if ( code >= disc->free_ent ) {
316
*stackp++ = disc->finchar;
317
code = disc->oldcode;
321
* Generate output characters in reverse order
323
while ( code >= 256 ) {
324
*stackp++ = disc->tab_suffix[code];
325
code = disc->tab_prefix[code];
327
*stackp++ = disc->finchar = disc->tab_suffix[code];
331
* And put them out in forward order
340
*ioptr++ = *--stackp;
341
}while ( stackp > disc->de_stack );
344
* Generate the new entry.
346
if ( (code=disc->free_ent) < disc->maxmaxcode ) {
347
disc->tab_prefix[code] = (unsigned short)disc->oldcode;
348
disc->tab_suffix[code] = disc->finchar;
349
disc->free_ent = code+1;
352
* Remember previous code.
354
disc->oldcode = disc->incode;
355
} while ((code = getcode(f, disc)) >= 0);
357
return ioptr - (char*)iobuf;
361
static Sfoff_t lzwSeek(Sfio_t* f, Sfoff_t offset, int whence, Sfdisc_t* disc)
363
static Sfoff_t lzwSeek(f, offset, whence, disc)
370
return (Sfoff_t)(-1);
375
static ssize_t lzwWrite(Sfio_t* f, const Void_t* buf, size_t count, Sfdisc_t* disc)
377
static ssize_t lzwWrite(f, buf, count, disc)
384
return (ssize_t)(-1);
389
int sfdclzw(Sfio_t* f)
397
if (!(lz = (LZW_Disc *)malloc(sizeof(LZW_Disc))) )
399
lz->disc.readf = lzwRead;
400
lz->disc.writef = lzwWrite;
401
lz->disc.seekf = lzwSeek;
402
lz->disc.exceptf = lzwExcept;
407
lz->io_ptr = (char_type *)lz->io_buf + LZWBITS;
408
lz->io_end = (char_type *)lz->io_buf + LZWBITS;
410
if(sfdisc(f, (Sfdisc_t*)lz) != (Sfdisc_t*)lz)