~zulcss/samba/server-dailies-3.4

1 by Chuck Short
Initial version
1
/* gzjoin -- command to join gzip files into one gzip file
2
3
  Copyright (C) 2004 Mark Adler, all rights reserved
4
  version 1.0, 11 Dec 2004
5
6
  This software is provided 'as-is', without any express or implied
7
  warranty.  In no event will the author be held liable for any damages
8
  arising from the use of this software.
9
10
  Permission is granted to anyone to use this software for any purpose,
11
  including commercial applications, and to alter it and redistribute it
12
  freely, subject to the following restrictions:
13
14
  1. The origin of this software must not be misrepresented; you must not
15
     claim that you wrote the original software. If you use this software
16
     in a product, an acknowledgment in the product documentation would be
17
     appreciated but is not required.
18
  2. Altered source versions must be plainly marked as such, and must not be
19
     misrepresented as being the original software.
20
  3. This notice may not be removed or altered from any source distribution.
21
22
  Mark Adler    madler@alumni.caltech.edu
23
 */
24
25
/*
26
 * Change history:
27
 *
28
 * 1.0  11 Dec 2004     - First version
29
 * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
30
 */
31
32
/*
33
   gzjoin takes one or more gzip files on the command line and writes out a
34
   single gzip file that will uncompress to the concatenation of the
35
   uncompressed data from the individual gzip files.  gzjoin does this without
36
   having to recompress any of the data and without having to calculate a new
37
   crc32 for the concatenated uncompressed data.  gzjoin does however have to
38
   decompress all of the input data in order to find the bits in the compressed
39
   data that need to be modified to concatenate the streams.
40
41
   gzjoin does not do an integrity check on the input gzip files other than
42
   checking the gzip header and decompressing the compressed data.  They are
43
   otherwise assumed to be complete and correct.
44
45
   Each joint between gzip files removes at least 18 bytes of previous trailer
46
   and subsequent header, and inserts an average of about three bytes to the
47
   compressed data in order to connect the streams.  The output gzip file
48
   has a minimal ten-byte gzip header with no file name or modification time.
49
50
   This program was written to illustrate the use of the Z_BLOCK option of
51
   inflate() and the crc32_combine() function.  gzjoin will not compile with
52
   versions of zlib earlier than 1.2.3.
53
 */
54
55
#include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
56
#include <stdlib.h>     /* exit(), malloc(), free() */
57
#include <fcntl.h>      /* open() */
58
#include <unistd.h>     /* close(), read(), lseek() */
59
#include "zlib.h"
60
    /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
61
62
#define local static
63
64
/* exit with an error (return a value to allow use in an expression) */
65
local int bail(char *why1, char *why2)
66
{
67
    fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
68
    exit(1);
69
    return 0;
70
}
71
72
/* -- simple buffered file input with access to the buffer -- */
73
74
#define CHUNK 32768         /* must be a power of two and fit in unsigned */
75
76
/* bin buffered input file type */
77
typedef struct {
78
    char *name;             /* name of file for error messages */
79
    int fd;                 /* file descriptor */
80
    unsigned left;          /* bytes remaining at next */
81
    unsigned char *next;    /* next byte to read */
82
    unsigned char *buf;     /* allocated buffer of length CHUNK */
83
} bin;
84
85
/* close a buffered file and free allocated memory */
86
local void bclose(bin *in)
87
{
88
    if (in != NULL) {
89
        if (in->fd != -1)
90
            close(in->fd);
91
        if (in->buf != NULL)
92
            free(in->buf);
93
        free(in);
94
    }
95
}
96
97
/* open a buffered file for input, return a pointer to type bin, or NULL on
98
   failure */
99
local bin *bopen(char *name)
100
{
101
    bin *in;
102
103
    in = malloc(sizeof(bin));
104
    if (in == NULL)
105
        return NULL;
106
    in->buf = malloc(CHUNK);
107
    in->fd = open(name, O_RDONLY, 0);
108
    if (in->buf == NULL || in->fd == -1) {
109
        bclose(in);
110
        return NULL;
111
    }
112
    in->left = 0;
113
    in->next = in->buf;
114
    in->name = name;
115
    return in;
116
}
117
118
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
119
   1 indicating that end-of-file was reached */
120
local int bload(bin *in)
121
{
122
    long len;
123
124
    if (in == NULL)
125
        return -1;
126
    if (in->left != 0)
127
        return 0;
128
    in->next = in->buf;
129
    do {
130
        len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
131
        if (len < 0)
132
            return -1;
133
        in->left += (unsigned)len;
134
    } while (len != 0 && in->left < CHUNK);
135
    return len == 0 ? 1 : 0;
136
}
137
138
/* get a byte from the file, bail if end of file */
139
#define bget(in) (in->left ? 0 : bload(in), \
140
                  in->left ? (in->left--, *(in->next)++) : \
141
                    bail("unexpected end of file on ", in->name))
142
143
/* get a four-byte little-endian unsigned integer from file */
144
local unsigned long bget4(bin *in)
145
{
146
    unsigned long val;
147
148
    val = bget(in);
149
    val += (unsigned long)(bget(in)) << 8;
150
    val += (unsigned long)(bget(in)) << 16;
151
    val += (unsigned long)(bget(in)) << 24;
152
    return val;
153
}
154
155
/* skip bytes in file */
156
local void bskip(bin *in, unsigned skip)
157
{
158
    /* check pointer */
159
    if (in == NULL)
160
        return;
161
162
    /* easy case -- skip bytes in buffer */
163
    if (skip <= in->left) {
164
        in->left -= skip;
165
        in->next += skip;
166
        return;
167
    }
168
169
    /* skip what's in buffer, discard buffer contents */
170
    skip -= in->left;
171
    in->left = 0;
172
173
    /* seek past multiples of CHUNK bytes */
174
    if (skip > CHUNK) {
175
        unsigned left;
176
177
        left = skip & (CHUNK - 1);
178
        if (left == 0) {
179
            /* exact number of chunks: seek all the way minus one byte to check
180
               for end-of-file with a read */
181
            lseek(in->fd, skip - 1, SEEK_CUR);
182
            if (read(in->fd, in->buf, 1) != 1)
183
                bail("unexpected end of file on ", in->name);
184
            return;
185
        }
186
187
        /* skip the integral chunks, update skip with remainder */
188
        lseek(in->fd, skip - left, SEEK_CUR);
189
        skip = left;
190
    }
191
192
    /* read more input and skip remainder */
193
    bload(in);
194
    if (skip > in->left)
195
        bail("unexpected end of file on ", in->name);
196
    in->left -= skip;
197
    in->next += skip;
198
}
199
200
/* -- end of buffered input functions -- */
201
202
/* skip the gzip header from file in */
203
local void gzhead(bin *in)
204
{
205
    int flags;
206
207
    /* verify gzip magic header and compression method */
208
    if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
209
        bail(in->name, " is not a valid gzip file");
210
211
    /* get and verify flags */
212
    flags = bget(in);
213
    if ((flags & 0xe0) != 0)
214
        bail("unknown reserved bits set in ", in->name);
215
216
    /* skip modification time, extra flags, and os */
217
    bskip(in, 6);
218
219
    /* skip extra field if present */
220
    if (flags & 4) {
221
        unsigned len;
222
223
        len = bget(in);
224
        len += (unsigned)(bget(in)) << 8;
225
        bskip(in, len);
226
    }
227
228
    /* skip file name if present */
229
    if (flags & 8)
230
        while (bget(in) != 0)
231
            ;
232
233
    /* skip comment if present */
234
    if (flags & 16)
235
        while (bget(in) != 0)
236
            ;
237
238
    /* skip header crc if present */
239
    if (flags & 2)
240
        bskip(in, 2);
241
}
242
243
/* write a four-byte little-endian unsigned integer to out */
244
local void put4(unsigned long val, FILE *out)
245
{
246
    putc(val & 0xff, out);
247
    putc((val >> 8) & 0xff, out);
248
    putc((val >> 16) & 0xff, out);
249
    putc((val >> 24) & 0xff, out);
250
}
251
252
/* Load up zlib stream from buffered input, bail if end of file */
253
local void zpull(z_streamp strm, bin *in)
254
{
255
    if (in->left == 0)
256
        bload(in);
257
    if (in->left == 0)
258
        bail("unexpected end of file on ", in->name);
259
    strm->avail_in = in->left;
260
    strm->next_in = in->next;
261
}
262
263
/* Write header for gzip file to out and initialize trailer. */
264
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
265
{
266
    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
267
    *crc = crc32(0L, Z_NULL, 0);
268
    *tot = 0;
269
}
270
271
/* Copy the compressed data from name, zeroing the last block bit of the last
272
   block if clr is true, and adding empty blocks as needed to get to a byte
273
   boundary.  If clr is false, then the last block becomes the last block of
274
   the output, and the gzip trailer is written.  crc and tot maintains the
275
   crc and length (modulo 2^32) of the output for the trailer.  The resulting
276
   gzip file is written to out.  gzinit() must be called before the first call
277
   of gzcopy() to write the gzip header and to initialize crc and tot. */
278
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
279
                  FILE *out)
280
{
281
    int ret;                /* return value from zlib functions */
282
    int pos;                /* where the "last block" bit is in byte */
283
    int last;               /* true if processing the last block */
284
    bin *in;                /* buffered input file */
285
    unsigned char *start;   /* start of compressed data in buffer */
286
    unsigned char *junk;    /* buffer for uncompressed data -- discarded */
287
    z_off_t len;            /* length of uncompressed data (support > 4 GB) */
288
    z_stream strm;          /* zlib inflate stream */
289
290
    /* open gzip file and skip header */
291
    in = bopen(name);
292
    if (in == NULL)
293
        bail("could not open ", name);
294
    gzhead(in);
295
296
    /* allocate buffer for uncompressed data and initialize raw inflate
297
       stream */
298
    junk = malloc(CHUNK);
299
    strm.zalloc = Z_NULL;
300
    strm.zfree = Z_NULL;
301
    strm.opaque = Z_NULL;
302
    strm.avail_in = 0;
303
    strm.next_in = Z_NULL;
304
    ret = inflateInit2(&strm, -15);
305
    if (junk == NULL || ret != Z_OK)
306
        bail("out of memory", "");
307
308
    /* inflate and copy compressed data, clear last-block bit if requested */
309
    len = 0;
310
    zpull(&strm, in);
311
    start = strm.next_in;
312
    last = start[0] & 1;
313
    if (last && clr)
314
        start[0] &= ~1;
315
    strm.avail_out = 0;
316
    for (;;) {
317
        /* if input used and output done, write used input and get more */
318
        if (strm.avail_in == 0 && strm.avail_out != 0) {
319
            fwrite(start, 1, strm.next_in - start, out);
320
            start = in->buf;
321
            in->left = 0;
322
            zpull(&strm, in);
323
        }
324
325
        /* decompress -- return early when end-of-block reached */
326
        strm.avail_out = CHUNK;
327
        strm.next_out = junk;
328
        ret = inflate(&strm, Z_BLOCK);
329
        switch (ret) {
330
        case Z_MEM_ERROR:
331
            bail("out of memory", "");
332
        case Z_DATA_ERROR:
333
            bail("invalid compressed data in ", in->name);
334
        }
335
336
        /* update length of uncompressed data */
337
        len += CHUNK - strm.avail_out;
338
339
        /* check for block boundary (only get this when block copied out) */
340
        if (strm.data_type & 128) {
341
            /* if that was the last block, then done */
342
            if (last)
343
                break;
344
345
            /* number of unused bits in last byte */
346
            pos = strm.data_type & 7;
347
348
            /* find the next last-block bit */
349
            if (pos != 0) {
350
                /* next last-block bit is in last used byte */
351
                pos = 0x100 >> pos;
352
                last = strm.next_in[-1] & pos;
353
                if (last && clr)
354
                    strm.next_in[-1] &= ~pos;
355
            }
356
            else {
357
                /* next last-block bit is in next unused byte */
358
                if (strm.avail_in == 0) {
359
                    /* don't have that byte yet -- get it */
360
                    fwrite(start, 1, strm.next_in - start, out);
361
                    start = in->buf;
362
                    in->left = 0;
363
                    zpull(&strm, in);
364
                }
365
                last = strm.next_in[0] & 1;
366
                if (last && clr)
367
                    strm.next_in[0] &= ~1;
368
            }
369
        }
370
    }
371
372
    /* update buffer with unused input */
373
    in->left = strm.avail_in;
374
    in->next = strm.next_in;
375
376
    /* copy used input, write empty blocks to get to byte boundary */
377
    pos = strm.data_type & 7;
378
    fwrite(start, 1, in->next - start - 1, out);
379
    last = in->next[-1];
380
    if (pos == 0 || !clr)
381
        /* already at byte boundary, or last file: write last byte */
382
        putc(last, out);
383
    else {
384
        /* append empty blocks to last byte */
385
        last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
386
        if (pos & 1) {
387
            /* odd -- append an empty stored block */
388
            putc(last, out);
389
            if (pos == 1)
390
                putc(0, out);               /* two more bits in block header */
391
            fwrite("\0\0\xff\xff", 1, 4, out);
392
        }
393
        else {
394
            /* even -- append 1, 2, or 3 empty fixed blocks */
395
            switch (pos) {
396
            case 6:
397
                putc(last | 8, out);
398
                last = 0;
399
            case 4:
400
                putc(last | 0x20, out);
401
                last = 0;
402
            case 2:
403
                putc(last | 0x80, out);
404
                putc(0, out);
405
            }
406
        }
407
    }
408
409
    /* update crc and tot */
410
    *crc = crc32_combine(*crc, bget4(in), len);
411
    *tot += (unsigned long)len;
412
413
    /* clean up */
414
    inflateEnd(&strm);
415
    free(junk);
416
    bclose(in);
417
418
    /* write trailer if this is the last gzip file */
419
    if (!clr) {
420
        put4(*crc, out);
421
        put4(*tot, out);
422
    }
423
}
424
425
/* join the gzip files on the command line, write result to stdout */
426
int main(int argc, char **argv)
427
{
428
    unsigned long crc, tot;     /* running crc and total uncompressed length */
429
430
    /* skip command name */
431
    argc--;
432
    argv++;
433
434
    /* show usage if no arguments */
435
    if (argc == 0) {
436
        fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
437
              stderr);
438
        return 0;
439
    }
440
441
    /* join gzip files on command line and write to stdout */
442
    gzinit(&crc, &tot, stdout);
443
    while (argc--)
444
        gzcopy(*argv++, argc, &crc, &tot, stdout);
445
446
    /* done */
447
    return 0;
448
}