2
* Copyright (c) 2009 Michihiro NAKAJIMA
3
* Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
#include "archive_platform.h"
29
__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_compression_xz.c 201167 2009-12-29 06:06:20Z kientzle $");
51
#include "archive_endian.h"
52
#include "archive_private.h"
53
#include "archive_read_private.h"
55
#if HAVE_LZMA_H && HAVE_LIBLZMA
59
unsigned char *out_block;
60
size_t out_block_size;
62
char eof; /* True = found end of compressed data. */
65
/* Combined lzma/xz filter */
66
static ssize_t xz_filter_read(struct archive_read_filter *, const void **);
67
static int xz_filter_close(struct archive_read_filter *);
68
static int xz_lzma_bidder_init(struct archive_read_filter *);
70
#elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
73
lzmadec_stream stream;
74
unsigned char *out_block;
75
size_t out_block_size;
77
char eof; /* True = found end of compressed data. */
80
/* Lzma-only filter */
81
static ssize_t lzma_filter_read(struct archive_read_filter *, const void **);
82
static int lzma_filter_close(struct archive_read_filter *);
86
* Note that we can detect xz and lzma compressed files even if we
87
* can't decompress them. (In fact, we like detecting them because we
88
* can give better error messages.) So the bid framework here gets
89
* compiled even if no lzma library is available.
91
static int xz_bidder_bid(struct archive_read_filter_bidder *,
92
struct archive_read_filter *);
93
static int xz_bidder_init(struct archive_read_filter *);
94
static int lzma_bidder_bid(struct archive_read_filter_bidder *,
95
struct archive_read_filter *);
96
static int lzma_bidder_init(struct archive_read_filter *);
99
archive_read_support_compression_xz(struct archive *_a)
101
struct archive_read *a = (struct archive_read *)_a;
102
struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
104
archive_clear_error(_a);
106
return (ARCHIVE_FATAL);
109
bidder->bid = xz_bidder_bid;
110
bidder->init = xz_bidder_init;
111
bidder->options = NULL;
113
#if HAVE_LZMA_H && HAVE_LIBLZMA
116
archive_set_error(_a, ARCHIVE_ERRNO_MISC,
117
"Using external unxz program for xz decompression");
118
return (ARCHIVE_WARN);
123
archive_read_support_compression_lzma(struct archive *_a)
125
struct archive_read *a = (struct archive_read *)_a;
126
struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
128
archive_clear_error(_a);
130
return (ARCHIVE_FATAL);
133
bidder->bid = lzma_bidder_bid;
134
bidder->init = lzma_bidder_init;
135
bidder->options = NULL;
137
#if HAVE_LZMA_H && HAVE_LIBLZMA
139
#elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
142
archive_set_error(_a, ARCHIVE_ERRNO_MISC,
143
"Using external unlzma program for lzma decompression");
144
return (ARCHIVE_WARN);
149
* Test whether we can handle this data.
152
xz_bidder_bid(struct archive_read_filter_bidder *self,
153
struct archive_read_filter *filter)
155
const unsigned char *buffer;
159
(void)self; /* UNUSED */
161
buffer = __archive_read_filter_ahead(filter, 6, &avail);
166
* Verify Header Magic Bytes : FD 37 7A 58 5A 00
169
if (buffer[0] != 0xFD)
172
if (buffer[1] != 0x37)
175
if (buffer[2] != 0x7A)
178
if (buffer[3] != 0x58)
181
if (buffer[4] != 0x5A)
184
if (buffer[5] != 0x00)
188
return (bits_checked);
192
* Test whether we can handle this data.
194
* <sigh> LZMA has a rather poor file signature. Zeros do not
195
* make good signature bytes as a rule, and the only non-zero byte
196
* here is an ASCII character. For example, an uncompressed tar
197
* archive whose first file is ']' would satisfy this check. It may
198
* be necessary to exclude LZMA from compression_all() because of
199
* this. Clients of libarchive would then have to explicitly enable
200
* LZMA checking instead of (or in addition to) compression_all() when
201
* they have other evidence (file name, command-line option) to go on.
204
lzma_bidder_bid(struct archive_read_filter_bidder *self,
205
struct archive_read_filter *filter)
207
const unsigned char *buffer;
210
uint64_t uncompressed_size;
213
(void)self; /* UNUSED */
215
buffer = __archive_read_filter_ahead(filter, 14, &avail);
219
/* First byte of raw LZMA stream is commonly 0x5d.
220
* The first byte is a special number, which consists of
221
* three parameters of LZMA compression, a number of literal
222
* context bits(which is from 0 to 8, default is 3), a number
223
* of literal pos bits(which is from 0 to 4, default is 0),
224
* a number of pos bits(which is from 0 to 4, default is 2).
225
* The first byte is made by
226
* (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
227
* and so the default value in this field is
228
* (2 * 5 + 0) * 9 + 3 = 0x5d.
229
* lzma of LZMA SDK has options to change those parameters.
230
* It means a range of this field is from 0 to 224. And lzma of
231
* XZ Utils with option -e records 0x5e in this field. */
232
/* NOTE: If this checking of the first byte increases false
233
* recognition, we should allow only 0x5d and 0x5e for the first
234
* byte of LZMA stream. */
236
if (buffer[0] > (4 * 5 + 4) * 9 + 8)
238
/* Most likely value in the first byte of LZMA stream. */
239
if (buffer[0] == 0x5d || buffer[0] == 0x5e)
242
/* Sixth through fourteenth bytes are uncompressed size,
243
* stored in little-endian order. `-1' means uncompressed
244
* size is unknown and lzma of XZ Utils always records `-1'
246
uncompressed_size = archive_le64dec(buffer+5);
247
if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1))
250
/* Second through fifth bytes are dictionary size, stored in
251
* little-endian order. The minimum dictionary size is
252
* 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
253
* -d12 and the maxinam dictionary size is 1 << 27(128MiB)
254
* which the one uses with option -d27.
255
* NOTE: A comment of LZMA SDK source code says this dictionary
256
* range is from 1 << 12 to 1 << 30. */
257
dicsize = archive_le32dec(buffer+1);
259
case 0x00001000:/* lzma of LZMA SDK option -d12. */
260
case 0x00002000:/* lzma of LZMA SDK option -d13. */
261
case 0x00004000:/* lzma of LZMA SDK option -d14. */
262
case 0x00008000:/* lzma of LZMA SDK option -d15. */
263
case 0x00010000:/* lzma of XZ Utils option -0 and -1.
264
* lzma of LZMA SDK option -d16. */
265
case 0x00020000:/* lzma of LZMA SDK option -d17. */
266
case 0x00040000:/* lzma of LZMA SDK option -d18. */
267
case 0x00080000:/* lzma of XZ Utils option -2.
268
* lzma of LZMA SDK option -d19. */
269
case 0x00100000:/* lzma of XZ Utils option -3.
270
* lzma of LZMA SDK option -d20. */
271
case 0x00200000:/* lzma of XZ Utils option -4.
272
* lzma of LZMA SDK option -d21. */
273
case 0x00400000:/* lzma of XZ Utils option -5.
274
* lzma of LZMA SDK option -d22. */
275
case 0x00800000:/* lzma of XZ Utils option -6.
276
* lzma of LZMA SDK option -d23. */
277
case 0x01000000:/* lzma of XZ Utils option -7.
278
* lzma of LZMA SDK option -d24. */
279
case 0x02000000:/* lzma of XZ Utils option -8.
280
* lzma of LZMA SDK option -d25. */
281
case 0x04000000:/* lzma of XZ Utils option -9.
282
* lzma of LZMA SDK option -d26. */
283
case 0x08000000:/* lzma of LZMA SDK option -d27. */
287
/* If a memory usage for encoding was not enough on
288
* the platform where LZMA stream was made, lzma of
289
* XZ Utils automatically decreased the dictionary
290
* size to enough memory for encoding by 1Mi bytes
292
if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
293
(dicsize & ((1 << 20)-1)) == 0 &&
294
bits_checked == 8 + 64) {
298
/* Otherwise dictionary size is unlikely. But it is
299
* possible that someone makes lzma stream with
300
* liblzma/LZMA SDK in one's dictionary size. */
304
/* TODO: The above test is still very weak. It would be
305
* good to do better. */
307
return (bits_checked);
310
#if HAVE_LZMA_H && HAVE_LIBLZMA
313
* liblzma 4.999.7 and later support both lzma and xz streams.
316
xz_bidder_init(struct archive_read_filter *self)
318
self->code = ARCHIVE_COMPRESSION_XZ;
320
return (xz_lzma_bidder_init(self));
324
lzma_bidder_init(struct archive_read_filter *self)
326
self->code = ARCHIVE_COMPRESSION_LZMA;
328
return (xz_lzma_bidder_init(self));
332
* Setup the callbacks.
335
xz_lzma_bidder_init(struct archive_read_filter *self)
337
static const size_t out_block_size = 64 * 1024;
339
struct private_data *state;
342
state = (struct private_data *)calloc(sizeof(*state), 1);
343
out_block = (unsigned char *)malloc(out_block_size);
344
if (state == NULL || out_block == NULL) {
345
archive_set_error(&self->archive->archive, ENOMEM,
346
"Can't allocate data for xz decompression");
349
return (ARCHIVE_FATAL);
353
state->out_block_size = out_block_size;
354
state->out_block = out_block;
355
self->read = xz_filter_read;
356
self->skip = NULL; /* not supported */
357
self->close = xz_filter_close;
359
state->stream.avail_in = 0;
361
state->stream.next_out = state->out_block;
362
state->stream.avail_out = state->out_block_size;
364
/* Initialize compression library.
365
* TODO: I don't know what value is best for memlimit.
366
* maybe, it needs to check memory size which
367
* running system has.
369
if (self->code == ARCHIVE_COMPRESSION_XZ)
370
ret = lzma_stream_decoder(&(state->stream),
371
(1U << 30),/* memlimit */
374
ret = lzma_alone_decoder(&(state->stream),
375
(1U << 30));/* memlimit */
380
/* Library setup failed: Choose an error message and clean up. */
383
archive_set_error(&self->archive->archive, ENOMEM,
384
"Internal error initializing compression library: "
385
"Cannot allocate memory");
387
case LZMA_OPTIONS_ERROR:
388
archive_set_error(&self->archive->archive,
390
"Internal error initializing compression library: "
391
"Invalid or unsupported options");
394
archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
395
"Internal error initializing lzma library");
399
free(state->out_block);
402
return (ARCHIVE_FATAL);
406
* Return the next block of decompressed data.
409
xz_filter_read(struct archive_read_filter *self, const void **p)
411
struct private_data *state;
416
state = (struct private_data *)self->data;
418
/* Empty our output buffer. */
419
state->stream.next_out = state->out_block;
420
state->stream.avail_out = state->out_block_size;
422
/* Try to fill the output buffer. */
423
while (state->stream.avail_out > 0 && !state->eof) {
424
state->stream.next_in =
425
__archive_read_filter_ahead(self->upstream, 1, &avail_in);
426
if (state->stream.next_in == NULL && avail_in < 0)
427
return (ARCHIVE_FATAL);
428
state->stream.avail_in = avail_in;
430
/* Decompress as much as we can in one pass. */
431
ret = lzma_code(&(state->stream),
432
(state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
434
case LZMA_STREAM_END: /* Found end of stream. */
437
case LZMA_OK: /* Decompressor made some progress. */
438
__archive_read_filter_consume(self->upstream,
439
avail_in - state->stream.avail_in);
442
archive_set_error(&self->archive->archive, ENOMEM,
443
"Lzma library error: Cannot allocate memory");
444
return (ARCHIVE_FATAL);
445
case LZMA_MEMLIMIT_ERROR:
446
archive_set_error(&self->archive->archive, ENOMEM,
447
"Lzma library error: Out of memory");
448
return (ARCHIVE_FATAL);
449
case LZMA_FORMAT_ERROR:
450
archive_set_error(&self->archive->archive,
452
"Lzma library error: format not recognized");
453
return (ARCHIVE_FATAL);
454
case LZMA_OPTIONS_ERROR:
455
archive_set_error(&self->archive->archive,
457
"Lzma library error: Invalid options");
458
return (ARCHIVE_FATAL);
459
case LZMA_DATA_ERROR:
460
archive_set_error(&self->archive->archive,
462
"Lzma library error: Corrupted input data");
463
return (ARCHIVE_FATAL);
465
archive_set_error(&self->archive->archive,
467
"Lzma library error: No progress is possible");
468
return (ARCHIVE_FATAL);
470
/* Return an error. */
471
archive_set_error(&self->archive->archive,
473
"Lzma decompression failed: Unknown error");
474
return (ARCHIVE_FATAL);
478
decompressed = state->stream.next_out - state->out_block;
479
state->total_out += decompressed;
480
if (decompressed == 0)
483
*p = state->out_block;
484
return (decompressed);
488
* Clean up the decompressor.
491
xz_filter_close(struct archive_read_filter *self)
493
struct private_data *state;
495
state = (struct private_data *)self->data;
496
lzma_end(&(state->stream));
497
free(state->out_block);
504
#if HAVE_LZMADEC_H && HAVE_LIBLZMADEC
507
* If we have the older liblzmadec library, then we can handle
508
* LZMA streams but not XZ streams.
512
* Setup the callbacks.
515
lzma_bidder_init(struct archive_read_filter *self)
517
static const size_t out_block_size = 64 * 1024;
519
struct private_data *state;
520
ssize_t ret, avail_in;
522
self->code = ARCHIVE_COMPRESSION_LZMA;
525
state = (struct private_data *)calloc(sizeof(*state), 1);
526
out_block = (unsigned char *)malloc(out_block_size);
527
if (state == NULL || out_block == NULL) {
528
archive_set_error(&self->archive->archive, ENOMEM,
529
"Can't allocate data for lzma decompression");
532
return (ARCHIVE_FATAL);
536
state->out_block_size = out_block_size;
537
state->out_block = out_block;
538
self->read = lzma_filter_read;
539
self->skip = NULL; /* not supported */
540
self->close = lzma_filter_close;
542
/* Prime the lzma library with 18 bytes of input. */
543
state->stream.next_in = (unsigned char *)(uintptr_t)
544
__archive_read_filter_ahead(self->upstream, 18, &avail_in);
545
if (state->stream.next_in == NULL)
546
return (ARCHIVE_FATAL);
547
state->stream.avail_in = avail_in;
548
state->stream.next_out = state->out_block;
549
state->stream.avail_out = state->out_block_size;
551
/* Initialize compression library. */
552
ret = lzmadec_init(&(state->stream));
553
__archive_read_filter_consume(self->upstream,
554
avail_in - state->stream.avail_in);
555
if (ret == LZMADEC_OK)
558
/* Library setup failed: Clean up. */
559
archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
560
"Internal error initializing lzma library");
562
/* Override the error message if we know what really went wrong. */
564
case LZMADEC_HEADER_ERROR:
565
archive_set_error(&self->archive->archive,
567
"Internal error initializing compression library: "
570
case LZMADEC_MEM_ERROR:
571
archive_set_error(&self->archive->archive, ENOMEM,
572
"Internal error initializing compression library: "
577
free(state->out_block);
580
return (ARCHIVE_FATAL);
584
* Return the next block of decompressed data.
587
lzma_filter_read(struct archive_read_filter *self, const void **p)
589
struct private_data *state;
591
ssize_t avail_in, ret;
593
state = (struct private_data *)self->data;
595
/* Empty our output buffer. */
596
state->stream.next_out = state->out_block;
597
state->stream.avail_out = state->out_block_size;
599
/* Try to fill the output buffer. */
600
while (state->stream.avail_out > 0 && !state->eof) {
601
state->stream.next_in = (unsigned char *)(uintptr_t)
602
__archive_read_filter_ahead(self->upstream, 1, &avail_in);
603
if (state->stream.next_in == NULL && avail_in < 0)
604
return (ARCHIVE_FATAL);
605
state->stream.avail_in = avail_in;
607
/* Decompress as much as we can in one pass. */
608
ret = lzmadec_decode(&(state->stream), avail_in == 0);
610
case LZMADEC_STREAM_END: /* Found end of stream. */
613
case LZMADEC_OK: /* Decompressor made some progress. */
614
__archive_read_filter_consume(self->upstream,
615
avail_in - state->stream.avail_in);
617
case LZMADEC_BUF_ERROR: /* Insufficient input data? */
618
archive_set_error(&self->archive->archive,
620
"Insufficient compressed data");
621
return (ARCHIVE_FATAL);
623
/* Return an error. */
624
archive_set_error(&self->archive->archive,
626
"Lzma decompression failed");
627
return (ARCHIVE_FATAL);
631
decompressed = state->stream.next_out - state->out_block;
632
state->total_out += decompressed;
633
if (decompressed == 0)
636
*p = state->out_block;
637
return (decompressed);
641
* Clean up the decompressor.
644
lzma_filter_close(struct archive_read_filter *self)
646
struct private_data *state;
649
state = (struct private_data *)self->data;
651
switch (lzmadec_end(&(state->stream))) {
655
archive_set_error(&(self->archive->archive),
657
"Failed to clean up %s compressor",
658
self->archive->archive.compression_name);
662
free(state->out_block);
671
* If we have no suitable library on this system, we can't actually do
672
* the decompression. We can, however, still detect compressed
673
* archives and emit a useful message.
677
lzma_bidder_init(struct archive_read_filter *self)
681
r = __archive_read_program(self, "unlzma");
682
/* Note: We set the format here even if __archive_read_program()
683
* above fails. We do, after all, know what the format is
684
* even if we weren't able to read it. */
685
self->code = ARCHIVE_COMPRESSION_LZMA;
690
#endif /* HAVE_LZMADEC_H */
694
xz_bidder_init(struct archive_read_filter *self)
698
r = __archive_read_program(self, "unxz");
699
/* Note: We set the format here even if __archive_read_program()
700
* above fails. We do, after all, know what the format is
701
* even if we weren't able to read it. */
702
self->code = ARCHIVE_COMPRESSION_XZ;
708
#endif /* HAVE_LZMA_H */