2
* Branch/Call/Jump (BCJ) filter decoders
4
* Authors: Lasse Collin <lasse.collin@tukaani.org>
5
* Igor Pavlov <http://7-zip.org/>
7
* This file has been put into the public domain.
8
* You can do whatever you want with this file.
14
* The rest of the file is inside this ifdef. It makes things a little more
15
* convenient when building without support for any BCJ filters.
20
/* Type of the BCJ filter being used */
22
BCJ_X86 = 4, /* x86 or x86-64 */
23
BCJ_POWERPC = 5, /* Big endian only */
24
BCJ_IA64 = 6, /* Big or little endian */
25
BCJ_ARM = 7, /* Little endian only */
26
BCJ_ARMTHUMB = 8, /* Little endian only */
27
BCJ_SPARC = 9 /* Big or little endian */
31
* Return value of the next filter in the chain. We need to preserve
32
* this information across calls, because we must not call the next
33
* filter anymore once it has returned XZ_STREAM_END.
37
/* True if we are operating in single-call mode. */
41
* Absolute position relative to the beginning of the uncompressed
42
* data (in a single .xz Block). We care only about the lowest 32
43
* bits so this doesn't need to be uint64_t even with big files.
47
/* x86 filter state */
48
uint32_t x86_prev_mask;
50
/* Temporary space to hold the variables from struct xz_buf */
56
/* Amount of already filtered data in the beginning of buf */
59
/* Total amount of data currently stored in buf */
63
* Buffer to hold a mix of filtered and unfiltered data. This
64
* needs to be big enough to hold Alignment + 2 * Look-ahead:
66
* Type Alignment Look-ahead
80
* This is used to test the most significant byte of a memory address
81
* in an x86 instruction.
83
static inline int INIT bcj_x86_test_msbyte(uint8_t b)
85
return b == 0x00 || b == 0xFF;
88
static size_t INIT bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
90
static const bool_t mask_to_allowed_status[8]
91
= { true, true, true, false, true, false, false, false };
93
static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
96
size_t prev_pos = (size_t)-1;
97
uint32_t prev_mask = s->x86_prev_mask;
107
for (i = 0; i < size; ++i) {
108
if ((buf[i] & 0xFE) != 0xE8)
111
prev_pos = i - prev_pos;
115
prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116
if (prev_mask != 0) {
117
b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118
if (!mask_to_allowed_status[prev_mask]
119
|| bcj_x86_test_msbyte(b)) {
121
prev_mask = (prev_mask << 1) | 1;
129
if (bcj_x86_test_msbyte(buf[i + 4])) {
130
src = get_unaligned_le32(buf + i + 1);
132
dest = src - (s->pos + (uint32_t)i + 5);
136
j = mask_to_bit_num[prev_mask] * 8;
137
b = (uint8_t)(dest >> (24 - j));
138
if (!bcj_x86_test_msbyte(b))
141
src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
145
dest |= (uint32_t)0 - (dest & 0x01000000);
146
put_unaligned_le32(dest, buf + i + 1);
149
prev_mask = (prev_mask << 1) | 1;
153
prev_pos = i - prev_pos;
154
s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
159
#ifdef XZ_DEC_POWERPC
160
static size_t INIT bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
165
for (i = 0; i + 4 <= size; i += 4) {
166
instr = get_unaligned_be32(buf + i);
167
if ((instr & 0xFC000003) == 0x48000001) {
169
instr -= s->pos + (uint32_t)i;
172
put_unaligned_be32(instr, buf + i);
181
static size_t INIT bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
183
static const uint8_t branch_table[32] = {
184
0, 0, 0, 0, 0, 0, 0, 0,
185
0, 0, 0, 0, 0, 0, 0, 0,
186
4, 4, 6, 6, 0, 0, 7, 7,
187
4, 4, 0, 0, 4, 4, 0, 0
191
* The local variables take a little bit stack space, but it's less
192
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
193
* stack usage here without doing that for the LZMA2 decoder too.
200
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
203
/* Bitwise offset of the instruction indicated by slot */
206
/* bit_pos split into byte and bit parts */
210
/* Address part of an instruction */
213
/* Mask used to detect which instructions to convert */
216
/* 41-bit instruction stored somewhere in the lowest 48 bits */
219
/* Instruction normalized with bit_res for easier manipulation */
222
for (i = 0; i + 16 <= size; i += 16) {
223
mask = branch_table[buf[i] & 0x1F];
224
for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225
if (((mask >> slot) & 1) == 0)
228
byte_pos = bit_pos >> 3;
229
bit_res = bit_pos & 7;
231
for (j = 0; j < 6; ++j)
232
instr |= (uint64_t)(buf[i + j + byte_pos])
235
norm = instr >> bit_res;
237
if (((norm >> 37) & 0x0F) == 0x05
238
&& ((norm >> 9) & 0x07) == 0) {
239
addr = (norm >> 13) & 0x0FFFFF;
240
addr |= ((uint32_t)(norm >> 36) & 1) << 20;
242
addr -= s->pos + (uint32_t)i;
245
norm &= ~((uint64_t)0x8FFFFF << 13);
246
norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247
norm |= (uint64_t)(addr & 0x100000)
250
instr &= (1 << bit_res) - 1;
251
instr |= norm << bit_res;
253
for (j = 0; j < 6; j++)
254
buf[i + j + byte_pos]
255
= (uint8_t)(instr >> (8 * j));
265
static size_t INIT bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
270
for (i = 0; i + 4 <= size; i += 4) {
271
if (buf[i + 3] == 0xEB) {
272
addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273
| ((uint32_t)buf[i + 2] << 16);
275
addr -= s->pos + (uint32_t)i + 8;
277
buf[i] = (uint8_t)addr;
278
buf[i + 1] = (uint8_t)(addr >> 8);
279
buf[i + 2] = (uint8_t)(addr >> 16);
287
#ifdef XZ_DEC_ARMTHUMB
288
static size_t INIT bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
293
for (i = 0; i + 4 <= size; i += 2) {
294
if ((buf[i + 1] & 0xF8) == 0xF0
295
&& (buf[i + 3] & 0xF8) == 0xF8) {
296
addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297
| ((uint32_t)buf[i] << 11)
298
| (((uint32_t)buf[i + 3] & 0x07) << 8)
299
| (uint32_t)buf[i + 2];
301
addr -= s->pos + (uint32_t)i + 4;
303
buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304
buf[i] = (uint8_t)(addr >> 11);
305
buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306
buf[i + 2] = (uint8_t)addr;
316
static size_t INIT bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
321
for (i = 0; i + 4 <= size; i += 4) {
322
instr = get_unaligned_be32(buf + i);
323
if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
325
instr -= s->pos + (uint32_t)i;
327
instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328
| 0x40000000 | (instr & 0x3FFFFF);
329
put_unaligned_be32(instr, buf + i);
338
* Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339
* of data that got filtered.
341
* NOTE: This is implemented as a switch statement to avoid using function
342
* pointers, which could be problematic in the kernel boot code, which must
343
* avoid pointers to static data (at least on x86).
345
static void INIT bcj_apply(struct xz_dec_bcj *s,
346
uint8_t *buf, size_t *pos, size_t size)
356
filtered = bcj_x86(s, buf, size);
359
#ifdef XZ_DEC_POWERPC
361
filtered = bcj_powerpc(s, buf, size);
366
filtered = bcj_ia64(s, buf, size);
371
filtered = bcj_arm(s, buf, size);
374
#ifdef XZ_DEC_ARMTHUMB
376
filtered = bcj_armthumb(s, buf, size);
381
filtered = bcj_sparc(s, buf, size);
385
/* Never reached but silence compiler warnings. */
395
* Flush pending filtered data from temp to the output buffer.
396
* Move the remaining mixture of possibly filtered and unfiltered
397
* data to the beginning of temp.
399
static void INIT bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
403
copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404
memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405
b->out_pos += copy_size;
407
s->temp.filtered -= copy_size;
408
s->temp.size -= copy_size;
409
memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
413
* The BCJ filter functions are primitive in sense that they process the
414
* data in chunks of 1-16 bytes. To hide this issue, this function does
417
XZ_EXTERN enum xz_ret INIT xz_dec_bcj_run(struct xz_dec_bcj *s,
418
struct xz_dec_lzma2 *lzma2,
424
* Flush pending already filtered data to the output buffer. Return
425
* immediatelly if we couldn't flush everything, or if the next
426
* filter in the chain had already returned XZ_STREAM_END.
428
if (s->temp.filtered > 0) {
430
if (s->temp.filtered > 0)
433
if (s->ret == XZ_STREAM_END)
434
return XZ_STREAM_END;
438
* If we have more output space than what is currently pending in
439
* temp, copy the unfiltered data from temp to the output buffer
440
* and try to fill the output buffer by decoding more data from the
441
* next filter in the chain. Apply the BCJ filter on the new data
442
* in the output buffer. If everything cannot be filtered, copy it
443
* to temp and rewind the output buffer position accordingly.
445
* This needs to be always run when temp.size == 0 to handle a special
446
* case where the output buffer is full and the next filter has no
447
* more output coming but hasn't returned XZ_STREAM_END yet.
449
if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
450
out_start = b->out_pos;
451
memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
452
b->out_pos += s->temp.size;
454
s->ret = xz_dec_lzma2_run(lzma2, b);
455
if (s->ret != XZ_STREAM_END
456
&& (s->ret != XZ_OK || s->single_call))
459
bcj_apply(s, b->out, &out_start, b->out_pos);
462
* As an exception, if the next filter returned XZ_STREAM_END,
463
* we can do that too, since the last few bytes that remain
464
* unfiltered are meant to remain unfiltered.
466
if (s->ret == XZ_STREAM_END)
467
return XZ_STREAM_END;
469
s->temp.size = b->out_pos - out_start;
470
b->out_pos -= s->temp.size;
471
memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
474
* If there wasn't enough input to the next filter to fill
475
* the output buffer with unfiltered data, there's no point
476
* to try decoding more data to temp.
478
if (b->out_pos + s->temp.size < b->out_size)
483
* We have unfiltered data in temp. If the output buffer isn't full
484
* yet, try to fill the temp buffer by decoding more data from the
485
* next filter. Apply the BCJ filter on temp. Then we hopefully can
486
* fill the actual output buffer by copying filtered data from temp.
487
* A mix of filtered and unfiltered data may be left in temp; it will
488
* be taken care on the next call to this function.
490
if (b->out_pos < b->out_size) {
491
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
493
s->out_pos = b->out_pos;
494
s->out_size = b->out_size;
495
b->out = s->temp.buf;
496
b->out_pos = s->temp.size;
497
b->out_size = sizeof(s->temp.buf);
499
s->ret = xz_dec_lzma2_run(lzma2, b);
501
s->temp.size = b->out_pos;
503
b->out_pos = s->out_pos;
504
b->out_size = s->out_size;
506
if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
509
bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
512
* If the next filter returned XZ_STREAM_END, we mark that
513
* everything is filtered, since the last unfiltered bytes
514
* of the stream are meant to be left as is.
516
if (s->ret == XZ_STREAM_END)
517
s->temp.filtered = s->temp.size;
520
if (s->temp.filtered > 0)
527
XZ_EXTERN struct xz_dec_bcj *INIT xz_dec_bcj_create(bool_t single_call)
529
struct xz_dec_bcj *s = malloc(sizeof(*s));
531
s->single_call = single_call;
536
XZ_EXTERN enum xz_ret INIT xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
542
#ifdef XZ_DEC_POWERPC
551
#ifdef XZ_DEC_ARMTHUMB
560
/* Unsupported Filter ID */
561
return XZ_OPTIONS_ERROR;
567
s->x86_prev_mask = 0;
568
s->temp.filtered = 0;